def bytes_received(self, data, request, spider):
        if (not spider.pluck or spider.dont_truncate
                # We only limit bytes received for final requests (i.e. where the callback is the default `parse` method).
                or request.callback
                # ijson will parse the value at `root_path`, which can go to the end of the file.
                # https://github.com/ICRAR/ijson/issues/43
                or spider.root_path
                # XLSX files must be read in full.
                or spider.unflatten):
            return

        self.total_bytes_received += len(data)
        if self.total_bytes_received >= self.max_bytes:
            raise StopDownload(fail=False)
Beispiel #2
0
 def headers_received(self, headers, body_length, request, spider):
     self.meta["headers_received"] = headers
     raise StopDownload(fail=True)
Beispiel #3
0
 def bytes_received(self, data, request, spider):
     self.meta["bytes_received"] = data
     raise StopDownload(fail=True)
 def headers_received(self, headers, body_length, request, spider):
     super().headers_received(headers, body_length, request, spider)
     raise StopDownload(fail=False)
Beispiel #5
0
 def bytes_received(self, data, request, spider):
     super().bytes_received(data, request, spider)
     raise StopDownload(fail=False)