def fetch(self): """Lazily trigger download of the data when requested.""" if self._file_path is not None: return self._file_path temp_path = self.context.work_path if self._content_hash is not None: self._file_path = storage.load_file(self._content_hash, temp_path=temp_path) return self._file_path if self.response is not None: # self._file_path = random_filename(temp_path) if self.content_type == 'text/html': self._file_path = html_filename(self.url, temp_path) else: self._file_path = file_filename(self.url, temp_path) content_hash = sha1() with open(self._file_path, 'wb') as fh: for chunk in self.response.iter_content(chunk_size=8192): content_hash.update(chunk) fh.write(chunk) self._remove_file = True chash = content_hash.hexdigest() self._content_hash = storage.archive_file(self._file_path, content_hash=chash) if self.http.cache and self.ok: self.context.set_tag(self.request_id, self.serialize()) self.retrieved_at = datetime.utcnow().isoformat() return self._file_path
def _stream_content(self): """Lazily trigger download of the data when requested.""" if self.response is None: self._file_path = storage.load_file(self._content_hash) else: fd, self._file_path = tempfile.mkstemp() os.close(fd) content_hash = sha1() with open(self._file_path, 'wb') as fh: for chunk in self.response.iter_content(chunk_size=8192): content_hash.update(chunk) fh.write(chunk) self._remove_file = True chash = content_hash.hexdigest() self._content_hash = storage.archive_file(self._file_path, content_hash=chash) if self.http.cache and self.ok: self.context.set_tag(self.request_id, self.serialize()) return self._file_path
def store_file(self, file_path, content_hash=None): """Put a file into permanent storage so it can be visible to other stages.""" return storage.archive_file(file_path, content_hash=content_hash)