Ejemplo n.º 1
0
def test_cleanup_archive(context):
    url = "https://httpbin.org/user-agent"
    result = context.http.get(url, headers={"User-Agent": "Memorious Test"})
    data = result.serialize()
    assert storage.load_file(data["content_hash"]) is not None
    cleanup_archive(context, data)
    assert storage.load_file(data["content_hash"]) is None
Ejemplo n.º 2
0
    def fetch(self):
        """Lazily trigger download of the data when requested."""
        if self._file_path is not None:
            return self._file_path
        temp_path = self.context.work_path
        if self._content_hash is not None:
            self._file_path = storage.load_file(self._content_hash,
                                                temp_path=temp_path)
            return self._file_path
        if self.response is not None:
#            self._file_path = random_filename(temp_path)
            if self.content_type == 'text/html':
                self._file_path = html_filename(self.url, temp_path)
            else:
                self._file_path = file_filename(self.url, temp_path)
            content_hash = sha1()
            with open(self._file_path, 'wb') as fh:
                for chunk in self.response.iter_content(chunk_size=8192):
                    content_hash.update(chunk)
                    fh.write(chunk)
            self._remove_file = True
            chash = content_hash.hexdigest()
            self._content_hash = storage.archive_file(self._file_path,
                                                      content_hash=chash)
            if self.http.cache and self.ok:
                self.context.set_tag(self.request_id, self.serialize())
            self.retrieved_at = datetime.utcnow().isoformat()
        return self._file_path
Ejemplo n.º 3
0
    def load_file(self, content_hash, file_name=None):
        file_path = storage.load_file(content_hash, file_name=file_name)
        if file_path is None:
            raise StorageFileMissing(content_hash, file_name=file_name)

        try:
            with open(file_path, 'r') as fh:
                yield fh
        finally:
            storage.cleanup_file(content_hash)
Ejemplo n.º 4
0
 def load_file(self, content_hash, file_name=None, read_mode='rb'):
     file_path = storage.load_file(content_hash,
                                   file_name=file_name,
                                   temp_path=self.work_path)
     if file_path is None:
         yield None
     else:
         try:
             with open(file_path, mode=read_mode) as fh:
                 yield fh
         finally:
             storage.cleanup_file(content_hash, temp_path=self.work_path)
Ejemplo n.º 5
0
    def _stream_content(self):
        """Lazily trigger download of the data when requested."""
        if self.response is None:
            self._file_path = storage.load_file(self._content_hash)
        else:
            fd, self._file_path = tempfile.mkstemp()
            os.close(fd)
            content_hash = sha1()
            with open(self._file_path, 'wb') as fh:
                for chunk in self.response.iter_content(chunk_size=8192):
                    content_hash.update(chunk)
                    fh.write(chunk)
            self._remove_file = True
            chash = content_hash.hexdigest()
            self._content_hash = storage.archive_file(self._file_path,
                                                      content_hash=chash)

            if self.http.cache and self.ok:
                self.context.set_tag(self.request_id, self.serialize())
        return self._file_path