def store_objects(self,
                      upload: Upload,
                      compression: Optional[str] = None) -> Optional[str]:

        compression = compression or self._compression

        resource_id = f'{self._resource_id_source()}.tar.{compression}'

        name, objs, encoder = upload

        num_stored = 0
        with removing(create_tempfilename(resource_id)) as path:
            with self._open_archive(path, 'w') as archive:
                with NamedTemporaryFile() as fobj:
                    num_bytes = 0
                    for obj in objs:
                        encoded = encoder(obj)
                        fobj.write(encoded)
                        num_bytes += len(encoded)
                        num_stored += 1

                    if num_bytes > 0:
                        fobj.seek(0)
                        archive.add(fobj.name, name)

            if num_stored > 0:
                self._file_storage.store_file(resource_id, path)

        self.log_debug('stored %d objects at %s', num_stored, resource_id)
        return resource_id if num_stored > 0 else None
Exemple #2
0
    def store_objects(self,
                      upload: Tuple[str, Iterable[dict]],
                      resource_id: Optional[str] = None) -> Optional[str]:

        resource_id = self._to_resource_id(resource_id)
        name, objs = upload

        num_stored = 0
        with removing(create_tempfilename()) as path:
            with self._open_archive(path, 'w') as archive:
                with NamedTemporaryFile() as fobj:
                    num_bytes = 0
                    for obj in objs:
                        serialized = to_json(obj)
                        encoded = serialized.encode(self._encoding)
                        fobj.write(encoded)
                        fobj.write(b'\n')
                        num_bytes += len(encoded) + 1
                        num_stored += 1
                        self.log_debug('stored obj %s', obj.get('_uid', ''))

                    if num_bytes > 0:
                        fobj.seek(0)
                        archive.add(fobj.name, name)

            if num_stored > 0:
                self._file_storage.store_file(resource_id, path)

        self.log_debug('stored %d objects at %s', num_stored, resource_id)
        return resource_id if num_stored > 0 else None
 def _given_resource(self, resource_id: str, name: str, lines: bytes):
     client = self._storage._file_storage._client
     mode = f'w:{Path(resource_id).suffix[1:]}'
     with removing(create_tempfilename(resource_id)) as buffer_path:
         with tarfile_open(buffer_path, mode) as archive:
             tarinfo = TarInfo(name)
             tarinfo.size = len(lines)
             archive.addfile(tarinfo, BytesIO(lines))
         client.upload_object(buffer_path, resource_id)
 def fetch_objects(self, resource_id: str) -> Iterable[dict]:
     num_fetched = 0
     with removing(self._file_storage.fetch_file(resource_id)) as path:
         with gzip_open(path, 'rb') as fobj:
             for encoded in fobj:
                 serialized = encoded.decode(self._encoding)
                 obj = self._parse_jsonl(serialized)
                 if not obj:
                     continue
                 num_fetched += 1
                 self.log_debug('fetched email %s', obj.get('_uid'))
                 yield obj
     self.log_debug('fetched %d objects from %s', num_fetched, resource_id)
    def fetch_objects(self, resource_id: str,
                      download: Download) -> Iterable[dict]:

        name, decoder = download

        num_fetched = 0
        with removing(self._file_storage.fetch_file(resource_id)) as path:
            with self._open_archive(path, 'r') as archive:
                fobj = self._open_archive_file(archive, name)
                for encoded in fobj:
                    obj = decoder(encoded)
                    if obj is None:
                        continue
                    num_fetched += 1
                    yield obj
        self.log_debug('fetched %d objects from %s', num_fetched, resource_id)
    def store_objects(self, objs: Iterable[dict]) -> Optional[str]:
        resource_id = str(uuid4())

        num_stored = 0
        with removing(create_tempfilename()) as path:
            with gzip_open(path, 'wb') as fobj:
                for obj in objs:
                    serialized = to_json(obj)
                    encoded = serialized.encode(self._encoding)
                    fobj.write(encoded)
                    fobj.write(b'\n')
                    num_stored += 1
                    self.log_debug('stored email %s', obj.get('_uid'))

            if num_stored > 0:
                self._file_storage.store_file(resource_id, path)

        self.log_debug('stored %d objects at %s', num_stored, resource_id)
        return resource_id if num_stored > 0 else None
Exemple #7
0
 def test_removes_file_on_exception(self):
     with NamedTemporaryFile(delete=False) as fobj:
         with self.assertRaises(ValueError):
             with temporary.removing(fobj.name) as path:
                 raise ValueError
         self.assertFileDoesNotExist(path)
Exemple #8
0
 def test_removes_file_only_if_exists(self):
     with NamedTemporaryFile(delete=False) as fobj:
         with temporary.removing(fobj.name) as path:
             remove(path)
         self.assertFileDoesNotExist(path)
Exemple #9
0
 def test_removes_file_when_done(self):
     with NamedTemporaryFile(delete=False) as fobj:
         with temporary.removing(fobj.name) as path:
             pass
         self.assertFileDoesNotExist(path)