def store_objects(self, upload: Upload, compression: Optional[str] = None) -> Optional[str]: compression = compression or self._compression resource_id = f'{self._resource_id_source()}.tar.{compression}' name, objs, encoder = upload num_stored = 0 with removing(create_tempfilename(resource_id)) as path: with self._open_archive(path, 'w') as archive: with NamedTemporaryFile() as fobj: num_bytes = 0 for obj in objs: encoded = encoder(obj) fobj.write(encoded) num_bytes += len(encoded) num_stored += 1 if num_bytes > 0: fobj.seek(0) archive.add(fobj.name, name) if num_stored > 0: self._file_storage.store_file(resource_id, path) self.log_debug('stored %d objects at %s', num_stored, resource_id) return resource_id if num_stored > 0 else None
def store_objects(self, upload: Tuple[str, Iterable[dict]], resource_id: Optional[str] = None) -> Optional[str]: resource_id = self._to_resource_id(resource_id) name, objs = upload num_stored = 0 with removing(create_tempfilename()) as path: with self._open_archive(path, 'w') as archive: with NamedTemporaryFile() as fobj: num_bytes = 0 for obj in objs: serialized = to_json(obj) encoded = serialized.encode(self._encoding) fobj.write(encoded) fobj.write(b'\n') num_bytes += len(encoded) + 1 num_stored += 1 self.log_debug('stored obj %s', obj.get('_uid', '')) if num_bytes > 0: fobj.seek(0) archive.add(fobj.name, name) if num_stored > 0: self._file_storage.store_file(resource_id, path) self.log_debug('stored %d objects at %s', num_stored, resource_id) return resource_id if num_stored > 0 else None
def _given_resource(self, resource_id: str, name: str, lines: bytes): client = self._storage._file_storage._client mode = f'w:{Path(resource_id).suffix[1:]}' with removing(create_tempfilename(resource_id)) as buffer_path: with tarfile_open(buffer_path, mode) as archive: tarinfo = TarInfo(name) tarinfo.size = len(lines) archive.addfile(tarinfo, BytesIO(lines)) client.upload_object(buffer_path, resource_id)
def fetch_objects(self, resource_id: str) -> Iterable[dict]: num_fetched = 0 with removing(self._file_storage.fetch_file(resource_id)) as path: with gzip_open(path, 'rb') as fobj: for encoded in fobj: serialized = encoded.decode(self._encoding) obj = self._parse_jsonl(serialized) if not obj: continue num_fetched += 1 self.log_debug('fetched email %s', obj.get('_uid')) yield obj self.log_debug('fetched %d objects from %s', num_fetched, resource_id)
def fetch_objects(self, resource_id: str, download: Download) -> Iterable[dict]: name, decoder = download num_fetched = 0 with removing(self._file_storage.fetch_file(resource_id)) as path: with self._open_archive(path, 'r') as archive: fobj = self._open_archive_file(archive, name) for encoded in fobj: obj = decoder(encoded) if obj is None: continue num_fetched += 1 yield obj self.log_debug('fetched %d objects from %s', num_fetched, resource_id)
def store_objects(self, objs: Iterable[dict]) -> Optional[str]: resource_id = str(uuid4()) num_stored = 0 with removing(create_tempfilename()) as path: with gzip_open(path, 'wb') as fobj: for obj in objs: serialized = to_json(obj) encoded = serialized.encode(self._encoding) fobj.write(encoded) fobj.write(b'\n') num_stored += 1 self.log_debug('stored email %s', obj.get('_uid')) if num_stored > 0: self._file_storage.store_file(resource_id, path) self.log_debug('stored %d objects at %s', num_stored, resource_id) return resource_id if num_stored > 0 else None
def test_removes_file_on_exception(self): with NamedTemporaryFile(delete=False) as fobj: with self.assertRaises(ValueError): with temporary.removing(fobj.name) as path: raise ValueError self.assertFileDoesNotExist(path)
def test_removes_file_only_if_exists(self): with NamedTemporaryFile(delete=False) as fobj: with temporary.removing(fobj.name) as path: remove(path) self.assertFileDoesNotExist(path)
def test_removes_file_when_done(self): with NamedTemporaryFile(delete=False) as fobj: with temporary.removing(fobj.name) as path: pass self.assertFileDoesNotExist(path)