def startDecompressMessage(self): if self._isServer: if self._decompressor is None or self.client_no_context_takeover: self._decompressor = snappy.StreamDecompressor() else: if self._decompressor is None or self.server_no_context_takeover: self._decompressor = snappy.StreamDecompressor()
def test_decompression(self): # test that we check for the initial stream identifier data = b"\x01" * 50 self.assertRaises(snappy.UncompressError, snappy.StreamDecompressor().decompress, b"\x01\x36\x00\00" + struct.pack("<L", snappy._masked_crc32c(data)) + data) self.assertEqual( snappy.StreamDecompressor().decompress( b"\xff\x06\x00\x00sNaPpY" b"\x01\x36\x00\x00" + struct.pack("<L", snappy._masked_crc32c(data)) + data), data) decompressor = snappy.StreamDecompressor() decompressor.decompress(b"\xff\x06\x00\x00sNaPpY") self.assertEqual( decompressor.copy().decompress( b"\x01\x36\x00\x00" + struct.pack("<L", snappy._masked_crc32c(data)) + data), data) # test that we throw errors for unknown unskippable chunks self.assertRaises(snappy.UncompressError, decompressor.copy().decompress, b"\x03\x01\x00\x00") # test that we skip unknown skippable chunks self.assertEqual(b"", decompressor.copy().decompress(b"\xfe\x01\x00\x00")) # test that we check CRCs compressed_data = snappy.compress(data) real_crc = struct.pack("<L", snappy._masked_crc32c(data)) fake_crc = os.urandom(4) self.assertRaises(snappy.UncompressError, decompressor.copy().decompress, b"\x00\x0a\x00\x00" + fake_crc + compressed_data) self.assertEqual( decompressor.copy().decompress( b"\x00\x0a\x00\x00" + real_crc + compressed_data), data) # test that we buffer when we don't have enough uncompressed_data = os.urandom(100) compressor = snappy.StreamCompressor() compressed_data = (compressor.compress(uncompressed_data[:50]) + compressor.compress(uncompressed_data[50:])) for split1 in range(len(compressed_data) - 1): for split2 in range(split1, len(compressed_data)): decompressor = snappy.StreamDecompressor() self.assertEqual( (decompressor.decompress(compressed_data[:split1]) + decompressor.decompress(compressed_data[split1:split2]) + decompressor.decompress(compressed_data[split2:])), uncompressed_data)
async def get_object(self, session: aiohttp.ClientSession) -> None: """Method to get the object from S3 after the pre-signed URL has been obtained Args: session: The current aiohttp session Returns: None """ try: decompressor = snappy.StreamDecompressor() async with session.get(self.presigned_s3_url) as response: if response.status != 200: # An error occurred body = await response.text() raise IOError( f"Failed to get {self.object_details.dataset_path} to storage backend." f" Status: {response.status}. Response: {body}") async with aiofiles.open(self.object_details.object_path, 'wb') as fd: while True: chunk = await response.content.read( self.download_chunk_size) if not chunk: fd.write(decompressor.flush()) break await fd.write(decompressor.decompress(chunk)) except Exception as err: logger.exception(err) raise IOError( f"Failed to get {self.object_details.dataset_path} to storage backend. {err}" )
def _create_decompressor(self, alg): if alg == "snappy": return snappy.StreamDecompressor() elif alg == "lzma": return lzma.LZMADecompressor() raise InvalidConfigurationError( "invalid compression algorithm: {!r}".format(alg))
async def get_object(self, session: aiohttp.ClientSession, progress_update_fn: Callable) -> None: """Method to get the object from S3 after the pre-signed URL has been obtained Args: session: The current aiohttp session progress_update_fn: A callable with arg "completed_bytes" (int) indicating how many bytes have been downloaded in since last called Returns: None """ try: decompressor = snappy.StreamDecompressor() async with session.get(self.presigned_s3_url) as response: if response.status != 200: # An error occurred body = await response.text() raise IOError(f"Failed to get {self.object_details.dataset_path} to storage backend." f" Status: {response.status}. Response: {body}") async with aiofiles.open(self.object_details.object_path, 'wb') as fd: while True: chunk = await response.content.read(self.download_chunk_size) if not chunk: fd.write(decompressor.flush()) break decompressed_chunk = decompressor.decompress(chunk) await fd.write(decompressed_chunk) progress_update_fn(completed_bytes=len(decompressed_chunk)) except Exception as err: logger.exception(err) raise IOError(f"Failed to get {self.object_details.dataset_path} from storage backend. {err}")
def test_concatenation(self): data1 = os.urandom(snappy.snappy._CHUNK_MAX * 2) data2 = os.urandom(4096) decompressor = snappy.StreamDecompressor() self.assertEqual( decompressor.decompress(snappy.StreamCompressor().compress(data1) + snappy.StreamCompressor().compress(data2)), data1 + data2)
def __init__(self, buffer=None): if not snappy: raise RuntimeError('python-snappy required for compression') self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() super().__init__(buffer)
def __init__(self, input): super(SnappyInputStream, self).__init__(input) try: import snappy except ImportError: raise errors.DependencyNotInstalledError( "python-snappy library is required for snappy support") self._decompressor = snappy.StreamDecompressor()
def __init__(self, infile, mode, **kwargs): import snappy self.details = {"size": 999999999} # not true, but OK if we don't seek super().__init__(fs=None, path="snappy", mode=mode.strip("b") + "b", **kwargs) self.infile = infile if "r" in mode: self.codec = snappy.StreamDecompressor() else: self.codec = snappy.StreamCompressor()
def __init__(self, input): try: import snappy except ImportError: raise errors.DependencyNotInstalledError( "python-snappy library is required for snappy support") self._decompressor = snappy.StreamDecompressor() self._input = input self._internal_buffer = compat.BytesIO() self._cursor = 0
def __init__(self, infile, mode, **kwargs): import snappy super().__init__( fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs ) self.infile = infile if "r" in mode: self.codec = snappy.StreamDecompressor() else: self.codec = snappy.StreamCompressor()
def __init__(self, infile, mode, **kwargs): import snappy self.details = {'size': 999999999} # not true, but OK if we don't seek super().__init__(fs=None, path='snappy', mode=mode.strip('b') + 'b', **kwargs) self.infile = infile if 'r' in mode: self.codec = snappy.StreamDecompressor() else: self.codec = snappy.StreamCompressor()
def decompressor(self, algorithm): if algorithm is None: return None if algorithm == "lzma": return lzma.LZMADecompressor() elif algorithm == "snappy": if not snappy: raise MissingLibraryError( "python-snappy is required when using snappy compression") return snappy.StreamDecompressor() else: raise InvalidConfigurationError( "invalid compression algorithm: {!r}".format(algorithm))
def codec(self, value): self._codec = value if six.PY3 else ord(value) if self.codec == 0: pass elif self.codec == 1: self._decompressor = \ zlib.decompressobj(16 + zlib.MAX_WBITS) elif self.codec == 2: self._decompressor = snappy.StreamDecompressor() elif self.codec == 3: self._decompressor = lzma.LZMADecompressor() else: raise ValueError("Unknown compression type")
def restore(script): # Build a list of metrics to restore from our object store and globbing metrics = search(script) # For each metric, find the date we want for i in metrics.keys(): objs = metrics[i] d = findBackup(script, objs, script.options.date) logger.info("Restoring %s from timestamp %s" % (i, d)) blobgz = script.store.get("%s%s/%s.wsp.%s" \ % (script.options.storage_path, i, d, script.options.algorithm)) blobSHA = script.store.get("%s%s/%s.sha1" \ % (script.options.storage_path, i, d)) if blobgz is None: logger.warning("Skipping missing file in object store: %s/%s.wsp.%s" \ % (i, d, script.options.algorithm)) continue # Decompress blobgz = StringIO(blobgz) blob = None if script.options.algorithm == "gz": fd = gzip.GzipFile(fileobj=blobgz, mode="rb") blob = fd.read() fd.close() elif script.options.algorithm == "sz": compressor = snappy.StreamDecompressor() blob = compressor.decompress(blobgz.getvalue()) try: compressor.flush() except UncompressError as e: logger.error("Corrupt file in store: %s%s/%s.wsp.sz Error %s" \ % (script.options.storage_path, i, d, str(e))) continue # Verify if blobSHA is None: logger.warning("Missing SHA1 checksum file...no verification") else: if hashlib.sha1(blob).hexdigest() != blobSHA: logger.warning("Backup does NOT verify, skipping metric %s" \ % i) continue heal(script, i, blob) # Clean up del blob blobgz.close()
def __init__(self, file_name_or_obj, file_mode, buffer_size=snappy._CHUNK_MAX): if isinstance(file_name_or_obj, str): self._file = open(file_name_or_obj, file_mode) else: self._file = file_name_or_obj self.buffer_pos = 0 if file_mode == "wb": self.buffer = bytearray(buffer_size) self._compressor = snappy.StreamCompressor() else: self.buffer = None self._decompressor = snappy.StreamDecompressor()
def __init__(self, next_fp, mode): if snappy is None: raise io.UnsupportedOperation("Snappy is not available") if mode == "rb": self.decr = snappy.StreamDecompressor() self.encr = None elif mode == "wb": self.decr = None self.encr = snappy.StreamCompressor() else: raise io.UnsupportedOperation("unsupported mode for SnappyFile") super().__init__(next_fp) self.decr_done = False
def test_retrieve_snappy_file_obj(self, tmpdir): """ Test the retrieve_file_obj method with a snappy file """ # Setup the WAL source = tmpdir.join("wal_dir/000000080000ABFF000000C1") source.write("something".encode("utf-8"), ensure=True) # Create a simple CloudWalUploader obj uploader = CloudWalUploader(mock.MagicMock(), "test-server", compression="snappy") open_file = uploader.retrieve_file_obj(source.strpath) # Check the in memory file received assert open_file # Decompress on the fly to check content assert snappy.StreamDecompressor().decompress( open_file.read()) == "something".encode("utf-8")
def restore(script): fetchArchiveFromHdfs(script) # Build a list of metrics to restore from our object store and globbing metrics = search(script) # For each metric, find the date we want for i in metrics.keys(): objs = metrics[i] logger.info("Restoring %s from timestamp %s" % (i, script.options.date)) blobgz = script.store.get("%s/%s.%s" \ % (script.options.storage_path, i, script.options.algorithm)) if blobgz is None: logger.warning("Skipping missing file in object store: %s/%s.wsp.%s" \ % (i, d, script.options.algorithm)) continue # Decompress blobgz = StringIO(blobgz) blob = None if script.options.algorithm == "gz": fd = gzip.GzipFile(fileobj=blobgz, mode="rb") blob = fd.read() fd.close() elif script.options.algorithm == "sz": compressor = snappy.StreamDecompressor() blob = compressor.decompress(blobgz.getvalue()) try: compressor.flush() except UncompressError as e: logger.error("Corrupt file in store: %s%s/%s.wsp.sz Error %s" \ % (script.options.storage_path, i, d, str(e))) continue heal(script, i, blob) # Clean up del blob blobgz.close()
def test_random(self): for _ in range(100): compressor = snappy.StreamCompressor() decompressor = snappy.StreamDecompressor() data = b"" compressed = b"" for _ in range(random.randint(0, 3)): chunk = os.urandom(random.randint(0, snappy.snappy._CHUNK_MAX * 2)) data += chunk compressed += compressor.add_chunk( chunk, compress=random.choice([True, False, None])) upper_bound = random.choice([256, snappy.snappy._CHUNK_MAX * 2]) while compressed: size = random.randint(0, upper_bound) chunk, compressed = compressed[:size], compressed[size:] chunk = decompressor.decompress(chunk) self.assertEqual(data[:len(chunk)], chunk) data = data[len(chunk):] decompressor.flush() self.assertEqual(len(data), 0)
def load_stream(self, stream): off = 0 length = len(stream) chunkLength = 0x00000000 streamd = snappy.StreamDecompressor() dec = b"" while (off + 4 < length): piece = stream[off:off + 4] chunkLength = unpack('<BBBB', piece) if (chunkLength[0] & 0xFF != 0): print u"ooops!" return False chunkAsInt = unpack('<i', '\0' + piece[1:])[0] >> 8 if (off + chunkAsInt > length): print u"Bad chunk (off: %d cai: %d len: %d)" % ( off, chunkAsInt, length) return False off += 4 dec += snappy._uncompress(stream[off:off + chunkAsInt]) off += chunkAsInt return dec
def test_snappy_framed(benchmark, file, use_cramjam: bool): """ Uses snappy compression framed """ import snappy data = bytearray(file.read_bytes()) if use_cramjam: benchmark( round_trip, compress=cramjam.snappy.compress, decompress=cramjam.snappy.decompress, data=data, ) else: compressor = snappy.StreamCompressor() decompressor = snappy.StreamDecompressor() benchmark( round_trip, compress=compressor.compress, decompress=decompressor.decompress, data=data, )
def __init__(self, socket): self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() self._socket = socket self._bootstrapped = None
def __init__(self, socket): self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() super(SnappySocket, self).__init__(socket)
def __init__(self, buffer=None): self._parser = Reader() self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() buffer and self.feed(buffer)
def test_pull_objects_all(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) obj2_source = os.path.join('/tmp', uuid.uuid4().hex) check_info = {obj1_target: obj1_source, obj2_target: obj2_source} assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True helper_compress_file(obj1_target, obj1_source) helper_compress_file(obj2_target, obj2_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is False assert os.path.isfile(obj1_source) is True assert os.path.isfile(obj2_source) is True # remove data from the local file cache os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt")) os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt")) shutil.rmtree(os.path.join(manifest.cache_mgr.cache_root, 'objects')) os.makedirs(os.path.join(manifest.cache_mgr.cache_root, 'objects')) with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_2}?params=1", "namespace": ds.namespace, "obj_id": obj_id_2, "dataset": ds.name }, status=200) with open(obj2_source, 'rb') as data2: mocked_responses.get( f"https://dummyurl.com/{obj_id_2}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') result = iom.pull_all() assert len(result.success) == 2 assert len(result.failure) == 0 assert result.success[0].object_path != result.success[ 1].object_path assert result.success[0].object_path in [ obj_to_push[0].object_path, obj_to_push[1].object_path ] assert result.success[1].object_path in [ obj_to_push[0].object_path, obj_to_push[1].object_path ] assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True decompressor = snappy.StreamDecompressor() for r in result.success: with open(check_info[r.object_path], 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(r.object_path, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_pull_objects(self, mock_config_file, mock_dataset_head): im = InventoryManager(mock_config_file[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) os.makedirs( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "other_dir")) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "fdsfgfd") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) obj2_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True helper_compress_file(obj1_target, obj1_source) helper_compress_file(obj2_target, obj2_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is False assert os.path.isfile(obj1_source) is True assert os.path.isfile(obj2_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test2.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file[0]): with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_2}?params=1", "namespace": ds.namespace, "obj_id": obj_id_2, "dataset": ds.name }, status=200) with open(obj2_source, 'rb') as data2: mocked_responses.get( f"https://dummyurl.com/{obj_id_2}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"] } gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs) # Manually link since this is disabled by default in the job (because in real use, multiple jobs run # in parallel and you only want to link once. m.link_revision() assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is False decompressor = snappy.StreamDecompressor() with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1 # Download other file dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test2.txt"] } gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs) # Manually link since this is disabled by default in the job (because in real use, multiple jobs run # in parallel and you only want to link once. m.link_revision() assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1 with open(obj2_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj2_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_pull_objects_fail_signing(self, mock_dataset_with_cache_dir, temp_directories): with aioresponses() as mocked_responses: sb = get_storage_backend("gigantum_object_v1") ds = mock_dataset_with_cache_dir[0] sb.set_default_configuration(ds.namespace, "abcd", '1234') object_dir, compressed_dir = temp_directories obj1_id = uuid.uuid4().hex obj2_id = uuid.uuid4().hex obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd') obj2_src_path = helper_write_object(object_dir, obj2_id, '1234') assert os.path.isfile(obj1_src_path) is True assert os.path.isfile(obj2_src_path) is True obj1_compressed_path = os.path.join(compressed_dir, obj1_id) obj2_compressed_path = os.path.join(compressed_dir, obj2_id) helper_compress_file(obj1_src_path, obj1_compressed_path) helper_compress_file(obj2_src_path, obj2_compressed_path) assert os.path.isfile(obj1_src_path) is False assert os.path.isfile(obj2_src_path) is False assert os.path.isfile(obj1_compressed_path) is True assert os.path.isfile(obj2_compressed_path) is True check_info = {obj1_src_path: obj1_compressed_path, obj2_src_path: obj2_compressed_path} objects = [PullObject(object_path=obj1_src_path, revision=ds.git.repo.head.commit.hexsha, dataset_path='myfile1.txt'), PullObject(object_path=obj2_src_path, revision=ds.git.repo.head.commit.hexsha, dataset_path='myfile2.txt') ] mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}', payload={ "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1", "namespace": ds.namespace, "obj_id": obj1_id, "dataset": ds.name }, status=400) mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}', payload={ "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1", "namespace": ds.namespace, "obj_id": obj2_id, "dataset": ds.name }, status=200) with open(obj2_compressed_path, 'rb') as data2: mocked_responses.get(f"https://dummyurl.com/{obj2_id}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') result = sb.pull_objects(ds, objects, updater) assert len(result.success) == 1 assert len(result.failure) == 1 assert isinstance(result, PullResult) is True assert isinstance(result.success[0], PullObject) is True assert result.success[0].object_path == obj2_src_path assert result.failure[0].object_path == obj1_src_path assert os.path.isfile(result.success[0].object_path) is True assert os.path.isfile(result.failure[0].object_path) is False decompressor = snappy.StreamDecompressor() with open(check_info[result.success[0].object_path], 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(result.success[0].object_path, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_download_dataset_files(self, mock_config_file_background_tests, mock_dataset_head): def dispatch_query_mock(self, job_key): JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={'completed_bytes': '500'}) def dispatch_mock(self, method_reference, kwargs, metadata, persist): with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs) return "afakejobkey" im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 1 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True helper_compress_file(obj1_target, obj1_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj1_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file_background_tests[0]): with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"], 'config_file': mock_config_file_background_tests[0] } gtmcore.dispatcher.dataset_jobs.download_dataset_files( **dl_kwargs) assert os.path.isfile(obj1_target) is True decompressor = snappy.StreamDecompressor() with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def __init__(self, fp): self._comp = snappy.StreamDecompressor() self._fp = fp self._done = False self._pos = 0