async def test_presigneds3download_get_presigned_s3_url_error(self, event_loop, mock_dataset_with_cache_dir): sb = get_storage_backend("gigantum_object_v1") sb.set_default_configuration("test-user", "abcd", '1234') ds = mock_dataset_with_cache_dir[0] object_id = "abcd1234" object_service_root = f"{sb._object_service_endpoint(ds)}/{ds.namespace}/{ds.name}" headers = sb._object_service_headers() download_chunk_size = 40000 object_details = PullObject(object_path=f"/tmp/{object_id}", revision=ds.git.repo.head.commit.hexsha, dataset_path='myfile1.txt') psu = PresignedS3Download(object_service_root, headers, download_chunk_size, object_details) with aioresponses() as mocked_responses: async with aiohttp.ClientSession() as session: mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{object_id}', payload={ "presigned_url": "https://dummyurl.com?params=2", "namespace": ds.namespace, "obj_id": object_id, "dataset": ds.name }, status=500) with pytest.raises(IOError): await psu.get_presigned_s3_url(session)
def _gen_pull_objects(self, keys: List[str]) -> List[PullObject]: """ Args: keys: Returns: """ result = list() revision = self.manifest.dataset_revision for key in keys: data = self.manifest.dataset_to_object_path(key) result.append(PullObject(object_path=data, revision=revision, dataset_path=key)) return result
def test_pull_objects_fail_signing(self, mock_dataset_with_cache_dir, temp_directories): with aioresponses() as mocked_responses: sb = get_storage_backend("gigantum_object_v1") ds = mock_dataset_with_cache_dir[0] sb.set_default_configuration(ds.namespace, "abcd", '1234') object_dir, compressed_dir = temp_directories obj1_id = uuid.uuid4().hex obj2_id = uuid.uuid4().hex obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd') obj2_src_path = helper_write_object(object_dir, obj2_id, '1234') assert os.path.isfile(obj1_src_path) is True assert os.path.isfile(obj2_src_path) is True obj1_compressed_path = os.path.join(compressed_dir, obj1_id) obj2_compressed_path = os.path.join(compressed_dir, obj2_id) helper_compress_file(obj1_src_path, obj1_compressed_path) helper_compress_file(obj2_src_path, obj2_compressed_path) assert os.path.isfile(obj1_src_path) is False assert os.path.isfile(obj2_src_path) is False assert os.path.isfile(obj1_compressed_path) is True assert os.path.isfile(obj2_compressed_path) is True check_info = {obj1_src_path: obj1_compressed_path, obj2_src_path: obj2_compressed_path} objects = [PullObject(object_path=obj1_src_path, revision=ds.git.repo.head.commit.hexsha, dataset_path='myfile1.txt'), PullObject(object_path=obj2_src_path, revision=ds.git.repo.head.commit.hexsha, dataset_path='myfile2.txt') ] mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}', payload={ "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1", "namespace": ds.namespace, "obj_id": obj1_id, "dataset": ds.name }, status=400) mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}', payload={ "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1", "namespace": ds.namespace, "obj_id": obj2_id, "dataset": ds.name }, status=200) with open(obj2_compressed_path, 'rb') as data2: mocked_responses.get(f"https://dummyurl.com/{obj2_id}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') result = sb.pull_objects(ds, objects, updater) assert len(result.success) == 1 assert len(result.failure) == 1 assert isinstance(result, PullResult) is True assert isinstance(result.success[0], PullObject) is True assert result.success[0].object_path == obj2_src_path assert result.failure[0].object_path == obj1_src_path assert os.path.isfile(result.success[0].object_path) is True assert os.path.isfile(result.failure[0].object_path) is False decompressor = snappy.StreamDecompressor() with open(check_info[result.success[0].object_path], 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(result.success[0].object_path, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_pull(self, mock_dataset_with_local_dir): def chunk_update_callback(completed_bytes: int): """Method to update the job's metadata and provide feedback to the UI""" assert type(completed_bytes) == int assert completed_bytes > 0 ds = mock_dataset_with_local_dir[0] m = Manifest(ds, 'tester') assert len(m.manifest.keys()) == 0 ds.backend.update_from_remote(ds, updater) m = Manifest(ds, 'tester') # Remove revision dir shutil.rmtree(os.path.join(m.cache_mgr.cache_root, m.dataset_revision)) keys = ['test1.txt', 'test2.txt', 'subdir/test3.txt'] pull_objects = list() for key in keys: pull_objects.append( PullObject(object_path=m.dataset_to_object_path(key), revision=m.dataset_revision, dataset_path=key)) # Remove objects os.remove(m.dataset_to_object_path(key)) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) is False assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test2.txt')) is False assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'subdir', 'test3.txt')) is False for key in keys: assert os.path.isfile(m.dataset_to_object_path(key)) is False # Pull 1 File ds.backend.pull_objects(ds, [pull_objects[0]], chunk_update_callback) assert os.path.isdir( os.path.join(m.cache_mgr.cache_root, m.dataset_revision)) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) is True assert os.path.isfile(m.dataset_to_object_path('test1.txt')) is True # Pull all Files ds.backend.pull_objects(ds, pull_objects, chunk_update_callback) assert os.path.isdir( os.path.join(m.cache_mgr.cache_root, m.dataset_revision)) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test2.txt')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'subdir', 'test3.txt')) is True for key in keys: assert os.path.isfile(m.dataset_to_object_path(key)) is True
def test_pull(self, mock_config_class, mock_public_bucket): im = mock_config_class[0] ds = im.create_dataset(USERNAME, USERNAME, 'dataset-1', description="my dataset 1", storage_type="public_s3_bucket") ds.backend.set_default_configuration(USERNAME, 'fakebearertoken', 'fakeidtoken') # Configure backend completely current_config = ds.backend_config current_config['Bucket Name'] = mock_public_bucket current_config['Prefix'] = "" ds.backend_config = current_config ds.backend.update_from_remote(ds, updater) m = Manifest(ds, 'tester') # Remove revision dir and objects from cache shutil.rmtree(os.path.join(m.cache_mgr.cache_root, m.dataset_revision)) keys = [ 'test-file-1.bin', 'metadata/test-file-3.bin', 'metadata/sub/test-file-5.bin' ] pull_objects = list() for key in keys: pull_objects.append( PullObject(object_path=m.dataset_to_object_path(key), revision=m.dataset_revision, dataset_path=key)) # Remove objects os.remove(m.dataset_to_object_path(key)) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test-file-1.bin')) is False assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'metadata', 'test-file-3.bin')) is False assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'metadata', 'sub', 'test-file-5.bin')) is False for key in keys: assert os.path.isfile(m.dataset_to_object_path(key)) is False # Pull 1 File (duplicate contents so 2 files show up) ds.backend.pull_objects(ds, [pull_objects[0]], chunk_update_callback) assert os.path.isdir( os.path.join(m.cache_mgr.cache_root, m.dataset_revision)) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test-file-1.bin')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test-file-2.bin')) is True assert os.path.isfile( m.dataset_to_object_path('test-file-1.bin')) is True assert os.path.isfile( m.dataset_to_object_path('test-file-2.bin')) is True # Pull all Files ds.backend.pull_objects(ds, pull_objects, chunk_update_callback) assert os.path.isdir( os.path.join(m.cache_mgr.cache_root, m.dataset_revision)) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test-file-1.bin')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test-file-2.bin')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'metadata', 'test-file-3.bin')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'metadata', 'test-file-4.bin')) is True assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'metadata', 'sub', 'test-file-5.bin')) is True for key in keys: assert os.path.isfile(m.dataset_to_object_path(key)) is True