def test_delete_dataset_files_errors(self, fixture_working_dir, snapshot): im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset('default', 'default', "dataset-delete-2", storage_type="gigantum_object_v1", description="testing delete") m = Manifest(ds, 'default') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "fdsfgfd") m.sweep_all_changes() revision = m.dataset_revision assert os.path.exists( os.path.join(m.cache_mgr.cache_root, revision, "test1.txt")) is True assert os.path.exists( os.path.join(m.cache_mgr.cache_root, revision, "test2.txt")) is True query = """ mutation myMutation { deleteDatasetFiles(input: {datasetOwner: "default", datasetName: "dataset-delete-2", keys: ["testdfdfdfdf.txt"]}) { success } } """ result = fixture_working_dir[2].execute(query) assert 'errors' in result
def test_update_simple(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "asdfasdf") status = manifest.status() assert len(status.created) == 1 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert "test1.txt" in status.created manifest.update(status=status) time.sleep(2) status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0 helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "asdfasdf") status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 1 assert len(status.deleted) == 0 manifest.update() status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0
def test_compute_push_batches(self, mock_dataset_with_manifest_bg_tests): """Test compute push batches, verifying it works OK when you've deleted some files""" ds, manifest, working_dir = mock_dataset_with_manifest_bg_tests iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test3.txt", "test content 3") helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test" * 4300000) helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") helper_append_file(manifest.cache_mgr.cache_root, revision, "test4.txt", "test content 4") helper_append_file(manifest.cache_mgr.cache_root, revision, "test5.txt", "test content 5") manifest.sweep_all_changes() assert len(manifest.manifest) == 6 # remove a file from the manifest manifest.delete(['test5.txt']) assert len(manifest.manifest) == 5 key_batches, total_bytes, num_files = iom.compute_push_batches() assert num_files == 5 assert total_bytes == (4 * 4300000) + (14 * 4) assert len(key_batches) == 2 assert len(key_batches[0]) == 4 assert len(key_batches[1]) == 1 assert key_batches[1][0].dataset_path == 'test1.txt'
def test_status_deleted_files(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "asdfasdf") status = manifest.status() assert len(status.created) == 1 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert "test1.txt" in status.created manifest.update(status=status) status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0 os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt")) status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 1 manifest.update() status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0
def test_push_objects_with_failure(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj2 = obj_to_push[1].object_path.rsplit('/', 1) with aioresponses() as mocked_responses: mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}', payload={ "presigned_url": f"https://dummyurl.com/{obj1}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj1, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1", payload={}, status=200) mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}', payload={ "presigned_url": f"https://dummyurl.com/{obj2}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj2, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1", payload={}, status=400) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') result = iom.push_objects() assert len(glob.glob(f'{iom.push_dir}/*')) == 1 assert len(result.success) == 1 assert len(result.failure) == 1 assert result.success[0].object_path == obj_to_push[0].object_path assert result.failure[0].object_path == obj_to_push[1].object_path
def fixture_single_dataset(): """A pytest fixture that creates a temporary working directory, a config file to match, creates the schema, and populates the environment component repository. Class scope modifier attached """ # Create temp dir config_file, temp_dir = _create_temp_work_dir() # Create user identity insert_cached_identity(temp_dir) # Create test client schema = graphene.Schema(query=LabbookQuery, mutation=LabbookMutations) # Create a bunch of lab books im = InventoryManager(config_file) ds = im.create_dataset('default', 'default', "test-dataset", storage_type="gigantum_object_v1", description="Cats 2") m = Manifest(ds, 'default') cm_class = get_cache_manager_class(ds.client_config) cache_mgr = cm_class(ds, 'default') revision = ds.git.repo.head.commit.hexsha os.makedirs(os.path.join(cache_mgr.cache_root, revision, "other_dir")) helper_append_file(cache_mgr.cache_root, revision, "test1.txt", "asdfasdf") helper_append_file(cache_mgr.cache_root, revision, "test2.txt", "rtg") helper_append_file(cache_mgr.cache_root, revision, "test3.txt", "wer") helper_append_file(cache_mgr.cache_root, revision, "other_dir/test4.txt", "dfasdfhfgjhg") helper_append_file(cache_mgr.cache_root, revision, "other_dir/test5.txt", "fdghdfgsa") m.update() with patch.object(Configuration, 'find_default_config', lambda self: config_file): # Load User identity into app context app = Flask("lmsrvlabbook") app.config["LABMGR_CONFIG"] = Configuration() app.config["LABMGR_ID_MGR"] = get_identity_manager(Configuration()) with app.app_context(): # within this block, current_app points to app. Set current user explicitly (this is done in the middleware) flask.g.user_obj = app.config["LABMGR_ID_MGR"].get_user_profile() # Create a test client client = Client(schema, middleware=[DataloaderMiddleware()], context_value=ContextMock()) yield config_file, temp_dir, client, ds, cache_mgr # Remove the temp_dir shutil.rmtree(temp_dir)
def test_delete_dataset_while_linked(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="test", email="*****@*****.**") lb = inv_manager.create_labbook("test", "test", "labbook1", description="my first labbook") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) ds_root_dir = ds.root_dir lb_root_dir = lb.root_dir assert os.path.exists(ds_root_dir) is True assert os.path.exists(lb_root_dir) is True # Link dataset inv_manager.link_dataset_to_labbook(f"{ds_root_dir}/.git", "test", "dataset1", lb) m = Manifest(ds, 'test') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "dfg") assert os.path.exists( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt")) is True assert os.path.exists( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt")) is True dataset_delete_job = inv_manager.delete_dataset( "test", "test", "dataset1") assert os.path.exists(ds_root_dir) is False assert os.path.exists(lb_root_dir) is True assert os.path.exists(m.cache_mgr.cache_root) is True assert dataset_delete_job.namespace == "test" assert dataset_delete_job.name == "dataset1" assert dataset_delete_job.cache_root == m.cache_mgr.cache_root jobs.clean_dataset_file_cache("test", dataset_delete_job.namespace, dataset_delete_job.name, dataset_delete_job.cache_root, config_file=mock_config_file[0]) assert os.path.exists(m.cache_mgr.cache_root) is True cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1) assert os.path.exists(cache_base) is True
def test_objects_to_push(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test4.txt", "test content 4") manifest.sweep_all_changes() # Modify file to have 2 objects with same key helper_append_file(manifest.cache_mgr.cache_root, iom.manifest.dataset_revision, "test2.txt", "test content 22") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 4 assert obj_to_push[0].dataset_path == "other_dir/test4.txt" assert obj_to_push[1].dataset_path == "test1.txt" assert obj_to_push[2].dataset_path == "test2.txt" assert obj_to_push[3].dataset_path == "test2.txt" assert obj_to_push[2].revision != obj_to_push[3].revision assert iom.num_objects_to_push() == 4
def test_objects_to_push_deduped(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "test3.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test4.txt", "test content 4") manifest.sweep_all_changes() # Write a .DS_Store file in the objects dir to make sure it gets skipped with open( os.path.join(manifest.cache_mgr.cache_root, 'objects', '.push', '.DS_Store'), 'wt') as ff: ff.write("") obj_to_push = iom.objects_to_push(remove_duplicates=True) assert len(obj_to_push) == 2 assert obj_to_push[0].dataset_path == "other_dir/test4.txt" assert obj_to_push[1].dataset_path == "test1.txt" assert iom.num_objects_to_push(remove_duplicates=True) == 2
def test_move_rename_file(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir")) os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir", "nested_dir")) helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "asdfasdghndfdf") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt", "asdfdf") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir/nested_dir/test6.txt", "4456tyfg") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir/nested_dir/test7.txt", "fgfyytr") manifest.sweep_all_changes() num_records = len(ds.git.log()) assert num_records == 6 revision = manifest.dataset_revision cr = manifest.cache_mgr.cache_root assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is True assert os.path.exists(os.path.join(cr, revision, "test2.txt")) is True assert os.path.exists( os.path.join(cr, revision, "other_dir", "nested_dir", "test6.txt")) is True assert os.path.exists( os.path.join(cr, revision, "other_dir", "nested_dir", "test7.txt")) is True # test renaming a file edges = manifest.move("test1.txt", "test1-moved.txt") assert len(edges) == 1 assert edges[0]['key'] == 'test1-moved.txt' assert edges[0]['size'] == '14' assert edges[0]['is_local'] is True revision = manifest.dataset_revision assert os.path.exists( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt")) is False assert os.path.exists( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1-moved.txt")) is True assert os.path.exists( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt")) is True assert os.path.exists( os.path.join(cr, revision, "other_dir", "nested_dir", "test6.txt")) is True assert os.path.exists( os.path.join(cr, revision, "other_dir", "nested_dir", "test7.txt")) is True assert len(ds.git.log()) == num_records + 2
def test_sweep_all_changes_remove_file_in_dir(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "dir1")) helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "dir1/test1.txt", "asdfasdfdf") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt", "dfdf") assert len(ds.git.log()) == 4 status = manifest.status() assert len(status.created) == 3 assert len(status.modified) == 0 assert len(status.deleted) == 0 manifest.sweep_all_changes() status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert len(manifest.manifest.keys()) == 3 assert 'dir1/' in manifest.manifest assert 'dir1/test1.txt' in manifest.manifest assert 'test2.txt' in manifest.manifest src = os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "dir1", "test1.txt") os.remove(src) time.sleep(1.5) status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 1 assert 'dir1/test1.txt' in status.deleted manifest.sweep_all_changes() status = manifest.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert len(manifest.manifest.keys()) == 2 assert 'dir1/' in manifest.manifest assert 'test2.txt' in manifest.manifest
def test_complete_dataset_upload_transaction_simple( self, mock_config_file_background_tests): im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "new-ds", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "fake content!") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "moar fake content!") dl_kwargs = { 'dispatcher': Dispatcher, 'logged_in_username': "******", 'logged_in_email': "*****@*****.**", 'dataset_owner': "default", 'dataset_name': "new-ds", 'config_file': mock_config_file_background_tests[0] } assert len(m.manifest) == 0 gtmcore.dispatcher.dataset_jobs.complete_dataset_upload_transaction( **dl_kwargs) m = Manifest(ds, 'default') # make sure manifest got updated assert len(m.manifest) == 2 assert 'test1.txt' in m.manifest assert 'test2.txt' in m.manifest assert m.manifest['test1.txt']['b'] == '13' assert len(m.manifest['test1.txt']['h']) == 128 assert 'manifest-' in m.manifest['test1.txt']['fn'] assert m.manifest['test2.txt']['b'] == '18' assert len(m.manifest['test2.txt']['h']) == 128 assert 'manifest-' in m.manifest['test2.txt']['fn'] assert m.manifest['test2.txt']['h'] != m.manifest['test1.txt']['h'] # Make sure activity created assert len(ds.git.log()) == 6 assert "_GTM_ACTIVITY_START_" in ds.git.log()[0]['message'] assert "Uploaded 2 new file(s)." in ds.git.log()[0]['message']
def test_move_dataset_file(self, fixture_working_dir, snapshot): im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset('default', 'default', "dataset-move", storage_type="gigantum_object_v1", description="testing move") m = Manifest(ds, 'default') revision = m.dataset_revision helper_append_file(m.cache_mgr.cache_root, revision, "test1.txt", "asdfasdghndfdf") m.sweep_all_changes() revision = m.dataset_revision cr = m.cache_mgr.cache_root assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is True query = """ mutation myMutation { moveDatasetFile(input: {datasetOwner: "default", datasetName: "dataset-move", srcPath: "test1.txt", dstPath: "test1-renamed.txt"}) { updatedEdges { node { id key isDir isLocal size } } } } """ result = fixture_working_dir[2].execute(query) assert 'errors' not in result snapshot.assert_match(result) revision = m.dataset_revision cr = m.cache_mgr.cache_root assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is False assert os.path.exists(os.path.join(cr, revision, "test1-renamed.txt")) is True
def test_file_info_from_filesystem(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir/test4.txt", "dfasdfhfgjhg") file_info = manifest.gen_file_info("test1.txt") assert file_info['key'] == "test1.txt" assert file_info['size'] == '8' assert file_info['is_favorite'] is False assert file_info['is_local'] is True assert file_info['is_dir'] is False assert 'modified_at' in file_info file_info = manifest.gen_file_info("other_dir/test4.txt") assert file_info['key'] == "other_dir/test4.txt"
def test_status_created_files(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test_dir")) os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir")) os.makedirs( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test_dir", "nested")) helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt", "dfg") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test_dir/test3.txt", "asdffdgfghghfjjgh") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test_dir/nested/test4.txt", "565656565") helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir/test5.txt", "dfasdfhfgjhg") status = manifest.status() assert len(status.created) == 8 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert "test1.txt" in status.created assert "test2.txt" in status.created assert "test_dir/test3.txt" in status.created assert "test_dir/nested/test4.txt" in status.created assert "other_dir/test5.txt" in status.created assert "test_dir/" in status.created assert "test_dir/nested/" in status.created assert "other_dir/" in status.created
def test_delete_dataset(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="test", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) root_dir = ds.root_dir assert os.path.exists(root_dir) is True m = Manifest(ds, 'test') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "dfg") assert os.path.exists(os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt")) is True assert os.path.exists(os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt")) is True inv_manager.delete_dataset("test", "test", "dataset1") assert os.path.exists(root_dir) is False assert os.path.exists(m.cache_mgr.cache_root) is False cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1) assert os.path.exists(cache_base) is True
def test_sync__dataset(self, mock_config_file): def update_feedback(msg: str, has_failures: Optional[bool] = None, failure_detail: Optional[str] = None, percent_complete: Optional[float] = None): """Method to update the job's metadata and provide feedback to the UI""" assert has_failures is None or has_failures is False assert failure_detail is None def dispatch_query_mock(self, job_key): JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={'completed_bytes': '100'}) def dispatch_mock(self, method_reference, kwargs, metadata, persist): return "afakejobkey" username = '******' im = InventoryManager(mock_config_file[0]) ds = im.create_dataset(username, username, 'dataset-1', 'gigantum_object_v1') m = Manifest(ds, username) wf = DatasetWorkflow(ds) iom = IOManager(ds, m) assert len(glob.glob(f'{iom.push_dir}/*')) == 0 wf.publish(username=username, feedback_callback=update_feedback) # Put a file into the dataset that needs to be pushed helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() assert len(glob.glob(f'{iom.push_dir}/*')) == 1 with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): wf.sync(username=username, feedback_callback=update_feedback) assert os.path.exists(wf.remote) assert len(glob.glob(f'{iom.push_dir}/*')) == 0
def test_compute_pull_batches(self, mock_dataset_with_manifest_bg_tests): ds, manifest, working_dir = mock_dataset_with_manifest_bg_tests iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test3.txt", "test content 3") helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test" * 4300000) helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") helper_append_file(manifest.cache_mgr.cache_root, revision, "test4.txt", "test content 4") helper_append_file(manifest.cache_mgr.cache_root, revision, "test5.txt", "test content 5") manifest.sweep_all_changes() with pytest.raises(ValueError): iom.compute_pull_batches() # Remove all files so everything needs to be pulled rev_dir = os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision) object_dir = os.path.join(manifest.cache_mgr.cache_root, 'objects') shutil.rmtree(rev_dir) shutil.rmtree(object_dir) key_batches, total_bytes, num_files = iom.compute_pull_batches( pull_all=True) assert num_files == 5 assert total_bytes == (4 * 4300000) + (14 * 4) assert len(key_batches) == 2 assert len(key_batches[0]) == 4 assert len(key_batches[1]) == 1 assert key_batches[1][0] == 'test1.txt'
def test_file_distribution(self, fixture_single_dataset): """Test getting the a Dataset's local_bytes""" ds = fixture_single_dataset[3] query = """ { dataset(owner: "default", name: "test-dataset") { overview { fileTypeDistribution } } } """ result = fixture_single_dataset[2].execute(query) assert 'errors' not in result assert len( result['data']['dataset']['overview']['fileTypeDistribution']) == 1 assert result['data']['dataset']['overview'][ 'fileTypeDistribution'] == ['1.00|.txt'] # Delete all files m = Manifest(ds, 'default') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test55.csv", "22222") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "df.csv", "33333") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, ".hidden", "33333") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "noextension", "33333") m.update() result = fixture_single_dataset[2].execute(query) assert 'errors' not in result assert len( result['data']['dataset']['overview']['fileTypeDistribution']) == 2 assert result['data']['dataset']['overview']['fileTypeDistribution'][ 0] == '0.71|.txt' assert result['data']['dataset']['overview']['fileTypeDistribution'][ 1] == '0.29|.csv'
def test_objects_to_push_ignore_other_branch(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "fdsfgfd") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 assert obj_to_push[0].dataset_path == "test1.txt" assert obj_to_push[1].dataset_path == "test2.txt" # Create new branch and add a file there bm = BranchManager(ds, username=USERNAME) starting_branch = bm.active_branch bm.create_branch(title="test-branch") assert bm.active_branch == "test-branch" assert ds.is_repo_clean is True helper_append_file(manifest.cache_mgr.cache_root, iom.manifest.dataset_revision, "test3.txt", "fdsfgfd") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 3 assert obj_to_push[0].dataset_path == "test1.txt" assert obj_to_push[1].dataset_path == "test2.txt" assert obj_to_push[2].dataset_path == "test3.txt" # Go back to original branch, you shouldn't have to push file on other branch bm.workon_branch(starting_branch) obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 assert obj_to_push[0].dataset_path == "test1.txt" assert obj_to_push[1].dataset_path == "test2.txt"
def test_pull_objects_all_partial_download(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test3.txt", "1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 3 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) _, obj_id_3 = obj_to_push[2].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj3_target = obj_to_push[2].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) assert "test3.txt" in obj_to_push[0].dataset_path assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True assert os.path.exists(obj3_target) is True # Completely remove other_dir/test3.txt object os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir", "test3.txt")) helper_compress_file(obj1_target, obj1_source) # Remove link for test1.txt os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt")) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is True assert os.path.isfile(obj3_target) is True with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') result = iom.pull_all() assert len(result.success) == 1 assert len(result.failure) == 0 assert result.success[0].object_path == obj1_target assert "test3.txt" in result.success[0].dataset_path assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True assert os.path.isfile(obj3_target) is True filename = os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "other_dir", "test3.txt") assert os.path.isfile(filename) is True with open(filename, 'rt') as dd: assert dd.read() == "1" filename = os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt") assert os.path.isfile(filename) is True with open(filename, 'rt') as dd: assert dd.read() == "test content 1" filename = os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt") assert os.path.isfile(filename) is True with open(filename, 'rt') as dd: assert dd.read() == "test content 2" # Try pulling all again with nothing to pull result = iom.pull_all() assert len(result.success) == 0 assert len(result.failure) == 0 assert result.message == "Dataset already downloaded."
def test_pull_objects_all(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) obj2_source = os.path.join('/tmp', uuid.uuid4().hex) check_info = {obj1_target: obj1_source, obj2_target: obj2_source} assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True helper_compress_file(obj1_target, obj1_source) helper_compress_file(obj2_target, obj2_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is False assert os.path.isfile(obj1_source) is True assert os.path.isfile(obj2_source) is True # remove data from the local file cache os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt")) os.remove( os.path.join(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test2.txt")) shutil.rmtree(os.path.join(manifest.cache_mgr.cache_root, 'objects')) os.makedirs(os.path.join(manifest.cache_mgr.cache_root, 'objects')) with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_2}?params=1", "namespace": ds.namespace, "obj_id": obj_id_2, "dataset": ds.name }, status=200) with open(obj2_source, 'rb') as data2: mocked_responses.get( f"https://dummyurl.com/{obj_id_2}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') result = iom.pull_all() assert len(result.success) == 2 assert len(result.failure) == 0 assert result.success[0].object_path != result.success[ 1].object_path assert result.success[0].object_path in [ obj_to_push[0].object_path, obj_to_push[1].object_path ] assert result.success[1].object_path in [ obj_to_push[0].object_path, obj_to_push[1].object_path ] assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True decompressor = snappy.StreamDecompressor() for r in result.success: with open(check_info[r.object_path], 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(r.object_path, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_pull_objects(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) obj2_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True helper_compress_file(obj1_target, obj1_source) helper_compress_file(obj2_target, obj2_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is False assert os.path.isfile(obj1_source) is True assert os.path.isfile(obj2_source) is True with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_2}?params=1", "namespace": ds.namespace, "obj_id": obj_id_2, "dataset": ds.name }, status=200) with open(obj2_source, 'rb') as data2: mocked_responses.get( f"https://dummyurl.com/{obj_id_2}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') assert len(glob.glob(f'{iom.push_dir}/*')) == 1 iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') result = iom.pull_objects(keys=["test1.txt"]) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 assert len(result.success) == 1 assert len(result.failure) == 0 assert result.success[0].object_path == obj_to_push[0].object_path assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is False with open(obj1_target, 'rt') as dd: assert "test content 1" == dd.read() result = iom.pull_objects(keys=["test2.txt"]) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 assert len(result.success) == 1 assert len(result.failure) == 0 assert result.success[0].object_path == obj_to_push[1].object_path assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True with open(obj1_target, 'rt') as dd: assert "test content 1" == dd.read() with open(obj2_target, 'rt') as dd: assert "test content 2" == dd.read()
def test_pull_objects(self, mock_config_file, mock_dataset_head): im = InventoryManager(mock_config_file[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) os.makedirs( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "other_dir")) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "fdsfgfd") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) obj2_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True helper_compress_file(obj1_target, obj1_source) helper_compress_file(obj2_target, obj2_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is False assert os.path.isfile(obj1_source) is True assert os.path.isfile(obj2_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test2.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file[0]): with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_2}?params=1", "namespace": ds.namespace, "obj_id": obj_id_2, "dataset": ds.name }, status=200) with open(obj2_source, 'rb') as data2: mocked_responses.get( f"https://dummyurl.com/{obj_id_2}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"] } gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs) # Manually link since this is disabled by default in the job (because in real use, multiple jobs run # in parallel and you only want to link once. m.link_revision() assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is False decompressor = snappy.StreamDecompressor() with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1 # Download other file dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test2.txt"] } gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs) # Manually link since this is disabled by default in the job (because in real use, multiple jobs run # in parallel and you only want to link once. m.link_revision() assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1 with open(obj2_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj2_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_complete_dataset_upload_transaction_all_types( self, mock_config_file_background_tests): im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "new-ds", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "fake content 1") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "fake content 2") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test3.txt", "fake content 3") dl_kwargs = { 'dispatcher': Dispatcher, 'logged_in_username': "******", 'logged_in_email': "*****@*****.**", 'dataset_owner': "default", 'dataset_name': "new-ds", 'config_file': mock_config_file_background_tests[0] } assert len(m.manifest) == 0 gtmcore.dispatcher.dataset_jobs.complete_dataset_upload_transaction( **dl_kwargs) m = Manifest(ds, 'default') # make sure manifest got updated assert len(m.manifest) == 3 assert 'test1.txt' in m.manifest assert 'test2.txt' in m.manifest assert 'test3.txt' in m.manifest hash1 = m.manifest['test1.txt']['h'] # Make sure activity created assert len(ds.git.log()) == 6 assert "_GTM_ACTIVITY_START_" in ds.git.log()[0]['message'] assert "Uploaded 3 new file(s)." in ds.git.log()[0]['message'] helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "fake content changed") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test4.txt", "fake content 4") os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test3.txt")) gtmcore.dispatcher.dataset_jobs.complete_dataset_upload_transaction( **dl_kwargs) m = Manifest(ds, 'default') # make sure manifest got updated assert len(m.manifest) == 3 assert 'test1.txt' in m.manifest assert 'test2.txt' in m.manifest assert 'test4.txt' in m.manifest assert hash1 != m.manifest['test1.txt']['h'] # Make sure activity created assert len(ds.git.log()) == 8 assert "_GTM_ACTIVITY_START_" in ds.git.log()[0]['message'] assert "Uploaded 1 new file(s). Uploaded 1 modified file(s). 1 deleted file(s)." in ds.git.log( )[0]['message']
def test_update_dataset_link(self, fixture_working_dir, snapshot): im = InventoryManager(fixture_working_dir[0]) lb = im.create_labbook('default', 'default', 'test-lb', 'testing dataset links') ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") manifest = Manifest(ds, 'default') helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "12345") manifest.sweep_all_changes() # Fake publish to a local bare repo _MOCK_create_remote_repo2(ds, 'default', None, None) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False overview_query = """ { labbook(owner: "default", name:"test-lb") { linkedDatasets{ name overview { localBytes totalBytes } } } } """ query = """ mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!, $a: String!, $du: String) { modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn, action: $a, datasetUrl: $du}) { newLabbookEdge { node { id name description linkedDatasets { name } } } } } """ variables = { "lo": "default", "ln": "test-lb", "do": "default", "dn": "dataset100", "a": "link", "du": ds.remote } result = fixture_working_dir[2].execute(query, variable_values=variables) assert "errors" not in result snapshot.assert_match(result) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'default', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True assert os.path.exists( os.path.join(dataset_submodule_dir, 'test_file.dat')) is False with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) > 0 # check overview result = fixture_working_dir[2].execute(overview_query) assert "errors" not in result assert result['data']['labbook']['linkedDatasets'][0]['overview'][ 'localBytes'] == '5' assert result['data']['labbook']['linkedDatasets'][0]['overview'][ 'totalBytes'] == '5' # Make change to published dataset git_dir = os.path.join(tempfile.gettempdir(), 'test_update_dataset_link_mutation') try: os.makedirs(git_dir) call_subprocess(['git', 'clone', ds.remote], cwd=git_dir, check=True) with open(os.path.join(git_dir, ds.name, 'test_file.dat'), 'wt') as tf: tf.write("Test File Contents") call_subprocess(['git', 'add', 'test_file.dat'], cwd=os.path.join(git_dir, ds.name), check=True) call_subprocess(['git', 'commit', '-m', 'editing repo'], cwd=os.path.join(git_dir, ds.name), check=True) call_subprocess(['git', 'push'], cwd=os.path.join(git_dir, ds.name), check=True) query = """ mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!, $a: String!) { modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn, action: $a}) { newLabbookEdge { node { id name description linkedDatasets { name } } } } } """ variables = { "lo": "default", "ln": "test-lb", "do": "default", "dn": "dataset100", "a": "update" } result = fixture_working_dir[2].execute(query, variable_values=variables) assert "errors" not in result snapshot.assert_match(result) # verify change is reflected assert os.path.exists( os.path.join(dataset_submodule_dir, 'test_file.dat')) is True # Verify activity record assert "Updated Dataset `default/dataset100` link to version" in lb.git.log( )[0]['message'] finally: if os.path.exists(git_dir): shutil.rmtree(git_dir)
def test_file_distribution_hidden(self, fixture_single_dataset): """""" ds = fixture_single_dataset[3] query = """ { dataset(owner: "default", name: "test-dataset") { overview { fileTypeDistribution } } } """ result = fixture_single_dataset[2].execute(query) assert 'errors' not in result assert result['data']['dataset']['overview'][ 'fileTypeDistribution'] == ['1.00|.txt'] # Delete all files m = Manifest(ds, 'default') os.makedirs( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, ".hiddendir")) os.makedirs( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, ".hiddendir", "subdir")) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test55.csv", "22222") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "df.csv", "11") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, ".hidden", "343") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "noextension", "6t4") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, ".hiddendir/tester.png", "8544") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, ".hiddendir/subdir/blah.jpeg", "8544") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, ".hiddendir/subdir/.hiddenfile", "jhg") m.update() result = fixture_single_dataset[2].execute(query) assert 'errors' not in result assert len( result['data']['dataset']['overview']['fileTypeDistribution']) == 4 assert result['data']['dataset']['overview']['fileTypeDistribution'][ 0] == '0.56|.txt' assert result['data']['dataset']['overview']['fileTypeDistribution'][ 1] == '0.22|.csv' assert result['data']['dataset']['overview']['fileTypeDistribution'][ 2] == '0.11|.jpeg' assert result['data']['dataset']['overview']['fileTypeDistribution'][ 3] == '0.11|.png'
def test_complete_dataset_upload_transaction_failure( self, mock_config_file_background_tests): im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "new-ds", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') dispatcher_obj = Dispatcher() helper_write_big_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.dat", "12") helper_write_big_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.dat", "23") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "zztest3.txt", "fake content 3") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "zztest4.txt", "fake content 4") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "zztest5.txt", "fake content 5") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "zztest6.txt", "fake content 6") job_kwargs = { 'dispatcher': Dispatcher, 'logged_in_username': "******", 'logged_in_email': "*****@*****.**", 'dataset_owner': "default", 'dataset_name': "new-ds", 'config_file': mock_config_file_background_tests[0] } job_metadata = { 'dataset': f"default|default|new-ds", 'method': 'complete_dataset_upload_transaction' } assert len(m.manifest) == 0 job_key = dispatcher_obj.dispatch_task( gtmcore.dispatcher.dataset_jobs. complete_dataset_upload_transaction, kwargs=job_kwargs, metadata=job_metadata) time.sleep(3) # Remove files to make them fail os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "zztest4.txt")) os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "zztest5.txt")) cnt = 0 while cnt < 120: job_status = dispatcher_obj.query_task(job_key) if job_status.status == 'finished': break time.sleep(1) cnt += 1 assert cnt < 119 m = Manifest(ds, 'default') assert len(m.manifest) == 4 assert 'test1.dat' in m.manifest assert 'test2.dat' in m.manifest assert 'zztest3.txt' in m.manifest assert 'zztest6.txt' in m.manifest assert 'zztest5.txt' not in m.manifest assert 'zztest4.txt' not in m.manifest assert job_status.meta['has_failures'] is True assert 'The following files failed to hash. Try re-uploading the files again:\nzztest4.txt \nzztest5.txt' ==\ job_status.meta['failure_detail'] assert 'An error occurred while processing some files. Check details and re-upload.' == \ job_status.meta['feedback']
def test_download_dataset_files(self, mock_config_file_background_tests, mock_dataset_head): def dispatch_query_mock(self, job_key): JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={'completed_bytes': '500'}) def dispatch_mock(self, method_reference, kwargs, metadata, persist): with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs) return "afakejobkey" im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 1 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True helper_compress_file(obj1_target, obj1_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj1_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file_background_tests[0]): with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"], 'config_file': mock_config_file_background_tests[0] } gtmcore.dispatcher.dataset_jobs.download_dataset_files( **dl_kwargs) assert os.path.isfile(obj1_target) is True decompressor = snappy.StreamDecompressor() with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_download_dataset_files_file_fail( self, mock_config_file_background_tests): def dispatch_query_mock(self, job_key): # mock the job actually running and returning status JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={ 'completed_bytes': '0', 'failure_keys': 'test1.txt' }) def dispatch_mock(self, method_reference, kwargs, metadata, persist): gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs) return "afakejobkey" im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 1 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True helper_compress_file(obj1_target, obj1_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj1_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file_background_tests[0]): with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"], 'config_file': mock_config_file_background_tests[0] } with pytest.raises(IOError): gtmcore.dispatcher.dataset_jobs.download_dataset_files( **dl_kwargs) assert os.path.isfile(obj1_target) is False