def test_link_unlink_dataset(self, mock_labbook): inv_manager = InventoryManager(mock_labbook[0]) lb = mock_labbook[2] ds = inv_manager.create_dataset("test", "test", "dataset100", "gigantum_object_v1", description="my dataset") # Fake publish to a local bare repo _MOCK_create_remote_repo2(ds, 'test', None, None) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100', lb) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True inv_manager.unlink_dataset_from_labbook('test', 'dataset100', lb) dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is False assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is False with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) == 0
def test_delete_labbook_linked_dataset(self, mock_config_file): """Test trying to create a labbook with a name that already exists locally""" inv_manager = InventoryManager(mock_config_file[0]) inv_manager.create_labbook("test", "test", "labbook1", description="my first labbook") lb = inv_manager.load_labbook("test", "test", "labbook1") auth = GitAuthor(name="test", email="*****@*****.**") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) inv_manager.link_dataset_to_labbook(f"{ds.root_dir}/.git", "test", "dataset1", lb) dataset_delete_jobs = inv_manager.delete_labbook( "test", "test", "labbook1") assert len(dataset_delete_jobs) == 1 assert dataset_delete_jobs[0].namespace == "test" assert dataset_delete_jobs[0].name == "dataset1" with pytest.raises(InventoryException): inv_manager.load_labbook("test", "test", "labbook1")
def test_get_commits_behind(self, fixture_working_dir): """Test temporar field commitsBehind on dataset objects""" im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset("default", "default", "test-ds", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="default", email="*****@*****.**")) lb = im.create_labbook("default", "default", "test-lb") im.link_dataset_to_labbook(f"{ds.root_dir}/.git", 'default', 'test-ds', lb) query = """ { labbook(owner: "default", name:"test-lb") { linkedDatasets{ name commitsBehind } } } """ r = fixture_working_dir[2].execute(query) assert 'errors' not in r assert r['data']['labbook']['linkedDatasets'][0]['name'] == 'test-ds' assert r['data']['labbook']['linkedDatasets'][0]['commitsBehind'] == 0 ds.write_readme("test contents to make a commit") r = fixture_working_dir[2].execute(query) assert 'errors' not in r assert r['data']['labbook']['linkedDatasets'][0]['name'] == 'test-ds' assert r['data']['labbook']['linkedDatasets'][0]['commitsBehind'] == 1
def test_get_linked_datasets(self, mock_labbook): inv_manager = InventoryManager(mock_labbook[0]) lb = mock_labbook[2] datasets = inv_manager.get_linked_datasets(lb) assert len(datasets) == 0 ds = inv_manager.create_dataset("test", "test", "dataset100", "gigantum_object_v1", description="my dataset") # Fake publish to a local bare repo _MOCK_create_remote_repo2(ds, 'test', None, None) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100', lb) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True datasets = inv_manager.get_linked_datasets(lb) assert len(datasets) == 1 assert datasets[0].name == ds.name assert datasets[0].namespace == ds.namespace
def test_create_rollback_branch_remove_linked_dataset( self, mock_create_labbooks): """ test creating a rollback branch that removes a linked dataset""" lb, client = mock_create_labbooks[0], mock_create_labbooks[1] im = InventoryManager(config_file=lb.client_config.config_file) ds = im.create_dataset(UT_USERNAME, UT_USERNAME, 'test-ds', storage_type='gigantum_object_v1') rollback_to = lb.git.commit_hash # Link dataset to project im.link_dataset_to_labbook(f"{ds.root_dir}/.git", UT_USERNAME, ds.name, lb) dataset_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', UT_USERNAME, 'test-ds') assert os.path.exists(dataset_dir) is True q = f""" mutation makeFeatureBranch {{ createExperimentalBranch(input: {{ owner: "{UT_USERNAME}", labbookName: "{UT_LBNAME}", branchName: "rollback-branch", revision: "{rollback_to}", description: "testing rollback", }}) {{ labbook{{ name activeBranchName description branches {{ branchName }} linkedDatasets{{ name }} }} }} }} """ r = client.execute(q) assert 'errors' not in r assert r['data']['createExperimentalBranch']['labbook'][ 'activeBranchName'] == 'rollback-branch' assert r['data']['createExperimentalBranch']['labbook'][ 'description'] == "testing rollback" assert r['data']['createExperimentalBranch']['labbook'][ 'linkedDatasets'] == [] assert lb.is_repo_clean assert os.path.exists(dataset_dir) is False
def test_delete_dataset_while_linked(self, mock_config_file): inv_manager = InventoryManager(mock_config_file[0]) auth = GitAuthor(name="test", email="*****@*****.**") lb = inv_manager.create_labbook("test", "test", "labbook1", description="my first labbook") ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=auth) ds_root_dir = ds.root_dir lb_root_dir = lb.root_dir assert os.path.exists(ds_root_dir) is True assert os.path.exists(lb_root_dir) is True # Link dataset inv_manager.link_dataset_to_labbook(f"{ds_root_dir}/.git", "test", "dataset1", lb) m = Manifest(ds, 'test') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfasdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "dfg") assert os.path.exists( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt")) is True assert os.path.exists( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt")) is True dataset_delete_job = inv_manager.delete_dataset( "test", "test", "dataset1") assert os.path.exists(ds_root_dir) is False assert os.path.exists(lb_root_dir) is True assert os.path.exists(m.cache_mgr.cache_root) is True assert dataset_delete_job.namespace == "test" assert dataset_delete_job.name == "dataset1" assert dataset_delete_job.cache_root == m.cache_mgr.cache_root jobs.clean_dataset_file_cache("test", dataset_delete_job.namespace, dataset_delete_job.name, dataset_delete_job.cache_root, config_file=mock_config_file[0]) assert os.path.exists(m.cache_mgr.cache_root) is True cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1) assert os.path.exists(cache_base) is True
def test_download_dataset_files_linked(self, fixture_working_dir, snapshot): def dispatcher_mock(self, function_ref, kwargs, metadata, persist): assert kwargs['logged_in_username'] == 'default' assert kwargs['access_token'] == 'asdf' assert kwargs['id_token'] == '1234' assert kwargs['dataset_owner'] == 'default' assert kwargs['dataset_name'] == 'dataset100' assert kwargs['labbook_owner'] == 'default' assert kwargs['labbook_name'] == 'test-lb' assert kwargs['all_keys'] is None assert kwargs['keys'] == ["test1.txt"] assert persist is True assert metadata[ 'dataset'] == 'default|default|test-lb|LINKED|default|default|dataset100' assert metadata['labbook'] == 'default|default|test-lb' assert metadata['method'] == 'download_dataset_files' return JobResponseMock( "rq:job:00923477-d46b-479c-ad0c-2b66f90b6b10") im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") lb = im.create_labbook('default', 'default', "test-lb", description="tester") im.link_dataset_to_labbook(f"{ds.root_dir}/.git", 'default', 'dataset100', lb) flask.g.access_token = "asdf" flask.g.id_token = "1234" with patch.object(Dispatcher, 'dispatch_task', dispatcher_mock): query = """ mutation myMutation { downloadDatasetFiles(input: {datasetOwner: "default", datasetName: "dataset100", keys: ["test1.txt"], labbookOwner: "default", labbookName: "test-lb"}){ backgroundJobKey } } """ r = fixture_working_dir[2].execute(query) assert 'errors' not in r assert isinstance( r['data']['downloadDatasetFiles']['backgroundJobKey'], str) assert "rq:" in r['data']['downloadDatasetFiles'][ 'backgroundJobKey']
def test_linked_to(self, mock_config_file): im = InventoryManager(mock_config_file[0]) lb = im.create_labbook("test", "test", "lb1", "testing") ds = im.create_dataset("test", "test", "dataset1", "gigantum_object_v1", description="my first dataset", author=GitAuthor(name="test", email="*****@*****.**")) assert ds.linked_to() is None im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "test", "dataset1", lb) assert ds.linked_to() is None linked_datasets = im.get_linked_datasets(lb) assert len(linked_datasets) == 1 assert linked_datasets[0].linked_to() == "test|test|lb1"
def test_delete_labbook_with_linked_dataset_exists( self, fixture_working_dir_env_repo_scoped): """Test deleting a LabBook with a linked dataset, while the dataset still exists (shouldn't clean up)""" def dispatcher_mock(self, function_ref, kwargs, metadata): # If you get here, a cleanup job was scheduled, which shouldn't have happened since dataset still there assert "CLEANUP SHOULD NOT HAVE BEEN SCHEDULED" im = InventoryManager(fixture_working_dir_env_repo_scoped[0]) lb = im.create_labbook("default", "default", "labbook1", description="Cats labbook 1") lb_root_dir = lb.root_dir assert os.path.exists(lb_root_dir) ds = im.create_dataset('default', 'default', "dataset2", storage_type="gigantum_object_v1", description="test") im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "default", "dataset2", lb) delete_query = f""" mutation delete {{ deleteLabbook(input: {{ owner: "default", labbookName: "labbook1", confirm: true }}) {{ success }} }} """ with patch.object(Dispatcher, 'dispatch_task', dispatcher_mock): r = fixture_working_dir_env_repo_scoped[2].execute(delete_query) assert 'errors' not in r assert r['data']['deleteLabbook']['success'] is True assert not os.path.exists(lb_root_dir) assert os.path.exists(ds.root_dir)
def mutate_and_get_payload(cls, root, info, labbook_owner, labbook_name, dataset_owner, dataset_name, action, dataset_url=None, client_mutation_id=None): logged_in_username = get_logged_in_username() im = InventoryManager() lb = im.load_labbook(logged_in_username, labbook_owner, labbook_name, author=get_logged_in_author()) with lb.lock(): if action == 'link': if dataset_url: remote_domain = cls._get_remote_domain( dataset_url, dataset_owner, dataset_name) if remote_domain: # Make sure git creds are configured for the remote admin_service = None for remote in lb.client_config.config['git'][ 'remotes']: if remote_domain == remote: admin_service = lb.client_config.config['git'][ 'remotes'][remote]['admin_service'] break if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token(info.context.headers. environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided." " Must have a valid session to query for collaborators" ) mgr = GitLabManager(remote_domain, admin_service, token) mgr.configure_git_credentials(remote_domain, logged_in_username) else: # Link to local dataset ds = im.load_dataset(logged_in_username, dataset_owner, dataset_name) dataset_url = f"{ds.root_dir}/.git" # Link the dataset to the labbook ds = im.link_dataset_to_labbook(dataset_url, dataset_owner, dataset_name, lb) ds.namespace = dataset_owner # Preload the dataloader info.context.dataset_loader.prime( f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}", ds) # Relink the revision m = Manifest(ds, logged_in_username) m.link_revision() elif action == 'unlink': im.unlink_dataset_from_labbook(dataset_owner, dataset_name, lb) elif action == 'update': ds = im.update_linked_dataset_reference( dataset_owner, dataset_name, lb) m = Manifest(ds, logged_in_username) m.force_reload() info.context.dataset_loader.prime( f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}", ds) else: raise ValueError( "Unsupported action. Use `link`, `unlink`, or `update`") info.context.labbook_loader.prime( f"{get_logged_in_username()}&{labbook_owner}&{labbook_name}", lb) edge = LabbookConnection.Edge(node=Labbook(owner=labbook_owner, name=labbook_name), cursor=base64.b64encode( f"{0}".encode('utf-8'))) return ModifyDatasetLink(new_labbook_edge=edge)
def test_link_unlink_dataset_across_branches(self, mock_labbook): """Test to verify linked Dataset initialization works across branching in Projects - Create a project - Create a dataset - Link dataset on master - Switch to another branch - Unlink dataset: dataset is gone - Switch to master: dataset is available - Switch to other branch: dataset is gone - Switch to master: dataset is available """ inv_manager = InventoryManager(mock_labbook[0]) lb = mock_labbook[2] ds = inv_manager.create_dataset("test", "test", "dataset100", "gigantum_object_v1", description="my dataset") # Fake publish to a local bare repo _MOCK_create_remote_repo2(ds, 'test', None, None) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False # link dataset and make sure it's there inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100', lb) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True # Create a branch bm = BranchManager(lb, username="******") assert bm.active_branch == 'master' branch_name = bm.create_branch(title="test-branch") assert bm.active_branch == branch_name assert lb.is_repo_clean # Dataset still there assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True # Unlink dataset in branch inv_manager.unlink_dataset_from_labbook('test', 'dataset100', lb) # Dataset gone dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is False assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is False with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) == 0 # Switch back to master bm.workon_branch('master') assert bm.active_branch == 'master' assert lb.active_branch == 'master' assert lb.is_repo_clean # Dataset is back! assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) > 0 # Switch back to branch bm.workon_branch('test-branch') assert bm.active_branch == 'test-branch' assert lb.active_branch == 'test-branch' assert lb.is_repo_clean dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is False assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is False with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) == 0 # Switch back to master bm.workon_branch('master') assert bm.active_branch == 'master' assert lb.active_branch == 'master' assert lb.is_repo_clean # Dataset is back! assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) > 0
def test_checkout__linked_dataset(self, mock_labbook_lfs_disabled, mock_config_file): """ test checking out a branch in a project that pulls in a linked dataset""" def dispatcher_mock(self, function_ref, kwargs, metadata): assert kwargs['logged_in_username'] == 'other-test-user2' assert kwargs['dataset_owner'] == 'testuser' assert kwargs['dataset_name'] == 'test-ds' # Inject mocked config file kwargs['config_file'] = mock_config_file[0] # Stop patching so job gets scheduled for real dispatcher_patch.stop() # Call same method as in mutation d = Dispatcher() res = d.dispatch_task( gtmcore.dispatcher.dataset_jobs.check_and_import_dataset, kwargs=kwargs, metadata=metadata) return res username = '******' lb = mock_labbook_lfs_disabled[2] im = InventoryManager(config_file=mock_labbook_lfs_disabled[0]) ds = im.create_dataset(username, username, 'test-ds', storage_type='gigantum_object_v1') # Publish dataset dataset_wf = DatasetWorkflow(ds) dataset_wf.publish(username=username) # Publish project labbook_wf = LabbookWorkflow(lb) labbook_wf.publish(username=username) # Switch branches labbook_wf.labbook.checkout_branch(branch_name="dataset-branch", new=True) # Link to project im.link_dataset_to_labbook(dataset_wf.remote, username, username, labbook_wf.labbook) # Publish branch labbook_wf.sync(username=username) # Import project other_user = '******' wf_other = LabbookWorkflow.import_from_remote( labbook_wf.remote, username=other_user, config_file=mock_config_file[0]) # The remotes must be the same, cause it's the same remote repo assert wf_other.remote == labbook_wf.remote assert wf_other.repository != labbook_wf.repository assert f'{other_user}/{username}/labbooks/labbook1' in wf_other.repository.root_dir with pytest.raises(InventoryException): im_other_user = InventoryManager(config_file=mock_config_file[0]) ds = im_other_user.load_dataset(other_user, username, 'test-ds') # Patch dispatch_task so you can inject the mocked config file dispatcher_patch = patch.object(Dispatcher, 'dispatch_task', dispatcher_mock) dispatcher_patch.start() # Checkout the branch assert wf_other.labbook.active_branch == "master" wf_other.checkout(username=other_user, branch_name="dataset-branch") cnt = 0 while cnt < 20: try: im_other_user = InventoryManager( config_file=mock_config_file[0]) ds = im_other_user.load_dataset(other_user, username, 'test-ds') break except InventoryException: cnt += 1 time.sleep(1) assert cnt < 20 assert ds.name == 'test-ds' assert ds.namespace == username assert mock_config_file[1] in ds.root_dir assert wf_other.labbook.active_branch == "dataset-branch"
def test_import_from_remote__linked_dataset(self, mock_labbook_lfs_disabled, mock_config_file): """ test importing a project with a linked dataset""" def dispatcher_mock(self, function_ref, kwargs, metadata): assert kwargs['logged_in_username'] == 'other-test-user2' assert kwargs['dataset_owner'] == 'testuser' assert kwargs['dataset_name'] == 'test-ds' # Inject mocked config file kwargs['config_file'] = mock_config_file[0] # Stop patching so job gets scheduled for real dispatcher_patch.stop() # Call same method as in mutation d = Dispatcher() res = d.dispatch_task( gtmcore.dispatcher.dataset_jobs.check_and_import_dataset, kwargs=kwargs, metadata=metadata) return res username = '******' lb = mock_labbook_lfs_disabled[2] im = InventoryManager(config_file=mock_labbook_lfs_disabled[0]) ds = im.create_dataset(username, username, 'test-ds', storage_type='gigantum_object_v1') # Publish dataset dataset_wf = DatasetWorkflow(ds) dataset_wf.publish(username=username) # Link to project im.link_dataset_to_labbook(dataset_wf.remote, username, username, lb) # Publish project labbook_wf = LabbookWorkflow(lb) labbook_wf.publish(username=username) # Patch dispatch_task so you can inject the mocked config file dispatcher_patch = patch.object(Dispatcher, 'dispatch_task', dispatcher_mock) dispatcher_patch.start() # Import project, triggering an auto-import of the dataset other_user = '******' wf_other = LabbookWorkflow.import_from_remote( labbook_wf.remote, username=other_user, config_file=mock_config_file[0]) # The remotes must be the same, cause it's the same remote repo assert wf_other.remote == labbook_wf.remote # The actual path on disk will be different, though assert wf_other.repository != labbook_wf.repository # Check imported into namespace of original owner (testuser) assert f'{other_user}/{username}/labbooks/labbook1' in wf_other.repository.root_dir cnt = 0 while cnt < 20: try: im_other_user = InventoryManager( config_file=mock_config_file[0]) ds = im_other_user.load_dataset(other_user, username, 'test-ds') break except InventoryException: cnt += 1 time.sleep(1) assert cnt < 20 assert ds.name == 'test-ds' assert ds.namespace == username assert mock_config_file[1] in ds.root_dir
def test_verify_contents_linked_dataset(self, mock_dataset_with_local_dir): class JobMock(): def __init__(self): self.meta = dict() def save_meta(self): pass CURRENT_JOB = JobMock() def get_current_job_mock(): return CURRENT_JOB with patch('gtmcore.dispatcher.jobs.get_current_job', side_effect=get_current_job_mock): ds = mock_dataset_with_local_dir[0] im = InventoryManager() ds.backend.update_from_remote(ds, lambda x: print(x)) m = Manifest(ds, 'tester') assert len(m.manifest.keys()) == 4 assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test2.txt')) assert os.path.isfile( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'subdir', 'test3.txt')) modified_items = ds.backend.verify_contents(ds, lambda x: print(x)) assert len(modified_items) == 0 lb = im.create_labbook("tester", "tester", 'test-labbook') im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "tester", ds.name, lb) dataset_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'tester', ds.name) ds = im.load_dataset_from_directory(dataset_dir) test_dir = os.path.join(mock_dataset_with_local_dir[1], "local_data", "test_dir") with open(os.path.join(test_dir, 'test1.txt'), 'wt') as tf: tf.write("This file got changed in the filesystem") kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "tester", 'dataset_name': 'dataset-1', 'labbook_owner': "tester", 'labbook_name': 'test-labbook' } jobs.verify_dataset_contents(**kwargs) job = gtmcore.dispatcher.jobs.get_current_job() assert 'modified_keys' in job.meta assert job.meta['modified_keys'] == ["test1.txt"] assert 'Validating contents of 3 files.' in job.meta['feedback']
def test_update_dataset_link(self, mock_labbook): inv_manager = InventoryManager(mock_labbook[0]) lb = mock_labbook[2] ds = inv_manager.create_dataset("test", "test", "dataset100", "gigantum_object_v1", description="my dataset") # Fake publish to a local bare repo _MOCK_create_remote_repo2(ds, 'test', None, None) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100', lb) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True assert os.path.exists( os.path.join(dataset_submodule_dir, 'test_file.dat')) is False # Make change to remote git_dir = os.path.join(tempfile.gettempdir(), 'test_update_dataset_link') try: os.makedirs(git_dir) call_subprocess(['git', 'clone', ds.remote], cwd=git_dir, check=True) with open(os.path.join(git_dir, ds.name, 'test_file.dat'), 'wt') as tf: tf.write("Test File Contents") call_subprocess(['git', 'add', 'test_file.dat'], cwd=os.path.join(git_dir, ds.name), check=True) call_subprocess(['git', 'commit', '-m', 'editing repo'], cwd=os.path.join(git_dir, ds.name), check=True) call_subprocess(['git', 'push'], cwd=os.path.join(git_dir, ds.name), check=True) # Update dataset ref inv_manager.update_linked_dataset_reference( ds.namespace, ds.name, lb) # verify change is reflected assert os.path.exists( os.path.join(dataset_submodule_dir, 'test_file.dat')) is True # Verify activity record assert "Updated Dataset `test/dataset100` link to version" in lb.git.log( )[0]['message'] finally: if os.path.exists(git_dir): shutil.rmtree(git_dir)
def test_delete_labbook_with_linked_dataset( self, fixture_working_dir_env_repo_scoped): """Test deleting a LabBook with a linked dataset that has been deleted as well, should clean up""" class JobResponseMock(object): def __init__(self, key): self.key_str = key def dispatcher_mock(self, function_ref, kwargs, metadata): assert kwargs['logged_in_username'] == 'default' assert kwargs['dataset_owner'] == 'default' assert kwargs['dataset_name'] == 'dataset22' assert ".labmanager/datasets/default/default/dataset22" in kwargs[ 'cache_location'] assert metadata['method'] == 'clean_dataset_file_cache' with open("/tmp/mock_reached", 'wt') as tf: tf.write("reached") return JobResponseMock( "rq:job:00923477-d46b-479c-ad0c-2dffcfdfb6b10") im = InventoryManager(fixture_working_dir_env_repo_scoped[0]) lb = im.create_labbook("default", "default", "labbook1", description="Cats labbook 1") lb_root_dir = lb.root_dir assert os.path.exists(lb_root_dir) assert os.path.exists("/tmp/mock_reached") is False ds = im.create_dataset('default', 'default', "dataset22", storage_type="gigantum_object_v1", description="test") ds_root_dir = ds.root_dir im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "default", "dataset22", lb) im.delete_dataset('default', 'default', "dataset22") delete_query = f""" mutation delete {{ deleteLabbook(input: {{ owner: "default", labbookName: "labbook1", confirm: true }}) {{ success }} }} """ try: with patch.object(Dispatcher, 'dispatch_task', dispatcher_mock): r = fixture_working_dir_env_repo_scoped[2].execute( delete_query) assert 'errors' not in r assert r['data']['deleteLabbook']['success'] is True assert not os.path.exists(lb_root_dir) assert not os.path.exists(ds_root_dir) assert os.path.exists("/tmp/mock_reached") is True finally: if os.path.exists("/tmp/mock_reached"): os.remove("/tmp/mock_reached")