def mutate_and_get_payload(cls, root, info, owner, dataset_name, transaction_id, cancel=False, rollback=False, client_mutation_id=None): username = get_logged_in_username() ds = InventoryManager().load_dataset(username, owner, dataset_name, author=get_logged_in_author()) with ds.lock(): if cancel and rollback: logger.warning( f"Cancelled tx {transaction_id}, doing git reset") # TODO: Add ability to reset else: logger.info( f"Done batch upload {transaction_id}, cancelled={cancel}") if cancel: logger.warning("Sweeping aborted batch upload.") m = "Cancelled upload `{transaction_id}`. " if cancel else '' # Sweep up and process all files added during upload manifest = Manifest(ds, username) manifest.sweep_all_changes(upload=True, extra_msg=m) return CompleteDatasetUploadTransaction(success=True)
def test_delete_dataset_files_errors(self, fixture_working_dir, snapshot): im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset('default', 'default', "dataset-delete-2", storage_type="gigantum_object_v1", description="testing delete") m = Manifest(ds, 'default') helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "fdsfgfd") m.sweep_all_changes() revision = m.dataset_revision assert os.path.exists( os.path.join(m.cache_mgr.cache_root, revision, "test1.txt")) is True assert os.path.exists( os.path.join(m.cache_mgr.cache_root, revision, "test2.txt")) is True query = """ mutation myMutation { deleteDatasetFiles(input: {datasetOwner: "default", datasetName: "dataset-delete-2", keys: ["testdfdfdfdf.txt"]}) { success } } """ result = fixture_working_dir[2].execute(query) assert 'errors' in result
def test_move_dataset_file(self, fixture_working_dir, snapshot): im = InventoryManager(fixture_working_dir[0]) ds = im.create_dataset('default', 'default', "dataset-move", storage_type="gigantum_object_v1", description="testing move") m = Manifest(ds, 'default') revision = m.dataset_revision helper_append_file(m.cache_mgr.cache_root, revision, "test1.txt", "asdfasdghndfdf") m.sweep_all_changes() revision = m.dataset_revision cr = m.cache_mgr.cache_root assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is True query = """ mutation myMutation { moveDatasetFile(input: {datasetOwner: "default", datasetName: "dataset-move", srcPath: "test1.txt", dstPath: "test1-renamed.txt"}) { updatedEdges { node { id key isDir isLocal size } } } } """ result = fixture_working_dir[2].execute(query) assert 'errors' not in result snapshot.assert_match(result) revision = m.dataset_revision cr = m.cache_mgr.cache_root assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is False assert os.path.exists(os.path.join(cr, revision, "test1-renamed.txt")) is True
def test_sync__dataset(self, mock_config_file): def update_feedback(msg: str, has_failures: Optional[bool] = None, failure_detail: Optional[str] = None, percent_complete: Optional[float] = None): """Method to update the job's metadata and provide feedback to the UI""" assert has_failures is None or has_failures is False assert failure_detail is None def dispatch_query_mock(self, job_key): JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={'completed_bytes': '100'}) def dispatch_mock(self, method_reference, kwargs, metadata, persist): return "afakejobkey" username = '******' im = InventoryManager(mock_config_file[0]) ds = im.create_dataset(username, username, 'dataset-1', 'gigantum_object_v1') m = Manifest(ds, username) wf = DatasetWorkflow(ds) iom = IOManager(ds, m) assert len(glob.glob(f'{iom.push_dir}/*')) == 0 wf.publish(username=username, feedback_callback=update_feedback) # Put a file into the dataset that needs to be pushed helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() assert len(glob.glob(f'{iom.push_dir}/*')) == 1 with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): wf.sync(username=username, feedback_callback=update_feedback) assert os.path.exists(wf.remote) assert len(glob.glob(f'{iom.push_dir}/*')) == 0
def test_update_dataset_link(self, fixture_working_dir, snapshot): im = InventoryManager(fixture_working_dir[0]) lb = im.create_labbook('default', 'default', 'test-lb', 'testing dataset links') ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") manifest = Manifest(ds, 'default') helper_append_file(manifest.cache_mgr.cache_root, manifest.dataset_revision, "test1.txt", "12345") manifest.sweep_all_changes() # Fake publish to a local bare repo _MOCK_create_remote_repo2(ds, 'default', None, None) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False overview_query = """ { labbook(owner: "default", name:"test-lb") { linkedDatasets{ name overview { localBytes totalBytes } } } } """ query = """ mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!, $a: String!, $du: String) { modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn, action: $a, datasetUrl: $du}) { newLabbookEdge { node { id name description linkedDatasets { name } } } } } """ variables = { "lo": "default", "ln": "test-lb", "do": "default", "dn": "dataset100", "a": "link", "du": ds.remote } result = fixture_working_dir[2].execute(query, variable_values=variables) assert "errors" not in result snapshot.assert_match(result) assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'default', 'dataset100') assert os.path.exists(dataset_submodule_dir) is True assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True assert os.path.exists( os.path.join(dataset_submodule_dir, 'test_file.dat')) is False with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf: data = mf.read() assert len(data) > 0 # check overview result = fixture_working_dir[2].execute(overview_query) assert "errors" not in result assert result['data']['labbook']['linkedDatasets'][0]['overview'][ 'localBytes'] == '5' assert result['data']['labbook']['linkedDatasets'][0]['overview'][ 'totalBytes'] == '5' # Make change to published dataset git_dir = os.path.join(tempfile.gettempdir(), 'test_update_dataset_link_mutation') try: os.makedirs(git_dir) call_subprocess(['git', 'clone', ds.remote], cwd=git_dir, check=True) with open(os.path.join(git_dir, ds.name, 'test_file.dat'), 'wt') as tf: tf.write("Test File Contents") call_subprocess(['git', 'add', 'test_file.dat'], cwd=os.path.join(git_dir, ds.name), check=True) call_subprocess(['git', 'commit', '-m', 'editing repo'], cwd=os.path.join(git_dir, ds.name), check=True) call_subprocess(['git', 'push'], cwd=os.path.join(git_dir, ds.name), check=True) query = """ mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!, $a: String!) { modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn, action: $a}) { newLabbookEdge { node { id name description linkedDatasets { name } } } } } """ variables = { "lo": "default", "ln": "test-lb", "do": "default", "dn": "dataset100", "a": "update" } result = fixture_working_dir[2].execute(query, variable_values=variables) assert "errors" not in result snapshot.assert_match(result) # verify change is reflected assert os.path.exists( os.path.join(dataset_submodule_dir, 'test_file.dat')) is True # Verify activity record assert "Updated Dataset `default/dataset100` link to version" in lb.git.log( )[0]['message'] finally: if os.path.exists(git_dir): shutil.rmtree(git_dir)
def test_download_dataset_files_file_fail( self, mock_config_file_background_tests): def dispatch_query_mock(self, job_key): # mock the job actually running and returning status JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={ 'completed_bytes': '0', 'failure_keys': 'test1.txt' }) def dispatch_mock(self, method_reference, kwargs, metadata, persist): gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs) return "afakejobkey" im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 1 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True helper_compress_file(obj1_target, obj1_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj1_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file_background_tests[0]): with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"], 'config_file': mock_config_file_background_tests[0] } with pytest.raises(IOError): gtmcore.dispatcher.dataset_jobs.download_dataset_files( **dl_kwargs) assert os.path.isfile(obj1_target) is False
def test_download_dataset_files(self, mock_config_file_background_tests, mock_dataset_head): def dispatch_query_mock(self, job_key): JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={'completed_bytes': '500'}) def dispatch_mock(self, method_reference, kwargs, metadata, persist): with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs) return "afakejobkey" im = InventoryManager(mock_config_file_background_tests[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 1 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True helper_compress_file(obj1_target, obj1_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj1_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file_background_tests[0]): with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"], 'config_file': mock_config_file_background_tests[0] } gtmcore.dispatcher.dataset_jobs.download_dataset_files( **dl_kwargs) assert os.path.isfile(obj1_target) is True decompressor = snappy.StreamDecompressor() with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_pull_objects(self, mock_config_file, mock_dataset_head): im = InventoryManager(mock_config_file[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") m = Manifest(ds, 'default') iom = IOManager(ds, m) os.makedirs( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "other_dir")) helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "fdsfgfd") m.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1) obj1_target = obj_to_push[0].object_path obj2_target = obj_to_push[1].object_path obj1_source = os.path.join('/tmp', uuid.uuid4().hex) obj2_source = os.path.join('/tmp', uuid.uuid4().hex) assert os.path.exists(obj1_target) is True assert os.path.exists(obj2_target) is True helper_compress_file(obj1_target, obj1_source) helper_compress_file(obj2_target, obj2_source) assert os.path.isfile(obj1_target) is False assert os.path.isfile(obj2_target) is False assert os.path.isfile(obj1_source) is True assert os.path.isfile(obj2_source) is True # Clear out from linked dir os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test1.txt')) os.remove( os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'test2.txt')) with patch.object(Configuration, 'find_default_config', lambda self: mock_config_file[0]): with aioresponses() as mocked_responses: mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_1}?params=1", "namespace": ds.namespace, "obj_id": obj_id_1, "dataset": ds.name }, status=200) with open(obj1_source, 'rb') as data1: mocked_responses.get( f"https://dummyurl.com/{obj_id_1}?params=1", body=data1.read(), status=200, content_type='application/octet-stream') mocked_responses.get( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}', payload={ "presigned_url": f"https://dummyurl.com/{obj_id_2}?params=1", "namespace": ds.namespace, "obj_id": obj_id_2, "dataset": ds.name }, status=200) with open(obj2_source, 'rb') as data2: mocked_responses.get( f"https://dummyurl.com/{obj_id_2}?params=1", body=data2.read(), status=200, content_type='application/octet-stream') dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test1.txt"] } gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs) # Manually link since this is disabled by default in the job (because in real use, multiple jobs run # in parallel and you only want to link once. m.link_revision() assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is False decompressor = snappy.StreamDecompressor() with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1 # Download other file dl_kwargs = { 'logged_in_username': "******", 'access_token': "asdf", 'id_token': "1234", 'dataset_owner': "default", 'dataset_name': "dataset100", 'labbook_owner': None, 'labbook_name': None, 'keys': ["test2.txt"] } gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs) # Manually link since this is disabled by default in the job (because in real use, multiple jobs run # in parallel and you only want to link once. m.link_revision() assert os.path.isfile(obj1_target) is True assert os.path.isfile(obj2_target) is True with open(obj1_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj1_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1 with open(obj2_source, 'rb') as dd: source1 = decompressor.decompress(dd.read()) source1 += decompressor.flush() with open(obj2_target, 'rt') as dd: dest1 = dd.read() assert source1.decode("utf-8") == dest1
def test_push_objects(self, mock_config_file, mock_dataset_head): im = InventoryManager(mock_config_file[0]) ds = im.create_dataset('default', 'default', "dataset100", storage_type="gigantum_object_v1", description="100") manifest = Manifest(ds, 'default') iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj2 = obj_to_push[1].object_path.rsplit('/', 1) with aioresponses() as mocked_responses: mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}', payload={ "presigned_url": f"https://dummyurl.com/{obj1}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj1, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1", payload={}, status=200) mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}', payload={ "presigned_url": f"https://dummyurl.com/{obj2}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj2, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1", payload={}, status=200) job_kwargs = { 'objs': obj_to_push, 'logged_in_username': "******", 'access_token': "faketoken", 'id_token': "faketoken", 'dataset_owner': ds.namespace, 'dataset_name': ds.name, 'config_file': ds.client_config.config_file, } gtmcore.dispatcher.dataset_jobs.push_dataset_objects(**job_kwargs)