Esempio n. 1
0
    def test_link_unlink_dataset(self, mock_labbook):
        inv_manager = InventoryManager(mock_labbook[0])
        lb = mock_labbook[2]
        ds = inv_manager.create_dataset("test", "test", "dataset100", "gigantum_object_v1", description="my dataset")

        # Fake publish to a local bare repo
        _MOCK_create_remote_repo2(ds, 'test', None, None)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is False

        inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100', lb)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is True

        inv_manager.unlink_dataset_from_labbook('test', 'dataset100', lb)

        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is False
        assert os.path.exists(os.path.join(dataset_submodule_dir, '.gigantum')) is False
        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()

        assert len(data) == 0
    def test_delete_labbook_linked_dataset(self, mock_config_file):
        """Test trying to create a labbook with a name that already exists locally"""
        inv_manager = InventoryManager(mock_config_file[0])
        inv_manager.create_labbook("test",
                                   "test",
                                   "labbook1",
                                   description="my first labbook")
        lb = inv_manager.load_labbook("test", "test", "labbook1")

        auth = GitAuthor(name="test", email="*****@*****.**")
        ds = inv_manager.create_dataset("test",
                                        "test",
                                        "dataset1",
                                        "gigantum_object_v1",
                                        description="my first dataset",
                                        author=auth)

        inv_manager.link_dataset_to_labbook(f"{ds.root_dir}/.git", "test",
                                            "dataset1", lb)

        dataset_delete_jobs = inv_manager.delete_labbook(
            "test", "test", "labbook1")
        assert len(dataset_delete_jobs) == 1
        assert dataset_delete_jobs[0].namespace == "test"
        assert dataset_delete_jobs[0].name == "dataset1"

        with pytest.raises(InventoryException):
            inv_manager.load_labbook("test", "test", "labbook1")
Esempio n. 3
0
    def test_get_commits_behind(self, fixture_working_dir):
        """Test temporar field commitsBehind on dataset objects"""
        im = InventoryManager(fixture_working_dir[0])
        ds = im.create_dataset("default", "default", "test-ds", "gigantum_object_v1",
                               description="my first dataset",
                               author=GitAuthor(name="default", email="*****@*****.**"))

        lb = im.create_labbook("default", "default", "test-lb")

        im.link_dataset_to_labbook(f"{ds.root_dir}/.git", 'default', 'test-ds', lb)

        query = """
                {
                  labbook(owner: "default", name:"test-lb")
                  {
                    linkedDatasets{
                      name
                      commitsBehind
                      
                    }
                  }
                }
                """
        r = fixture_working_dir[2].execute(query)
        assert 'errors' not in r
        assert r['data']['labbook']['linkedDatasets'][0]['name'] == 'test-ds'
        assert r['data']['labbook']['linkedDatasets'][0]['commitsBehind'] == 0

        ds.write_readme("test contents to make a commit")
        
        r = fixture_working_dir[2].execute(query)
        assert 'errors' not in r
        assert r['data']['labbook']['linkedDatasets'][0]['name'] == 'test-ds'
        assert r['data']['labbook']['linkedDatasets'][0]['commitsBehind'] == 1
    def test_get_linked_datasets(self, mock_labbook):
        inv_manager = InventoryManager(mock_labbook[0])
        lb = mock_labbook[2]

        datasets = inv_manager.get_linked_datasets(lb)
        assert len(datasets) == 0

        ds = inv_manager.create_dataset("test",
                                        "test",
                                        "dataset100",
                                        "gigantum_object_v1",
                                        description="my dataset")

        # Fake publish to a local bare repo
        _MOCK_create_remote_repo2(ds, 'test', None, None)

        assert os.path.exists(os.path.join(lb.root_dir,
                                           '.gitmodules')) is False

        inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100',
                                            lb)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True

        datasets = inv_manager.get_linked_datasets(lb)
        assert len(datasets) == 1
        assert datasets[0].name == ds.name
        assert datasets[0].namespace == ds.namespace
Esempio n. 5
0
    def test_create_rollback_branch_remove_linked_dataset(
            self, mock_create_labbooks):
        """ test creating a rollback branch that removes a linked dataset"""
        lb, client = mock_create_labbooks[0], mock_create_labbooks[1]

        im = InventoryManager(config_file=lb.client_config.config_file)
        ds = im.create_dataset(UT_USERNAME,
                               UT_USERNAME,
                               'test-ds',
                               storage_type='gigantum_object_v1')

        rollback_to = lb.git.commit_hash

        # Link dataset to project
        im.link_dataset_to_labbook(f"{ds.root_dir}/.git", UT_USERNAME, ds.name,
                                   lb)
        dataset_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets',
                                   UT_USERNAME, 'test-ds')
        assert os.path.exists(dataset_dir) is True

        q = f"""
        mutation makeFeatureBranch {{
            createExperimentalBranch(input: {{
                owner: "{UT_USERNAME}",
                labbookName: "{UT_LBNAME}",
                branchName: "rollback-branch",
                revision: "{rollback_to}",
                description: "testing rollback",
            }}) {{
                labbook{{
                    name
                    activeBranchName
                    description
                    branches {{
                        branchName
                    }}
                    linkedDatasets{{
                        name
                    }}
                }}
            }}
        }}
        """
        r = client.execute(q)
        assert 'errors' not in r
        assert r['data']['createExperimentalBranch']['labbook'][
            'activeBranchName'] == 'rollback-branch'
        assert r['data']['createExperimentalBranch']['labbook'][
            'description'] == "testing rollback"
        assert r['data']['createExperimentalBranch']['labbook'][
            'linkedDatasets'] == []

        assert lb.is_repo_clean

        assert os.path.exists(dataset_dir) is False
    def test_delete_dataset_while_linked(self, mock_config_file):
        inv_manager = InventoryManager(mock_config_file[0])
        auth = GitAuthor(name="test", email="*****@*****.**")
        lb = inv_manager.create_labbook("test",
                                        "test",
                                        "labbook1",
                                        description="my first labbook")
        ds = inv_manager.create_dataset("test",
                                        "test",
                                        "dataset1",
                                        "gigantum_object_v1",
                                        description="my first dataset",
                                        author=auth)
        ds_root_dir = ds.root_dir
        lb_root_dir = lb.root_dir
        assert os.path.exists(ds_root_dir) is True
        assert os.path.exists(lb_root_dir) is True

        # Link dataset
        inv_manager.link_dataset_to_labbook(f"{ds_root_dir}/.git", "test",
                                            "dataset1", lb)

        m = Manifest(ds, 'test')
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfasdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "dfg")

        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "test1.txt")) is True
        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "test2.txt")) is True

        dataset_delete_job = inv_manager.delete_dataset(
            "test", "test", "dataset1")
        assert os.path.exists(ds_root_dir) is False
        assert os.path.exists(lb_root_dir) is True
        assert os.path.exists(m.cache_mgr.cache_root) is True
        assert dataset_delete_job.namespace == "test"
        assert dataset_delete_job.name == "dataset1"
        assert dataset_delete_job.cache_root == m.cache_mgr.cache_root

        jobs.clean_dataset_file_cache("test",
                                      dataset_delete_job.namespace,
                                      dataset_delete_job.name,
                                      dataset_delete_job.cache_root,
                                      config_file=mock_config_file[0])

        assert os.path.exists(m.cache_mgr.cache_root) is True

        cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1)
        assert os.path.exists(cache_base) is True
    def test_download_dataset_files_linked(self, fixture_working_dir,
                                           snapshot):
        def dispatcher_mock(self, function_ref, kwargs, metadata, persist):
            assert kwargs['logged_in_username'] == 'default'
            assert kwargs['access_token'] == 'asdf'
            assert kwargs['id_token'] == '1234'
            assert kwargs['dataset_owner'] == 'default'
            assert kwargs['dataset_name'] == 'dataset100'
            assert kwargs['labbook_owner'] == 'default'
            assert kwargs['labbook_name'] == 'test-lb'
            assert kwargs['all_keys'] is None
            assert kwargs['keys'] == ["test1.txt"]
            assert persist is True

            assert metadata[
                'dataset'] == 'default|default|test-lb|LINKED|default|default|dataset100'
            assert metadata['labbook'] == 'default|default|test-lb'
            assert metadata['method'] == 'download_dataset_files'

            return JobResponseMock(
                "rq:job:00923477-d46b-479c-ad0c-2b66f90b6b10")

        im = InventoryManager(fixture_working_dir[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        lb = im.create_labbook('default',
                               'default',
                               "test-lb",
                               description="tester")
        im.link_dataset_to_labbook(f"{ds.root_dir}/.git", 'default',
                                   'dataset100', lb)

        flask.g.access_token = "asdf"
        flask.g.id_token = "1234"

        with patch.object(Dispatcher, 'dispatch_task', dispatcher_mock):

            query = """
                       mutation myMutation {
                         downloadDatasetFiles(input: {datasetOwner: "default", datasetName: "dataset100", keys: ["test1.txt"], labbookOwner: "default", labbookName: "test-lb"}){
                             backgroundJobKey 
                         }
                       }
                       """
            r = fixture_working_dir[2].execute(query)
            assert 'errors' not in r
            assert isinstance(
                r['data']['downloadDatasetFiles']['backgroundJobKey'], str)
            assert "rq:" in r['data']['downloadDatasetFiles'][
                'backgroundJobKey']
Esempio n. 8
0
    def test_linked_to(self, mock_config_file):
        im = InventoryManager(mock_config_file[0])
        lb = im.create_labbook("test", "test", "lb1", "testing")
        ds = im.create_dataset("test", "test", "dataset1", "gigantum_object_v1",
                               description="my first dataset",
                               author=GitAuthor(name="test", email="*****@*****.**"))

        assert ds.linked_to() is None

        im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "test", "dataset1", lb)

        assert ds.linked_to() is None

        linked_datasets = im.get_linked_datasets(lb)
        assert len(linked_datasets) == 1
        assert linked_datasets[0].linked_to() == "test|test|lb1"
Esempio n. 9
0
    def test_delete_labbook_with_linked_dataset_exists(
            self, fixture_working_dir_env_repo_scoped):
        """Test deleting a LabBook with a linked dataset, while the dataset still exists (shouldn't clean up)"""
        def dispatcher_mock(self, function_ref, kwargs, metadata):
            # If you get here, a cleanup job was scheduled, which shouldn't have happened since dataset still there
            assert "CLEANUP SHOULD NOT HAVE BEEN SCHEDULED"

        im = InventoryManager(fixture_working_dir_env_repo_scoped[0])
        lb = im.create_labbook("default",
                               "default",
                               "labbook1",
                               description="Cats labbook 1")
        lb_root_dir = lb.root_dir
        assert os.path.exists(lb_root_dir)

        ds = im.create_dataset('default',
                               'default',
                               "dataset2",
                               storage_type="gigantum_object_v1",
                               description="test")
        im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "default",
                                   "dataset2", lb)

        delete_query = f"""
        mutation delete {{
            deleteLabbook(input: {{
                owner: "default",
                labbookName: "labbook1",
                confirm: true
            }}) {{
                success
            }}
        }}
        """
        with patch.object(Dispatcher, 'dispatch_task', dispatcher_mock):
            r = fixture_working_dir_env_repo_scoped[2].execute(delete_query)

        assert 'errors' not in r
        assert r['data']['deleteLabbook']['success'] is True
        assert not os.path.exists(lb_root_dir)
        assert os.path.exists(ds.root_dir)
Esempio n. 10
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               labbook_owner,
                               labbook_name,
                               dataset_owner,
                               dataset_name,
                               action,
                               dataset_url=None,
                               client_mutation_id=None):
        logged_in_username = get_logged_in_username()
        im = InventoryManager()
        lb = im.load_labbook(logged_in_username,
                             labbook_owner,
                             labbook_name,
                             author=get_logged_in_author())

        with lb.lock():
            if action == 'link':
                if dataset_url:
                    remote_domain = cls._get_remote_domain(
                        dataset_url, dataset_owner, dataset_name)

                    if remote_domain:
                        # Make sure git creds are configured for the remote
                        admin_service = None
                        for remote in lb.client_config.config['git'][
                                'remotes']:
                            if remote_domain == remote:
                                admin_service = lb.client_config.config['git'][
                                    'remotes'][remote]['admin_service']
                                break
                        if "HTTP_AUTHORIZATION" in info.context.headers.environ:
                            token = parse_token(info.context.headers.
                                                environ["HTTP_AUTHORIZATION"])
                        else:
                            raise ValueError(
                                "Authorization header not provided."
                                " Must have a valid session to query for collaborators"
                            )
                        mgr = GitLabManager(remote_domain, admin_service,
                                            token)
                        mgr.configure_git_credentials(remote_domain,
                                                      logged_in_username)
                else:
                    # Link to local dataset
                    ds = im.load_dataset(logged_in_username, dataset_owner,
                                         dataset_name)
                    dataset_url = f"{ds.root_dir}/.git"

                # Link the dataset to the labbook
                ds = im.link_dataset_to_labbook(dataset_url, dataset_owner,
                                                dataset_name, lb)
                ds.namespace = dataset_owner

                # Preload the dataloader
                info.context.dataset_loader.prime(
                    f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}",
                    ds)

                # Relink the revision
                m = Manifest(ds, logged_in_username)
                m.link_revision()
            elif action == 'unlink':
                im.unlink_dataset_from_labbook(dataset_owner, dataset_name, lb)
            elif action == 'update':
                ds = im.update_linked_dataset_reference(
                    dataset_owner, dataset_name, lb)
                m = Manifest(ds, logged_in_username)
                m.force_reload()

                info.context.dataset_loader.prime(
                    f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}",
                    ds)
            else:
                raise ValueError(
                    "Unsupported action. Use `link`, `unlink`, or `update`")

            info.context.labbook_loader.prime(
                f"{get_logged_in_username()}&{labbook_owner}&{labbook_name}",
                lb)
            edge = LabbookConnection.Edge(node=Labbook(owner=labbook_owner,
                                                       name=labbook_name),
                                          cursor=base64.b64encode(
                                              f"{0}".encode('utf-8')))

        return ModifyDatasetLink(new_labbook_edge=edge)
    def test_link_unlink_dataset_across_branches(self, mock_labbook):
        """Test to verify linked Dataset initialization works across branching in Projects

        - Create a project
        - Create a dataset
        - Link dataset on master
        - Switch to another branch
        - Unlink dataset: dataset is gone
        - Switch to master: dataset is available
        - Switch to other branch: dataset is gone
        - Switch to master: dataset is available
        """
        inv_manager = InventoryManager(mock_labbook[0])
        lb = mock_labbook[2]
        ds = inv_manager.create_dataset("test",
                                        "test",
                                        "dataset100",
                                        "gigantum_object_v1",
                                        description="my dataset")

        # Fake publish to a local bare repo
        _MOCK_create_remote_repo2(ds, 'test', None, None)

        assert os.path.exists(os.path.join(lb.root_dir,
                                           '.gitmodules')) is False

        # link dataset and make sure it's there
        inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100',
                                            lb)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True

        # Create a branch
        bm = BranchManager(lb, username="******")
        assert bm.active_branch == 'master'
        branch_name = bm.create_branch(title="test-branch")
        assert bm.active_branch == branch_name
        assert lb.is_repo_clean

        # Dataset still there
        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True

        # Unlink dataset in branch
        inv_manager.unlink_dataset_from_labbook('test', 'dataset100', lb)

        # Dataset gone
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is False
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is False
        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()

        assert len(data) == 0

        # Switch back to master
        bm.workon_branch('master')
        assert bm.active_branch == 'master'
        assert lb.active_branch == 'master'
        assert lb.is_repo_clean

        # Dataset is back!
        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True
        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()

        assert len(data) > 0

        # Switch back to branch
        bm.workon_branch('test-branch')
        assert bm.active_branch == 'test-branch'
        assert lb.active_branch == 'test-branch'
        assert lb.is_repo_clean

        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is False
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is False
        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()

        assert len(data) == 0

        # Switch back to master
        bm.workon_branch('master')
        assert bm.active_branch == 'master'
        assert lb.active_branch == 'master'
        assert lb.is_repo_clean

        # Dataset is back!
        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True
        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()

        assert len(data) > 0
Esempio n. 12
0
    def test_checkout__linked_dataset(self, mock_labbook_lfs_disabled,
                                      mock_config_file):
        """ test checking out a branch in a project that pulls in a linked dataset"""
        def dispatcher_mock(self, function_ref, kwargs, metadata):
            assert kwargs['logged_in_username'] == 'other-test-user2'
            assert kwargs['dataset_owner'] == 'testuser'
            assert kwargs['dataset_name'] == 'test-ds'

            # Inject mocked config file
            kwargs['config_file'] = mock_config_file[0]

            # Stop patching so job gets scheduled for real
            dispatcher_patch.stop()

            # Call same method as in mutation
            d = Dispatcher()
            res = d.dispatch_task(
                gtmcore.dispatcher.dataset_jobs.check_and_import_dataset,
                kwargs=kwargs,
                metadata=metadata)

            return res

        username = '******'
        lb = mock_labbook_lfs_disabled[2]
        im = InventoryManager(config_file=mock_labbook_lfs_disabled[0])
        ds = im.create_dataset(username,
                               username,
                               'test-ds',
                               storage_type='gigantum_object_v1')

        # Publish dataset
        dataset_wf = DatasetWorkflow(ds)
        dataset_wf.publish(username=username)

        # Publish project
        labbook_wf = LabbookWorkflow(lb)
        labbook_wf.publish(username=username)

        # Switch branches
        labbook_wf.labbook.checkout_branch(branch_name="dataset-branch",
                                           new=True)

        # Link to project
        im.link_dataset_to_labbook(dataset_wf.remote, username, username,
                                   labbook_wf.labbook)

        # Publish branch
        labbook_wf.sync(username=username)

        # Import project
        other_user = '******'
        wf_other = LabbookWorkflow.import_from_remote(
            labbook_wf.remote,
            username=other_user,
            config_file=mock_config_file[0])

        # The remotes must be the same, cause it's the same remote repo
        assert wf_other.remote == labbook_wf.remote
        assert wf_other.repository != labbook_wf.repository
        assert f'{other_user}/{username}/labbooks/labbook1' in wf_other.repository.root_dir

        with pytest.raises(InventoryException):
            im_other_user = InventoryManager(config_file=mock_config_file[0])
            ds = im_other_user.load_dataset(other_user, username, 'test-ds')

        # Patch dispatch_task so you can inject the mocked config file
        dispatcher_patch = patch.object(Dispatcher, 'dispatch_task',
                                        dispatcher_mock)
        dispatcher_patch.start()

        # Checkout the branch
        assert wf_other.labbook.active_branch == "master"
        wf_other.checkout(username=other_user, branch_name="dataset-branch")

        cnt = 0
        while cnt < 20:
            try:
                im_other_user = InventoryManager(
                    config_file=mock_config_file[0])
                ds = im_other_user.load_dataset(other_user, username,
                                                'test-ds')
                break
            except InventoryException:
                cnt += 1
                time.sleep(1)

        assert cnt < 20
        assert ds.name == 'test-ds'
        assert ds.namespace == username
        assert mock_config_file[1] in ds.root_dir
        assert wf_other.labbook.active_branch == "dataset-branch"
Esempio n. 13
0
    def test_import_from_remote__linked_dataset(self,
                                                mock_labbook_lfs_disabled,
                                                mock_config_file):
        """ test importing a project with a linked dataset"""
        def dispatcher_mock(self, function_ref, kwargs, metadata):
            assert kwargs['logged_in_username'] == 'other-test-user2'
            assert kwargs['dataset_owner'] == 'testuser'
            assert kwargs['dataset_name'] == 'test-ds'

            # Inject mocked config file
            kwargs['config_file'] = mock_config_file[0]

            # Stop patching so job gets scheduled for real
            dispatcher_patch.stop()

            # Call same method as in mutation
            d = Dispatcher()
            res = d.dispatch_task(
                gtmcore.dispatcher.dataset_jobs.check_and_import_dataset,
                kwargs=kwargs,
                metadata=metadata)

            return res

        username = '******'
        lb = mock_labbook_lfs_disabled[2]
        im = InventoryManager(config_file=mock_labbook_lfs_disabled[0])
        ds = im.create_dataset(username,
                               username,
                               'test-ds',
                               storage_type='gigantum_object_v1')

        # Publish dataset
        dataset_wf = DatasetWorkflow(ds)
        dataset_wf.publish(username=username)

        # Link to project
        im.link_dataset_to_labbook(dataset_wf.remote, username, username, lb)

        # Publish project
        labbook_wf = LabbookWorkflow(lb)
        labbook_wf.publish(username=username)

        # Patch dispatch_task so you can inject the mocked config file
        dispatcher_patch = patch.object(Dispatcher, 'dispatch_task',
                                        dispatcher_mock)
        dispatcher_patch.start()

        # Import project, triggering an auto-import of the dataset
        other_user = '******'
        wf_other = LabbookWorkflow.import_from_remote(
            labbook_wf.remote,
            username=other_user,
            config_file=mock_config_file[0])

        # The remotes must be the same, cause it's the same remote repo
        assert wf_other.remote == labbook_wf.remote
        # The actual path on disk will be different, though
        assert wf_other.repository != labbook_wf.repository
        # Check imported into namespace of original owner (testuser)
        assert f'{other_user}/{username}/labbooks/labbook1' in wf_other.repository.root_dir

        cnt = 0
        while cnt < 20:
            try:
                im_other_user = InventoryManager(
                    config_file=mock_config_file[0])
                ds = im_other_user.load_dataset(other_user, username,
                                                'test-ds')
                break
            except InventoryException:
                cnt += 1
                time.sleep(1)

        assert cnt < 20
        assert ds.name == 'test-ds'
        assert ds.namespace == username
        assert mock_config_file[1] in ds.root_dir
Esempio n. 14
0
    def test_verify_contents_linked_dataset(self, mock_dataset_with_local_dir):
        class JobMock():
            def __init__(self):
                self.meta = dict()

            def save_meta(self):
                pass

        CURRENT_JOB = JobMock()

        def get_current_job_mock():
            return CURRENT_JOB

        with patch('gtmcore.dispatcher.jobs.get_current_job',
                   side_effect=get_current_job_mock):
            ds = mock_dataset_with_local_dir[0]
            im = InventoryManager()

            ds.backend.update_from_remote(ds, lambda x: print(x))

            m = Manifest(ds, 'tester')
            assert len(m.manifest.keys()) == 4
            assert os.path.isfile(
                os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                             'test1.txt'))
            assert os.path.isfile(
                os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                             'test2.txt'))
            assert os.path.isfile(
                os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                             'subdir', 'test3.txt'))

            modified_items = ds.backend.verify_contents(ds, lambda x: print(x))
            assert len(modified_items) == 0

            lb = im.create_labbook("tester", "tester", 'test-labbook')
            im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "tester",
                                       ds.name, lb)

            dataset_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets',
                                       'tester', ds.name)
            ds = im.load_dataset_from_directory(dataset_dir)

            test_dir = os.path.join(mock_dataset_with_local_dir[1],
                                    "local_data", "test_dir")
            with open(os.path.join(test_dir, 'test1.txt'), 'wt') as tf:
                tf.write("This file got changed in the filesystem")

            kwargs = {
                'logged_in_username': "******",
                'access_token': "asdf",
                'id_token': "1234",
                'dataset_owner': "tester",
                'dataset_name': 'dataset-1',
                'labbook_owner': "tester",
                'labbook_name': 'test-labbook'
            }

            jobs.verify_dataset_contents(**kwargs)
            job = gtmcore.dispatcher.jobs.get_current_job()

            assert 'modified_keys' in job.meta
            assert job.meta['modified_keys'] == ["test1.txt"]
            assert 'Validating contents of 3 files.' in job.meta['feedback']
    def test_update_dataset_link(self, mock_labbook):
        inv_manager = InventoryManager(mock_labbook[0])
        lb = mock_labbook[2]
        ds = inv_manager.create_dataset("test",
                                        "test",
                                        "dataset100",
                                        "gigantum_object_v1",
                                        description="my dataset")

        # Fake publish to a local bare repo
        _MOCK_create_remote_repo2(ds, 'test', None, None)

        assert os.path.exists(os.path.join(lb.root_dir,
                                           '.gitmodules')) is False

        inv_manager.link_dataset_to_labbook(ds.remote, 'test', 'dataset100',
                                            lb)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'test', 'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True
        assert os.path.exists(
            os.path.join(dataset_submodule_dir, 'test_file.dat')) is False

        # Make change to remote
        git_dir = os.path.join(tempfile.gettempdir(),
                               'test_update_dataset_link')
        try:
            os.makedirs(git_dir)
            call_subprocess(['git', 'clone', ds.remote],
                            cwd=git_dir,
                            check=True)
            with open(os.path.join(git_dir, ds.name, 'test_file.dat'),
                      'wt') as tf:
                tf.write("Test File Contents")
            call_subprocess(['git', 'add', 'test_file.dat'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)
            call_subprocess(['git', 'commit', '-m', 'editing repo'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)
            call_subprocess(['git', 'push'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)

            # Update dataset ref
            inv_manager.update_linked_dataset_reference(
                ds.namespace, ds.name, lb)

            # verify change is reflected
            assert os.path.exists(
                os.path.join(dataset_submodule_dir, 'test_file.dat')) is True

            # Verify activity record
            assert "Updated Dataset `test/dataset100` link to version" in lb.git.log(
            )[0]['message']
        finally:
            if os.path.exists(git_dir):
                shutil.rmtree(git_dir)
Esempio n. 16
0
    def test_delete_labbook_with_linked_dataset(
            self, fixture_working_dir_env_repo_scoped):
        """Test deleting a LabBook with a linked dataset that has been deleted as well, should clean up"""
        class JobResponseMock(object):
            def __init__(self, key):
                self.key_str = key

        def dispatcher_mock(self, function_ref, kwargs, metadata):
            assert kwargs['logged_in_username'] == 'default'
            assert kwargs['dataset_owner'] == 'default'
            assert kwargs['dataset_name'] == 'dataset22'
            assert ".labmanager/datasets/default/default/dataset22" in kwargs[
                'cache_location']
            assert metadata['method'] == 'clean_dataset_file_cache'

            with open("/tmp/mock_reached", 'wt') as tf:
                tf.write("reached")

            return JobResponseMock(
                "rq:job:00923477-d46b-479c-ad0c-2dffcfdfb6b10")

        im = InventoryManager(fixture_working_dir_env_repo_scoped[0])
        lb = im.create_labbook("default",
                               "default",
                               "labbook1",
                               description="Cats labbook 1")
        lb_root_dir = lb.root_dir
        assert os.path.exists(lb_root_dir)
        assert os.path.exists("/tmp/mock_reached") is False

        ds = im.create_dataset('default',
                               'default',
                               "dataset22",
                               storage_type="gigantum_object_v1",
                               description="test")
        ds_root_dir = ds.root_dir
        im.link_dataset_to_labbook(f"{ds.root_dir}/.git", "default",
                                   "dataset22", lb)
        im.delete_dataset('default', 'default', "dataset22")

        delete_query = f"""
        mutation delete {{
            deleteLabbook(input: {{
                owner: "default",
                labbookName: "labbook1",
                confirm: true
            }}) {{
                success
            }}
        }}
        """
        try:
            with patch.object(Dispatcher, 'dispatch_task', dispatcher_mock):
                r = fixture_working_dir_env_repo_scoped[2].execute(
                    delete_query)

            assert 'errors' not in r
            assert r['data']['deleteLabbook']['success'] is True
            assert not os.path.exists(lb_root_dir)
            assert not os.path.exists(ds_root_dir)
            assert os.path.exists("/tmp/mock_reached") is True
        finally:
            if os.path.exists("/tmp/mock_reached"):
                os.remove("/tmp/mock_reached")