def test_delete_dataset_files_errors(self, fixture_working_dir, snapshot):
        im = InventoryManager(fixture_working_dir[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset-delete-2",
                               storage_type="gigantum_object_v1",
                               description="testing delete")
        m = Manifest(ds, 'default')

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fdsfgfd")
        m.sweep_all_changes()

        revision = m.dataset_revision
        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, revision,
                         "test1.txt")) is True
        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, revision,
                         "test2.txt")) is True

        query = """
                   mutation myMutation {
                     deleteDatasetFiles(input: {datasetOwner: "default", datasetName: "dataset-delete-2", 
                                                keys: ["testdfdfdfdf.txt"]}) {
                         success
                     }
                   }
                   """
        result = fixture_working_dir[2].execute(query)
        assert 'errors' in result
    def test_update_simple(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest

        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "asdfasdf")

        status = manifest.status()
        assert len(status.created) == 1
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        assert "test1.txt" in status.created

        manifest.update(status=status)
        time.sleep(2)

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "asdfasdf")

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 1
        assert len(status.deleted) == 0

        manifest.update()

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 0
Esempio n. 3
0
    def test_compute_push_batches(self, mock_dataset_with_manifest_bg_tests):
        """Test compute push batches, verifying it works OK when you've deleted some files"""
        ds, manifest, working_dir = mock_dataset_with_manifest_bg_tests
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "other_dir/test3.txt", "test content 3")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test" * 4300000)
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test4.txt", "test content 4")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test5.txt", "test content 5")
        manifest.sweep_all_changes()

        assert len(manifest.manifest) == 6

        # remove a file from the manifest
        manifest.delete(['test5.txt'])
        assert len(manifest.manifest) == 5

        key_batches, total_bytes, num_files = iom.compute_push_batches()
        assert num_files == 5
        assert total_bytes == (4 * 4300000) + (14 * 4)
        assert len(key_batches) == 2
        assert len(key_batches[0]) == 4
        assert len(key_batches[1]) == 1
        assert key_batches[1][0].dataset_path == 'test1.txt'
    def test_status_deleted_files(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest

        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "asdfasdf")

        status = manifest.status()
        assert len(status.created) == 1
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        assert "test1.txt" in status.created

        manifest.update(status=status)

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test1.txt"))

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 1

        manifest.update()

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 0
Esempio n. 5
0
    def test_push_objects_with_failure(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj2 = obj_to_push[1].object_path.rsplit('/', 1)

        with aioresponses() as mocked_responses:
            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj1}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj1,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1",
                                 payload={},
                                 status=200)

            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj2}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj2,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1",
                                 payload={},
                                 status=400)

            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.push_objects()
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1

            assert len(result.success) == 1
            assert len(result.failure) == 1
            assert result.success[0].object_path == obj_to_push[0].object_path
            assert result.failure[0].object_path == obj_to_push[1].object_path
Esempio n. 6
0
def fixture_single_dataset():
    """A pytest fixture that creates a temporary working directory, a config file to match, creates the schema,
    and populates the environment component repository.
    Class scope modifier attached
    """
    # Create temp dir
    config_file, temp_dir = _create_temp_work_dir()

    # Create user identity
    insert_cached_identity(temp_dir)

    # Create test client
    schema = graphene.Schema(query=LabbookQuery, mutation=LabbookMutations)

    # Create a bunch of lab books
    im = InventoryManager(config_file)

    ds = im.create_dataset('default',
                           'default',
                           "test-dataset",
                           storage_type="gigantum_object_v1",
                           description="Cats 2")
    m = Manifest(ds, 'default')
    cm_class = get_cache_manager_class(ds.client_config)
    cache_mgr = cm_class(ds, 'default')
    revision = ds.git.repo.head.commit.hexsha

    os.makedirs(os.path.join(cache_mgr.cache_root, revision, "other_dir"))
    helper_append_file(cache_mgr.cache_root, revision, "test1.txt", "asdfasdf")
    helper_append_file(cache_mgr.cache_root, revision, "test2.txt", "rtg")
    helper_append_file(cache_mgr.cache_root, revision, "test3.txt", "wer")
    helper_append_file(cache_mgr.cache_root, revision, "other_dir/test4.txt",
                       "dfasdfhfgjhg")
    helper_append_file(cache_mgr.cache_root, revision, "other_dir/test5.txt",
                       "fdghdfgsa")
    m.update()

    with patch.object(Configuration, 'find_default_config',
                      lambda self: config_file):
        # Load User identity into app context
        app = Flask("lmsrvlabbook")
        app.config["LABMGR_CONFIG"] = Configuration()
        app.config["LABMGR_ID_MGR"] = get_identity_manager(Configuration())

        with app.app_context():
            # within this block, current_app points to app. Set current user explicitly (this is done in the middleware)
            flask.g.user_obj = app.config["LABMGR_ID_MGR"].get_user_profile()

            # Create a test client
            client = Client(schema,
                            middleware=[DataloaderMiddleware()],
                            context_value=ContextMock())

            yield config_file, temp_dir, client, ds, cache_mgr

    # Remove the temp_dir
    shutil.rmtree(temp_dir)
    def test_delete_dataset_while_linked(self, mock_config_file):
        inv_manager = InventoryManager(mock_config_file[0])
        auth = GitAuthor(name="test", email="*****@*****.**")
        lb = inv_manager.create_labbook("test",
                                        "test",
                                        "labbook1",
                                        description="my first labbook")
        ds = inv_manager.create_dataset("test",
                                        "test",
                                        "dataset1",
                                        "gigantum_object_v1",
                                        description="my first dataset",
                                        author=auth)
        ds_root_dir = ds.root_dir
        lb_root_dir = lb.root_dir
        assert os.path.exists(ds_root_dir) is True
        assert os.path.exists(lb_root_dir) is True

        # Link dataset
        inv_manager.link_dataset_to_labbook(f"{ds_root_dir}/.git", "test",
                                            "dataset1", lb)

        m = Manifest(ds, 'test')
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfasdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "dfg")

        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "test1.txt")) is True
        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "test2.txt")) is True

        dataset_delete_job = inv_manager.delete_dataset(
            "test", "test", "dataset1")
        assert os.path.exists(ds_root_dir) is False
        assert os.path.exists(lb_root_dir) is True
        assert os.path.exists(m.cache_mgr.cache_root) is True
        assert dataset_delete_job.namespace == "test"
        assert dataset_delete_job.name == "dataset1"
        assert dataset_delete_job.cache_root == m.cache_mgr.cache_root

        jobs.clean_dataset_file_cache("test",
                                      dataset_delete_job.namespace,
                                      dataset_delete_job.name,
                                      dataset_delete_job.cache_root,
                                      config_file=mock_config_file[0])

        assert os.path.exists(m.cache_mgr.cache_root) is True

        cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1)
        assert os.path.exists(cache_base) is True
Esempio n. 8
0
    def test_objects_to_push(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "other_dir/test4.txt", "test content 4")
        manifest.sweep_all_changes()

        # Modify file to have 2 objects with same key
        helper_append_file(manifest.cache_mgr.cache_root,
                           iom.manifest.dataset_revision, "test2.txt",
                           "test content 22")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()

        assert len(obj_to_push) == 4
        assert obj_to_push[0].dataset_path == "other_dir/test4.txt"
        assert obj_to_push[1].dataset_path == "test1.txt"
        assert obj_to_push[2].dataset_path == "test2.txt"
        assert obj_to_push[3].dataset_path == "test2.txt"
        assert obj_to_push[2].revision != obj_to_push[3].revision

        assert iom.num_objects_to_push() == 4
Esempio n. 9
0
    def test_objects_to_push_deduped(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content dup")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content dup")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test3.txt", "test content dup")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "other_dir/test4.txt", "test content 4")
        manifest.sweep_all_changes()

        # Write a .DS_Store file in the objects dir to make sure it gets skipped
        with open(
                os.path.join(manifest.cache_mgr.cache_root, 'objects', '.push',
                             '.DS_Store'), 'wt') as ff:
            ff.write("")

        obj_to_push = iom.objects_to_push(remove_duplicates=True)

        assert len(obj_to_push) == 2
        assert obj_to_push[0].dataset_path == "other_dir/test4.txt"
        assert obj_to_push[1].dataset_path == "test1.txt"

        assert iom.num_objects_to_push(remove_duplicates=True) == 2
Esempio n. 10
0
    def test_move_rename_file(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest

        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "other_dir"))
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "other_dir", "nested_dir"))
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt",
                           "asdfasdghndfdf")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test2.txt", "asdfdf")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision,
                           "other_dir/nested_dir/test6.txt", "4456tyfg")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision,
                           "other_dir/nested_dir/test7.txt", "fgfyytr")
        manifest.sweep_all_changes()

        num_records = len(ds.git.log())
        assert num_records == 6

        revision = manifest.dataset_revision
        cr = manifest.cache_mgr.cache_root
        assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is True
        assert os.path.exists(os.path.join(cr, revision, "test2.txt")) is True
        assert os.path.exists(
            os.path.join(cr, revision, "other_dir", "nested_dir",
                         "test6.txt")) is True
        assert os.path.exists(
            os.path.join(cr, revision, "other_dir", "nested_dir",
                         "test7.txt")) is True

        # test renaming a file
        edges = manifest.move("test1.txt", "test1-moved.txt")
        assert len(edges) == 1
        assert edges[0]['key'] == 'test1-moved.txt'
        assert edges[0]['size'] == '14'
        assert edges[0]['is_local'] is True

        revision = manifest.dataset_revision
        assert os.path.exists(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test1.txt")) is False
        assert os.path.exists(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test1-moved.txt")) is True
        assert os.path.exists(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test2.txt")) is True
        assert os.path.exists(
            os.path.join(cr, revision, "other_dir", "nested_dir",
                         "test6.txt")) is True
        assert os.path.exists(
            os.path.join(cr, revision, "other_dir", "nested_dir",
                         "test7.txt")) is True
        assert len(ds.git.log()) == num_records + 2
Esempio n. 11
0
    def test_sweep_all_changes_remove_file_in_dir(self,
                                                  mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest

        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "dir1"))
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "dir1/test1.txt",
                           "asdfasdfdf")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test2.txt", "dfdf")

        assert len(ds.git.log()) == 4

        status = manifest.status()
        assert len(status.created) == 3
        assert len(status.modified) == 0
        assert len(status.deleted) == 0
        manifest.sweep_all_changes()
        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        assert len(manifest.manifest.keys()) == 3
        assert 'dir1/' in manifest.manifest
        assert 'dir1/test1.txt' in manifest.manifest
        assert 'test2.txt' in manifest.manifest

        src = os.path.join(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "dir1", "test1.txt")
        os.remove(src)
        time.sleep(1.5)

        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 1
        assert 'dir1/test1.txt' in status.deleted

        manifest.sweep_all_changes()
        status = manifest.status()
        assert len(status.created) == 0
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        assert len(manifest.manifest.keys()) == 2
        assert 'dir1/' in manifest.manifest
        assert 'test2.txt' in manifest.manifest
Esempio n. 12
0
    def test_complete_dataset_upload_transaction_simple(
            self, mock_config_file_background_tests):
        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "new-ds",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "fake content!")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "moar fake content!")

        dl_kwargs = {
            'dispatcher': Dispatcher,
            'logged_in_username': "******",
            'logged_in_email': "*****@*****.**",
            'dataset_owner': "default",
            'dataset_name': "new-ds",
            'config_file': mock_config_file_background_tests[0]
        }

        assert len(m.manifest) == 0
        gtmcore.dispatcher.dataset_jobs.complete_dataset_upload_transaction(
            **dl_kwargs)

        m = Manifest(ds, 'default')

        # make sure manifest got updated
        assert len(m.manifest) == 2
        assert 'test1.txt' in m.manifest
        assert 'test2.txt' in m.manifest

        assert m.manifest['test1.txt']['b'] == '13'
        assert len(m.manifest['test1.txt']['h']) == 128
        assert 'manifest-' in m.manifest['test1.txt']['fn']

        assert m.manifest['test2.txt']['b'] == '18'
        assert len(m.manifest['test2.txt']['h']) == 128
        assert 'manifest-' in m.manifest['test2.txt']['fn']

        assert m.manifest['test2.txt']['h'] != m.manifest['test1.txt']['h']

        # Make sure activity created
        assert len(ds.git.log()) == 6
        assert "_GTM_ACTIVITY_START_" in ds.git.log()[0]['message']
        assert "Uploaded 2 new file(s)." in ds.git.log()[0]['message']
    def test_move_dataset_file(self, fixture_working_dir, snapshot):
        im = InventoryManager(fixture_working_dir[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset-move",
                               storage_type="gigantum_object_v1",
                               description="testing move")
        m = Manifest(ds, 'default')

        revision = m.dataset_revision
        helper_append_file(m.cache_mgr.cache_root, revision, "test1.txt",
                           "asdfasdghndfdf")
        m.sweep_all_changes()

        revision = m.dataset_revision
        cr = m.cache_mgr.cache_root
        assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is True

        query = """
                   mutation myMutation {
                     moveDatasetFile(input: {datasetOwner: "default", datasetName: "dataset-move", 
                                             srcPath: "test1.txt", dstPath: "test1-renamed.txt"}) {
                         updatedEdges {
                            node {
                              id
                              key
                              isDir
                              isLocal
                              size
                            }
                         }
                     }
                   }
                   """
        result = fixture_working_dir[2].execute(query)
        assert 'errors' not in result
        snapshot.assert_match(result)

        revision = m.dataset_revision
        cr = m.cache_mgr.cache_root
        assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is False
        assert os.path.exists(os.path.join(cr, revision,
                                           "test1-renamed.txt")) is True
Esempio n. 14
0
    def test_file_info_from_filesystem(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest

        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "asdfasdf")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "other_dir/test4.txt",
                           "dfasdfhfgjhg")

        file_info = manifest.gen_file_info("test1.txt")
        assert file_info['key'] == "test1.txt"
        assert file_info['size'] == '8'
        assert file_info['is_favorite'] is False
        assert file_info['is_local'] is True
        assert file_info['is_dir'] is False
        assert 'modified_at' in file_info

        file_info = manifest.gen_file_info("other_dir/test4.txt")
        assert file_info['key'] == "other_dir/test4.txt"
Esempio n. 15
0
    def test_status_created_files(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest

        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test_dir"))
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "other_dir"))
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test_dir", "nested"))
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "asdfasdf")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test2.txt", "dfg")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test_dir/test3.txt",
                           "asdffdgfghghfjjgh")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision,
                           "test_dir/nested/test4.txt", "565656565")
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "other_dir/test5.txt",
                           "dfasdfhfgjhg")

        status = manifest.status()
        assert len(status.created) == 8
        assert len(status.modified) == 0
        assert len(status.deleted) == 0

        assert "test1.txt" in status.created
        assert "test2.txt" in status.created
        assert "test_dir/test3.txt" in status.created
        assert "test_dir/nested/test4.txt" in status.created
        assert "other_dir/test5.txt" in status.created
        assert "test_dir/" in status.created
        assert "test_dir/nested/" in status.created
        assert "other_dir/" in status.created
Esempio n. 16
0
    def test_delete_dataset(self, mock_config_file):
        inv_manager = InventoryManager(mock_config_file[0])
        auth = GitAuthor(name="test", email="*****@*****.**")
        ds = inv_manager.create_dataset("test", "test", "dataset1", "gigantum_object_v1",
                                        description="my first dataset",
                                        author=auth)
        root_dir = ds.root_dir
        assert os.path.exists(root_dir) is True

        m = Manifest(ds, 'test')
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfasdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt", "dfg")

        assert os.path.exists(os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt")) is True
        assert os.path.exists(os.path.join(m.cache_mgr.cache_root, m.dataset_revision, "test2.txt")) is True

        inv_manager.delete_dataset("test", "test", "dataset1")
        assert os.path.exists(root_dir) is False
        assert os.path.exists(m.cache_mgr.cache_root) is False

        cache_base, _ = m.cache_mgr.cache_root.rsplit(os.path.sep, 1)
        assert os.path.exists(cache_base) is True
Esempio n. 17
0
    def test_sync__dataset(self, mock_config_file):
        def update_feedback(msg: str,
                            has_failures: Optional[bool] = None,
                            failure_detail: Optional[str] = None,
                            percent_complete: Optional[float] = None):
            """Method to update the job's metadata and provide feedback to the UI"""
            assert has_failures is None or has_failures is False
            assert failure_detail is None

        def dispatch_query_mock(self, job_key):
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={'completed_bytes': '100'})

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            return "afakejobkey"

        username = '******'
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset(username, username, 'dataset-1',
                               'gigantum_object_v1')
        m = Manifest(ds, username)
        wf = DatasetWorkflow(ds)

        iom = IOManager(ds, m)
        assert len(glob.glob(f'{iom.push_dir}/*')) == 0
        wf.publish(username=username, feedback_callback=update_feedback)

        # Put a file into the dataset that needs to be pushed
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        assert len(glob.glob(f'{iom.push_dir}/*')) == 1
        with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
            with patch.object(Dispatcher, 'query_task', dispatch_query_mock):
                wf.sync(username=username, feedback_callback=update_feedback)
                assert os.path.exists(wf.remote)
                assert len(glob.glob(f'{iom.push_dir}/*')) == 0
Esempio n. 18
0
    def test_compute_pull_batches(self, mock_dataset_with_manifest_bg_tests):
        ds, manifest, working_dir = mock_dataset_with_manifest_bg_tests
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "other_dir/test3.txt", "test content 3")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test" * 4300000)
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test4.txt", "test content 4")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test5.txt", "test content 5")
        manifest.sweep_all_changes()

        with pytest.raises(ValueError):
            iom.compute_pull_batches()

        # Remove all files so everything needs to be pulled
        rev_dir = os.path.join(manifest.cache_mgr.cache_root,
                               manifest.dataset_revision)
        object_dir = os.path.join(manifest.cache_mgr.cache_root, 'objects')
        shutil.rmtree(rev_dir)
        shutil.rmtree(object_dir)

        key_batches, total_bytes, num_files = iom.compute_pull_batches(
            pull_all=True)
        assert num_files == 5
        assert total_bytes == (4 * 4300000) + (14 * 4)
        assert len(key_batches) == 2
        assert len(key_batches[0]) == 4
        assert len(key_batches[1]) == 1
        assert key_batches[1][0] == 'test1.txt'
Esempio n. 19
0
    def test_file_distribution(self, fixture_single_dataset):
        """Test getting the a Dataset's local_bytes"""
        ds = fixture_single_dataset[3]
        query = """
                    {
                      dataset(owner: "default", name: "test-dataset") {
                        overview {
                          fileTypeDistribution
                        }
                      }
                    }
                    """
        result = fixture_single_dataset[2].execute(query)
        assert 'errors' not in result
        assert len(
            result['data']['dataset']['overview']['fileTypeDistribution']) == 1
        assert result['data']['dataset']['overview'][
            'fileTypeDistribution'] == ['1.00|.txt']

        # Delete all files
        m = Manifest(ds, 'default')
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test55.csv", "22222")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "df.csv", "33333")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           ".hidden", "33333")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "noextension", "33333")
        m.update()

        result = fixture_single_dataset[2].execute(query)
        assert 'errors' not in result
        assert len(
            result['data']['dataset']['overview']['fileTypeDistribution']) == 2
        assert result['data']['dataset']['overview']['fileTypeDistribution'][
            0] == '0.71|.txt'
        assert result['data']['dataset']['overview']['fileTypeDistribution'][
            1] == '0.29|.csv'
Esempio n. 20
0
    def test_objects_to_push_ignore_other_branch(self,
                                                 mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "fdsfgfd")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        assert obj_to_push[0].dataset_path == "test1.txt"
        assert obj_to_push[1].dataset_path == "test2.txt"

        # Create new branch and add a file there
        bm = BranchManager(ds, username=USERNAME)
        starting_branch = bm.active_branch
        bm.create_branch(title="test-branch")
        assert bm.active_branch == "test-branch"
        assert ds.is_repo_clean is True

        helper_append_file(manifest.cache_mgr.cache_root,
                           iom.manifest.dataset_revision, "test3.txt",
                           "fdsfgfd")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 3
        assert obj_to_push[0].dataset_path == "test1.txt"
        assert obj_to_push[1].dataset_path == "test2.txt"
        assert obj_to_push[2].dataset_path == "test3.txt"

        # Go back to original branch, you shouldn't have to push file on other branch
        bm.workon_branch(starting_branch)

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        assert obj_to_push[0].dataset_path == "test1.txt"
        assert obj_to_push[1].dataset_path == "test2.txt"
Esempio n. 21
0
    def test_pull_objects_all_partial_download(self,
                                               mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "other_dir/test3.txt", "1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 3
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        _, obj_id_3 = obj_to_push[2].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path
        obj3_target = obj_to_push[2].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert "test3.txt" in obj_to_push[0].dataset_path

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True
        assert os.path.exists(obj3_target) is True

        # Completely remove other_dir/test3.txt object
        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "other_dir", "test3.txt"))
        helper_compress_file(obj1_target, obj1_source)

        # Remove link for test1.txt
        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test1.txt"))

        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is True
        assert os.path.isfile(obj3_target) is True

        with aioresponses() as mocked_responses:
            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_1,
                    "dataset": ds.name
                },
                status=200)

            with open(obj1_source, 'rb') as data1:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    body=data1.read(),
                    status=200,
                    content_type='application/octet-stream')

            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.pull_all()
            assert len(result.success) == 1
            assert len(result.failure) == 0
            assert result.success[0].object_path == obj1_target
            assert "test3.txt" in result.success[0].dataset_path

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is True
            assert os.path.isfile(obj3_target) is True

            filename = os.path.join(manifest.cache_mgr.cache_root,
                                    manifest.dataset_revision, "other_dir",
                                    "test3.txt")
            assert os.path.isfile(filename) is True
            with open(filename, 'rt') as dd:
                assert dd.read() == "1"

            filename = os.path.join(manifest.cache_mgr.cache_root,
                                    manifest.dataset_revision, "test1.txt")
            assert os.path.isfile(filename) is True
            with open(filename, 'rt') as dd:
                assert dd.read() == "test content 1"

            filename = os.path.join(manifest.cache_mgr.cache_root,
                                    manifest.dataset_revision, "test2.txt")
            assert os.path.isfile(filename) is True
            with open(filename, 'rt') as dd:
                assert dd.read() == "test content 2"

            # Try pulling all again with nothing to pull
            result = iom.pull_all()
            assert len(result.success) == 0
            assert len(result.failure) == 0
            assert result.message == "Dataset already downloaded."
Esempio n. 22
0
    def test_pull_objects_all(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        check_info = {obj1_target: obj1_source, obj2_target: obj2_source}

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True

        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)

        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        # remove data from the local file cache
        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test1.txt"))
        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test2.txt"))
        shutil.rmtree(os.path.join(manifest.cache_mgr.cache_root, 'objects'))
        os.makedirs(os.path.join(manifest.cache_mgr.cache_root, 'objects'))

        with aioresponses() as mocked_responses:
            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_1,
                    "dataset": ds.name
                },
                status=200)

            with open(obj1_source, 'rb') as data1:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    body=data1.read(),
                    status=200,
                    content_type='application/octet-stream')

            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_2,
                    "dataset": ds.name
                },
                status=200)

            with open(obj2_source, 'rb') as data2:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    body=data2.read(),
                    status=200,
                    content_type='application/octet-stream')

            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.pull_all()
            assert len(result.success) == 2
            assert len(result.failure) == 0
            assert result.success[0].object_path != result.success[
                1].object_path
            assert result.success[0].object_path in [
                obj_to_push[0].object_path, obj_to_push[1].object_path
            ]
            assert result.success[1].object_path in [
                obj_to_push[0].object_path, obj_to_push[1].object_path
            ]

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is True

            decompressor = snappy.StreamDecompressor()
            for r in result.success:
                with open(check_info[r.object_path], 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(r.object_path, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1
Esempio n. 23
0
    def test_pull_objects(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True

        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)

        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        with aioresponses() as mocked_responses:
            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_1,
                    "dataset": ds.name
                },
                status=200)

            with open(obj1_source, 'rb') as data1:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    body=data1.read(),
                    status=200,
                    content_type='application/octet-stream')

            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_2,
                    "dataset": ds.name
                },
                status=200)

            with open(obj2_source, 'rb') as data2:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    body=data2.read(),
                    status=200,
                    content_type='application/octet-stream')

            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.pull_objects(keys=["test1.txt"])
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            assert len(result.success) == 1
            assert len(result.failure) == 0
            assert result.success[0].object_path == obj_to_push[0].object_path

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is False
            with open(obj1_target, 'rt') as dd:
                assert "test content 1" == dd.read()

            result = iom.pull_objects(keys=["test2.txt"])
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            assert len(result.success) == 1
            assert len(result.failure) == 0
            assert result.success[0].object_path == obj_to_push[1].object_path

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is True
            with open(obj1_target, 'rt') as dd:
                assert "test content 1" == dd.read()
            with open(obj2_target, 'rt') as dd:
                assert "test content 2" == dd.read()
Esempio n. 24
0
    def test_pull_objects(self, mock_config_file, mock_dataset_head):
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        os.makedirs(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "other_dir"))
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fdsfgfd")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True
        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test2.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file[0]):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')

                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_2,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj2_source, 'rb') as data2:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        body=data2.read(),
                        status=200,
                        content_type='application/octet-stream')

                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test1.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is False

                decompressor = snappy.StreamDecompressor()
                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                # Download other file
                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test2.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is True

                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                with open(obj2_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj2_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1
Esempio n. 25
0
    def test_complete_dataset_upload_transaction_all_types(
            self, mock_config_file_background_tests):
        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "new-ds",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "fake content 1")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fake content 2")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test3.txt", "fake content 3")

        dl_kwargs = {
            'dispatcher': Dispatcher,
            'logged_in_username': "******",
            'logged_in_email': "*****@*****.**",
            'dataset_owner': "default",
            'dataset_name': "new-ds",
            'config_file': mock_config_file_background_tests[0]
        }

        assert len(m.manifest) == 0
        gtmcore.dispatcher.dataset_jobs.complete_dataset_upload_transaction(
            **dl_kwargs)

        m = Manifest(ds, 'default')

        # make sure manifest got updated
        assert len(m.manifest) == 3
        assert 'test1.txt' in m.manifest
        assert 'test2.txt' in m.manifest
        assert 'test3.txt' in m.manifest
        hash1 = m.manifest['test1.txt']['h']

        # Make sure activity created
        assert len(ds.git.log()) == 6
        assert "_GTM_ACTIVITY_START_" in ds.git.log()[0]['message']
        assert "Uploaded 3 new file(s)." in ds.git.log()[0]['message']

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "fake content changed")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test4.txt", "fake content 4")
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "test3.txt"))

        gtmcore.dispatcher.dataset_jobs.complete_dataset_upload_transaction(
            **dl_kwargs)
        m = Manifest(ds, 'default')

        # make sure manifest got updated
        assert len(m.manifest) == 3
        assert 'test1.txt' in m.manifest
        assert 'test2.txt' in m.manifest
        assert 'test4.txt' in m.manifest
        assert hash1 != m.manifest['test1.txt']['h']

        # Make sure activity created
        assert len(ds.git.log()) == 8
        assert "_GTM_ACTIVITY_START_" in ds.git.log()[0]['message']
        assert "Uploaded 1 new file(s). Uploaded 1 modified file(s). 1 deleted file(s)." in ds.git.log(
        )[0]['message']
    def test_update_dataset_link(self, fixture_working_dir, snapshot):
        im = InventoryManager(fixture_working_dir[0])
        lb = im.create_labbook('default', 'default', 'test-lb',
                               'testing dataset links')
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        manifest = Manifest(ds, 'default')
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "12345")
        manifest.sweep_all_changes()

        # Fake publish to a local bare repo
        _MOCK_create_remote_repo2(ds, 'default', None, None)

        assert os.path.exists(os.path.join(lb.root_dir,
                                           '.gitmodules')) is False

        overview_query = """
                {
                  labbook(owner: "default", name:"test-lb")
                  {
                    linkedDatasets{
                      name
                      overview {
                          localBytes
                          totalBytes
                      }
                    }
                  }
                }
                """

        query = """
                   mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!,
                                       $a: String!, $du: String) {
                     modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn,
                                               action: $a, datasetUrl: $du}) {
                         newLabbookEdge {
                           node {
                             id
                             name
                             description
                             linkedDatasets {
                               name
                             }
                           }
                         }
                     }
                   }
                   """
        variables = {
            "lo": "default",
            "ln": "test-lb",
            "do": "default",
            "dn": "dataset100",
            "a": "link",
            "du": ds.remote
        }
        result = fixture_working_dir[2].execute(query,
                                                variable_values=variables)
        assert "errors" not in result
        snapshot.assert_match(result)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'default',
                                             'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True
        assert os.path.exists(
            os.path.join(dataset_submodule_dir, 'test_file.dat')) is False

        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()
        assert len(data) > 0

        # check overview
        result = fixture_working_dir[2].execute(overview_query)
        assert "errors" not in result
        assert result['data']['labbook']['linkedDatasets'][0]['overview'][
            'localBytes'] == '5'
        assert result['data']['labbook']['linkedDatasets'][0]['overview'][
            'totalBytes'] == '5'

        # Make change to published dataset
        git_dir = os.path.join(tempfile.gettempdir(),
                               'test_update_dataset_link_mutation')
        try:
            os.makedirs(git_dir)
            call_subprocess(['git', 'clone', ds.remote],
                            cwd=git_dir,
                            check=True)
            with open(os.path.join(git_dir, ds.name, 'test_file.dat'),
                      'wt') as tf:
                tf.write("Test File Contents")
            call_subprocess(['git', 'add', 'test_file.dat'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)
            call_subprocess(['git', 'commit', '-m', 'editing repo'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)
            call_subprocess(['git', 'push'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)

            query = """
                       mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!,
                                           $a: String!) {
                         modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn,
                                                   action: $a}) {
                             newLabbookEdge {
                               node {
                                 id
                                 name
                                 description
                                 linkedDatasets {
                                   name
                                 }
                               }
                             }
                         }
                       }
                       """
            variables = {
                "lo": "default",
                "ln": "test-lb",
                "do": "default",
                "dn": "dataset100",
                "a": "update"
            }
            result = fixture_working_dir[2].execute(query,
                                                    variable_values=variables)
            assert "errors" not in result
            snapshot.assert_match(result)

            # verify change is reflected
            assert os.path.exists(
                os.path.join(dataset_submodule_dir, 'test_file.dat')) is True

            # Verify activity record
            assert "Updated Dataset `default/dataset100` link to version" in lb.git.log(
            )[0]['message']

        finally:
            if os.path.exists(git_dir):
                shutil.rmtree(git_dir)
Esempio n. 27
0
    def test_file_distribution_hidden(self, fixture_single_dataset):
        """"""
        ds = fixture_single_dataset[3]
        query = """
                    {
                      dataset(owner: "default", name: "test-dataset") {
                        overview {
                          fileTypeDistribution
                        }
                      }
                    }
                    """
        result = fixture_single_dataset[2].execute(query)
        assert 'errors' not in result
        assert result['data']['dataset']['overview'][
            'fileTypeDistribution'] == ['1.00|.txt']

        # Delete all files
        m = Manifest(ds, 'default')
        os.makedirs(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         ".hiddendir"))
        os.makedirs(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         ".hiddendir", "subdir"))
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test55.csv", "22222")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "df.csv", "11")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           ".hidden", "343")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "noextension", "6t4")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           ".hiddendir/tester.png", "8544")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           ".hiddendir/subdir/blah.jpeg", "8544")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           ".hiddendir/subdir/.hiddenfile", "jhg")
        m.update()

        result = fixture_single_dataset[2].execute(query)
        assert 'errors' not in result
        assert len(
            result['data']['dataset']['overview']['fileTypeDistribution']) == 4
        assert result['data']['dataset']['overview']['fileTypeDistribution'][
            0] == '0.56|.txt'
        assert result['data']['dataset']['overview']['fileTypeDistribution'][
            1] == '0.22|.csv'
        assert result['data']['dataset']['overview']['fileTypeDistribution'][
            2] == '0.11|.jpeg'
        assert result['data']['dataset']['overview']['fileTypeDistribution'][
            3] == '0.11|.png'
Esempio n. 28
0
    def test_complete_dataset_upload_transaction_failure(
            self, mock_config_file_background_tests):
        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "new-ds",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        dispatcher_obj = Dispatcher()

        helper_write_big_file(m.cache_mgr.cache_root, m.dataset_revision,
                              "test1.dat", "12")
        helper_write_big_file(m.cache_mgr.cache_root, m.dataset_revision,
                              "test2.dat", "23")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "zztest3.txt", "fake content 3")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "zztest4.txt", "fake content 4")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "zztest5.txt", "fake content 5")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "zztest6.txt", "fake content 6")
        job_kwargs = {
            'dispatcher': Dispatcher,
            'logged_in_username': "******",
            'logged_in_email': "*****@*****.**",
            'dataset_owner': "default",
            'dataset_name': "new-ds",
            'config_file': mock_config_file_background_tests[0]
        }

        job_metadata = {
            'dataset': f"default|default|new-ds",
            'method': 'complete_dataset_upload_transaction'
        }
        assert len(m.manifest) == 0

        job_key = dispatcher_obj.dispatch_task(
            gtmcore.dispatcher.dataset_jobs.
            complete_dataset_upload_transaction,
            kwargs=job_kwargs,
            metadata=job_metadata)

        time.sleep(3)

        # Remove files to make them fail
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "zztest4.txt"))
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "zztest5.txt"))

        cnt = 0
        while cnt < 120:
            job_status = dispatcher_obj.query_task(job_key)

            if job_status.status == 'finished':
                break

            time.sleep(1)
            cnt += 1

        assert cnt < 119

        m = Manifest(ds, 'default')
        assert len(m.manifest) == 4
        assert 'test1.dat' in m.manifest
        assert 'test2.dat' in m.manifest
        assert 'zztest3.txt' in m.manifest
        assert 'zztest6.txt' in m.manifest
        assert 'zztest5.txt' not in m.manifest
        assert 'zztest4.txt' not in m.manifest

        assert job_status.meta['has_failures'] is True
        assert 'The following files failed to hash. Try re-uploading the files again:\nzztest4.txt \nzztest5.txt' ==\
               job_status.meta['failure_detail']
        assert 'An error occurred while processing some files. Check details and re-upload.' == \
               job_status.meta['feedback']
Esempio n. 29
0
    def test_download_dataset_files(self, mock_config_file_background_tests,
                                    mock_dataset_head):
        def dispatch_query_mock(self, job_key):
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={'completed_bytes': '500'})

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')
                gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)

                return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                        **dl_kwargs)
                    assert os.path.isfile(obj1_target) is True

                    decompressor = snappy.StreamDecompressor()
                    with open(obj1_source, 'rb') as dd:
                        source1 = decompressor.decompress(dd.read())
                        source1 += decompressor.flush()
                    with open(obj1_target, 'rt') as dd:
                        dest1 = dd.read()
                    assert source1.decode("utf-8") == dest1
Esempio n. 30
0
    def test_download_dataset_files_file_fail(
            self, mock_config_file_background_tests):
        def dispatch_query_mock(self, job_key):
            # mock the job actually running and returning status
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={
                                 'completed_bytes': '0',
                                 'failure_keys': 'test1.txt'
                             })

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)
            return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    with pytest.raises(IOError):
                        gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                            **dl_kwargs)
                    assert os.path.isfile(obj1_target) is False