def _push_dataset_objects(self, dataset: Dataset, logged_in_username: str,
                           feedback_callback: Callable, access_token,
                           id_token) -> None:
     dataset.backend.set_default_configuration(logged_in_username,
                                               access_token, id_token)
     m = Manifest(dataset, logged_in_username)
     iom = IOManager(dataset, m)
     iom.push_objects(status_update_fn=feedback_callback)
     iom.manifest.link_revision()
    def test_push_objects_with_failure(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj2 = obj_to_push[1].object_path.rsplit('/', 1)

        with aioresponses() as mocked_responses:
            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj1}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj1,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1",
                                 payload={},
                                 status=200)

            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj2}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj2,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1",
                                 payload={},
                                 status=400)

            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.push_objects()
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1

            assert len(result.success) == 1
            assert len(result.failure) == 1
            assert result.success[0].object_path == obj_to_push[0].object_path
            assert result.failure[0].object_path == obj_to_push[1].object_path
Exemple #3
0
def push_dataset_objects(objs: List[PushObject],
                         logged_in_username: str,
                         access_token: str,
                         id_token: str,
                         dataset_owner: str,
                         dataset_name: str,
                         config_file: str = None) -> None:
    """Method to pull a collection of objects from a dataset's backend

    Args:
        objs: List if file PushObject to push
        logged_in_username: username for the currently logged in user
        access_token: bearer token
        id_token: identity token
        dataset_owner: Owner of the dataset containing the files to download
        dataset_name: Name of the dataset containing the files to download
        config_file: config file (used for test mocking)

    Returns:
        str: directory path of imported labbook
    """
    logger = LMLogger.get_logger()

    def progress_update_callback(completed_bytes: int) -> None:
        """Method to update the job's metadata and provide feedback to the UI"""
        current_job = get_current_job()
        if not current_job:
            return
        if 'completed_bytes' not in current_job.meta:
            current_job.meta['completed_bytes'] = 0

        current_job.meta['completed_bytes'] = int(
            current_job.meta['completed_bytes']) + completed_bytes
        current_job.save_meta()

    try:
        p = os.getpid()
        logger.info(
            f"(Job {p}) Starting push_dataset_objects(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name}")

        im = InventoryManager(config_file=config_file)
        ds = im.load_dataset(logged_in_username, dataset_owner, dataset_name)

        ds.namespace = dataset_owner
        ds.backend.set_default_configuration(logged_in_username, access_token,
                                             id_token)
        m = Manifest(ds, logged_in_username)
        iom = IOManager(ds, m)

        result = iom.push_objects(objs,
                                  progress_update_fn=progress_update_callback)

        job = get_current_job()
        if job:
            job.meta['failures'] = ",".join([
                f"{x.object_path}|{x.dataset_path}|{x.revision}"
                for x in result.failure
            ])
            job.meta['message'] = result.message
            job.save_meta()

    except Exception as err:
        logger.exception(err)
        raise
    def test_push_objects_deduplicate(self, mock_dataset_with_manifest,
                                      mock_dataset_head):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content dup")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test3.txt", "test content dup")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 3
        _, obj1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj2 = obj_to_push[1].object_path.rsplit('/', 1)
        _, obj3 = obj_to_push[2].object_path.rsplit('/', 1)
        assert obj1 != obj2
        assert obj2 == obj3

        with aioresponses() as mocked_responses:
            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj1}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj1,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1",
                                 headers={'Etag': 'asdfasdf'},
                                 status=200)

            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj2}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj2,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1",
                                 headers={'Etag': '12341234'},
                                 status=200)

            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            obj_to_push = iom.objects_to_push(remove_duplicates=True)
            result = iom.push_objects(obj_to_push, chunk_update_callback)
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1

            assert len(result.success) == 2
            assert len(result.failure) == 0
            assert isinstance(result, PushResult) is True
            assert isinstance(result.success[0], PushObject) is True
            assert result.success[0].object_path != result.success[
                1].object_path
            assert result.success[0].object_path in [
                obj_to_push[0].object_path, obj_to_push[1].object_path
            ]
            assert result.success[1].object_path in [
                obj_to_push[0].object_path, obj_to_push[1].object_path
            ]