Ejemplo n.º 1
0
    def test_pull_objects(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True

        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)

        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        with aioresponses() as mocked_responses:
            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_1,
                    "dataset": ds.name
                },
                status=200)

            with open(obj1_source, 'rb') as data1:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    body=data1.read(),
                    status=200,
                    content_type='application/octet-stream')

            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_2,
                    "dataset": ds.name
                },
                status=200)

            with open(obj2_source, 'rb') as data2:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    body=data2.read(),
                    status=200,
                    content_type='application/octet-stream')

            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.pull_objects(keys=["test1.txt"])
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            assert len(result.success) == 1
            assert len(result.failure) == 0
            assert result.success[0].object_path == obj_to_push[0].object_path

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is False
            with open(obj1_target, 'rt') as dd:
                assert "test content 1" == dd.read()

            result = iom.pull_objects(keys=["test2.txt"])
            assert len(glob.glob(f'{iom.push_dir}/*')) == 1
            assert len(result.success) == 1
            assert len(result.failure) == 0
            assert result.success[0].object_path == obj_to_push[1].object_path

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is True
            with open(obj1_target, 'rt') as dd:
                assert "test content 1" == dd.read()
            with open(obj2_target, 'rt') as dd:
                assert "test content 2" == dd.read()
Ejemplo n.º 2
0
    def test_pull_objects_fail_signing(self, mock_dataset_with_cache_dir, temp_directories):
        with aioresponses() as mocked_responses:
            sb = get_storage_backend("gigantum_object_v1")
            ds = mock_dataset_with_cache_dir[0]
            sb.set_default_configuration(ds.namespace, "abcd", '1234')

            object_dir, compressed_dir = temp_directories

            obj1_id = uuid.uuid4().hex
            obj2_id = uuid.uuid4().hex

            obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd')
            obj2_src_path = helper_write_object(object_dir, obj2_id, '1234')
            assert os.path.isfile(obj1_src_path) is True
            assert os.path.isfile(obj2_src_path) is True

            obj1_compressed_path = os.path.join(compressed_dir, obj1_id)
            obj2_compressed_path = os.path.join(compressed_dir, obj2_id)
            helper_compress_file(obj1_src_path, obj1_compressed_path)
            helper_compress_file(obj2_src_path, obj2_compressed_path)

            assert os.path.isfile(obj1_src_path) is False
            assert os.path.isfile(obj2_src_path) is False
            assert os.path.isfile(obj1_compressed_path) is True
            assert os.path.isfile(obj2_compressed_path) is True

            check_info = {obj1_src_path: obj1_compressed_path,
                          obj2_src_path: obj2_compressed_path}

            objects = [PullObject(object_path=obj1_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile1.txt'),
                       PullObject(object_path=obj2_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile2.txt')
                       ]

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj1_id,
                                         "dataset": ds.name
                                 },
                                 status=400)

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj2_id,
                                         "dataset": ds.name
                                 },
                                 status=200)

            with open(obj2_compressed_path, 'rb') as data2:
                mocked_responses.get(f"https://dummyurl.com/{obj2_id}?params=1",
                                     body=data2.read(), status=200,
                                     content_type='application/octet-stream')

            result = sb.pull_objects(ds, objects, updater)
            assert len(result.success) == 1
            assert len(result.failure) == 1
            assert isinstance(result, PullResult) is True
            assert isinstance(result.success[0], PullObject) is True
            assert result.success[0].object_path == obj2_src_path
            assert result.failure[0].object_path == obj1_src_path

            assert os.path.isfile(result.success[0].object_path) is True
            assert os.path.isfile(result.failure[0].object_path) is False

            decompressor = snappy.StreamDecompressor()
            with open(check_info[result.success[0].object_path], 'rb') as dd:
                source1 = decompressor.decompress(dd.read())
                source1 += decompressor.flush()
            with open(result.success[0].object_path, 'rt') as dd:
                dest1 = dd.read()
            assert source1.decode("utf-8") == dest1
Ejemplo n.º 3
0
    def test_download_dataset_files(self, mock_config_file_background_tests,
                                    mock_dataset_head):
        def dispatch_query_mock(self, job_key):
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={'completed_bytes': '500'})

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')
                gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)

                return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                        **dl_kwargs)
                    assert os.path.isfile(obj1_target) is True

                    decompressor = snappy.StreamDecompressor()
                    with open(obj1_source, 'rb') as dd:
                        source1 = decompressor.decompress(dd.read())
                        source1 += decompressor.flush()
                    with open(obj1_target, 'rt') as dd:
                        dest1 = dd.read()
                    assert source1.decode("utf-8") == dest1
Ejemplo n.º 4
0
    def test_download_dataset_files_file_fail(
            self, mock_config_file_background_tests):
        def dispatch_query_mock(self, job_key):
            # mock the job actually running and returning status
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={
                                 'completed_bytes': '0',
                                 'failure_keys': 'test1.txt'
                             })

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)
            return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    with pytest.raises(IOError):
                        gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                            **dl_kwargs)
                    assert os.path.isfile(obj1_target) is False
Ejemplo n.º 5
0
    def test_pull_objects(self, mock_config_file, mock_dataset_head):
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        os.makedirs(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "other_dir"))
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fdsfgfd")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True
        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test2.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file[0]):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')

                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_2,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj2_source, 'rb') as data2:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        body=data2.read(),
                        status=200,
                        content_type='application/octet-stream')

                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test1.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is False

                decompressor = snappy.StreamDecompressor()
                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                # Download other file
                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test2.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is True

                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                with open(obj2_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj2_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1