Example #1
0
    async def test_presigneds3download_get_presigned_s3_url_error(self, event_loop, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        sb.set_default_configuration("test-user", "abcd", '1234')
        ds = mock_dataset_with_cache_dir[0]

        object_id = "abcd1234"
        object_service_root = f"{sb._object_service_endpoint(ds)}/{ds.namespace}/{ds.name}"

        headers = sb._object_service_headers()
        download_chunk_size = 40000
        object_details = PullObject(object_path=f"/tmp/{object_id}",
                                    revision=ds.git.repo.head.commit.hexsha,
                                    dataset_path='myfile1.txt')
        psu = PresignedS3Download(object_service_root, headers, download_chunk_size, object_details)

        with aioresponses() as mocked_responses:
            async with aiohttp.ClientSession() as session:
                mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{object_id}',
                                     payload={
                                         "presigned_url": "https://dummyurl.com?params=2",
                                         "namespace": ds.namespace,
                                         "obj_id": object_id,
                                         "dataset": ds.name
                                     },
                                     status=500)
                with pytest.raises(IOError):
                    await psu.get_presigned_s3_url(session)
Example #2
0
    def _gen_pull_objects(self, keys: List[str]) -> List[PullObject]:
        """

        Args:
            keys:

        Returns:

        """
        result = list()
        revision = self.manifest.dataset_revision
        for key in keys:
            data = self.manifest.dataset_to_object_path(key)
            result.append(PullObject(object_path=data, revision=revision, dataset_path=key))

        return result
Example #3
0
    def test_pull_objects_fail_signing(self, mock_dataset_with_cache_dir, temp_directories):
        with aioresponses() as mocked_responses:
            sb = get_storage_backend("gigantum_object_v1")
            ds = mock_dataset_with_cache_dir[0]
            sb.set_default_configuration(ds.namespace, "abcd", '1234')

            object_dir, compressed_dir = temp_directories

            obj1_id = uuid.uuid4().hex
            obj2_id = uuid.uuid4().hex

            obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd')
            obj2_src_path = helper_write_object(object_dir, obj2_id, '1234')
            assert os.path.isfile(obj1_src_path) is True
            assert os.path.isfile(obj2_src_path) is True

            obj1_compressed_path = os.path.join(compressed_dir, obj1_id)
            obj2_compressed_path = os.path.join(compressed_dir, obj2_id)
            helper_compress_file(obj1_src_path, obj1_compressed_path)
            helper_compress_file(obj2_src_path, obj2_compressed_path)

            assert os.path.isfile(obj1_src_path) is False
            assert os.path.isfile(obj2_src_path) is False
            assert os.path.isfile(obj1_compressed_path) is True
            assert os.path.isfile(obj2_compressed_path) is True

            check_info = {obj1_src_path: obj1_compressed_path,
                          obj2_src_path: obj2_compressed_path}

            objects = [PullObject(object_path=obj1_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile1.txt'),
                       PullObject(object_path=obj2_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile2.txt')
                       ]

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj1_id,
                                         "dataset": ds.name
                                 },
                                 status=400)

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj2_id,
                                         "dataset": ds.name
                                 },
                                 status=200)

            with open(obj2_compressed_path, 'rb') as data2:
                mocked_responses.get(f"https://dummyurl.com/{obj2_id}?params=1",
                                     body=data2.read(), status=200,
                                     content_type='application/octet-stream')

            result = sb.pull_objects(ds, objects, updater)
            assert len(result.success) == 1
            assert len(result.failure) == 1
            assert isinstance(result, PullResult) is True
            assert isinstance(result.success[0], PullObject) is True
            assert result.success[0].object_path == obj2_src_path
            assert result.failure[0].object_path == obj1_src_path

            assert os.path.isfile(result.success[0].object_path) is True
            assert os.path.isfile(result.failure[0].object_path) is False

            decompressor = snappy.StreamDecompressor()
            with open(check_info[result.success[0].object_path], 'rb') as dd:
                source1 = decompressor.decompress(dd.read())
                source1 += decompressor.flush()
            with open(result.success[0].object_path, 'rt') as dd:
                dest1 = dd.read()
            assert source1.decode("utf-8") == dest1
Example #4
0
    def test_pull(self, mock_dataset_with_local_dir):
        def chunk_update_callback(completed_bytes: int):
            """Method to update the job's metadata and provide feedback to the UI"""
            assert type(completed_bytes) == int
            assert completed_bytes > 0

        ds = mock_dataset_with_local_dir[0]
        m = Manifest(ds, 'tester')
        assert len(m.manifest.keys()) == 0
        ds.backend.update_from_remote(ds, updater)
        m = Manifest(ds, 'tester')

        # Remove revision dir
        shutil.rmtree(os.path.join(m.cache_mgr.cache_root, m.dataset_revision))

        keys = ['test1.txt', 'test2.txt', 'subdir/test3.txt']
        pull_objects = list()
        for key in keys:
            pull_objects.append(
                PullObject(object_path=m.dataset_to_object_path(key),
                           revision=m.dataset_revision,
                           dataset_path=key))
            # Remove objects
            os.remove(m.dataset_to_object_path(key))

        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt')) is False
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test2.txt')) is False
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'subdir',
                         'test3.txt')) is False

        for key in keys:
            assert os.path.isfile(m.dataset_to_object_path(key)) is False

        # Pull 1 File
        ds.backend.pull_objects(ds, [pull_objects[0]], chunk_update_callback)
        assert os.path.isdir(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision))
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt')) is True
        assert os.path.isfile(m.dataset_to_object_path('test1.txt')) is True

        # Pull all Files
        ds.backend.pull_objects(ds, pull_objects, chunk_update_callback)
        assert os.path.isdir(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision))
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test2.txt')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision, 'subdir',
                         'test3.txt')) is True
        for key in keys:
            assert os.path.isfile(m.dataset_to_object_path(key)) is True
    def test_pull(self, mock_config_class, mock_public_bucket):
        im = mock_config_class[0]
        ds = im.create_dataset(USERNAME,
                               USERNAME,
                               'dataset-1',
                               description="my dataset 1",
                               storage_type="public_s3_bucket")
        ds.backend.set_default_configuration(USERNAME, 'fakebearertoken',
                                             'fakeidtoken')

        # Configure backend completely
        current_config = ds.backend_config
        current_config['Bucket Name'] = mock_public_bucket
        current_config['Prefix'] = ""
        ds.backend_config = current_config

        ds.backend.update_from_remote(ds, updater)
        m = Manifest(ds, 'tester')

        # Remove revision dir and objects from cache
        shutil.rmtree(os.path.join(m.cache_mgr.cache_root, m.dataset_revision))

        keys = [
            'test-file-1.bin', 'metadata/test-file-3.bin',
            'metadata/sub/test-file-5.bin'
        ]
        pull_objects = list()
        for key in keys:
            pull_objects.append(
                PullObject(object_path=m.dataset_to_object_path(key),
                           revision=m.dataset_revision,
                           dataset_path=key))
            # Remove objects
            os.remove(m.dataset_to_object_path(key))

        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test-file-1.bin')) is False
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'metadata', 'test-file-3.bin')) is False
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'metadata', 'sub', 'test-file-5.bin')) is False

        for key in keys:
            assert os.path.isfile(m.dataset_to_object_path(key)) is False

        # Pull 1 File (duplicate contents so 2 files show up)
        ds.backend.pull_objects(ds, [pull_objects[0]], chunk_update_callback)
        assert os.path.isdir(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision))
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test-file-1.bin')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test-file-2.bin')) is True
        assert os.path.isfile(
            m.dataset_to_object_path('test-file-1.bin')) is True
        assert os.path.isfile(
            m.dataset_to_object_path('test-file-2.bin')) is True

        # Pull all Files
        ds.backend.pull_objects(ds, pull_objects, chunk_update_callback)
        assert os.path.isdir(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision))
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test-file-1.bin')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test-file-2.bin')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'metadata', 'test-file-3.bin')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'metadata', 'test-file-4.bin')) is True
        assert os.path.isfile(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'metadata', 'sub', 'test-file-5.bin')) is True
        for key in keys:
            assert os.path.isfile(m.dataset_to_object_path(key)) is True