Ejemplo n.º 1
0
    async def test_presigneds3upload_get_presigned_s3_url_skip(self, event_loop, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        sb.set_default_configuration("test-user", "abcd", '1234')
        ds = mock_dataset_with_cache_dir[0]

        object_id = "abcd1234"
        object_service_root = f"{sb._object_service_endpoint(ds)}/{ds.namespace}/{ds.name}"

        headers = sb._object_service_headers()
        upload_chunk_size = 40000
        object_details = PushObject(object_path=f"/tmp/{object_id}",
                                    revision=ds.git.repo.head.commit.hexsha,
                                    dataset_path='myfile1.txt')
        psu = PresignedS3Upload(object_service_root, headers, upload_chunk_size, object_details)

        with aioresponses() as mocked_responses:
            async with aiohttp.ClientSession() as session:
                mocked_responses.put(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{object_id}',
                                     payload={
                                         "presigned_url": "https://dummyurl.com?params=1",
                                         "key_id": "asdfasdf",
                                         "namespace": ds.namespace,
                                         "obj_id": object_id,
                                         "dataset": ds.name
                                     },
                                     status=403)

                await psu.get_presigned_s3_url(session)

        assert psu.presigned_s3_url == ""
        assert psu.skip_object is True
Ejemplo n.º 2
0
    def backend(self) -> Union[ManagedStorageBackend, UnmanagedStorageBackend]:
        """Property to access the storage backend for this dataset"""
        if not self._backend:
            self._backend = get_storage_backend(self.storage_type)
            self._backend.configuration = self.backend_config

        return self._backend
Ejemplo n.º 3
0
    def test_push_objects(self, mock_dataset_with_cache_dir, temp_directories):
        with aioresponses() as mocked_responses:
            sb = get_storage_backend("gigantum_object_v1")

            ds = mock_dataset_with_cache_dir[0]

            sb.set_default_configuration(ds.namespace, "abcd", '1234')

            object_dir, compressed_dir = temp_directories

            obj1_id = uuid.uuid4().hex
            obj2_id = uuid.uuid4().hex

            obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd')
            obj2_src_path = helper_write_object(object_dir, obj2_id, '1234')
            assert os.path.isfile(obj1_src_path) is True
            assert os.path.isfile(obj2_src_path) is True

            objects = [PushObject(object_path=obj1_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile1.txt'),
                       PushObject(object_path=obj2_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile2.txt')
                       ]

            mocked_responses.put(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1",
                                         "namespace": ds.namespace,
                                         "key_id": "hghghg",
                                         "obj_id": obj1_id,
                                         "dataset": ds.name
                                 },
                                 status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj1_id}?params=1",
                                 payload={},
                                 status=200)

            mocked_responses.put(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}',
                                 payload={
                                            "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1",
                                            "namespace": ds.namespace,
                                            "key_id": "hghghg",
                                            "obj_id": obj2_id,
                                            "dataset": ds.name
                                 },
                                 status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj2_id}?params=1",
                                 payload={},
                                 status=200)

            result = sb.push_objects(ds, objects, updater)
            assert len(result.success) == 2
            assert len(result.failure) == 0
            assert isinstance(result, PushResult) is True
            assert isinstance(result.success[0], PushObject) is True
            assert result.success[0].object_path != result.success[1].object_path
            assert result.success[0].object_path in [obj1_src_path, obj2_src_path]
            assert result.success[1].object_path in [obj1_src_path, obj2_src_path]
Ejemplo n.º 4
0
    def test_finalize_pull(self, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        ds = mock_dataset_with_cache_dir[0]

        def check_updater(msg):
            assert msg == f"Done pulling objects from tester/dataset-1"

        sb.finalize_pull(ds, check_updater)
Ejemplo n.º 5
0
    def test_get_request_headers(self):
        sb = get_storage_backend("gigantum_object_v1")
        sb.set_default_configuration("test-user", "abcd", '1234')

        headers = sb._object_service_headers()

        assert headers['Authorization'] == "Bearer abcd"
        assert headers['Identity'] == "1234"
        assert headers['Accept'] == 'application/json'
        assert headers['Content-Type'] == 'application/json'
Ejemplo n.º 6
0
    def _load_info(self):
        """Private method to load the metadata for the Type"""
        if not self._dataset_type_data:
            sb = get_storage_backend(self.storage_type)

            self._dataset_type_data = sb.metadata

        self.name = self._dataset_type_data['name']
        self.description = self._dataset_type_data['description']
        self.readme = self._dataset_type_data['readme']
        self.tags = self._dataset_type_data['tags']
        self.icon = self._dataset_type_data['icon']
        self.url = self._dataset_type_data['url']
        self.is_managed = self._dataset_type_data['is_managed']
Ejemplo n.º 7
0
    def test_prepare_pull(self, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        ds = mock_dataset_with_cache_dir[0]

        with pytest.raises(ValueError):
            sb.prepare_pull(ds, [], updater)

        sb.configuration['username'] = "******"
        with pytest.raises(ValueError):
            sb.prepare_pull(ds, [], updater)

        sb.configuration['gigantum_bearer_token'] = "asdf"
        with pytest.raises(ValueError):
            sb.prepare_pull(ds, [], updater)

        sb.configuration['gigantum_id_token'] = "1234"
        sb.prepare_pull(ds, [], updater)
Ejemplo n.º 8
0
    def _load_info(self):
        """Private method to load the metadata for the Type"""
        if not self._dataset_type_data:
            sb = get_storage_backend(self.storage_type)

            self._dataset_type_data = sb.metadata
            self._dataset_type_data['is_managed'] = sb.is_managed
            if sb.is_managed is False:
                self._dataset_type_data[
                    'can_update_unmanaged_from_remote'] = sb.can_update_from_remote
            else:
                self._dataset_type_data[
                    'can_update_unmanaged_from_remote'] = False

        self.name = self._dataset_type_data['name']
        self.description = self._dataset_type_data['description']
        self.readme = self._dataset_type_data['readme']
        self.tags = self._dataset_type_data['tags']
        self.icon = self._dataset_type_data['icon']
        self.url = self._dataset_type_data['url']
        self.is_managed = self._dataset_type_data['is_managed']
        self.can_update_unmanaged_from_remote = self._dataset_type_data[
            'can_update_unmanaged_from_remote']
Ejemplo n.º 9
0
    def test_pull_objects_fail_signing(self, mock_dataset_with_cache_dir, temp_directories):
        with aioresponses() as mocked_responses:
            sb = get_storage_backend("gigantum_object_v1")
            ds = mock_dataset_with_cache_dir[0]
            sb.set_default_configuration(ds.namespace, "abcd", '1234')

            object_dir, compressed_dir = temp_directories

            obj1_id = uuid.uuid4().hex
            obj2_id = uuid.uuid4().hex

            obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd')
            obj2_src_path = helper_write_object(object_dir, obj2_id, '1234')
            assert os.path.isfile(obj1_src_path) is True
            assert os.path.isfile(obj2_src_path) is True

            obj1_compressed_path = os.path.join(compressed_dir, obj1_id)
            obj2_compressed_path = os.path.join(compressed_dir, obj2_id)
            helper_compress_file(obj1_src_path, obj1_compressed_path)
            helper_compress_file(obj2_src_path, obj2_compressed_path)

            assert os.path.isfile(obj1_src_path) is False
            assert os.path.isfile(obj2_src_path) is False
            assert os.path.isfile(obj1_compressed_path) is True
            assert os.path.isfile(obj2_compressed_path) is True

            check_info = {obj1_src_path: obj1_compressed_path,
                          obj2_src_path: obj2_compressed_path}

            objects = [PullObject(object_path=obj1_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile1.txt'),
                       PullObject(object_path=obj2_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile2.txt')
                       ]

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj1_id,
                                         "dataset": ds.name
                                 },
                                 status=400)

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj2_id,
                                         "dataset": ds.name
                                 },
                                 status=200)

            with open(obj2_compressed_path, 'rb') as data2:
                mocked_responses.get(f"https://dummyurl.com/{obj2_id}?params=1",
                                     body=data2.read(), status=200,
                                     content_type='application/octet-stream')

            result = sb.pull_objects(ds, objects, updater)
            assert len(result.success) == 1
            assert len(result.failure) == 1
            assert isinstance(result, PullResult) is True
            assert isinstance(result.success[0], PullObject) is True
            assert result.success[0].object_path == obj2_src_path
            assert result.failure[0].object_path == obj1_src_path

            assert os.path.isfile(result.success[0].object_path) is True
            assert os.path.isfile(result.failure[0].object_path) is False

            decompressor = snappy.StreamDecompressor()
            with open(check_info[result.success[0].object_path], 'rb') as dd:
                source1 = decompressor.decompress(dd.read())
                source1 += decompressor.flush()
            with open(result.success[0].object_path, 'rt') as dd:
                dest1 = dd.read()
            assert source1.decode("utf-8") == dest1
Ejemplo n.º 10
0
    def test_get_service_endpoint(self, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        ds = mock_dataset_with_cache_dir[0]

        assert sb._object_service_endpoint(ds) == "https://api.gigantum.com/object-v1"
Ejemplo n.º 11
0
    def test_get_storage_backend(self):
        sb = get_storage_backend("gigantum_object_v1")

        assert isinstance(sb, GigantumObjectStore)
Ejemplo n.º 12
0
    def test_prepare_push_errors(self, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        ds = mock_dataset_with_cache_dir[0]

        sb.set_default_configuration("test-user", "abcd", '1234')
        sb.prepare_push(ds, [], updater)
Ejemplo n.º 13
0
 def test_get_storage_backend_invalid(self):
     with pytest.raises(GigantumException):
         get_storage_backend("dfsakjldfghkljfgds")
Ejemplo n.º 14
0
    def test_get_storage_backend_unmanaged(self, mock_enable_unmanaged_for_testing):
        sb = get_storage_backend("local_filesystem")

        assert isinstance(sb, LocalFilesystem)
Ejemplo n.º 15
0
    def test_get_storage_backend(self, mock_dataset_with_cache_dir_local):
        sb = get_storage_backend("local_filesystem")

        assert isinstance(sb, LocalFilesystem)
Ejemplo n.º 16
0
    def test_get_storage_backend(self, mock_config_class):
        sb = get_storage_backend("public_s3_bucket")

        assert isinstance(sb, PublicS3Bucket)