Python PushObject Examples, gtmcore.dataset.io.PushObject Python Examples

Example #1

0

Show file

    def test_push_objects(self, mock_dataset_with_cache_dir, temp_directories):
        with aioresponses() as mocked_responses:
            sb = get_storage_backend("gigantum_object_v1")

            ds = mock_dataset_with_cache_dir[0]

            sb.set_default_configuration(ds.namespace, "abcd", '1234')

            object_dir, compressed_dir = temp_directories

            obj1_id = uuid.uuid4().hex
            obj2_id = uuid.uuid4().hex

            obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd')
            obj2_src_path = helper_write_object(object_dir, obj2_id, '1234')
            assert os.path.isfile(obj1_src_path) is True
            assert os.path.isfile(obj2_src_path) is True

            objects = [PushObject(object_path=obj1_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile1.txt'),
                       PushObject(object_path=obj2_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile2.txt')
                       ]

            mocked_responses.put(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1",
                                         "namespace": ds.namespace,
                                         "key_id": "hghghg",
                                         "obj_id": obj1_id,
                                         "dataset": ds.name
                                 },
                                 status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj1_id}?params=1",
                                 payload={},
                                 status=200)

            mocked_responses.put(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}',
                                 payload={
                                            "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1",
                                            "namespace": ds.namespace,
                                            "key_id": "hghghg",
                                            "obj_id": obj2_id,
                                            "dataset": ds.name
                                 },
                                 status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj2_id}?params=1",
                                 payload={},
                                 status=200)

            result = sb.push_objects(ds, objects, updater)
            assert len(result.success) == 2
            assert len(result.failure) == 0
            assert isinstance(result, PushResult) is True
            assert isinstance(result.success[0], PushObject) is True
            assert result.success[0].object_path != result.success[1].object_path
            assert result.success[0].object_path in [obj1_src_path, obj2_src_path]
            assert result.success[1].object_path in [obj1_src_path, obj2_src_path]

Example #2

0

Show file

    async def test_presigneds3upload_get_presigned_s3_url_skip(self, event_loop, mock_dataset_with_cache_dir):
        sb = get_storage_backend("gigantum_object_v1")
        sb.set_default_configuration("test-user", "abcd", '1234')
        ds = mock_dataset_with_cache_dir[0]

        object_id = "abcd1234"
        object_service_root = f"{sb._object_service_endpoint(ds)}/{ds.namespace}/{ds.name}"

        headers = sb._object_service_headers()
        upload_chunk_size = 40000
        object_details = PushObject(object_path=f"/tmp/{object_id}",
                                    revision=ds.git.repo.head.commit.hexsha,
                                    dataset_path='myfile1.txt')
        psu = PresignedS3Upload(object_service_root, headers, upload_chunk_size, object_details)

        with aioresponses() as mocked_responses:
            async with aiohttp.ClientSession() as session:
                mocked_responses.put(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{object_id}',
                                     payload={
                                         "presigned_url": "https://dummyurl.com?params=1",
                                         "key_id": "asdfasdf",
                                         "namespace": ds.namespace,
                                         "obj_id": object_id,
                                         "dataset": ds.name
                                     },
                                     status=403)

                await psu.get_presigned_s3_url(session)

        assert psu.presigned_s3_url == ""
        assert psu.skip_object is True

Example #3

0

Show file

File: job.py Project: Pandulis/gigantum-client

    def get_failed_objects(self) -> List[PushObject]:
        """Get the failed objects from the underlying call to `pull_objects`"""
        failed_objs: List[PushObject] = list()
        if self._job_status:
            if 'failures' in self._job_status.meta:
                fail_str = self._job_status.meta['failures']
                if len(fail_str) > 0:
                    failure_data = fail_str.split(',')
                    for fd in failure_data:
                        obj_path, dataset_path, revision = fd.split("|")
                        failed_objs.append(
                            PushObject(object_path=obj_path,
                                       dataset_path=dataset_path,
                                       revision=revision))

        return failed_objs

Example #4

0

Show file

File: manager.py Project: jjwatts/gigantum-client

    def objects_to_push(self,
                        remove_duplicates: bool = False) -> List[PushObject]:
        """Return a list of named tuples of all objects that need to be pushed

        Returns:
            List[namedtuple]
        """
        objects = list()
        if os.path.exists(self.push_dir):
            push_files = [
                f for f in os.listdir(self.push_dir)
                if os.path.isfile(os.path.join(self.push_dir, f))
            ]

            if push_files:
                object_ids: List[str] = list()
                for pf in push_files:
                    if os.path.basename(pf) == '.DS_Store':
                        continue

                    if not self._commit_in_branch(pf):
                        continue

                    with open(os.path.join(self.push_dir, pf), 'rt') as pfh:
                        lines = pfh.readlines()
                        lines = sorted(lines)
                        for line in lines:
                            line = line.strip()
                            dataset_path, object_path = line.split(',')
                            _, object_id = object_path.rsplit('/', 1)

                            # Handle de-duplicating objects if the backend supports it
                            if remove_duplicates is True:
                                if object_id in object_ids:
                                    continue

                                object_ids.append(object_id)

                            objects.append(
                                PushObject(dataset_path=dataset_path,
                                           object_path=object_path,
                                           revision=pf))

            objects = natsorted(objects, key=attrgetter('dataset_path'))

        return objects