Python Package.set Examples

Programming Language: Python

Namespace/Package Name: quilt3.packages

Class/Type: Package

Method/Function: set

Examples at hotexamples.com: 5

Python Package.set - 5 examples found. These are the top rated real world Python examples of quilt3.packages.Package.set extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Package(5)

set(5)

_set_commit_message(3)

_workflow(2)

set_meta(2)

_dump(1)

dump(1)

set_dir(1)

Example #1

Show file

    def setUpClass(cls):
        super().setUpClass()

        pkg = Package()
        pkg._set_commit_message(cls.parent_commit_message)
        pkg._workflow = {
            'config':
            f's3://{cls.parent_bucket}/.quilt/workflows/config.yml?versionId=configVersion',
            'id': 'gamma',
            'schemas': {
                'top-secret':
                f's3://{cls.parent_bucket}/top-secret.schema.json?versionId=schemaVersion'
            },
        }
        pkg.set_meta({'meta': 'old meta'})
        cls.entries = cls.get_pkg_entries()
        for lk, entry in cls.entries.items():
            pkg.set(lk, entry)
        manifest_buf = io.BytesIO()
        pkg._dump(manifest_buf)
        cls.parent_manifest = manifest_buf.getvalue()
        cls.parent_top_hash = pkg.top_hash
        cls.src_params = {
            'parent': {
                'registry': cls.src_registry,
                'name': cls.parent_pkg_name,
                'top_hash': cls.parent_top_hash,
            },
        }

Example #2

Show file

 def prepare_pkg(self, *, copy_data):
     expected_pkg = Package()
     pkg_entries = self.entries.items()
     if copy_data:
         pkg_entries = [(
             lk,
             e.with_physical_key(
                 PhysicalKey(self.dst_bucket, f'{self.dst_pkg_name}/{lk}',
                             'dst_' + e.physical_key.version_id)),
         ) for lk, e in pkg_entries]
     for lk, entry in pkg_entries:
         expected_pkg.set(lk, entry)
     expected_pkg._set_commit_message(None)
     return expected_pkg

Example #3

Show file

class HashCalculationTest(unittest.TestCase):
    def setUp(self):
        self.pkg = Package()
        self.entry_with_hash = PackageEntry(
            PhysicalKey('test-bucket', 'with-hash', 'with-hash'),
            42,
            {
                'type': 'SHA256',
                'value': '0' * 64
            },
            {},
        )
        self.entry_without_hash = PackageEntry(
            PhysicalKey('test-bucket', 'without-hash', 'without-hash'),
            42,
            None,
            {},
        )
        self.pkg.set('with-hash', self.entry_with_hash)
        self.pkg.set('without-hash', self.entry_without_hash)

    def test_calculate_pkg_hashes(self):
        boto_session = mock.MagicMock()
        with mock.patch.object(
                t4_lambda_pkgpush,
                'calculate_pkg_entry_hash') as calculate_pkg_entry_hash_mock:
            t4_lambda_pkgpush.calculate_pkg_hashes(boto_session, self.pkg)

        calculate_pkg_entry_hash_mock.assert_called_once_with(
            mock.ANY, self.entry_without_hash)

    @mock.patch.object(t4_lambda_pkgpush, 'S3_HASH_LAMBDA_MAX_FILE_SIZE_BYTES',
                       1)
    def test_calculate_pkg_hashes_too_large_file_error(self):
        s3_client = mock.MagicMock()
        with pytest.raises(t4_lambda_pkgpush.FileTooLargeForHashing):
            t4_lambda_pkgpush.calculate_pkg_hashes(s3_client, self.pkg)

    def test_calculate_pkg_entry_hash(self):
        get_s3_client_mock = mock.MagicMock()
        s3_client_mock = get_s3_client_mock.return_value
        s3_client_mock.generate_presigned_url.return_value = 'https://example.com'
        with mock.patch("t4_lambda_pkgpush.invoke_hash_lambda",
                        return_value='0' * 64) as invoke_hash_lambda_mock:
            t4_lambda_pkgpush.calculate_pkg_entry_hash(get_s3_client_mock,
                                                       self.entry_without_hash)

        get_s3_client_mock.assert_called_once_with(
            self.entry_without_hash.physical_key.bucket)
        invoke_hash_lambda_mock.assert_called_once_with(
            s3_client_mock.generate_presigned_url.return_value)
        s3_client_mock.generate_presigned_url.assert_called_once_with(
            ClientMethod='get_object',
            ExpiresIn=t4_lambda_pkgpush.
            S3_HASH_LAMBDA_SIGNED_URL_EXPIRES_IN_SECONDS,
            Params={
                'Bucket': self.entry_without_hash.physical_key.bucket,
                'Key': self.entry_without_hash.physical_key.path,
                'VersionId': self.entry_without_hash.physical_key.version_id,
            },
        )

        assert self.entry_without_hash.hash == {
            'type': 'SHA256',
            'value': invoke_hash_lambda_mock.return_value,
        }

    def test_invoke_hash_lambda(self):
        lambda_client_stubber = Stubber(t4_lambda_pkgpush.lambda_)
        lambda_client_stubber.activate()
        self.addCleanup(lambda_client_stubber.deactivate)
        test_hash = '0' * 64
        test_url = 'https://example.com'

        lambda_client_stubber.add_response(
            'invoke',
            service_response={
                'Payload': io.BytesIO(b'"%s"' % test_hash.encode()),
            },
            expected_params={
                'FunctionName': t4_lambda_pkgpush.S3_HASH_LAMBDA,
                'Payload': '"%s"' % test_url,
            },
        )

        assert t4_lambda_pkgpush.invoke_hash_lambda(test_url) == test_hash
        lambda_client_stubber.assert_no_pending_responses()

    def test_invoke_hash_lambda_error(self):
        lambda_client_stubber = Stubber(t4_lambda_pkgpush.lambda_)
        lambda_client_stubber.activate()
        self.addCleanup(lambda_client_stubber.deactivate)
        test_url = 'https://example.com'

        lambda_client_stubber.add_response(
            'invoke',
            service_response={
                'FunctionError': 'Unhandled',
                'Payload': io.BytesIO(b'some error info'),
            },
            expected_params={
                'FunctionName': t4_lambda_pkgpush.S3_HASH_LAMBDA,
                'Payload': '"%s"' % test_url,
            },
        )

        with pytest.raises(t4_lambda_pkgpush.S3HashLambdaUnhandledError):
            t4_lambda_pkgpush.invoke_hash_lambda(test_url)
        lambda_client_stubber.assert_no_pending_responses()

Example #4

Show file

    def _mock_package_build(self,
                            entries,
                            *,
                            message=...,
                            expected_workflow=...):
        if message is ...:
            message = self.dst_commit_message

        # Use a test package to verify manifest entries
        test_pkg = Package()
        test_pkg.set_meta(self.meta)

        # Mock hashing package objects
        for entry in entries:
            pkey = PhysicalKey.from_url(entry['physical_key'])
            hash_obj = {'type': 'SHA256', 'value': entry['hash']}
            test_entry = PackageEntry(pkey, entry['size'], hash_obj,
                                      entry.get('meta'))
            test_pkg.set(entry['logical_key'], entry=test_entry)

        mocked_workflow_data = 'some-workflow-data'
        test_pkg._workflow = mocked_workflow_data

        # build the manifest from the test_package
        test_pkg._set_commit_message(message)
        manifest = io.BytesIO()
        test_pkg.dump(manifest)
        manifest.seek(0)

        self.s3_stubber.add_response(
            'put_object',
            service_response={},
            expected_params={
                'Body': manifest.read(),
                'Bucket': self.dst_bucket,
                'Key': f'.quilt/packages/{test_pkg.top_hash}',
            },
        )
        self.s3_stubber.add_response(
            'put_object',
            service_response={},
            expected_params={
                'Body':
                str.encode(test_pkg.top_hash),
                'Bucket':
                self.dst_bucket,
                'Key':
                f'.quilt/named_packages/{self.dst_pkg_name}/{str(int(self.mock_timestamp))}',
            },
        )
        self.s3_stubber.add_response(
            'put_object',
            service_response={},
            expected_params={
                'Body': str.encode(test_pkg.top_hash),
                'Bucket': self.dst_bucket,
                'Key': f'.quilt/named_packages/{self.dst_pkg_name}/latest',
            },
        )
        with mock.patch(
                'quilt3.workflows.validate',
                return_value=mocked_workflow_data) as workflow_validate_mock:
            yield
        workflow_validate_mock.assert_called_once_with(
            registry=get_package_registry(self.dst_registry),
            workflow=expected_workflow,
            name=self.dst_pkg_name,
            pkg=mock.ANY,  # TODO: probably this should be more specific.
            message=message,
        )

Example #5

Show file

def create_package(
    manifest: pd.DataFrame,
    step_pkg_root: Path,
    filepath_columns: List[str] = ["filepath"],
    metadata_columns: List[str] = [],
) -> Tuple[Package, pd.DataFrame]:
    # Make a copy
    relative_manifest = manifest.copy(deep=True)

    # Create empty package
    pkg = Package()

    # Create associate mappings: List[Dict[str, str]]
    # This list is in index order. Meaning that as the column values are descended we
    # can simply add a new associate to the already existing associate map at that list
    # index.
    associates = []

    # Create metadata reduction map
    # This will be used to clean up and standardize the metadata access after object
    # construction. Metadata column name to boolean value for should or should not
    # reduce metadata values. This will be used during the "clean up the package
    # metadata step". If we have multiple files each with the same keys for the
    # metadata, but for one reason or another, one packaged file's value for a certain
    # key is a list while another's is a single string, this leads to a confusing mixed
    # return value API for the same _type_ of object. Example:
    # fov/
    #   obj1/
    #      {example_key: "hello"}
    #   obj2/
    #      {example_key: ["hello", "world"]}
    # Commonly this happens when a manifest has rows of unique instances of a child
    # object but retains a reference to a parent object, example: rows of information
    # about unique cells that were all generated using the same algorithm, whose
    # information is stored in a column, for each cell information row. This could
    # result in some files (which only have one cell) being a single string while other
    # files (which have more than one cell) being a list of the same string over and
    # over again. "Why spend all this time to reduce/ collapse the metadata anyway?",
    # besides making it so that users won't have to call `obj2.meta["example_key"][0]`
    # every time they want the value, and besides the fact that it standardizes the
    # metadata api, the biggest reason is that S3 objects can only have 2KB of metadata,
    # without this reduction/ collapse step, manifests are more likely to hit that limit
    # and cause a package distribution error.
    metadata_reduction_map = {
        index_col: True
        for index_col in metadata_columns
    }

    # Set all files
    with tqdm(
            total=len(filepath_columns) * len(relative_manifest),
            desc="Constructing package",
    ) as pbar:
        for col in filepath_columns:
            # Update values to the logical key as they are set
            for i, val in enumerate(relative_manifest[col].values):
                # Fully resolve the path
                physical_key = Path(val).expanduser().resolve()

                # Try creating a logical key from the relative of step
                # local staging to the filepath
                #
                # Ex:
                # step_pkg_root = "local_staging/raw"
                # physical_key = "local_staging/raw/images/some_file.tiff"
                # produced logical_key = "images/some_file.tiff"
                try:
                    logical_key = str(
                        file_utils._filepath_rel2abs(physical_key).relative_to(
                            file_utils._filepath_rel2abs(step_pkg_root)))

                except ValueError:
                    # Create logical key from merging column and filename
                    # Also remove any obvious "path" type words from column name
                    #
                    # Ex:
                    # physical_key = "/some/abs/path/some_file.tiff"
                    # column = "SourceReadPath"
                    # produced logical_key = "source/some_file.tiff"
                    stripped_col = col.lower().replace("read",
                                                       "").replace("path", "")
                    logical_key = f"{stripped_col}/{physical_key.name}"

                if physical_key.is_file():
                    relative_manifest[col].values[i] = logical_key

                    # Create metadata dictionary to attach to object
                    meta = {}
                    for meta_col in metadata_columns:
                        # Short reference to current metadata value
                        v = relative_manifest[meta_col].values[i]

                        # Enforce simple JSON serializable type
                        # First check if value is a numpy value
                        # It likely is because pandas relies on numpy
                        # All numpy types have the "dtype" attribute and can be cast to
                        # python type by using the `item` function, details here:
                        # https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.item.html
                        if hasattr(v, "dtype"):
                            v = v.item()

                        # Cast to JSON serializable type
                        v = file_utils.make_json_serializable(
                            v, f"Value from column: {meta_col}, index: {i}")

                        # Update metadata with value
                        meta[meta_col] = [v]

                    # Check if object already exists
                    if logical_key in pkg:
                        # Join the two meta dictionaries
                        joined_meta = {}
                        for meta_col, curr_v in pkg[logical_key].meta.items():
                            # Join the values for the current iteration of the metadata
                            joined_values = [*curr_v, *meta[meta_col]]

                            # Only check if the metadata at this index can be reduced
                            # if currently is still being decided. We know if the
                            # metadata value at this index is still be decided if:
                            # the boolean value in the metadata reduction map is True,
                            # as in, this index can be reduced or collapsed.
                            # The other reason to make this check is so that we don't
                            # override an earlier False reduction value. In the case
                            # where early on we encounter an instance of the metadata
                            # that should not be reduced but then later on we say it
                            # can be, this check prevents that. As we want all metadata
                            # access across the dataset to be uniform.
                            if metadata_reduction_map[meta_col]:
                                # Update the metadata reduction map
                                # For the current column being checked, as long as it
                                # is still being determined that the column can be
                                # reduced (aka we have entered this if block) check if
                                # we can still reduce the metadata after the recent
                                # addition. "We can reduce the metadata if the count of
                                # the first value (or any value) is the same as the
                                # length of the entire list of values".
                                # This runs quickly for small lists as seen here:
                                # https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical
                                metadata_reduction_map[
                                    meta_col] = joined_values.count(
                                        joined_values[0]) == len(
                                            joined_values)  # noqa F501

                            # Attached the joined values to the joined metadata
                            joined_meta[meta_col] = joined_values

                        # Update meta
                        pkg[logical_key].set_meta(joined_meta)

                    # Object didn't already exist, simply set it
                    else:
                        pkg.set(logical_key, physical_key, meta)

                    # Update associates
                    try:
                        associates[i][col] = logical_key
                    except IndexError:
                        associates.append({col: logical_key})
                else:
                    relative_manifest[col].values[i] = logical_key
                    pkg.set_dir(logical_key, physical_key)

                # Update progress bar
                pbar.update()

        # Clean up package metadata
        pkg = _recursive_clean(pkg, metadata_reduction_map)

        # Attach associates
        for i, associate_mapping in tqdm(
                enumerate(associates),
                desc="Creating associate metadata blocks"):
            for col, lk in associate_mapping.items():
                # Having dictionary expansion in this order means that associates will
                # override a prior existing `associates` key, this is assumed safe
                # because attach_associates was set to True.
                pkg[lk].set_meta({
                    **pkg[lk].meta,
                    **{
                        "associates": associate_mapping
                    }
                })

        return pkg, relative_manifest