コード例 #1
0
 def dump_row(row: JSON) -> str:
     row_columns = row.keys()
     # TinyQuery's errors are typically not helpful in debugging missing/
     # extra columns in the row JSON.
     assert row_columns == columns, row_columns
     row = {
         column_name: (json.dumps(column_value) if isinstance(
             column_value, Mapping) else column_value)
         for column_name, column_value in row.items()
     }
     return json.dumps(row)
コード例 #2
0
 def from_index(
     cls,
     field_types: CataloguedFieldTypes,
     hit: JSON,
     *,
     coordinates: Optional[
         DocumentCoordinates[CataloguedEntityReference]] = None
 ) -> 'Document':
     if coordinates is None:
         coordinates = DocumentCoordinates.from_hit(hit)
     if 'contents' in hit['_source']:
         file: JSON
         content_descriptions = [
             file['content_description']
             for file in hit['_source']['contents']['files']
         ]
         assert [] not in content_descriptions, 'Found empty list as content_description value'
     source = cls.translate_fields(hit['_source'],
                                   field_types[coordinates.entity.catalog],
                                   forward=False)
     # noinspection PyArgumentList
     # https://youtrack.jetbrains.com/issue/PY-28506
     self = cls(coordinates=coordinates,
                version=hit.get('_version', 0),
                contents=source.get('contents'),
                **cls._from_source(source))
     return self
コード例 #3
0
def populate_tags(tf_config: JSON) -> JSON:
    """
    Add tags to all taggable resources and change the `name` tag to `Name`
    for tagged AWS resources.
    """
    taggable_resource_types = terraform.taggable_resource_types()
    try:
        resources = tf_config['resource']
    except KeyError:
        return tf_config
    else:
        return {
            k: v if k != 'resource' else [{
                resource_type: [{
                    resource_name: {
                        **arguments, 'tags':
                        _adjust_name_tag(
                            resource_type,
                            _tags(resource_name, **arguments.get('tags', {})))
                    }
                    if resource_type in taggable_resource_types else arguments
                } for resource_name, arguments in _normalize_tf(resource)]
            } for resource_type, resource in _normalize_tf(resources)]
            for k, v in tf_config.items()
        }
コード例 #4
0
ファイル: index_service.py プロジェクト: DataBiosphere/azul
 def flatten(value: JSON,
             *path) -> Iterable[Tuple[Tuple[str, ...], AnyJSON]]:
     for k, v in value.items():
         if isinstance(v, Mapping):
             yield from flatten(v, *path, k)
         else:
             yield (*path, k), v
コード例 #5
0
    def parse_stratification(self,
                             points: JSON) -> List[Mapping[str, List[str]]]:
        """
        >>> file = File(name='foo.txt', source='', project_id='1234', row_num=1)
        >>> file.parse_stratification({'species': 'human', 'organ': 'blood'})
        [{'species': ['human'], 'organ': ['blood']}]

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'blood'})
        [{'species': ['human', 'mouse'], 'organ': ['blood']}]

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'human: blood, mouse: brain'})
        [{'species': ['human'], 'organ': ['blood']}, {'species': ['mouse'], 'organ': ['brain']}]

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'human: blood'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: Row 1 'organ' values ['human'] differ from parent dimension.

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'human: blood, mouse: brain, cat: brain'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: Row 1 'organ' values ['cat', 'human', 'mouse'] differ from parent dimension.
        """
        strata = [{}]
        for dimension, values in points.items():
            if values:
                parsed_values = self.parse_values(values)
                if None in parsed_values:
                    # Add the value to all stratum
                    assert len(parsed_values) == 1, parsed_values
                    for stratum in strata:
                        stratum[dimension] = parsed_values[None]
                else:
                    # Each value belongs to a separate stratum. Find the stratum
                    # with the matching multi-value point and split it into
                    # separate stratum.
                    parents = list(parsed_values.keys())
                    for stratum in strata:
                        for dimension_, values_ in stratum.items():
                            if set(parents) == set(values_):
                                stratum[dimension_] = [parents.pop(0)]
                                while len(parents) > 0:
                                    new_stratum = deepcopy(stratum)
                                    new_stratum[dimension_] = [parents.pop(0)]
                                    strata.append(new_stratum)
                    # Put each value in its specified stratum
                    parents = set(parsed_values.keys())
                    for stratum in strata:
                        for parent, values_ in parsed_values.items():
                            if [parent] in stratum.values():
                                stratum[dimension] = values_
                                parents -= {parent}
                    require(
                        len(parents) == 0,
                        f'Row {self.row_num} {dimension!r} values {sorted(parents)} '
                        'differ from parent dimension.')
        return strata
コード例 #6
0
ファイル: aggregate.py プロジェクト: DataBiosphere/azul
 def _transform_entity(self, entity: JSON) -> JSON:
     fqid = entity['uuid'], entity['version']
     return dict(size=(fqid, entity['size']),
                 file_format=entity['file_format'],
                 file_source=entity['file_source'],
                 is_intermediate=entity['is_intermediate'],
                 count=(fqid, 1),
                 content_description=entity['content_description'],
                 matrix_cell_count=(fqid, entity.get('matrix_cell_count')))
コード例 #7
0
ファイル: aggregate.py プロジェクト: DataBiosphere/azul
 def _accumulate(self, aggregate: MutableMapping[str, Optional[Accumulator]], entity: JSON):
     entity = self._transform_entity(entity)
     for field_, value in entity.items():
         try:
             accumulator = aggregate[field_]
         except Exception:
             accumulator = self._get_accumulator(field_)
             aggregate[field_] = accumulator
         if accumulator is not None:
             accumulator.accumulate(value)
コード例 #8
0
def _sanitize_tf(tf_config: JSON) -> JSON:
    """
    Avoid errors like

        Error: Missing block label

          on api_gateway.tf.json line 12:
          12:     "resource": []

        At least one object property is required, whose name represents the resource
        block's type.
    """
    return {k: v for k, v in tf_config.items() if v}
コード例 #9
0
 def _flatten(self, output: Output, obj: JSON, *path: str) -> None:
     for key, value in obj.items():
         if key not in self.ignored_fields:
             new_path = *path, key
             if isinstance(value, dict):
                 self._flatten(output, obj[key], *new_path)
             elif isinstance(value, list):
                 for item in value:
                     if isinstance(item, dict):
                         self._flatten(output, item, *new_path)
                     else:
                         self._set_value(output, item, *new_path)
             else:
                 self._set_value(output, value, *new_path)
コード例 #10
0
ファイル: document.py プロジェクト: DataBiosphere/azul
 def from_json(cls,
               *,
               coordinates: C,
               document: JSON,
               version: Optional[InternalVersion],
               **kwargs
               ) -> 'Aggregate':
     self = super().from_json(coordinates=coordinates,
                              document=document,
                              version=version,
                              num_contributions=document['num_contributions'],
                              sources=map(DocumentSource.from_json, document['sources']),
                              bundles=document.get('bundles'))
     assert isinstance(self, Aggregate)
     return self
コード例 #11
0
ファイル: document.py プロジェクト: DataBiosphere/azul
 def from_json(cls,
               *,
               coordinates: C,
               document: JSON,
               version: Optional[InternalVersion],
               **kwargs,
               ) -> 'Document':
     # noinspection PyArgumentList
     # https://youtrack.jetbrains.com/issue/PY-28506
     self = cls(coordinates=coordinates,
                version=version,
                contents=document.get('contents'),
                **kwargs)
     assert document['entity_id'] == self.entity.entity_id
     return self
コード例 #12
0
 def get_manifest(self, state: JSON) -> JSON:
     partition = ManifestPartition.from_json(
         state[self.partition_state_key])
     auth = state.get('authentication')
     result = self.service.get_manifest(
         format_=ManifestFormat(state['format_']),
         catalog=state['catalog'],
         filters=Filters.from_json(state['filters']),
         partition=partition,
         authentication=None
         if auth is None else Authentication.from_json(auth),
         object_key=state['object_key'])
     if isinstance(result, ManifestPartition):
         assert not result.is_last, result
         return {**state, self.partition_state_key: result.to_json()}
     elif isinstance(result, Manifest):
         return {
             # The presence of this key terminates the step function loop
             self.manifest_state_key:
             result.to_json()
         }
     else:
         assert False, type(result)
コード例 #13
0
 def _from_source(cls, source: JSON) -> Mapping[str, Any]:
     return dict(super()._from_source(source),
                 num_contributions=source['num_contributions'],
                 bundles=source.get('bundles'))
コード例 #14
0
 def _deep_get(self, d: JSON, *path: str) -> Optional[JSON]:
     if d is not None and path:
         key, *path = path
         return self._deep_get(d.get(key), *path)
     else:
         return d
コード例 #15
0
 def drs_path(self, manifest_entry: JSON) -> Optional[str]:
     return manifest_entry.get('drs_path')
コード例 #16
0
    def patch_resource_names(self, tf_config: JSON) -> JSON:
        """
        Some Chalice-generated resources have names that are incompatible with
        our convention for generating fully qualified resource names. This
        method transforms the given Terraform configuration to use names that
        are compatible with the convention.

        >>> from azul.doctests import assert_json
        >>> assert_json(chalice.patch_resource_names({
        ...     "resource": {
        ...         "aws_cloudwatch_event_rule": {
        ...             "indexercachehealth-event": {  # patch
        ...                 "name": "indexercachehealth-event"  # leave
        ...             }
        ...         },
        ...         "aws_cloudwatch_event_target": {
        ...             "indexercachehealth-event": {  # patch
        ...                 "rule": "${aws_cloudwatch_event_rule.indexercachehealth-event.name}",  # patch
        ...                 "target_id": "indexercachehealth-event",  # leave
        ...                 "arn": "${aws_lambda_function.indexercachehealth.arn}"
        ...             }
        ...         },
        ...         "aws_lambda_permission": {
        ...             "indexercachehealth-event": {  # patch
        ...                 "function_name": "azul-indexer-prod-indexercachehealth",
        ...                 "source_arn": "${aws_cloudwatch_event_rule.indexercachehealth-event.arn}"  # patch
        ...             }
        ...         },
        ...         "aws_lambda_event_source_mapping": {
        ...             "contribute-sqs-event-source": {
        ...                 "batch_size": 1
        ...             }
        ...         }
        ...     }
        ... }))
        {
            "resource": {
                "aws_cloudwatch_event_rule": {
                    "indexercachehealth": {
                        "name": "indexercachehealth-event"
                    }
                },
                "aws_cloudwatch_event_target": {
                    "indexercachehealth": {
                        "rule": "${aws_cloudwatch_event_rule.indexercachehealth.name}",
                        "target_id": "indexercachehealth-event",
                        "arn": "${aws_lambda_function.indexercachehealth.arn}"
                    }
                },
                "aws_lambda_permission": {
                    "indexercachehealth": {
                        "function_name": "azul-indexer-prod-indexercachehealth",
                        "source_arn": "${aws_cloudwatch_event_rule.indexercachehealth.arn}"
                    }
                },
                "aws_lambda_event_source_mapping": {
                    "contribute-sqs-event-source": {
                        "batch_size": 1
                    }
                }
            }
        }
        """
        mapping = self.resource_name_mapping(tf_config)

        tf_config = {
            block_name: {
                resource_type: {
                    mapping.get((resource_type, name), name): resource
                    for name, resource in resources.items()
                }
                for resource_type, resources in block.items()
            } if block_name == 'resource' else block
            for block_name, block in tf_config.items()
        }

        def ref(resource_type, name):
            return '${' + resource_type + '.' + name + '.'

        ref_map = {
            ref(resource_type, name): ref(resource_type, new_name)
            for (resource_type, name), new_name in mapping.items()
        }

        def patch_refs(v: U) -> U:
            if isinstance(v, dict):
                return {k: patch_refs(v) for k, v in v.items()}
            elif isinstance(v, str):
                for old_ref, new_ref in ref_map.items():
                    if old_ref in v:
                        return v.replace(old_ref, new_ref)
                return v
            else:
                return v

        return patch_refs(tf_config)