Esempio n. 1
0
 def dump_row(row: JSON) -> str:
     row_columns = row.keys()
     # TinyQuery's errors are typically not helpful in debugging missing/
     # extra columns in the row JSON.
     assert row_columns == columns, row_columns
     row = {
         column_name: (json.dumps(column_value) if isinstance(
             column_value, Mapping) else column_value)
         for column_name, column_value in row.items()
     }
     return json.dumps(row)
Esempio n. 2
0
 def from_index(
     cls,
     field_types: CataloguedFieldTypes,
     hit: JSON,
     *,
     coordinates: Optional[
         DocumentCoordinates[CataloguedEntityReference]] = None
 ) -> 'Document':
     if coordinates is None:
         coordinates = DocumentCoordinates.from_hit(hit)
     if 'contents' in hit['_source']:
         file: JSON
         content_descriptions = [
             file['content_description']
             for file in hit['_source']['contents']['files']
         ]
         assert [] not in content_descriptions, 'Found empty list as content_description value'
     source = cls.translate_fields(hit['_source'],
                                   field_types[coordinates.entity.catalog],
                                   forward=False)
     # noinspection PyArgumentList
     # https://youtrack.jetbrains.com/issue/PY-28506
     self = cls(coordinates=coordinates,
                version=hit.get('_version', 0),
                contents=source.get('contents'),
                **cls._from_source(source))
     return self
Esempio n. 3
0
def populate_tags(tf_config: JSON) -> JSON:
    """
    Add tags to all taggable resources and change the `name` tag to `Name`
    for tagged AWS resources.
    """
    taggable_resource_types = terraform.taggable_resource_types()
    try:
        resources = tf_config['resource']
    except KeyError:
        return tf_config
    else:
        return {
            k: v if k != 'resource' else [{
                resource_type: [{
                    resource_name: {
                        **arguments, 'tags':
                        _adjust_name_tag(
                            resource_type,
                            _tags(resource_name, **arguments.get('tags', {})))
                    }
                    if resource_type in taggable_resource_types else arguments
                } for resource_name, arguments in _normalize_tf(resource)]
            } for resource_type, resource in _normalize_tf(resources)]
            for k, v in tf_config.items()
        }
Esempio n. 4
0
 def flatten(value: JSON,
             *path) -> Iterable[Tuple[Tuple[str, ...], AnyJSON]]:
     for k, v in value.items():
         if isinstance(v, Mapping):
             yield from flatten(v, *path, k)
         else:
             yield (*path, k), v
Esempio n. 5
0
    def parse_stratification(self,
                             points: JSON) -> List[Mapping[str, List[str]]]:
        """
        >>> file = File(name='foo.txt', source='', project_id='1234', row_num=1)
        >>> file.parse_stratification({'species': 'human', 'organ': 'blood'})
        [{'species': ['human'], 'organ': ['blood']}]

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'blood'})
        [{'species': ['human', 'mouse'], 'organ': ['blood']}]

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'human: blood, mouse: brain'})
        [{'species': ['human'], 'organ': ['blood']}, {'species': ['mouse'], 'organ': ['brain']}]

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'human: blood'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: Row 1 'organ' values ['human'] differ from parent dimension.

        >>> file.parse_stratification({'species': 'human, mouse', 'organ': 'human: blood, mouse: brain, cat: brain'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: Row 1 'organ' values ['cat', 'human', 'mouse'] differ from parent dimension.
        """
        strata = [{}]
        for dimension, values in points.items():
            if values:
                parsed_values = self.parse_values(values)
                if None in parsed_values:
                    # Add the value to all stratum
                    assert len(parsed_values) == 1, parsed_values
                    for stratum in strata:
                        stratum[dimension] = parsed_values[None]
                else:
                    # Each value belongs to a separate stratum. Find the stratum
                    # with the matching multi-value point and split it into
                    # separate stratum.
                    parents = list(parsed_values.keys())
                    for stratum in strata:
                        for dimension_, values_ in stratum.items():
                            if set(parents) == set(values_):
                                stratum[dimension_] = [parents.pop(0)]
                                while len(parents) > 0:
                                    new_stratum = deepcopy(stratum)
                                    new_stratum[dimension_] = [parents.pop(0)]
                                    strata.append(new_stratum)
                    # Put each value in its specified stratum
                    parents = set(parsed_values.keys())
                    for stratum in strata:
                        for parent, values_ in parsed_values.items():
                            if [parent] in stratum.values():
                                stratum[dimension] = values_
                                parents -= {parent}
                    require(
                        len(parents) == 0,
                        f'Row {self.row_num} {dimension!r} values {sorted(parents)} '
                        'differ from parent dimension.')
        return strata
Esempio n. 6
0
 def _transform_entity(self, entity: JSON) -> JSON:
     fqid = entity['uuid'], entity['version']
     return dict(size=(fqid, entity['size']),
                 file_format=entity['file_format'],
                 file_source=entity['file_source'],
                 is_intermediate=entity['is_intermediate'],
                 count=(fqid, 1),
                 content_description=entity['content_description'],
                 matrix_cell_count=(fqid, entity.get('matrix_cell_count')))
Esempio n. 7
0
 def _accumulate(self, aggregate: MutableMapping[str, Optional[Accumulator]], entity: JSON):
     entity = self._transform_entity(entity)
     for field_, value in entity.items():
         try:
             accumulator = aggregate[field_]
         except Exception:
             accumulator = self._get_accumulator(field_)
             aggregate[field_] = accumulator
         if accumulator is not None:
             accumulator.accumulate(value)
Esempio n. 8
0
def _sanitize_tf(tf_config: JSON) -> JSON:
    """
    Avoid errors like

        Error: Missing block label

          on api_gateway.tf.json line 12:
          12:     "resource": []

        At least one object property is required, whose name represents the resource
        block's type.
    """
    return {k: v for k, v in tf_config.items() if v}
Esempio n. 9
0
 def _flatten(self, output: Output, obj: JSON, *path: str) -> None:
     for key, value in obj.items():
         if key not in self.ignored_fields:
             new_path = *path, key
             if isinstance(value, dict):
                 self._flatten(output, obj[key], *new_path)
             elif isinstance(value, list):
                 for item in value:
                     if isinstance(item, dict):
                         self._flatten(output, item, *new_path)
                     else:
                         self._set_value(output, item, *new_path)
             else:
                 self._set_value(output, value, *new_path)
Esempio n. 10
0
 def from_json(cls,
               *,
               coordinates: C,
               document: JSON,
               version: Optional[InternalVersion],
               **kwargs
               ) -> 'Aggregate':
     self = super().from_json(coordinates=coordinates,
                              document=document,
                              version=version,
                              num_contributions=document['num_contributions'],
                              sources=map(DocumentSource.from_json, document['sources']),
                              bundles=document.get('bundles'))
     assert isinstance(self, Aggregate)
     return self
Esempio n. 11
0
 def from_json(cls,
               *,
               coordinates: C,
               document: JSON,
               version: Optional[InternalVersion],
               **kwargs,
               ) -> 'Document':
     # noinspection PyArgumentList
     # https://youtrack.jetbrains.com/issue/PY-28506
     self = cls(coordinates=coordinates,
                version=version,
                contents=document.get('contents'),
                **kwargs)
     assert document['entity_id'] == self.entity.entity_id
     return self
Esempio n. 12
0
 def get_manifest(self, state: JSON) -> JSON:
     partition = ManifestPartition.from_json(
         state[self.partition_state_key])
     auth = state.get('authentication')
     result = self.service.get_manifest(
         format_=ManifestFormat(state['format_']),
         catalog=state['catalog'],
         filters=Filters.from_json(state['filters']),
         partition=partition,
         authentication=None
         if auth is None else Authentication.from_json(auth),
         object_key=state['object_key'])
     if isinstance(result, ManifestPartition):
         assert not result.is_last, result
         return {**state, self.partition_state_key: result.to_json()}
     elif isinstance(result, Manifest):
         return {
             # The presence of this key terminates the step function loop
             self.manifest_state_key:
             result.to_json()
         }
     else:
         assert False, type(result)
Esempio n. 13
0
 def _from_source(cls, source: JSON) -> Mapping[str, Any]:
     return dict(super()._from_source(source),
                 num_contributions=source['num_contributions'],
                 bundles=source.get('bundles'))
Esempio n. 14
0
 def _deep_get(self, d: JSON, *path: str) -> Optional[JSON]:
     if d is not None and path:
         key, *path = path
         return self._deep_get(d.get(key), *path)
     else:
         return d
Esempio n. 15
0
 def drs_path(self, manifest_entry: JSON) -> Optional[str]:
     return manifest_entry.get('drs_path')
Esempio n. 16
0
    def patch_resource_names(self, tf_config: JSON) -> JSON:
        """
        Some Chalice-generated resources have names that are incompatible with
        our convention for generating fully qualified resource names. This
        method transforms the given Terraform configuration to use names that
        are compatible with the convention.

        >>> from azul.doctests import assert_json
        >>> assert_json(chalice.patch_resource_names({
        ...     "resource": {
        ...         "aws_cloudwatch_event_rule": {
        ...             "indexercachehealth-event": {  # patch
        ...                 "name": "indexercachehealth-event"  # leave
        ...             }
        ...         },
        ...         "aws_cloudwatch_event_target": {
        ...             "indexercachehealth-event": {  # patch
        ...                 "rule": "${aws_cloudwatch_event_rule.indexercachehealth-event.name}",  # patch
        ...                 "target_id": "indexercachehealth-event",  # leave
        ...                 "arn": "${aws_lambda_function.indexercachehealth.arn}"
        ...             }
        ...         },
        ...         "aws_lambda_permission": {
        ...             "indexercachehealth-event": {  # patch
        ...                 "function_name": "azul-indexer-prod-indexercachehealth",
        ...                 "source_arn": "${aws_cloudwatch_event_rule.indexercachehealth-event.arn}"  # patch
        ...             }
        ...         },
        ...         "aws_lambda_event_source_mapping": {
        ...             "contribute-sqs-event-source": {
        ...                 "batch_size": 1
        ...             }
        ...         }
        ...     }
        ... }))
        {
            "resource": {
                "aws_cloudwatch_event_rule": {
                    "indexercachehealth": {
                        "name": "indexercachehealth-event"
                    }
                },
                "aws_cloudwatch_event_target": {
                    "indexercachehealth": {
                        "rule": "${aws_cloudwatch_event_rule.indexercachehealth.name}",
                        "target_id": "indexercachehealth-event",
                        "arn": "${aws_lambda_function.indexercachehealth.arn}"
                    }
                },
                "aws_lambda_permission": {
                    "indexercachehealth": {
                        "function_name": "azul-indexer-prod-indexercachehealth",
                        "source_arn": "${aws_cloudwatch_event_rule.indexercachehealth.arn}"
                    }
                },
                "aws_lambda_event_source_mapping": {
                    "contribute-sqs-event-source": {
                        "batch_size": 1
                    }
                }
            }
        }
        """
        mapping = self.resource_name_mapping(tf_config)

        tf_config = {
            block_name: {
                resource_type: {
                    mapping.get((resource_type, name), name): resource
                    for name, resource in resources.items()
                }
                for resource_type, resources in block.items()
            } if block_name == 'resource' else block
            for block_name, block in tf_config.items()
        }

        def ref(resource_type, name):
            return '${' + resource_type + '.' + name + '.'

        ref_map = {
            ref(resource_type, name): ref(resource_type, new_name)
            for (resource_type, name), new_name in mapping.items()
        }

        def patch_refs(v: U) -> U:
            if isinstance(v, dict):
                return {k: patch_refs(v) for k, v in v.items()}
            elif isinstance(v, str):
                for old_ref, new_ref in ref_map.items():
                    if old_ref in v:
                        return v.replace(old_ref, new_ref)
                return v
            else:
                return v

        return patch_refs(tf_config)