Example #1
0
 def reify(self,
           service_config: ServiceConfig,
           *,
           explicit_only: bool
           ) -> FiltersJSON:
     filters = copy_json(self.explicit)
     if not explicit_only:
         self._add_implicit_source_filter(filters, service_config.source_id_facet)
     return filters
Example #2
0
 def __init__(self, app_name, unit_test=False, spec=None):
     self.unit_test = unit_test
     if spec is not None:
         assert 'paths' not in spec, 'The top-level spec must not define paths'
         self.specs: Optional[MutableJSON] = copy_json(spec)
         self.specs['paths'] = {}
     else:
         self.specs: Optional[MutableJSON] = None
     super().__init__(app_name, debug=config.debug > 0, configure_logs=False)
Example #3
0
    def _register_spec(self,
                       path: str,
                       path_spec: Optional[JSON],
                       method_spec: Optional[JSON],
                       methods: Iterable[str]):
        """
        Add a route's specifications to the specification object.
        """
        if path_spec is not None:
            assert path not in self.specs['paths'], 'Only specify path_spec once per route path'
            self.specs['paths'][path] = copy_json(path_spec)

        if method_spec is not None:
            for method in methods:
                # OpenAPI requires HTTP method names be lower case
                method = method.lower()
                # This may override duplicate specs from path_specs
                if path not in self.specs['paths']:
                    self.specs['paths'][path] = {}
                assert method not in self.specs['paths'][path], \
                    'Only specify method_spec once per route path and method'
                self.specs['paths'][path][method] = copy_json(method_spec)
Example #4
0
 def add_doc(self, doc: JSON):
     """
     Add an Elasticsearch document to be transformed.
     """
     doc_copy = copy_json(doc, 'contents', 'files')
     contents = doc_copy['contents']
     assert contents['projects'] is doc['contents']['projects']
     assert contents['files'] is not doc['contents']['files']
     file_relations = set()
     for entity_type, entities in contents.items():
         # FIXME: Protocol entities lack document ID so we skip for now
         #        https://github.com/DataBiosphere/azul/issues/3084
         entities = (e for e in entities if 'document_id' in e)
         # Sorting entities is required for deterministic output since
         # the order of the inner entities in an aggregate document is
         # tied to the order with which contributions are returned by ES
         # during aggregation, which happens to be non-deterministic.
         for entity in sorted(entities, key=itemgetter('document_id')):
             if entity_type != 'files':
                 pfb_entity = PFBEntity.from_json(name=entity_type,
                                                  object_=entity,
                                                  schema=self.schema)
                 if pfb_entity not in self._entities:
                     self._entities[pfb_entity] = set()
                 file_relations.add(PFBRelation.to_entity(pfb_entity))
     file_entity: MutableJSON = one(contents['files'])
     related_files = file_entity.pop('related_files', [])
     for entity in chain([file_entity], related_files):
         if entity != file_entity:
             # Replace the file entity with a related file
             contents['files'][:] = entity
         entity['drs_uri'] = self.repository_plugin.drs_uri(
             entity.pop('drs_path'))
         # File entities are assumed to be unique
         pfb_entity = PFBEntity.from_json(name='files',
                                          object_=entity,
                                          schema=self.schema)
         assert pfb_entity not in self._entities
         # Terra streams PFBs and requires entities be defined before they are
         # referenced. Thus we add the file entity after all the entities
         # it relates to.
         self._entities[pfb_entity] = file_relations
Example #5
0
 def from_json(cls, json: JSON) -> 'Authentication':
     json = copy_json(json)
     cls_name = json.pop(cls._cls_field)
     return cls._cls_for_name[cls_name](**json)
Example #6
0
def dss_bundle_to_tdr(bundle: Bundle, source: TDRSourceRef) -> TDRBundle:
    metadata = copy_json(bundle.metadata_files)

    # Order entities by UUID for consistency with Plugin output.
    entities_by_type: Mapping[str, MutableJSONs] = defaultdict(list)
    for k, v in bundle.metadata_files.items():
        if k != 'links.json':
            entity_type = k.rsplit('_', 1)[0]
            entities_by_type[entity_type].append(v)
    for (entity_type, entities) in entities_by_type.items():
        entities.sort(key=lambda e: e['provenance']['document_id'])
        for i, entity in enumerate(entities):
            name = f'{entity_type}_{i}.json'
            bundle.metadata_files[name] = entity
            manifest_entry = find_manifest_entry(
                bundle, entity['provenance']['document_id'])
            manifest_entry['name'] = name

    bundle.manifest.sort(key=itemgetter('uuid'))

    links_json = metadata['links.json']
    links_json['schema_type'] = 'links'  # DCP/1 uses 'link_bundle'
    for link in links_json['links']:
        process_id = link.pop('process')
        link['process_id'] = process_id
        link['process_type'] = find_concrete_type(
            bundle, find_file_name(bundle, process_id))
        link[
            'link_type'] = 'process_link'  # No supplementary files in DCP/1 bundles
        for component in ('input',
                          'output'):  # Protocols already in desired format
            del link[
                f'{component}_type']  # Replace abstract type with concrete types
            component_list = link[f'{component}s']
            component_list[:] = [{
                f'{component}_id':
                component_id,
                f'{component}_type':
                find_concrete_type(bundle,
                                   find_file_name(bundle, component_id))
            } for component_id in component_list]

    manifest: MutableJSONs = copy_jsons(bundle.manifest)
    links_entry = None
    for entry in manifest:
        entry['version'] = convert_version(entry['version'])
        entry['is_stitched'] = False
        if entry['name'] == 'links.json':
            links_entry = entry
        if entry['indexed']:
            entity_json = metadata[entry['name']]
            # Size of the entity JSON in TDR, not the size of pretty-printed
            # output file.
            entry['size'] = content_length(entity_json)
            # Only include mandatory checksums
            del entry['sha1']
            del entry['s3_etag']
            entry['crc32c'] = ''
            entry['sha256'] = ''
        else:
            entry['drs_path'] = drs_path(
                source.id, deterministic_uuid(bundle.uuid, entry['uuid']))
    manifest.sort(key=itemgetter('uuid'))

    assert links_entry is not None
    # links.json has no FQID of its own in TDR since its FQID is used
    # for the entire bundle.
    links_entry['uuid'] = bundle.uuid
    return TDRBundle(fqid=SourcedBundleFQID(source=source,
                                            uuid=links_entry['uuid'],
                                            version=links_entry['version']),
                     manifest=manifest,
                     metadata_files=metadata)