def reify(self, service_config: ServiceConfig, *, explicit_only: bool ) -> FiltersJSON: filters = copy_json(self.explicit) if not explicit_only: self._add_implicit_source_filter(filters, service_config.source_id_facet) return filters
def __init__(self, app_name, unit_test=False, spec=None): self.unit_test = unit_test if spec is not None: assert 'paths' not in spec, 'The top-level spec must not define paths' self.specs: Optional[MutableJSON] = copy_json(spec) self.specs['paths'] = {} else: self.specs: Optional[MutableJSON] = None super().__init__(app_name, debug=config.debug > 0, configure_logs=False)
def _register_spec(self, path: str, path_spec: Optional[JSON], method_spec: Optional[JSON], methods: Iterable[str]): """ Add a route's specifications to the specification object. """ if path_spec is not None: assert path not in self.specs['paths'], 'Only specify path_spec once per route path' self.specs['paths'][path] = copy_json(path_spec) if method_spec is not None: for method in methods: # OpenAPI requires HTTP method names be lower case method = method.lower() # This may override duplicate specs from path_specs if path not in self.specs['paths']: self.specs['paths'][path] = {} assert method not in self.specs['paths'][path], \ 'Only specify method_spec once per route path and method' self.specs['paths'][path][method] = copy_json(method_spec)
def add_doc(self, doc: JSON): """ Add an Elasticsearch document to be transformed. """ doc_copy = copy_json(doc, 'contents', 'files') contents = doc_copy['contents'] assert contents['projects'] is doc['contents']['projects'] assert contents['files'] is not doc['contents']['files'] file_relations = set() for entity_type, entities in contents.items(): # FIXME: Protocol entities lack document ID so we skip for now # https://github.com/DataBiosphere/azul/issues/3084 entities = (e for e in entities if 'document_id' in e) # Sorting entities is required for deterministic output since # the order of the inner entities in an aggregate document is # tied to the order with which contributions are returned by ES # during aggregation, which happens to be non-deterministic. for entity in sorted(entities, key=itemgetter('document_id')): if entity_type != 'files': pfb_entity = PFBEntity.from_json(name=entity_type, object_=entity, schema=self.schema) if pfb_entity not in self._entities: self._entities[pfb_entity] = set() file_relations.add(PFBRelation.to_entity(pfb_entity)) file_entity: MutableJSON = one(contents['files']) related_files = file_entity.pop('related_files', []) for entity in chain([file_entity], related_files): if entity != file_entity: # Replace the file entity with a related file contents['files'][:] = entity entity['drs_uri'] = self.repository_plugin.drs_uri( entity.pop('drs_path')) # File entities are assumed to be unique pfb_entity = PFBEntity.from_json(name='files', object_=entity, schema=self.schema) assert pfb_entity not in self._entities # Terra streams PFBs and requires entities be defined before they are # referenced. Thus we add the file entity after all the entities # it relates to. self._entities[pfb_entity] = file_relations
def from_json(cls, json: JSON) -> 'Authentication': json = copy_json(json) cls_name = json.pop(cls._cls_field) return cls._cls_for_name[cls_name](**json)
def dss_bundle_to_tdr(bundle: Bundle, source: TDRSourceRef) -> TDRBundle: metadata = copy_json(bundle.metadata_files) # Order entities by UUID for consistency with Plugin output. entities_by_type: Mapping[str, MutableJSONs] = defaultdict(list) for k, v in bundle.metadata_files.items(): if k != 'links.json': entity_type = k.rsplit('_', 1)[0] entities_by_type[entity_type].append(v) for (entity_type, entities) in entities_by_type.items(): entities.sort(key=lambda e: e['provenance']['document_id']) for i, entity in enumerate(entities): name = f'{entity_type}_{i}.json' bundle.metadata_files[name] = entity manifest_entry = find_manifest_entry( bundle, entity['provenance']['document_id']) manifest_entry['name'] = name bundle.manifest.sort(key=itemgetter('uuid')) links_json = metadata['links.json'] links_json['schema_type'] = 'links' # DCP/1 uses 'link_bundle' for link in links_json['links']: process_id = link.pop('process') link['process_id'] = process_id link['process_type'] = find_concrete_type( bundle, find_file_name(bundle, process_id)) link[ 'link_type'] = 'process_link' # No supplementary files in DCP/1 bundles for component in ('input', 'output'): # Protocols already in desired format del link[ f'{component}_type'] # Replace abstract type with concrete types component_list = link[f'{component}s'] component_list[:] = [{ f'{component}_id': component_id, f'{component}_type': find_concrete_type(bundle, find_file_name(bundle, component_id)) } for component_id in component_list] manifest: MutableJSONs = copy_jsons(bundle.manifest) links_entry = None for entry in manifest: entry['version'] = convert_version(entry['version']) entry['is_stitched'] = False if entry['name'] == 'links.json': links_entry = entry if entry['indexed']: entity_json = metadata[entry['name']] # Size of the entity JSON in TDR, not the size of pretty-printed # output file. entry['size'] = content_length(entity_json) # Only include mandatory checksums del entry['sha1'] del entry['s3_etag'] entry['crc32c'] = '' entry['sha256'] = '' else: entry['drs_path'] = drs_path( source.id, deterministic_uuid(bundle.uuid, entry['uuid'])) manifest.sort(key=itemgetter('uuid')) assert links_entry is not None # links.json has no FQID of its own in TDR since its FQID is used # for the entire bundle. links_entry['uuid'] = bundle.uuid return TDRBundle(fqid=SourcedBundleFQID(source=source, uuid=links_entry['uuid'], version=links_entry['version']), manifest=manifest, metadata_files=metadata)