def calculate_flat_provenance_types( prov_doc: ProvDocument, to_level: int = 0, including_primitives_types: bool = True, counting_wdf_as_two: bool = False, ignored_types: Iterable[str] = ϕ, ) -> MultiLevelTypeDict: # flatten all the bundles, if any prov_doc = prov_doc.flattened() # initialise index structures level0_types = defaultdict( set) # type: Dict[QualifiedName, Set[QualifiedName]] predecessors = defaultdict( set ) # type: Dict[QualifiedName, Set[Tuple[QualifiedName, QualifiedName]]] types_to_ignore: FrozenSet[str] = frozenset(ignored_types) # indexing node types and relations for rec in prov_doc.get_records(): # type: ProvRecord if rec.is_element(): level0_types[rec.identifier] |= get_element_types( rec, including_primitives_types, types_to_ignore) elif rec.is_relation(): rel_type = rec.get_type() attrs, values = zip(*rec.formal_attributes) # expecting a QualifiedName from the first argument of a relation predecessor, successor = values[:2] if predecessor is not None and successor is not None: predecessors[successor].add((rel_type, predecessor)) # the type map for this graph fp_types = defaultdict(dict) # type: MultiLevelTypeDict # converting type sets to FlatProvenanceType level 0 fp_types[0] = { node: (frozenset(level0_types[node]), ) for node in level0_types } # propagating level-0 types to the specified level for k in range(1, to_level + 1): # only propagating (k-1) types from nodes that have them for node, types in fp_types[k - 1].items(): # propagating the types to the predecessors for rel_type, predecessor in predecessors[node]: k_type = types + (frozenset({rel_type}), ) # type: FlatProvenanceType if counting_wdf_as_two and (rel_type == PROV_DERIVATION): k_p1_type = k_type + (frozenset({rel_type}), ) # type: FlatProvenanceType fp_types[k + 1][predecessor] = ( join_flat_types(fp_types[k + 1][predecessor], k_p1_type) if predecessor in fp_types[k + 1] else k_p1_type) else: fp_types[k][predecessor] = (join_flat_types( fp_types[k][predecessor], k_type) if predecessor in fp_types[k] else k_type) return fp_types
def declare_directory(self, value: CWLObjectType) -> ProvEntity: """Register any nested files/directories.""" # FIXME: Calculate a hash-like identifier for directory # so we get same value if it's the same filenames/hashes # in a different location. # For now, mint a new UUID to identify this directory, but # attempt to keep it inside the value dictionary dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn)) # New annotation file to keep the ORE Folder listing ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl" dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn]) coll = self.document.entity( dir_id, [ (PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, PROV["Collection"]), (PROV_TYPE, PROV["Dictionary"]), (PROV_TYPE, RO["Folder"]), ], ) # ORE description of ro:Folder, saved separately coll_b = dir_bundle.entity( dir_id, [(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])], ) self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier) # dir_manifest = dir_bundle.entity( # dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"], # ORE["describes"]: coll_b.identifier}) coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)] coll_b_attribs = [] # type: List[Tuple[Identifier, ProvEntity]] # FIXME: .listing might not be populated yet - hopefully # a later call to this method will sort that is_empty = True if "listing" not in value: get_listing(self.fsaccess, value) for entry in cast(MutableSequence[CWLObjectType], value.get("listing", [])): is_empty = False # Declare child-artifacts entity = self.declare_artefact(entry) self.document.membership(coll, entity) # Membership relation aka our ORE Proxy m_id = uuid.uuid4().urn m_entity = self.document.entity(m_id) m_b = dir_bundle.entity(m_id) # PROV-O style Dictionary # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition # ..as prov.py do not currently allow PROV-N extensions # like hadDictionaryMember(..) m_entity.add_asserted_type(PROV["KeyEntityPair"]) m_entity.add_attributes({ PROV["pairKey"]: entry["basename"], PROV["pairEntity"]: entity, }) # As well as a being a # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry m_b.add_asserted_type(RO["FolderEntry"]) m_b.add_asserted_type(ORE["Proxy"]) m_b.add_attributes({ RO["entryName"]: entry["basename"], ORE["proxyIn"]: coll, ORE["proxyFor"]: entity, }) coll_attribs.append((PROV["hadDictionaryMember"], m_entity)) coll_b_attribs.append((ORE["aggregates"], m_b)) coll.add_attributes(coll_attribs) coll_b.add_attributes(coll_b_attribs) # Also Save ORE Folder as annotation metadata ore_doc = ProvDocument() ore_doc.add_namespace(ORE) ore_doc.add_namespace(RO) ore_doc.add_namespace(UUID) ore_doc.add_bundle(dir_bundle) ore_doc = ore_doc.flattened() ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn)) with self.research_object.write_bag_file( ore_doc_path) as provenance_file: ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle") self.research_object.add_annotation(dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri) if is_empty: # Empty directory coll.add_asserted_type(PROV["EmptyCollection"]) coll.add_asserted_type(PROV["EmptyDictionary"]) self.research_object.add_uri(coll.identifier.uri) return coll