Python ProvDocument.flattened Examples

Programming Language: Python

Namespace/Package Name: prov.model

Class/Type: ProvDocument

Method/Function: flattened

Examples at hotexamples.com: 2

Python ProvDocument.flattened - 2 examples found. These are the top rated real world Python examples of prov.model.ProvDocument.flattened extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ProvDocument(30)

agent(30)

deserialize(30)

entity(30)

add_namespace(30)

activity(30)

wasGeneratedBy(29)

used(19)

wasAssociatedWith(18)

wasAttributedTo(16)

bundle(14)

serialize(13)

set_default_namespace(11)

get_provn(8)

valid_qualified_name(8)

specializationOf(8)

wasDerivedFrom(8)

actedOnBehalfOf(7)

wasInformedBy(6)

wasRevisionOf(4)

get_records(4)

hadMember(4)

wasInvalidatedBy(4)

add_bundle(3)

get_record(3)

update(2)

wasStartedBy(2)

collection(2)

flattened(2)

unified(1)

start(1)

mentionOf(1)

membership(1)

generation(1)

derivation(1)

wasEndedBy(1)

wasInfluencedBy(1)

association(1)

alternateOf(1)

add_record(1)

influence(1)

Example #1

Show file

File: flatprovenancetypes.py Project: trungdong/provenance-kernel-evaluation

def calculate_flat_provenance_types(
    prov_doc: ProvDocument,
    to_level: int = 0,
    including_primitives_types: bool = True,
    counting_wdf_as_two: bool = False,
    ignored_types: Iterable[str] = ϕ,
) -> MultiLevelTypeDict:
    # flatten all the bundles, if any
    prov_doc = prov_doc.flattened()

    # initialise index structures
    level0_types = defaultdict(
        set)  # type: Dict[QualifiedName, Set[QualifiedName]]
    predecessors = defaultdict(
        set
    )  # type: Dict[QualifiedName, Set[Tuple[QualifiedName, QualifiedName]]]

    types_to_ignore: FrozenSet[str] = frozenset(ignored_types)

    # indexing node types and relations
    for rec in prov_doc.get_records():  # type: ProvRecord
        if rec.is_element():
            level0_types[rec.identifier] |= get_element_types(
                rec, including_primitives_types, types_to_ignore)
        elif rec.is_relation():
            rel_type = rec.get_type()
            attrs, values = zip(*rec.formal_attributes)
            # expecting a QualifiedName from the first argument of a relation
            predecessor, successor = values[:2]
            if predecessor is not None and successor is not None:
                predecessors[successor].add((rel_type, predecessor))

    # the type map for this graph
    fp_types = defaultdict(dict)  # type: MultiLevelTypeDict
    # converting type sets to FlatProvenanceType level 0
    fp_types[0] = {
        node: (frozenset(level0_types[node]), )
        for node in level0_types
    }
    # propagating level-0 types to the specified level
    for k in range(1, to_level + 1):
        # only propagating (k-1) types from nodes that have them
        for node, types in fp_types[k - 1].items():
            # propagating the types to the predecessors
            for rel_type, predecessor in predecessors[node]:
                k_type = types + (frozenset({rel_type}),
                                  )  # type: FlatProvenanceType
                if counting_wdf_as_two and (rel_type == PROV_DERIVATION):
                    k_p1_type = k_type + (frozenset({rel_type}),
                                          )  # type: FlatProvenanceType
                    fp_types[k + 1][predecessor] = (
                        join_flat_types(fp_types[k +
                                                 1][predecessor], k_p1_type)
                        if predecessor in fp_types[k + 1] else k_p1_type)
                else:
                    fp_types[k][predecessor] = (join_flat_types(
                        fp_types[k][predecessor], k_type) if predecessor
                                                in fp_types[k] else k_type)

    return fp_types

Example #2

Show file

    def declare_directory(self, value: CWLObjectType) -> ProvEntity:
        """Register any nested files/directories."""
        # FIXME: Calculate a hash-like identifier for directory
        # so we get same value if it's the same filenames/hashes
        # in a different location.
        # For now, mint a new UUID to identify this directory, but
        # attempt to keep it inside the value dictionary
        dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn))

        # New annotation file to keep the ORE Folder listing
        ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
        dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])

        coll = self.document.entity(
            dir_id,
            [
                (PROV_TYPE, WFPROV["Artifact"]),
                (PROV_TYPE, PROV["Collection"]),
                (PROV_TYPE, PROV["Dictionary"]),
                (PROV_TYPE, RO["Folder"]),
            ],
        )
        # ORE description of ro:Folder, saved separately
        coll_b = dir_bundle.entity(
            dir_id,
            [(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])],
        )
        self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)

        # dir_manifest = dir_bundle.entity(
        #     dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"],
        #                             ORE["describes"]: coll_b.identifier})

        coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)]
        coll_b_attribs = []  # type: List[Tuple[Identifier, ProvEntity]]

        # FIXME: .listing might not be populated yet - hopefully
        # a later call to this method will sort that
        is_empty = True

        if "listing" not in value:
            get_listing(self.fsaccess, value)
        for entry in cast(MutableSequence[CWLObjectType],
                          value.get("listing", [])):
            is_empty = False
            # Declare child-artifacts
            entity = self.declare_artefact(entry)
            self.document.membership(coll, entity)
            # Membership relation aka our ORE Proxy
            m_id = uuid.uuid4().urn
            m_entity = self.document.entity(m_id)
            m_b = dir_bundle.entity(m_id)

            # PROV-O style Dictionary
            # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
            # ..as prov.py do not currently allow PROV-N extensions
            # like hadDictionaryMember(..)
            m_entity.add_asserted_type(PROV["KeyEntityPair"])

            m_entity.add_attributes({
                PROV["pairKey"]: entry["basename"],
                PROV["pairEntity"]: entity,
            })

            # As well as a being a
            # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
            m_b.add_asserted_type(RO["FolderEntry"])
            m_b.add_asserted_type(ORE["Proxy"])
            m_b.add_attributes({
                RO["entryName"]: entry["basename"],
                ORE["proxyIn"]: coll,
                ORE["proxyFor"]: entity,
            })
            coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
            coll_b_attribs.append((ORE["aggregates"], m_b))

        coll.add_attributes(coll_attribs)
        coll_b.add_attributes(coll_b_attribs)

        # Also Save ORE Folder as annotation metadata
        ore_doc = ProvDocument()
        ore_doc.add_namespace(ORE)
        ore_doc.add_namespace(RO)
        ore_doc.add_namespace(UUID)
        ore_doc.add_bundle(dir_bundle)
        ore_doc = ore_doc.flattened()
        ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn))
        with self.research_object.write_bag_file(
                ore_doc_path) as provenance_file:
            ore_doc.serialize(provenance_file,
                              format="rdf",
                              rdf_format="turtle")
        self.research_object.add_annotation(dir_id, [ore_doc_fn],
                                            ORE["isDescribedBy"].uri)

        if is_empty:
            # Empty directory
            coll.add_asserted_type(PROV["EmptyCollection"])
            coll.add_asserted_type(PROV["EmptyDictionary"])
        self.research_object.add_uri(coll.identifier.uri)
        return coll