Example #1
0
def enforce_uniqueness_constraints(graph: ProvDocument) -> ProvDocument:
    """Enforce model uniqueness constraints.

    Remove node duplicates:
        - ProvDocument.unified takes care of this by removing nodes with
        the same id.

    Remove relation duplicates:
        - Allow only one relation of a certain type between two nodes.

    Enforcing this constraint after having populated the model instead of
    during population simplifies the model creation.
    """
    records, known = [], set()

    for relation in graph.get_records(ProvRelation):
        (_, source), (_, target) = relation.formal_attributes[:2]
        rel_tuple = (type(relation), source, target)
        if rel_tuple in known:
            continue
        known.add(rel_tuple)
        records.append(relation)

    records.extend(graph.get_records(ProvElement))

    g = ProvDocument(records)
    return g.unified()
def calculate_flat_provenance_types(
    prov_doc: ProvDocument,
    to_level: int = 0,
    including_primitives_types: bool = True,
    counting_wdf_as_two: bool = False,
    ignored_types: Iterable[str] = ϕ,
) -> MultiLevelTypeDict:
    # flatten all the bundles, if any
    prov_doc = prov_doc.flattened()

    # initialise index structures
    level0_types = defaultdict(
        set)  # type: Dict[QualifiedName, Set[QualifiedName]]
    predecessors = defaultdict(
        set
    )  # type: Dict[QualifiedName, Set[Tuple[QualifiedName, QualifiedName]]]

    types_to_ignore: FrozenSet[str] = frozenset(ignored_types)

    # indexing node types and relations
    for rec in prov_doc.get_records():  # type: ProvRecord
        if rec.is_element():
            level0_types[rec.identifier] |= get_element_types(
                rec, including_primitives_types, types_to_ignore)
        elif rec.is_relation():
            rel_type = rec.get_type()
            attrs, values = zip(*rec.formal_attributes)
            # expecting a QualifiedName from the first argument of a relation
            predecessor, successor = values[:2]
            if predecessor is not None and successor is not None:
                predecessors[successor].add((rel_type, predecessor))

    # the type map for this graph
    fp_types = defaultdict(dict)  # type: MultiLevelTypeDict
    # converting type sets to FlatProvenanceType level 0
    fp_types[0] = {
        node: (frozenset(level0_types[node]), )
        for node in level0_types
    }
    # propagating level-0 types to the specified level
    for k in range(1, to_level + 1):
        # only propagating (k-1) types from nodes that have them
        for node, types in fp_types[k - 1].items():
            # propagating the types to the predecessors
            for rel_type, predecessor in predecessors[node]:
                k_type = types + (frozenset({rel_type}),
                                  )  # type: FlatProvenanceType
                if counting_wdf_as_two and (rel_type == PROV_DERIVATION):
                    k_p1_type = k_type + (frozenset({rel_type}),
                                          )  # type: FlatProvenanceType
                    fp_types[k + 1][predecessor] = (
                        join_flat_types(fp_types[k +
                                                 1][predecessor], k_p1_type)
                        if predecessor in fp_types[k + 1] else k_p1_type)
                else:
                    fp_types[k][predecessor] = (join_flat_types(
                        fp_types[k][predecessor], k_type) if predecessor
                                                in fp_types[k] else k_type)

    return fp_types
Example #3
0
def count_record_types(prov_doc: ProvDocument) -> dict:
    counter = Counter(map(ProvRecord.get_type, prov_doc.get_records()))
    counter.update(
        map(
            ProvRecord.get_type,
            chain.from_iterable(map(ProvBundle.get_records, prov_doc.bundles)),
        ))
    result = dict(
        (PROV_N_MAP[rec_type], count) for rec_type, count in counter.items())
    return result
    def test_document_update_simple(self):
        d1 = ProvDocument()
        d1.set_default_namespace(EX_URI)
        d1.entity('e')

        b1 = d1.bundle('b1')
        b1.entity('e')

        d2 = ProvDocument()
        d2.set_default_namespace(EX_URI)
        d2.entity('e')

        b1 = d2.bundle('b1')
        b1.entity('e')
        b2 = d2.bundle('b2')
        b2.entity('e')

        self.assertRaises(ProvException, lambda: d1.update(1))

        d1.update(d2)
        self.assertEqual(len(d1.get_records()), 2)
        self.assertEqual(len(d1.bundles), 2)
Example #5
0
    def test_document_update_simple(self):
        d1 = ProvDocument()
        d1.set_default_namespace(EX_URI)
        d1.entity('e')

        b1 = d1.bundle('b1')
        b1.entity('e')

        d2 = ProvDocument()
        d2.set_default_namespace(EX_URI)
        d2.entity('e')

        b1 = d2.bundle('b1')
        b1.entity('e')
        b2 = d2.bundle('b2')
        b2.entity('e')

        self.assertRaises(ProvException, lambda: d1.update(1))

        d1.update(d2)
        self.assertEqual(len(d1.get_records()), 2)
        self.assertEqual(len(d1.bundles), 2)