Example #1
0
    def from_uriref(justification_id: URIRef, *, graph: Graph):
        aida = aida_namespace(graph=graph)

        if not (justification_id, RDF.type, aida.TextJustification) in graph:
            raise ValueError(
                f"{justification_id} does not have type TextJustification in graph."
            )

        span_start = graph.value(subject=justification_id,
                                 predicate=aida.startOffset,
                                 any=False)

        span_end = graph.value(subject=justification_id,
                               predicate=aida.endOffsetInclusive,
                               any=False)

        source = graph.value(subject=justification_id,
                             predicate=aida.source,
                             any=False)

        source_doc = graph.value(subject=justification_id,
                                 predicate=aida.sourceDocument,
                                 any=False)

        return Justification(
            justification_id=justification_id,
            child_id=str(source) if source else None,
            parent_id=str(source_doc) if source_doc else None,
            span_start=int(span_start) if span_start else None,
            span_end=int(span_end) if span_end else None,
            private_data=private_data(justification_id, graph=graph),
        )
Example #2
0
def main(
    aif_file: Path, out_dir: Path, db_path: Path, by_clusters: bool, verbose: bool
) -> Path:
    graph: Graph = Graph()
    graph.parse(source=str(aif_file), format="turtle")
    aida = aida_namespace(graph)
    print(f"Starting visualization...")

    if by_clusters:
        out_dir.mkdir(exist_ok=True)
        if verbose:
            output_file = out_dir / f"{aif_file.stem}_visualization_verbose.html"
        else:
            output_file = out_dir / f"{aif_file.stem}_visualization.html"
        output_file.touch(exist_ok=True)
        hypothesis = Hypothesis.from_graph(graph)
        hypothesis.visualize(out_dir, output_file, db_path, verbose)
    else:
        entities = list(graph.subjects(predicate=RDF.type, object=aida.Entity))
        events = list(graph.subjects(predicate=RDF.type, object=aida.Event))
        relations = list(graph.subjects(predicate=RDF.type, object=aida.Relation))
        clusters = list(graph.subjects(predicate=RDF.type, object=aida.SameAsCluster))

        element_ids = clusters + entities + events + relations
        elements = {
            element_id: Element.from_uriref(element_id, graph=graph)
            for element_id in tqdm(element_ids)
        }

        corpus = Corpus(db_path)
        renderer = HtmlWriter(corpus, elements, directory=out_dir)
        renderer.write_to_dir(output_file_name=f"{aif_file.stem}.html")
        pretty_printer = PrettyPrinter(corpus, elements, directory=out_dir)
        pretty_printer.write_to_dir(output_file_name=f"{aif_file.stem}.tsv")
    return out_dir
Example #3
0
    def from_uriref(element_id: URIRef, *, graph: Graph):

        aida = aida_namespace(graph=graph)

        informativejustification_ids = [
            j for j in graph.objects(subject=element_id,
                                     predicate=aida.informativeJustification)
            if (j, RDF.type, aida.TextJustification) in graph
        ]

        justifiedby_ids = [
            j for j in graph.objects(subject=element_id,
                                     predicate=aida.justifiedBy)
            if (j, RDF.type, aida.TextJustification) in graph
        ]
        statement_ids = list(
            graph.subjects(predicate=RDF.subject, object=element_id))

        link_assertion_ids = list(
            graph.objects(subject=element_id, predicate=aida.link))

        return Element(
            element_id=element_id,
            element_type=graph.value(subject=element_id,
                                     predicate=RDF.type,
                                     any=False),
            prototypes=list(
                graph.objects(subject=element_id, predicate=aida.prototype)),
            members=list(
                graph.objects(subject=element_id,
                              predicate=aida.clusterMember)),
            clusters=list(
                graph.objects(subject=element_id, predicate=aida.cluster)),
            names=list(
                graph.objects(subject=element_id, predicate=aida.hasName)),
            handles=list(
                graph.objects(subject=element_id, predicate=aida.handle)),
            informative_justifications=[
                Justification.from_uriref(inf_j, graph=graph)
                for inf_j in informativejustification_ids
            ],
            justified_by=[
                Justification.from_uriref(j, graph=graph)
                for j in justifiedby_ids
            ],
            statements=[
                Statement.from_uriref(s, graph=graph) for s in statement_ids
            ],
            link_assertions=[
                LinkAssertion.from_uriref(l, graph=graph)
                for l in link_assertion_ids
            ],
            private_data=private_data(element_id, graph=graph),
        )
Example #4
0
def private_data(subject_id: URIRef, *, graph: Graph):
    aida = aida_namespace(graph=graph)
    json_pairs = []
    for private_data_node in graph.objects(subject=subject_id,
                                           predicate=aida.privateData):
        for literal in graph.objects(subject=private_data_node,
                                     predicate=aida.jsonContent):
            for pair in json.loads(str(literal)).items():
                json_pairs.append(pair)

    return json_pairs
Example #5
0
    def from_uriref(link_id: URIRef, *, graph):
        aida = aida_namespace(graph=graph)

        target = graph.value(subject=link_id, predicate=aida.linkTarget)
        system = graph.value(subject=link_id, predicate=aida.system)
        confidence = graph.value(
            subject=graph.value(subject=link_id, predicate=aida.confidence),
            predicate=aida.confidenceValue,
        )

        return LinkAssertion(
            link_id=link_id,
            link_confidence=float(confidence),
            link_target=target,
            link_system=system,
        )
Example #6
0
    def from_uriref(statement_id: URIRef, *, graph: Graph):
        aida = aida_namespace(graph=graph)

        statement_subject = graph.value(subject=statement_id,
                                        predicate=RDF.subject,
                                        any=False)
        statement_predicate = graph.value(subject=statement_id,
                                          predicate=RDF.predicate,
                                          any=False)
        statement_object = graph.value(subject=statement_id,
                                       predicate=RDF.object,
                                       any=False)
        justifiedby_ids = list(
            graph.objects(subject=statement_id, predicate=aida.justifiedBy))

        textjustification_ids = [
            node for node in justifiedby_ids
            if (node, RDF.type, aida.TextJustification) in graph
        ]
        compoundjustification_ids = [
            node for node in justifiedby_ids
            if (node, RDF.type, aida.CompoundJustification) in graph
        ]

        for cj in compoundjustification_ids:
            textjustification_ids.extend(
                graph.objects(subject=cj,
                              predicate=aida.containedJustification))

        return Statement(
            statement_id=statement_id,
            subject=statement_subject,
            predicate=statement_predicate,
            object=statement_object,
            justified_by=[
                Justification.from_uriref(j, graph=graph)
                for j in textjustification_ids
            ],
        )
Example #7
0
def main(aif_file: Path, corpus_path: Path, out_dir: Path):
    corpus = Corpus(corpus_path)
    graph = Graph()

    print('parsing graph...')
    graph.parse(source=str(aif_file), format="turtle")
    print('done.')
    aida = aida_namespace(graph)
    cluster_ids = list(
        graph.subjects(predicate=RDF.type, object=aida.SameAsCluster))
    cluster_element_maps = {}
    for cluster_id in tqdm(cluster_ids, desc="parsing elements in clusters"):
        cluster_element = Element.from_uriref(cluster_id, graph=graph)
        membership_ids = list(
            graph.subjects(predicate=aida.cluster, object=URIRef(cluster_id)))
        membership_elements = [
            Element.from_uriref(membership_id, graph=graph)
            for membership_id in membership_ids
        ]

        entity_ids = [
            m for membership in membership_elements for m in membership.members
        ]
        entity_elements = [
            Element.from_uriref(entity_id, graph=graph)
            for entity_id in entity_ids
        ]

        element_by_id = {
            e.element_id: e
            for e in [cluster_element] + membership_elements + entity_elements
        }
        cluster_element_maps[cluster_id] = element_by_id

    for i, element_by_id in tqdm(enumerate(cluster_element_maps.values()),
                                 desc='writing clusters to .html'):
        writer = HtmlWriter(corpus=corpus, elements=element_by_id)
        writer.write_to_dir(out_dir, f"{i:04}.html")