def from_uriref(justification_id: URIRef, *, graph: Graph): aida = aida_namespace(graph=graph) if not (justification_id, RDF.type, aida.TextJustification) in graph: raise ValueError( f"{justification_id} does not have type TextJustification in graph." ) span_start = graph.value(subject=justification_id, predicate=aida.startOffset, any=False) span_end = graph.value(subject=justification_id, predicate=aida.endOffsetInclusive, any=False) source = graph.value(subject=justification_id, predicate=aida.source, any=False) source_doc = graph.value(subject=justification_id, predicate=aida.sourceDocument, any=False) return Justification( justification_id=justification_id, child_id=str(source) if source else None, parent_id=str(source_doc) if source_doc else None, span_start=int(span_start) if span_start else None, span_end=int(span_end) if span_end else None, private_data=private_data(justification_id, graph=graph), )
def main( aif_file: Path, out_dir: Path, db_path: Path, by_clusters: bool, verbose: bool ) -> Path: graph: Graph = Graph() graph.parse(source=str(aif_file), format="turtle") aida = aida_namespace(graph) print(f"Starting visualization...") if by_clusters: out_dir.mkdir(exist_ok=True) if verbose: output_file = out_dir / f"{aif_file.stem}_visualization_verbose.html" else: output_file = out_dir / f"{aif_file.stem}_visualization.html" output_file.touch(exist_ok=True) hypothesis = Hypothesis.from_graph(graph) hypothesis.visualize(out_dir, output_file, db_path, verbose) else: entities = list(graph.subjects(predicate=RDF.type, object=aida.Entity)) events = list(graph.subjects(predicate=RDF.type, object=aida.Event)) relations = list(graph.subjects(predicate=RDF.type, object=aida.Relation)) clusters = list(graph.subjects(predicate=RDF.type, object=aida.SameAsCluster)) element_ids = clusters + entities + events + relations elements = { element_id: Element.from_uriref(element_id, graph=graph) for element_id in tqdm(element_ids) } corpus = Corpus(db_path) renderer = HtmlWriter(corpus, elements, directory=out_dir) renderer.write_to_dir(output_file_name=f"{aif_file.stem}.html") pretty_printer = PrettyPrinter(corpus, elements, directory=out_dir) pretty_printer.write_to_dir(output_file_name=f"{aif_file.stem}.tsv") return out_dir
def from_uriref(element_id: URIRef, *, graph: Graph): aida = aida_namespace(graph=graph) informativejustification_ids = [ j for j in graph.objects(subject=element_id, predicate=aida.informativeJustification) if (j, RDF.type, aida.TextJustification) in graph ] justifiedby_ids = [ j for j in graph.objects(subject=element_id, predicate=aida.justifiedBy) if (j, RDF.type, aida.TextJustification) in graph ] statement_ids = list( graph.subjects(predicate=RDF.subject, object=element_id)) link_assertion_ids = list( graph.objects(subject=element_id, predicate=aida.link)) return Element( element_id=element_id, element_type=graph.value(subject=element_id, predicate=RDF.type, any=False), prototypes=list( graph.objects(subject=element_id, predicate=aida.prototype)), members=list( graph.objects(subject=element_id, predicate=aida.clusterMember)), clusters=list( graph.objects(subject=element_id, predicate=aida.cluster)), names=list( graph.objects(subject=element_id, predicate=aida.hasName)), handles=list( graph.objects(subject=element_id, predicate=aida.handle)), informative_justifications=[ Justification.from_uriref(inf_j, graph=graph) for inf_j in informativejustification_ids ], justified_by=[ Justification.from_uriref(j, graph=graph) for j in justifiedby_ids ], statements=[ Statement.from_uriref(s, graph=graph) for s in statement_ids ], link_assertions=[ LinkAssertion.from_uriref(l, graph=graph) for l in link_assertion_ids ], private_data=private_data(element_id, graph=graph), )
def private_data(subject_id: URIRef, *, graph: Graph): aida = aida_namespace(graph=graph) json_pairs = [] for private_data_node in graph.objects(subject=subject_id, predicate=aida.privateData): for literal in graph.objects(subject=private_data_node, predicate=aida.jsonContent): for pair in json.loads(str(literal)).items(): json_pairs.append(pair) return json_pairs
def from_uriref(link_id: URIRef, *, graph): aida = aida_namespace(graph=graph) target = graph.value(subject=link_id, predicate=aida.linkTarget) system = graph.value(subject=link_id, predicate=aida.system) confidence = graph.value( subject=graph.value(subject=link_id, predicate=aida.confidence), predicate=aida.confidenceValue, ) return LinkAssertion( link_id=link_id, link_confidence=float(confidence), link_target=target, link_system=system, )
def from_uriref(statement_id: URIRef, *, graph: Graph): aida = aida_namespace(graph=graph) statement_subject = graph.value(subject=statement_id, predicate=RDF.subject, any=False) statement_predicate = graph.value(subject=statement_id, predicate=RDF.predicate, any=False) statement_object = graph.value(subject=statement_id, predicate=RDF.object, any=False) justifiedby_ids = list( graph.objects(subject=statement_id, predicate=aida.justifiedBy)) textjustification_ids = [ node for node in justifiedby_ids if (node, RDF.type, aida.TextJustification) in graph ] compoundjustification_ids = [ node for node in justifiedby_ids if (node, RDF.type, aida.CompoundJustification) in graph ] for cj in compoundjustification_ids: textjustification_ids.extend( graph.objects(subject=cj, predicate=aida.containedJustification)) return Statement( statement_id=statement_id, subject=statement_subject, predicate=statement_predicate, object=statement_object, justified_by=[ Justification.from_uriref(j, graph=graph) for j in textjustification_ids ], )
def main(aif_file: Path, corpus_path: Path, out_dir: Path): corpus = Corpus(corpus_path) graph = Graph() print('parsing graph...') graph.parse(source=str(aif_file), format="turtle") print('done.') aida = aida_namespace(graph) cluster_ids = list( graph.subjects(predicate=RDF.type, object=aida.SameAsCluster)) cluster_element_maps = {} for cluster_id in tqdm(cluster_ids, desc="parsing elements in clusters"): cluster_element = Element.from_uriref(cluster_id, graph=graph) membership_ids = list( graph.subjects(predicate=aida.cluster, object=URIRef(cluster_id))) membership_elements = [ Element.from_uriref(membership_id, graph=graph) for membership_id in membership_ids ] entity_ids = [ m for membership in membership_elements for m in membership.members ] entity_elements = [ Element.from_uriref(entity_id, graph=graph) for entity_id in entity_ids ] element_by_id = { e.element_id: e for e in [cluster_element] + membership_elements + entity_elements } cluster_element_maps[cluster_id] = element_by_id for i, element_by_id in tqdm(enumerate(cluster_element_maps.values()), desc='writing clusters to .html'): writer = HtmlWriter(corpus=corpus, elements=element_by_id) writer.write_to_dir(out_dir, f"{i:04}.html")