def computeHybridOntology( ff, topConcepts ): """ computes the hybrid ontology @param[in] ff list of input ontologies @param[in] topConcepts concepts which are required to participate in every hybrid ontology relation @returns a hybrid ontology which contains all relations found in the ontologies ff between concepts listed in the topConcept list """ g = ConjunctiveGraph() allTopConcepts = set( topConcepts ) usedTopConcepts = set() for f in ff: for s, p, o in extractSPO( XCL2RDF.toRDF(open(f).read() ) ): if s in allTopConcepts and o in allTopConcepts: g.add( (getUrl(s), NS_RDFS['label'], Literal(s)) ) g.add( (getUrl(p), NS_RDFS['label'], Literal(p)) ) g.add( (getUrl(o), NS_RDFS['label'], Literal(o)) ) g.add( (getUrl(s), getUrl(p), getUrl(o)) ) usedTopConcepts.add( s ) usedTopConcepts.add( o ) _addUseCaseSpecificUnusedConcepts(g) with open("hybrid-graph.rdf", "w") as f: f.write( g.serialize() ) unusedConcepts = allTopConcepts.difference( usedTopConcepts ) print "# of unused concepts: %d" % len( unusedConcepts ) print ", ".join( list(unusedConcepts) )
def computeHybridOntology( ff, topConcepts ): """ computes the hybrid ontology @param[in] ff list of input ontologies @param[in] topConcepts concepts which are required to participate in every hybrid ontology relation @returns a hybrid ontology which contains all relations found in the ontologies ff between concepts listed in the topConcept list """ g = ConjunctiveGraph() allTopConcepts = set( topConcepts ) usedTopConcepts = set() for f in ff: for s, p, o in extractSPO( XCL2RDF.toRDF(open(f).read() ) ): if s in allTopConcepts and o in allTopConcepts: g.add( (getUrl(s), NS_RDFS['label'], Literal(s)) ) g.add( (getUrl(p), NS_RDFS['label'], Literal(p)) ) g.add( (getUrl(o), NS_RDFS['label'], Literal(o)) ) g.add( (getUrl(s), getUrl(p), getUrl(o)) ) usedTopConcepts.add( s ) usedTopConcepts.add( o ) _addUseCaseSpecificUnusedConcepts(g) with open("hybrid-graph.rdf", "w") as f: f.write( g.serialize() ) unusedConcepts = allTopConcepts.difference( usedTopConcepts ) print("# of unused concepts: %d" % len(unusedConcepts)) print(", ".join( list(unusedConcepts) ))
def computeOntologyStatistics(ff, cc, rc, ccCutOffCount, rcCutOffCount): """ computes per ontology statistics (R, P, F1) @param[in] ff list of ontology files @param[in] cc concept counts dictionary @param[in] rc relation counts dictionary @param[in] ccCutOffCount min cc required for a term to be considered @param[in] rcCutOffCount min cc required for a term to be considered """ goldStandardConcepts = set( [c for c, cnt in list(cc.items()) if cnt >= ccCutOffCount]) goldStandardRelations = set( [r for r, cnt in list(rc.items()) if cnt >= rcCutOffCount]) c = open("ontology-stats.csv", "w") w = writer(c) w.writerow( ("ontology", "concept precision", "concept recall", "concept F1", "relation precision", "relation recall", "relation F1")) for f in ff: concepts = set( map(str, extractConceptSet(XCL2RDF.toRDF(open(f).read())))) relations = set( map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read())))) cPrecision = len(goldStandardConcepts.intersection(concepts)) / float( len(concepts)) cRecall = len(goldStandardConcepts.intersection(concepts)) / float( len(goldStandardConcepts)) if (cPrecision + cRecall) == 0.: cF1 = "NaN" else: cF1 = old_div(2 * cPrecision * cRecall, (cPrecision + cRecall)) rPrecision = len( goldStandardRelations.intersection(relations)) / float( len(relations)) rRecall = len(goldStandardRelations.intersection(relations)) / float( len(goldStandardRelations)) if (rPrecision + rRecall) == 0.: rF1 = "NaN" else: rF1 = old_div(2 * rPrecision * rRecall, (rPrecision + rRecall)) w.writerow((path.basename(f), cPrecision, cRecall, cF1, rPrecision, rRecall, rF1)) c.close()
def computeStatistics( ff ): """ computes the statistics (number of times a concept is used; number of times a relation name is used) based on the given list of ontologies @param[in] ff a list of files containing the ontologies to be analyzed """ conceptCounts = defaultdict(int) relationCounts = defaultdict(int) for f in ff: concepts = set(map(str, extractConceptSet( XCL2RDF.toRDF(open(f).read() )))) relations = set(map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read() )))) for c in concepts: conceptCounts[c] += 1 for r in relations: relationCounts[r] += 1 csvOutput( conceptCounts, relationCounts ) return conceptCounts, relationCounts
def computeStatistics(ff): """ computes the statistics (number of times a concept is used; number of times a relation name is used) based on the given list of ontologies @param[in] ff a list of files containing the ontologies to be analyzed """ conceptCounts = defaultdict(int) relationCounts = defaultdict(int) for f in ff: concepts = set( map(str, extractConceptSet(XCL2RDF.toRDF(open(f).read())))) relations = set( map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read())))) for c in concepts: conceptCounts[c] += 1 for r in relations: relationCounts[r] += 1 csvOutput(conceptCounts, relationCounts) return conceptCounts, relationCounts
def visualizeOntologies( ff ): """ visualizes the given ontologies @param[in] ff a list of files containing the ontologies to be visualized """ _createOutputDir( IMG_OUTPUT_DIR ) for f in ff: fName, fExt = path.splitext( path.basename(f)) rdfOntology = XCL2RDF.toRDF(open(f).read() ) g = GraphvizVisualize( rdfOntology, sparqlQuery=OutputQueries._labeledGraphSparqlQuery ) g.graphTitle = fName g.createImage( path.join(IMG_OUTPUT_DIR, fName), "pdf" )
def visualizeOntologies(ff): """ visualizes the given ontologies @param[in] ff a list of files containing the ontologies to be visualized """ _createOutputDir(IMG_OUTPUT_DIR) for f in ff: fName, fExt = path.splitext(path.basename(f)) rdfOntology = XCL2RDF.toRDF(open(f).read()) g = GraphvizVisualize( rdfOntology, sparqlQuery=OutputQueries._labeledGraphSparqlQuery) g.graphTitle = fName g.createImage(path.join(IMG_OUTPUT_DIR, fName), "pdf")
def _readOntology( fname ): """ reads the given ontology using the correct format @param[in] the ontology's file name @returns the ontology graph """ if fname.endswith(".cxl"): return XCL2RDF.toRDF( open( fname ).read() ) elif fname.endswith(".rdf") or fname.endswith(".xml"): g = Graph() g.parse( fname, "xml" ) return g else: raise "Unknown Ontology format error"
def computeOntologyStatistics( ff, cc, rc, ccCutOffCount, rcCutOffCount): """ computes per ontology statistics (R, P, F1) @param[in] ff list of ontology files @param[in] cc concept counts dictionary @param[in] rc relation counts dictionary @param[in] ccCutOffCount min cc required for a term to be considered @param[in] rcCutOffCount min cc required for a term to be considered """ goldStandardConcepts = set([ c for c, cnt in cc.items() if cnt >= ccCutOffCount ]) goldStandardRelations = set([ r for r, cnt in rc.items() if cnt >= rcCutOffCount ]) c = open("ontology-stats.csv", "w") w = writer(c) w.writerow( ("ontology", "concept precision", "concept recall", "concept F1", "relation precision", "relation recall", "relation F1") ) for f in ff: concepts = set(map(str, extractConceptSet( XCL2RDF.toRDF(open(f).read() )))) relations = set(map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read() )))) cPrecision = len(goldStandardConcepts.intersection( concepts ))/float( len(concepts) ) cRecall = len(goldStandardConcepts.intersection( concepts ))/float( len(goldStandardConcepts) ) if (cPrecision + cRecall) == 0.: cF1 = "NaN" else: cF1 = 2 * cPrecision * cRecall / (cPrecision + cRecall) rPrecision = len(goldStandardRelations.intersection( relations ))/float( len(relations) ) rRecall = len(goldStandardRelations.intersection( relations ))/float( len(goldStandardRelations) ) if (rPrecision + rRecall) == 0.: rF1 = "NaN" else: rF1 = 2 * rPrecision * rRecall / (rPrecision + rRecall) w.writerow( (path.basename(f), cPrecision, cRecall, cF1, rPrecision, rRecall, rF1) ) c.close()
def getConcepts( fname ): """ evaluates the given ontology and writes the results into a file @param[in] fname file name of the ontology to evaluate """ goldStd = XCL2RDF.toRDF( open(fname).read() ) goldStdConcepts = extractConceptSet(goldStd) result = [] for concept in goldStdConcepts: cleaned_phrase = phraseCleanup.clean( concept ) if ", ".join(cleaned_phrase) != concept: log.info("Replacing '%s' with '%s'" % (concept, ", ".join(cleaned_phrase)) ) result.extend( cleaned_phrase ) return result