예제 #1
0
def computeHybridOntology( ff, topConcepts ):
    """ computes the hybrid ontology
        @param[in] ff list of input ontologies
        @param[in] topConcepts concepts which are required to participate in
                               every hybrid ontology relation
        @returns a hybrid ontology which contains all relations found in the
                 ontologies ff between concepts listed in the topConcept list
    """
    g = ConjunctiveGraph()

    allTopConcepts = set( topConcepts )
    usedTopConcepts = set()

    for f in ff:
       for s, p, o in extractSPO( XCL2RDF.toRDF(open(f).read() ) ):
          if s in allTopConcepts and o in allTopConcepts:
              g.add( (getUrl(s), NS_RDFS['label'], Literal(s)) )
              g.add( (getUrl(p), NS_RDFS['label'], Literal(p)) )
              g.add( (getUrl(o), NS_RDFS['label'], Literal(o)) )
              g.add( (getUrl(s), getUrl(p), getUrl(o)) )
              usedTopConcepts.add( s )
              usedTopConcepts.add( o )

    _addUseCaseSpecificUnusedConcepts(g)
    with open("hybrid-graph.rdf", "w") as f:
        f.write( g.serialize() )

    unusedConcepts = allTopConcepts.difference( usedTopConcepts )
    print "# of unused concepts: %d" % len( unusedConcepts ) 
    print ", ".join( list(unusedConcepts) )
예제 #2
0
def computeHybridOntology( ff, topConcepts ):
    """ computes the hybrid ontology
        @param[in] ff list of input ontologies
        @param[in] topConcepts concepts which are required to participate in
                               every hybrid ontology relation
        @returns a hybrid ontology which contains all relations found in the
                 ontologies ff between concepts listed in the topConcept list
    """
    g = ConjunctiveGraph()

    allTopConcepts = set( topConcepts )
    usedTopConcepts = set()

    for f in ff:
       for s, p, o in extractSPO( XCL2RDF.toRDF(open(f).read() ) ):
          if s in allTopConcepts and o in allTopConcepts:
              g.add( (getUrl(s), NS_RDFS['label'], Literal(s)) )
              g.add( (getUrl(p), NS_RDFS['label'], Literal(p)) )
              g.add( (getUrl(o), NS_RDFS['label'], Literal(o)) )
              g.add( (getUrl(s), getUrl(p), getUrl(o)) )
              usedTopConcepts.add( s )
              usedTopConcepts.add( o )

    _addUseCaseSpecificUnusedConcepts(g)
    with open("hybrid-graph.rdf", "w") as f:
        f.write( g.serialize() )

    unusedConcepts = allTopConcepts.difference( usedTopConcepts )
    print("# of unused concepts: %d" % len(unusedConcepts)) 
    print(", ".join( list(unusedConcepts) ))
예제 #3
0
def computeOntologyStatistics(ff, cc, rc, ccCutOffCount, rcCutOffCount):
    """ computes per ontology statistics (R, P, F1)
        @param[in] ff list of ontology files
        @param[in] cc concept counts dictionary
        @param[in] rc relation counts dictionary
        @param[in] ccCutOffCount min cc required for a term to be considered
        @param[in] rcCutOffCount min cc required for a term to be considered
    """
    goldStandardConcepts = set(
        [c for c, cnt in list(cc.items()) if cnt >= ccCutOffCount])
    goldStandardRelations = set(
        [r for r, cnt in list(rc.items()) if cnt >= rcCutOffCount])
    c = open("ontology-stats.csv", "w")
    w = writer(c)
    w.writerow(
        ("ontology", "concept precision", "concept recall", "concept F1",
         "relation precision", "relation recall", "relation F1"))

    for f in ff:
        concepts = set(
            map(str, extractConceptSet(XCL2RDF.toRDF(open(f).read()))))
        relations = set(
            map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read()))))

        cPrecision = len(goldStandardConcepts.intersection(concepts)) / float(
            len(concepts))
        cRecall = len(goldStandardConcepts.intersection(concepts)) / float(
            len(goldStandardConcepts))
        if (cPrecision + cRecall) == 0.:
            cF1 = "NaN"
        else:
            cF1 = old_div(2 * cPrecision * cRecall, (cPrecision + cRecall))

        rPrecision = len(
            goldStandardRelations.intersection(relations)) / float(
                len(relations))
        rRecall = len(goldStandardRelations.intersection(relations)) / float(
            len(goldStandardRelations))
        if (rPrecision + rRecall) == 0.:
            rF1 = "NaN"
        else:
            rF1 = old_div(2 * rPrecision * rRecall, (rPrecision + rRecall))

        w.writerow((path.basename(f), cPrecision, cRecall, cF1, rPrecision,
                    rRecall, rF1))

    c.close()
예제 #4
0
def computeStatistics( ff ):
    """ computes the statistics (number of times a concept is used; number of 
        times a relation name is used) based on the given list of ontologies 
        @param[in] ff   a list of files containing the ontologies to be analyzed
    """

    conceptCounts = defaultdict(int)
    relationCounts = defaultdict(int)
    for f in ff:
       concepts  = set(map(str, extractConceptSet( XCL2RDF.toRDF(open(f).read() ))))
       relations = set(map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read() ))))

       for c in concepts:
           conceptCounts[c] += 1

       for r in relations:
           relationCounts[r] += 1

    csvOutput( conceptCounts, relationCounts )
    return conceptCounts, relationCounts
예제 #5
0
def computeStatistics(ff):
    """ computes the statistics (number of times a concept is used; number of 
        times a relation name is used) based on the given list of ontologies 
        @param[in] ff   a list of files containing the ontologies to be analyzed
    """

    conceptCounts = defaultdict(int)
    relationCounts = defaultdict(int)
    for f in ff:
        concepts = set(
            map(str, extractConceptSet(XCL2RDF.toRDF(open(f).read()))))
        relations = set(
            map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read()))))

        for c in concepts:
            conceptCounts[c] += 1

        for r in relations:
            relationCounts[r] += 1

    csvOutput(conceptCounts, relationCounts)
    return conceptCounts, relationCounts
예제 #6
0
def visualizeOntologies( ff ):
    """ visualizes the given ontologies
        @param[in] ff   a list of files containing the ontologies to be visualized
    """
    _createOutputDir( IMG_OUTPUT_DIR )

    for f in ff:
        fName, fExt = path.splitext( path.basename(f))
        rdfOntology = XCL2RDF.toRDF(open(f).read() )

        g = GraphvizVisualize( rdfOntology, sparqlQuery=OutputQueries._labeledGraphSparqlQuery )
        g.graphTitle = fName
        g.createImage( path.join(IMG_OUTPUT_DIR, fName), "pdf" )
예제 #7
0
def visualizeOntologies(ff):
    """ visualizes the given ontologies
        @param[in] ff   a list of files containing the ontologies to be visualized
    """
    _createOutputDir(IMG_OUTPUT_DIR)

    for f in ff:
        fName, fExt = path.splitext(path.basename(f))
        rdfOntology = XCL2RDF.toRDF(open(f).read())

        g = GraphvizVisualize(
            rdfOntology, sparqlQuery=OutputQueries._labeledGraphSparqlQuery)
        g.graphTitle = fName
        g.createImage(path.join(IMG_OUTPUT_DIR, fName), "pdf")
def _readOntology( fname ):
    """ reads the given ontology using the correct 
        format 
        @param[in] the ontology's file name
        @returns the ontology graph
    """
    if fname.endswith(".cxl"):
        return XCL2RDF.toRDF( open( fname ).read() )
    elif fname.endswith(".rdf") or fname.endswith(".xml"):
        g = Graph()
        g.parse( fname, "xml" )
        return g
    else:
        raise "Unknown Ontology format error"
예제 #9
0
def computeOntologyStatistics( ff, cc, rc, ccCutOffCount, rcCutOffCount):
    """ computes per ontology statistics (R, P, F1)
        @param[in] ff list of ontology files
        @param[in] cc concept counts dictionary
        @param[in] rc relation counts dictionary
        @param[in] ccCutOffCount min cc required for a term to be considered
        @param[in] rcCutOffCount min cc required for a term to be considered
    """
    goldStandardConcepts  = set([ c for c, cnt in cc.items() if cnt >= ccCutOffCount ])
    goldStandardRelations = set([ r for r, cnt in rc.items() if cnt >= rcCutOffCount ])
    c = open("ontology-stats.csv", "w")
    w = writer(c) 
    w.writerow( ("ontology", "concept precision", "concept recall", "concept F1", 
                                "relation precision", "relation recall", "relation F1") )

    for f in ff:
       concepts  = set(map(str, extractConceptSet( XCL2RDF.toRDF(open(f).read() ))))
       relations = set(map(str, extractRelationSet(XCL2RDF.toRDF(open(f).read() ))))

       cPrecision = len(goldStandardConcepts.intersection( concepts ))/float( len(concepts) )
       cRecall    = len(goldStandardConcepts.intersection( concepts ))/float( len(goldStandardConcepts) )
       if (cPrecision + cRecall) == 0.:
           cF1 = "NaN"
       else:
           cF1        = 2 * cPrecision * cRecall / (cPrecision + cRecall)

       rPrecision = len(goldStandardRelations.intersection( relations ))/float( len(relations) )
       rRecall    = len(goldStandardRelations.intersection( relations ))/float( len(goldStandardRelations) )
       if (rPrecision + rRecall) == 0.:
           rF1 = "NaN"
       else:
           rF1        = 2 * rPrecision * rRecall / (rPrecision + rRecall)

       w.writerow( (path.basename(f), cPrecision, cRecall, cF1, rPrecision, rRecall, rF1) )

    c.close()
예제 #10
0
def getConcepts( fname ):
    """ evaluates the given ontology and writes the results into a file 
    @param[in] fname        file name of the ontology to evaluate
    """
    
    goldStd  = XCL2RDF.toRDF( open(fname).read() )
    goldStdConcepts  = extractConceptSet(goldStd)

    result = []
    for concept in goldStdConcepts:
        cleaned_phrase = phraseCleanup.clean( concept )
        if ", ".join(cleaned_phrase) != concept:
            log.info("Replacing '%s' with '%s'" % (concept, ", ".join(cleaned_phrase)) )
        result.extend( cleaned_phrase  )

    return result