예제 #1
0
def test_awe_match_pairs():
    """
    Text axiom weight estimation
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine(
        config={
            'match_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'weights': [1.0, -1.0, 2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1', 'foo 1')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))

    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YX = lexmap.weighted_axioms('Y:1', 'X:1', xg)
    logging.info('P_XY={} P_YX={}'.format(P_XY, P_YX))
    assert P_XY[0] > P_XY[1]
    assert P_XY[0] == P_YX[1]
예제 #2
0
def test_awe_scope_map():
    """
    Text axiom weight estimation, syn scopes
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine()
    ont.add_node('X:1', 'x1')
    ont.add_node('Y:1', 'y1')
    ont.add_node('Z:1', 'z1')
    ont.add_synonym(Synonym('X:1', val='related', pred='hasRelatedSynonym'))
    ont.add_synonym(Synonym('Y:1', val='related', pred='hasRelatedSynonym'))

    ont.add_synonym(Synonym('Y:1', val='exact', pred='hasExactSynonym'))
    ont.add_synonym(Synonym('Z:1', val='exact', pred='hasExactSynonym'))

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ))
    assert P_XY[2] > P_XY[0]
    assert P_XY[2] > P_XY[1]
    assert P_XY[2] > P_XY[3]
    assert P_XY[2] < P_YZ[2]
예제 #3
0
def test_awe_1_to_many_flat():
    """
    Text axiom weight estimation, for a 1-to-many situation, where the many are not inter-related
    """
    ont = Ontology()
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'cardinality': '1m',
                'weights': [-1.0, 1.0, -2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1a', 'foo 1a')
    ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym'))
    ont.add_node('Y:1b', 'foo 1b')
    ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym'))
    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P = lexmap.weighted_axioms('X:1', 'Y:1a', xg)
    logging.info('P={}'.format(P))
    assert P[0] < P[1]
    assert P[1] > P[2]
예제 #4
0
def test_awe_1_to_1():
    """
    Text axiom weight estimation
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'cardinality': '11',
                'weights': [-1.0, -1.0, 2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1', 'foo 1')
    ont.add_node('Z:1a', 'foo 1')
    ont.add_node('Z:1b', 'foo 1')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_XZ = lexmap.weighted_axioms('X:1', 'Z:1a', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_XZ))
    assert P_XY[2] > P_XZ[2]
예제 #5
0
def test_awe_1_to_many_default():
    """
    As previous test, but with defaults
    """
    ont = Ontology()
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'cardinality': 'm1',
                'weights': [1.0, -1.0, -2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1a', 'foo 1a')
    ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym'))
    ont.add_node('Y:1b', 'foo 1b')
    ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym'))
    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P = lexmap.weighted_axioms('X:1', 'Y:1a', xg)
    logging.info('P={}'.format(P))
    assert P[0] < P[1]
    assert P[1] > P[2]
예제 #6
0
def test_lexmap_multi():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    files = ['x', 'm', 'h', 'bto']
    onts = [
        factory.create('tests/resources/autopod-{}.json'.format(f))
        for f in files
    ]
    lexmap = LexicalMapEngine()
    lexmap.index_ontologies(onts)
    #print(lexmap.lmap)
    #print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    for x, y, d in g.edges_iter(data=True):
        cl = nx.ancestors(g, x)
        print("{} '{}' <-> {} '{}' :: {} CLOSURE={}".format(
            x, lexmap.label(x), y, lexmap.label(y), d, len(cl)))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0
    unmapped = lexmap.unmapped_nodes(g)
    print('U: {}'.format(len(unmapped)))
    unmapped = lexmap.unmapped_nodes(g, rs_threshold=4)
    print('U4: {}'.format(len(unmapped)))

    cliques = lexmap.cliques(g)
    maxc = max(cliques, key=len)
    print('CLIQUES: {}'.format(cliques))
    print('MAX CLIQUES: {}'.format(maxc))
    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))
예제 #7
0
def test_align():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont1 = factory.create('ma')
    ont2 = factory.create('zfa')
    lexmap = LexicalMapEngine()

    lexmap.index_ontology(ont1)
    lexmap.index_ontology(ont2)

    print(lexmap.lmap)
    print(ont1.all_synonyms())
    print(ont2.all_synonyms())
    g = lexmap.get_xref_graph()
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
예제 #8
0
def test_awe_xref_weights():
    """
    Text axiom weight estimation, when provided with defaults
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine(
        config={
            'xref_weights': [
                {
                    'left': 'X:1',
                    'right': 'Y:1',
                    'weights': [100.0, 0.0, 0.0, 0.0]
                },
                {
                    'left': 'Z:1',
                    'right': 'Y:1',
                    'weights': [0.0, 100.0, 0.0, 0.0]
                },
            ]
        })
    ont.add_node('X:1', 'foo')
    ont.add_node('Y:1', 'foo')
    ont.add_node('Z:1', 'foo')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ))
    assert P_XY[0] > P_XY[1]
    assert P_XY[0] > P_XY[2]
    assert P_XY[0] > P_XY[3]
    assert P_YZ[0] > P_YZ[1]
    assert P_YZ[0] > P_YZ[2]
    assert P_YZ[0] > P_YZ[3]
예제 #9
0
def test_awe_1_to_many_hier():
    """
    Text axiom weight estimation
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine()

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Z:1a', 'foo 1')
    ont.add_node('Z:1b', 'foo 1')
    ont.add_parent('Z:1b', 'Z:1a')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_a = lexmap.weighted_axioms('X:1', 'Z:1a', xg)
    P_b = lexmap.weighted_axioms('X:1', 'Z:1b', xg)
    logging.info('P_a={} P_b={}'.format(P_a, P_b))
    assert P_a[0] > P_a[1]
    assert P_b[0] < P_b[1]
    assert P_a[0] > P_b[0]
예제 #10
0
def test_lexmap_basic():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    lexmap = LexicalMapEngine()
    lexmap.index_ontology(ont)

    print(lexmap.lmap)
    print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    assert g.has_edge('Z:2', 'ZZ:2')  # roman numerals
    assert g.has_edge('Z:2', 'Y:2')  # case insensitivity
    assert g.has_edge('A:1', 'B:1')  # synonyms
    assert g.has_edge('B:1', 'A:1')  # bidirectional
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0

    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))

    lexmap = LexicalMapEngine(
        config=dict(synsets=[dict(word="", synonym="ignoreme", weight=-2.0)],
                    normalized_form_confidence=0.25,
                    abbreviation_confidence=0.5,
                    meaningful_ids=True,
                    ontology_configurations=[
                        dict(prefix='AA', normalized_form_confidence=-1000)
                    ]))

    assert len(lexmap._get_config_val('NULL', 'synsets')) == 1
    assert lexmap._normalize_label('ignoreme foo', {'ignoreme': ''}) == 'foo'
    assert lexmap._normalize_label('replaceme foo',
                                   {'replaceme': 'zz'}) == 'foo zz'

    ont.add_node('TEST:1', 'foo bar')
    ont.add_node('TEST:2', 'bar foo')
    ont.add_node('TEST:3', 'foo bar')
    ont.add_node('TEST:4', 'wiz')
    syn = Synonym('TEST:4', val='bar foo', pred='hasRelatedSynonym')
    ont.add_synonym(syn)
    ont.add_node('http://x.org/wiz#FooBar')
    ont.add_node('TEST:6', '123')
    ont.add_node('TEST:7', '123')
    ont.add_node('TEST:8', 'bar ignoreme foo')
    ont.add_node('AA:1', 'foo bar')
    ont.add_node('AA:2', 'bar foo')
    ont.add_node('ABBREV:1', 'ABCD')
    ont.add_node('ABBREV:2', 'ABCD')
    for s in ont.synonyms('TEST:4'):
        print('S={}'.format(s))
    lexmap.index_ontology(ont)
    g = lexmap.get_xref_graph()
    for x, d in g['TEST:1'].items():
        print('XREF: {} = {}'.format(x, d))
    assert g.has_edge('TEST:1', 'TEST:2')  # normalized
    logging.info('E 1-2 = {}'.format(g['TEST:1']['TEST:2']))
    assert int(g['TEST:1']['TEST:2']['score']) == 25
    assert int(g['TEST:1']['TEST:3']['score']) == 100
    assert int(g['TEST:1']['TEST:4']['score']) < 25
    assert g.has_edge('TEST:3', 'http://x.org/wiz#FooBar')  # IDs and CamelCase
    assert not g.has_edge('TEST:6',
                          'TEST:7')  # should omit syns with no alphanumeric

    # test exclude normalized form
    assert not g.has_edge('AA:1', 'AA:2')

    # test custom synsets are used
    assert g.has_edge('TEST:8', 'TEST:2')
    assert g.has_edge('TEST:8', 'AA:2')
    assert not g.has_edge('TEST:8', 'AA:1')  # do not normalize AAs

    assert lexmap.smap['ABBREV:1'][0].is_abbreviation()
    assert lexmap.smap['ABBREV:2'][0].is_abbreviation()
    assert g.has_edge('ABBREV:1', 'ABBREV:2')
    assert int(g['ABBREV:1']['ABBREV:2']['score']) == 25

    df = lexmap.unmapped_dataframe(g)
    print(df.to_csv())
예제 #11
0
def main():
    """
    Wrapper for OGR
    """
    parser = argparse.ArgumentParser(description='Wrapper for ontobio lexical mapping'
                                                 """
                                                 Lexically maps one or more ontologies. Ontologies can be local or remote,
                                                 any input handle can be specified, see docs for more details on handles.

                                                 If multiple ontologies are specified, then each ontology in the list is compared against the first one.

                                                 If a simgle ontology is specified, then all pairs in that ontology will be compared
                                                 
                                                 Output format to be documented - see lexmap.py for the various scoring attributes for now.
                                                 """,
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-o', '--outfile', type=str, nargs='*', default=[], required=False,
                        help='Path to output file')
    parser.add_argument('-t', '--to', type=str, required=False, default='tsv',
                        help='Output to (tree, dot, ...)')
    parser.add_argument('-l', '--labels', type=str,
                        help='If set, then include node labels in results. DEPRECATED')
    parser.add_argument('-s', '--scoring', default='sim', type=str,
                        help='Score weighting scheme. Default=sim')
    parser.add_argument('-P', '--prefix', type=str, required=False,
                        help='Prefix to constrain traversal on, e.g. PATO, ENVO')
    parser.add_argument('-c', '--config', type=str, required=False,
                        help='lexmap configuration file (yaml). See schema for details')
    parser.add_argument('-X', '--xref_weights', type=str, required=False,
                        help='csv of curated per-xref weights')
    parser.add_argument('-u', '--unmapped', type=str, required=False,
                        help='File to export unmapped nodes to')
    parser.add_argument('-A', '--all-by-all', dest='all_by_all', action='store_true',
                        help='compare all ontologies against all.')
    parser.add_argument('-v', '--verbosity', default=0, action='count',
                        help='Increase output verbosity')

    parser.add_argument('ontologies',nargs='*',
                        help='one or more ontologies to be aligned. Any input handle can be specified')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)
        
    logging.info("Welcome!")

    factory = OntologyFactory()
    onts = [filter_by_prefix(factory.create(h)) for h in args.ontologies]

    
    config = {}
    if args.config is not None:
        f = open(args.config,'r')
        config = yaml.load(f)
        f.close()

    if args.xref_weights is not None:
        if 'xref_weights' not in config:
            config['xref_weights'] = []
        xws = config['xref_weights']
        df = pd.read_csv(args.xref_weights)
        df = df.fillna(0.0)
        for _, row in df.iterrows():
            w = float(row['weight'])
            WA = np.array((0.0, 0.0, 0.0, 0.0))
            if w < 0:
                WA[2] = w
                WA[3] = abs(w)
            else:
                WA[2] = w
                WA[3] = -w
            xws.append({'left':row['left'],
                        'right':row['right'],
                        'weights':WA})
        
    logging.info("ALL: {}".format(args.all_by_all))
    
    lexmap = LexicalMapEngine(config=config)
    if len(onts) == 0:
        raise ValueException("must pass one or more ontologies")
    else:
        logging.info("Indexing ontologies: {}".format(onts))
        for ont in onts:
            lexmap.index_ontology(ont)
        oid0 = onts[0].id
        pairs = [(oid0,oid0)]
        if len(onts) > 1:
            if args.all_by_all:
                logging.info("All vs ALL: {}".format(onts))
                pairs = []
                for i in onts:
                    for j in onts:
                        if i.id < j.id:
                            pairs.append((i.id, j.id))
            else:
                logging.info("All vs first in list: {}".format(oid0))
                pairs = [(oid0, ont.id) for ont in onts[1:]]
        logging.info("Comparing the following pairs of ontologies: {}".format(pairs))
        lexmap.ontology_pairs = pairs
    mo = Ontology()
    mo.merge(onts)
    
    g = lexmap.get_xref_graph()
    
    if args.to == 'obo':
        write_obo(g,mo,args)
    else:
        write_tsv(lexmap,g,mo,args)

        
    if args.unmapped:
        udf = lexmap.unmapped_dataframe(g)
        udf.to_csv(args.unmapped, sep="\t", index=False)
예제 #12
0
def main():
    """
    Wrapper for OGR
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for ontobio lexical mapping'
        """
                                                 ...
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        default='tsv',
                        help='Output to (tree, dot, ...)')
    parser.add_argument('-l',
                        '--labels',
                        type=str,
                        help='If set, then include node labels in results')
    parser.add_argument(
        '-P',
        '--prefix',
        type=str,
        required=False,
        help='Prefix to constrain traversal on, e.g. PATO, ENVO')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    parser.add_argument('ontologies', nargs='*')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    factory = OntologyFactory()
    onts = [factory.create(h) for h in args.ontologies]

    lexmap = LexicalMapEngine()
    if len(onts) == 0:
        raise ValueException("must pass one or more ontologies")
    else:
        for ont in onts:
            lexmap.index_ontology(ont)

    mo = Ontology()
    mo.merge(onts)

    g = lexmap.get_xref_graph()

    if args.to == 'obo':
        write_obo(g, mo, args)
    else:
        write_tsv(g, mo, args)