Esempio n. 1
0
def test_awe_scope_map():
    """
    Text axiom weight estimation, syn scopes
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine()
    ont.add_node('X:1', 'x1')
    ont.add_node('Y:1', 'y1')
    ont.add_node('Z:1', 'z1')
    ont.add_synonym(Synonym('X:1', val='related', pred='hasRelatedSynonym'))
    ont.add_synonym(Synonym('Y:1', val='related', pred='hasRelatedSynonym'))

    ont.add_synonym(Synonym('Y:1', val='exact', pred='hasExactSynonym'))
    ont.add_synonym(Synonym('Z:1', val='exact', pred='hasExactSynonym'))

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ))
    assert P_XY[2] > P_XY[0]
    assert P_XY[2] > P_XY[1]
    assert P_XY[2] > P_XY[3]
    assert P_XY[2] < P_YZ[2]
Esempio n. 2
0
def test_awe_1_to_many_default():
    """
    As previous test, but with defaults
    """
    ont = Ontology()
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'cardinality': 'm1',
                'weights': [1.0, -1.0, -2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1a', 'foo 1a')
    ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym'))
    ont.add_node('Y:1b', 'foo 1b')
    ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym'))
    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P = lexmap.weighted_axioms('X:1', 'Y:1a', xg)
    logging.info('P={}'.format(P))
    assert P[0] < P[1]
    assert P[1] > P[2]
Esempio n. 3
0
def test_awe_1_to_many_flat():
    """
    Text axiom weight estimation, for a 1-to-many situation, where the many are not inter-related
    """
    ont = Ontology()
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'cardinality': '1m',
                'weights': [-1.0, 1.0, -2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1a', 'foo 1a')
    ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym'))
    ont.add_node('Y:1b', 'foo 1b')
    ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym'))
    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P = lexmap.weighted_axioms('X:1', 'Y:1a', xg)
    logging.info('P={}'.format(P))
    assert P[0] < P[1]
    assert P[1] > P[2]
Esempio n. 4
0
def test_mutable():
    """
    Test mutability of ontology class
    """
    ont = Ontology()
    ont.add_node('TEST:1', 'foo bar')
    ont.add_node('TEST:2', 'bar foo')
    ont.add_node('TEST:3', 'foo bar')
    ont.add_node('TEST:4', 'wiz')
    syn = Synonym('TEST:4', val='bar foo', pred='hasExactSynonym')
    ont.add_synonym(syn)
    w = GraphRenderer.create('obo')
    w.write(ont)
    for n in ont.nodes():
        meta = ont._meta(n)
        print('{} -> {}'.format(n, meta))

    assert ont.label('TEST:1') == 'foo bar'
    assert ont.synonyms('TEST:1') == []
    assert ont.synonyms('TEST:4')[0].val == 'bar foo'
Esempio n. 5
0
def test_lexmap_basic():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    lexmap = LexicalMapEngine()
    lexmap.index_ontology(ont)

    print(lexmap.lmap)
    print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    assert g.has_edge('Z:2', 'ZZ:2')  # roman numerals
    assert g.has_edge('Z:2', 'Y:2')  # case insensitivity
    assert g.has_edge('A:1', 'B:1')  # synonyms
    assert g.has_edge('B:1', 'A:1')  # bidirectional
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0

    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))

    lexmap = LexicalMapEngine(
        config=dict(synsets=[dict(word="", synonym="ignoreme", weight=-2.0)],
                    normalized_form_confidence=0.25,
                    abbreviation_confidence=0.5,
                    meaningful_ids=True,
                    ontology_configurations=[
                        dict(prefix='AA', normalized_form_confidence=-1000)
                    ]))

    assert len(lexmap._get_config_val('NULL', 'synsets')) == 1
    assert lexmap._normalize_label('ignoreme foo', {'ignoreme': ''}) == 'foo'
    assert lexmap._normalize_label('replaceme foo',
                                   {'replaceme': 'zz'}) == 'foo zz'

    ont.add_node('TEST:1', 'foo bar')
    ont.add_node('TEST:2', 'bar foo')
    ont.add_node('TEST:3', 'foo bar')
    ont.add_node('TEST:4', 'wiz')
    syn = Synonym('TEST:4', val='bar foo', pred='hasRelatedSynonym')
    ont.add_synonym(syn)
    ont.add_node('http://x.org/wiz#FooBar')
    ont.add_node('TEST:6', '123')
    ont.add_node('TEST:7', '123')
    ont.add_node('TEST:8', 'bar ignoreme foo')
    ont.add_node('AA:1', 'foo bar')
    ont.add_node('AA:2', 'bar foo')
    ont.add_node('ABBREV:1', 'ABCD')
    ont.add_node('ABBREV:2', 'ABCD')
    for s in ont.synonyms('TEST:4'):
        print('S={}'.format(s))
    lexmap.index_ontology(ont)
    g = lexmap.get_xref_graph()
    for x, d in g['TEST:1'].items():
        print('XREF: {} = {}'.format(x, d))
    assert g.has_edge('TEST:1', 'TEST:2')  # normalized
    logging.info('E 1-2 = {}'.format(g['TEST:1']['TEST:2']))
    assert int(g['TEST:1']['TEST:2']['score']) == 25
    assert int(g['TEST:1']['TEST:3']['score']) == 100
    assert int(g['TEST:1']['TEST:4']['score']) < 25
    assert g.has_edge('TEST:3', 'http://x.org/wiz#FooBar')  # IDs and CamelCase
    assert not g.has_edge('TEST:6',
                          'TEST:7')  # should omit syns with no alphanumeric

    # test exclude normalized form
    assert not g.has_edge('AA:1', 'AA:2')

    # test custom synsets are used
    assert g.has_edge('TEST:8', 'TEST:2')
    assert g.has_edge('TEST:8', 'AA:2')
    assert not g.has_edge('TEST:8', 'AA:1')  # do not normalize AAs

    assert lexmap.smap['ABBREV:1'][0].is_abbreviation()
    assert lexmap.smap['ABBREV:2'][0].is_abbreviation()
    assert g.has_edge('ABBREV:1', 'ABBREV:2')
    assert int(g['ABBREV:1']['ABBREV:2']['score']) == 25

    df = lexmap.unmapped_dataframe(g)
    print(df.to_csv())