Esempio n. 1
0
def get_IC():
    cur = db.cursor()
    # check for exact match
    query = """SELECT distinct term_id, info_content
               FROM SSM"""
    cur.execute(query)

    res = cur.fetchall()
    return zip(*res)
Esempio n. 2
0
def check_dist_between(cid1, cid2):
    cur = db.cursor()
    query = """SELECT distance 
               FROM graph_path 
               WHERE term1_id = %s and term2_id = %s""" % (cid1, cid2)
    cur.execute(query)
    res = cur.fetchone()
    if res is None:
        dist = -1
    else:
        dist = int(res[0])
    return dist
Esempio n. 3
0
def get_description(id):
    cur = db.cursor()
    query = """SELECT term_definition
           FROM term_definition
           WHERE term_id = %s""" % id
    cur.execute(query)
    res = cur.fetchone()

    if res is not None:
        return res[0]
    else:
        return "NA"
Esempio n. 4
0
def load_synonyms():
    syns = []
    cur = db.cursor()
    query = """SELECT id, name
           FROM term """
    cur.execute(query)
    ids = cur.fetchall()
    for i in ids:
        print "getting synonyms for" + i[1].lower() + '(' + str(i[0]) + ')',
        synset = set()
        synset.add(i[1].lower())
        query = """SELECT term_synonym
           FROM term_synonym
           WHERE term_id = %s""" % i[0]
        cur.execute(query)
        names = cur.fetchall()
        print len(names)
        for name in names:
            #print name[0],
            synset.add(name[0].lower())
        syns.append(synset)
    pickle.dump(syns, open("data/hpo_synonyms.pickle", 'wb'))
    print "done"
Esempio n. 5
0
def find_hpo_term(term, adjust=0):
    ''' returns tuple (hpoID, hpoTerm, score)
        if resolution fails, return ('0', 'null', 0.0)
    '''
    # print "TERM", term
    term = MySQLdb.escape_string(term)
    # adjust - adjust the final score
    match = ()
    cur = db.cursor()
    # check for exact match
    query = """SELECT distinct id, name
                   FROM term a 
                   WHERE name =%s and LENGTH(a.name)>0;"""
    # print "QUERY", query
    cur.execute(query, (term, ))

    res = cur.fetchone()
    if res is not None:
        # print "1"
        score = 1.0 + adjust
        match = (str(res[0]), res[1], score)
    else:
        # synonyms
        cur.execute(
            """SELECT a.term_id, a.term_synonym, b.name
                       FROM term_synonym a, term b
                       WHERE a.term_synonym=%s
                        and b.id=a.term_id
                        and LENGTH(a.term_synonym)>0""", (term, ))
        res = cur.fetchone()
        if res is not None:
            # print "2"
            score = 0.8 + adjust
            match = (str(res[0]), res[2], score)

        else:
            # plural - tb pode ser recursivo
            if len(term) > 0 and term[-1] == 's':
                match = find_hpo_term(term[:-1], -0.1)

    #######################################################
    ##HPO has no descriptor table and no ec (in same table)
    #     terms = '("' + '","'.join(term.split(" ")) + '")'
    #     ## JOins several tables and selects dictint words from term list.
    #     query = """SELECT ((sum(d.ic)/ec)-0.1) as score, e.name, c.term_id, c.id,
    #                       group_concat(d.word separator ','), count(d.id), c.descriptor_type
    #                FROM term e JOIN descriptor3 c ON(c.term_id=e.id) JOIN word2term3 b ON (b.descriptor_id=c.id)
    #                     JOIN word3 d ON (d.id=b.word_id) JOIN SSM_TermDesc f ON (e.id=f.term_id)
    #                WHERE b.word_id IN (
    #                      SELECT distinct id
    #                      FROM word3
    #                      WHERE word in %s)
    #                GROUP by c.id
    #                ORDER by score desc
    #                LIMIT 3;""" % (terms,)
    #     # print "QUERY3", query, adjust
    #     cur.execute(query)
    #     res = cur.fetchone()
    #     if res is not None:
    #         # print "3"
    #         match = (str(res[3]), res[1], float(res[0]))
    #         # print term, match
    ######################################################

    if not match or match[2] < 0.0:
        match = ('0', 'null', 0.0)

    return match