def get_IC(): cur = db.cursor() # check for exact match query = """SELECT distinct term_id, info_content FROM SSM""" cur.execute(query) res = cur.fetchall() return zip(*res)
def check_dist_between(cid1, cid2): cur = db.cursor() query = """SELECT distance FROM graph_path WHERE term1_id = %s and term2_id = %s""" % (cid1, cid2) cur.execute(query) res = cur.fetchone() if res is None: dist = -1 else: dist = int(res[0]) return dist
def get_description(id): cur = db.cursor() query = """SELECT term_definition FROM term_definition WHERE term_id = %s""" % id cur.execute(query) res = cur.fetchone() if res is not None: return res[0] else: return "NA"
def load_synonyms(): syns = [] cur = db.cursor() query = """SELECT id, name FROM term """ cur.execute(query) ids = cur.fetchall() for i in ids: print "getting synonyms for" + i[1].lower() + '(' + str(i[0]) + ')', synset = set() synset.add(i[1].lower()) query = """SELECT term_synonym FROM term_synonym WHERE term_id = %s""" % i[0] cur.execute(query) names = cur.fetchall() print len(names) for name in names: #print name[0], synset.add(name[0].lower()) syns.append(synset) pickle.dump(syns, open("data/hpo_synonyms.pickle", 'wb')) print "done"
def find_hpo_term(term, adjust=0): ''' returns tuple (hpoID, hpoTerm, score) if resolution fails, return ('0', 'null', 0.0) ''' # print "TERM", term term = MySQLdb.escape_string(term) # adjust - adjust the final score match = () cur = db.cursor() # check for exact match query = """SELECT distinct id, name FROM term a WHERE name =%s and LENGTH(a.name)>0;""" # print "QUERY", query cur.execute(query, (term, )) res = cur.fetchone() if res is not None: # print "1" score = 1.0 + adjust match = (str(res[0]), res[1], score) else: # synonyms cur.execute( """SELECT a.term_id, a.term_synonym, b.name FROM term_synonym a, term b WHERE a.term_synonym=%s and b.id=a.term_id and LENGTH(a.term_synonym)>0""", (term, )) res = cur.fetchone() if res is not None: # print "2" score = 0.8 + adjust match = (str(res[0]), res[2], score) else: # plural - tb pode ser recursivo if len(term) > 0 and term[-1] == 's': match = find_hpo_term(term[:-1], -0.1) ####################################################### ##HPO has no descriptor table and no ec (in same table) # terms = '("' + '","'.join(term.split(" ")) + '")' # ## JOins several tables and selects dictint words from term list. # query = """SELECT ((sum(d.ic)/ec)-0.1) as score, e.name, c.term_id, c.id, # group_concat(d.word separator ','), count(d.id), c.descriptor_type # FROM term e JOIN descriptor3 c ON(c.term_id=e.id) JOIN word2term3 b ON (b.descriptor_id=c.id) # JOIN word3 d ON (d.id=b.word_id) JOIN SSM_TermDesc f ON (e.id=f.term_id) # WHERE b.word_id IN ( # SELECT distinct id # FROM word3 # WHERE word in %s) # GROUP by c.id # ORDER by score desc # LIMIT 3;""" % (terms,) # # print "QUERY3", query, adjust # cur.execute(query) # res = cur.fetchone() # if res is not None: # # print "3" # match = (str(res[3]), res[1], float(res[0])) # # print term, match ###################################################### if not match or match[2] < 0.0: match = ('0', 'null', 0.0) return match