#!/usr/bin/python from http_wrapper import HttpWrapper from urllib.parse import quote import sys START_S_ID = None if len(sys.argv) > 1: START_S_ID = sys.argv[1] hgnc_search_uri = 'http://rest.genenames.org' db_uri = 'http://e3cb0988.ngrok.io' # Get all preprocessed MeSH terms in DB. db_http = HttpWrapper(db_uri) all_processed = db_http.request( '/sql', 'GET', { 'query': 'SELECT * FROM LUNG_PROCESSED WHERE S_ID = 28', }, '', ) # Search using P_NAME from processed rows. # S_ID and HGNC Map. # Key: S_ID # Value: HGNC doc max score sid_hgnc_map = dict()
password='', db='mesh', charset='utf8') elapsed_millis = get_current_millis() # Get all genes which does not have MESH_NAME. all_genes = None with db.cursor(pymysql.cursors.DictCursor) as cursor: query = 'SELECT * FROM PROSTATE_GENES WHERE MESH_NAME IS NULL ORDER BY S_ID' if START_S_ID is None \ else 'SELECT * FROM PROSTATE_GENES WHERE S_ID > %s AND MESH_NAME IS NULL ORDER BY S_ID' % (START_S_ID) cursor.execute(query) all_genes = cursor.fetchall() print('Find all genes time:', get_elapsed_seconds(get_current_millis(), elapsed_millis)) hgnc_http = HttpWrapper('http://rest.genenames.org') def get_max_score_doc(result_docs): return max(result_docs, key=lambda doc: doc['score']) # Fulfill gene's MESH_NAME. for gene in all_genes: elapsed_millis = get_current_millis() processeds = None with db.cursor(pymysql.cursors.DictCursor) as cursor: cursor.execute('SELECT * FROM PROSTATE_PROCESSED where S_ID = %s', (gene['S_ID'], )) processeds = cursor.fetchall() print('Get processed time:',
all_qualifiers = None with db.cursor(pymysql.cursors.DictCursor) as cursor: cursor.execute('SELECT * FROM MESH_QUALIFIER') all_qualifiers = cursor.fetchall() all_descriptors = None with db.cursor(pymysql.cursors.DictCursor) as cursor: cursor.execute('SELECT * FROM MESH_DESCRIPTOR') all_descriptors = cursor.fetchall() all_supplementals = None with db.cursor(pymysql.cursors.DictCursor) as cursor: cursor.execute('SELECT * FROM MESH_SUPPLEMENTAL') all_supplementals = cursor.fetchall() print('Find all mesh terms time:', get_elapsed_seconds(get_current_millis(), elapsed_millis)) hgnc_http = HttpWrapper(hgnc_search_uri) def get_max_score_doc(result_docs): return max(result_docs, key=lambda doc: doc['score']) def check_is_family(mesh_term): qualifiers = list( filter(lambda qualifier: qualifier['NAME'] == mesh_term, all_qualifiers)) descriptors = list( filter(lambda descriptor: descriptor['NAME'] == mesh_term, all_descriptors)) supplementals = list( filter(lambda supplemental: supplemental['NAME'] == mesh_term,
all_descriptors = cursor.fetchall() all_supplementals = None with db.cursor(pymysql.cursors.DictCursor) as cursor: cursor.execute('SELECT * FROM MESH_SUPPLEMENTAL') all_supplementals = cursor.fetchall() print('Find all mesh terms time:', get_elapsed_seconds(get_current_millis(), elapsed_millis)) # Search using P_NAME from processed rows. # S_ID and HGNC Map. # Key: S_ID # Value: HGNC doc max score sid_hgnc_map = dict() hgnc_http = HttpWrapper(hgnc_search_uri) def get_max_score_doc(result_docs): return max(result_docs, key=lambda doc: doc['score']) def check_is_family(mesh_term): qualifiers = list( filter(lambda qualifier: qualifier['NAME'] == mesh_term, all_qualifiers)) descriptors = list( filter(lambda descriptor: descriptor['NAME'] == mesh_term, all_descriptors)) supplementals = list( filter(lambda supplemental: supplemental['NAME'] == mesh_term,