예제 #1
0
#!/usr/bin/python

from http_wrapper import HttpWrapper
from urllib.parse import quote
import sys

START_S_ID = None
if len(sys.argv) > 1:
    START_S_ID = sys.argv[1]

hgnc_search_uri = 'http://rest.genenames.org'
db_uri = 'http://e3cb0988.ngrok.io'

# Get all preprocessed MeSH terms in DB.
db_http = HttpWrapper(db_uri)
all_processed = db_http.request(
    '/sql',
    'GET',
    {
        'query': 'SELECT * FROM LUNG_PROCESSED WHERE S_ID = 28',
    },
    '',
)

# Search using P_NAME from processed rows.

# S_ID and HGNC Map.
# Key: S_ID
# Value: HGNC doc max score
sid_hgnc_map = dict()
예제 #2
0
                     password='',
                     db='mesh',
                     charset='utf8')

elapsed_millis = get_current_millis()
# Get all genes which does not have MESH_NAME.
all_genes = None
with db.cursor(pymysql.cursors.DictCursor) as cursor:
    query = 'SELECT * FROM PROSTATE_GENES WHERE MESH_NAME IS NULL ORDER BY S_ID' if START_S_ID is None \
      else 'SELECT * FROM PROSTATE_GENES WHERE S_ID > %s AND MESH_NAME IS NULL ORDER BY S_ID' % (START_S_ID)
    cursor.execute(query)
    all_genes = cursor.fetchall()
print('Find all genes time:',
      get_elapsed_seconds(get_current_millis(), elapsed_millis))

hgnc_http = HttpWrapper('http://rest.genenames.org')


def get_max_score_doc(result_docs):
    return max(result_docs, key=lambda doc: doc['score'])


# Fulfill gene's MESH_NAME.
for gene in all_genes:
    elapsed_millis = get_current_millis()
    processeds = None
    with db.cursor(pymysql.cursors.DictCursor) as cursor:
        cursor.execute('SELECT * FROM PROSTATE_PROCESSED where S_ID = %s',
                       (gene['S_ID'], ))
        processeds = cursor.fetchall()
    print('Get processed time:',
예제 #3
0
all_qualifiers = None
with db.cursor(pymysql.cursors.DictCursor) as cursor:
    cursor.execute('SELECT * FROM MESH_QUALIFIER')
    all_qualifiers = cursor.fetchall()
all_descriptors = None
with db.cursor(pymysql.cursors.DictCursor) as cursor:
    cursor.execute('SELECT * FROM MESH_DESCRIPTOR')
    all_descriptors = cursor.fetchall()
all_supplementals = None
with db.cursor(pymysql.cursors.DictCursor) as cursor:
    cursor.execute('SELECT * FROM MESH_SUPPLEMENTAL')
    all_supplementals = cursor.fetchall()
print('Find all mesh terms time:',
      get_elapsed_seconds(get_current_millis(), elapsed_millis))

hgnc_http = HttpWrapper(hgnc_search_uri)


def get_max_score_doc(result_docs):
    return max(result_docs, key=lambda doc: doc['score'])


def check_is_family(mesh_term):
    qualifiers = list(
        filter(lambda qualifier: qualifier['NAME'] == mesh_term,
               all_qualifiers))
    descriptors = list(
        filter(lambda descriptor: descriptor['NAME'] == mesh_term,
               all_descriptors))
    supplementals = list(
        filter(lambda supplemental: supplemental['NAME'] == mesh_term,
예제 #4
0
    all_descriptors = cursor.fetchall()
all_supplementals = None
with db.cursor(pymysql.cursors.DictCursor) as cursor:
    cursor.execute('SELECT * FROM MESH_SUPPLEMENTAL')
    all_supplementals = cursor.fetchall()
print('Find all mesh terms time:',
      get_elapsed_seconds(get_current_millis(), elapsed_millis))

# Search using P_NAME from processed rows.

# S_ID and HGNC Map.
# Key: S_ID
# Value: HGNC doc max score
sid_hgnc_map = dict()

hgnc_http = HttpWrapper(hgnc_search_uri)


def get_max_score_doc(result_docs):
    return max(result_docs, key=lambda doc: doc['score'])


def check_is_family(mesh_term):
    qualifiers = list(
        filter(lambda qualifier: qualifier['NAME'] == mesh_term,
               all_qualifiers))
    descriptors = list(
        filter(lambda descriptor: descriptor['NAME'] == mesh_term,
               all_descriptors))
    supplementals = list(
        filter(lambda supplemental: supplemental['NAME'] == mesh_term,