Ejemplo n.º 1
0
def load_dictionary(dicttimestamp, server='postgres-cns-myaura'):
    """ Load dictionary from database

    Args:
        dicttimestamp (string): the version of dictionary (ex: 20210131)
        server (string): the server name in db_config.ini

    Returns:
        tuple (termdictparser, pandas.DataFrame): A TermDictParser and a pandas dataframe containing the dictionary.
    """
    print('--- Loading {server:s} dictionary ({dicttimestamp:s}) ---'.format(server=server, dicttimestamp=dicttimestamp))
    #
    if 'postgres' in server:

        engine = db.connectToPostgreSQL(server=server)
        tablename = 'dictionaries.dict_%s' % (dicttimestamp)
        sql = """
            SELECT
                d.id,
                COALESCE(d.id_parent,d.id) AS id_parent,
                d.dictionary,
                d.token,
                COALESCE(p.token, d.token) as parent,
                d.type,
                d.source,
                d.id_original,
                COALESCE(p.id_original, d.id_original) as id_original_parent
                FROM %s d
                LEFT JOIN %s p ON d.id_parent = p.id
                WHERE d.enabled > 0""" % (tablename, tablename)

    elif 'mysql' in server:

        engine = db.connectToMySQL(server=server)
        tablename = 'dict_%s' % (dicttimestamp)
        sql = """
            SELECT
            d.id,
            IFNULL(d.id_parent,d.id) AS id_parent,
            d.dictionary,
            d.token,
            IFNULL(p.token, d.token) as parent,
            d.type,
            d.source,
            d.id_original,
            IFNULL(p.id_original, d.id_original) as id_original_parent
            FROM %s d
            LEFT JOIN %s p ON d.id_parent = p.id
            WHERE d.enabled = True""" % (tablename, tablename)
    else:
        raise TypeError("Invalid server name. The name of the server must contain either a 'mysql' or 'postgress' string.")

    df = pd.read_sql(sql, engine, index_col='id')

    return df
Ejemplo n.º 2
0
        dicttimestamp = defline.split('=')[1].strip()

    # Load Dictionary
    dfD = load_dictionary(dicttimestamp=dicttimestamp,
                          server='cns-postgres-myaura')
    # Build Parser Vocabulary
    tdp = build_term_parser(dfD)

    dict_token = dfD['token'].to_dict()
    dict_id_parent = dfD['id_parent'].to_dict()
    dict_parent = dfD['parent'].to_dict()
    # dict_dictionary = dfD['dictionary'].to_dict()
    dict_type = dfD['type'].to_dict()
    # dict_source = dfD['source'].to_dict()

    engine_pubmed = db.connectToPostgreSQL(server='cns-postgres-myaura')
    # engine_prediction_result = db.connectToPostgreSQL('postgres_cns_test')

    # db_pubmed = 'pubmed_medline17'
    # db_mention = 'ddi_pubmed_mentions'
    mention_table = 'mention_pubmed_epilepsy_%s.mention' % (dicttimestamp)
    psql_mention = db.connectToPostgreSQL('cns-postgres-myaura')
    # mongo_mention, _ = db.connectToMongoDB(server='mongo_ddi', db=db_mention)

    for i in range(10000):
        offset = i * 100
        print('> Parsing PubMedID: %d - %d' % (i * 100, (i + 1) * 100))
        i += 1

        # SQL Query
        sql = """SELECT pmid, article_title, abstract_text, pub_year FROM pubmed.view_epilepsy offset %d limit 100""" % (
Ejemplo n.º 3
0
if __name__ == '__main__':

    #
    # Init
    #
    dicttimestamp = '20180706'  # raw_input("dict timestamp [yyyymmdd]:") #'20171221' # datetime.today().strftime('%Y%m%d')
    with open(
            os.path.join(os.path.dirname(__file__), '..', 'scripts',
                         'var.sh')) as varfile:
        defline = varfile.readline()
        dicttimestamp = defline.split('=')[1].strip()

    mention_table = 'mention_pubmed_epilepsy_%s.mention' % (dicttimestamp)
    comention_table = 'mention_pubmed_epilepsy_%s.comention' % (dicttimestamp)
    psql_mention = db.connectToPostgreSQL('cns-postgres-myaura')

    inserts = {}

    for i in range(10000):
        offset = i * 100
        print('> Parsing PubMedID: %d - %d' % (i * 100, (i + 1) * 100))
        i += 1

        # SQL Query
        sql = """SELECT pmid, year_publication, match FROM %s offset %d limit 100""" % (
            mention_table, offset)
        q = psql_mention.execute(sql)

        # No pmid found
        if q.rowcount < 1:
Ejemplo n.º 4
0
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from termdictparser import Sentences
from load_dictionary import load_dictionary, build_term_parser

# import utils

if __name__ == '__main__':

    #
    # Init
    #
    #
    dicttimestamp = '20180706'  # raw_input("dict timestamp [yyyymmdd]:") #'20171221' # datetime.today().strftime('%Y%m%d')
    src_table = 'clinical_trials.view_clinical_trials'
    engine_src_text = db.connectToPostgreSQL(server='cns-postgres-myaura')

    with open(
            os.path.join(os.path.dirname(__file__), '..', 'scripts',
                         'var.sh')) as varfile:
        defline = varfile.readline()
        dicttimestamp = defline.split('=')[1].strip()

    mention_table = 'mention_clinical_trials_%s.mention' % (dicttimestamp)
    psql_mention = db.connectToPostgreSQL('cns-postgres-myaura')

    # Load Dictionary
    dfD = load_dictionary(dicttimestamp=dicttimestamp,
                          server='cns-postgres-myaura')
    # Build Parser Vocabulary
    tdp = build_term_parser(dfD)