Beispiel #1
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(LOGFILE)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    download_eco(args)
    download_uniprots(args)

    # UniProt uses ECO IDs in GOAs, not GO evidence codes, so get a mapping of
    # ECO IDs to GO evidence codes
    eco_map = mk_eco_map(args)
Beispiel #2
0
  loglevel = int(args['--loglevel'])
  if args['--logfile']:
    logfile = args['--logfile']
  else:
    logfile = LOGFILE
    logger = logging.getLogger(__name__)
    logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  for cfgd in CONFIG:
    name = cfgd['name']
    #download(name)
    parsed_ont = cfgd['parse_function'](cfgd['DOWNLOAD_DIR']+cfgd['FILENAME'])
    cfgd['load_function'](dba, logger, logfile, parsed_ont, cfgd)
    
  elapsed = time.time() - start_time
  print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
Beispiel #3
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(LOGFILE)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    load(args, dba, logger, logfile)

    # Dataset and Provenance
    dataset_id = dba.ins_dataset({
        'name':
        'IDG Eligible Targets List',
        'source':
Beispiel #4
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    start_time = time.time()
    #do_glygen(dba, logger, logfile)
    do_tiga(dba, logger, logfile)

    # Dataset
    dataset_id = dba.ins_dataset({
        'name': 'ExtLinks',
Beispiel #5
0
  if args['--logfile']:
    logfile =  args['--logfile']
  else:
    logfile = LOGFILE
  loglevel = int(args['--loglevel'])
  logger = logging.getLogger(__name__)
  logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(logfile)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  start_time = time.time()

  rv = dba.upd_tdls_null()
  if type(rv) == int:
    print(f"\nSet tdl to NULL for {rv} target rows")
  else:
    print(f"Error setting target.tdl values to NULL. See logfile {logfile} for details.")
    exit(1)
  rv = dba.del_dataset('TDLs')
  if rv:
Beispiel #6
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    # for the time being, this has to be done manually because Lars is forcing https
    # -SLM 20210227
    #print("\nDownloading new JensenLab files...")
    #download_pmscores(args)
    #download_DISEASES(args)

    start_time = time.time()
Beispiel #7
0
#!/usr/bin/env python3
# Time-stamp: <2020-12-01 17:43:38 smathias>
__author__    = "Steve Mathias"
__email__     = "smathias @salud.unm.edu"
__org__       = "Translational Informatics Division, UNM School of Medicine"
__copyright__ = "Copyright 2020, Steve Mathias"
__license__   = "Creative Commons Attribution-NonCommercial (CC BY-NC)"
__version__   = "1.0.0"

import os,sys,time
from TCRD.DBAdaptor import DBAdaptor

PROGRAM = os.path.basename(sys.argv[0])
DBNAME = 'tcrd6'

if __name__ == '__main__':
  print("\n{} v{}) [{}]:\n".format(PROGRAM, __version__, time.strftime("%c")))

  dba = DBAdaptor({'dbname': DBNAME})
  dbi = dba.get_dbinfo()
  print("Connected to TCRD database {} (schema ver {}; data ver {})\n".format(dbi['dbname'], dbi['schema_ver'], dbi['data_ver']))
Beispiel #8
0
  if args['--logfile']:
    logfile =  args['--logfile']
  else:
    logfile = LOGFILE
  loglevel = int(args['--loglevel'])
  logger = logging.getLogger(__name__)
  logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(LOGFILE)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  # Dataset and Provenance
  # This has to be done first because the dataset id is needed for xrefs
  dataset_id = dba.ins_dataset( {'name': 'HGNC', 'source': 'Custom download file from https://www.genenames.org/download/custom/', 'app': PROGRAM, 'app_version': __version__, 'url': 'http://www.genenames.org/', 'comments': 'File downloaded with the following column data: HGNC ID, Approved symbol, Approved name, Status, Chromosome, UniProt ID, NCBI Gene ID, Mouse genome database ID'} )
  assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details."
  provs = [ {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'sym', 'comment': "This is only updated with HGNC data if data from UniProt is absent."},
            {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'geneid', 'comment': "This is only updated with HGNC data if data from UniProt is absent."},
            {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'chr'},
            {'dataset_id': dataset_id, 'table_name': 'xref', 'where_clause': f"dataset_id ={dataset_id}", 'comment': 'These are MGI xrefs only.'} ]
  for prov in provs:
    rv = dba.ins_provenance(prov)
Beispiel #9
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    # ChEMBL MySQL connection
    f = open('/home/smathias/.dbirc', 'r')
    pw = f.readline().strip()
    f.close()
    chembldb = mysql.connector.connect(host='localhost',
                                       port=3306,
                                       db=CHEMBL_DB,
Beispiel #10
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    start_time = time.time()
    load(args, dba, logger, logfile)
    # Dataset
    dataset_id = dba.ins_dataset({
        'name': 'DRGC Resources',
        'source': 'RSS APIs at ',
        'app': PROGRAM,
Beispiel #11
0
  if args['--logfile']:
    logfile =  args['--logfile']
  else:
    logfile = LOGFILE
  loglevel = int(args['--loglevel'])
  logger = logging.getLogger(__name__)
  logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(LOGFILE)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  print("\nDownloading update files...")
  download_pmscores(args)
  download_DISEASES(args)
  download_mentions(args)
  download_do(args)

  start_time = time.time()
  print("\nUpdating JensenLab PubMed Text-mining Scores...")
  # delete existing pmscores
  rv = dba.del_all_rows(pmscore)
Beispiel #12
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    start_time = time.time()
    load(args, dba, logger, logfile)
    # Dataset
    dataset_id = dba.ins_dataset({
        'name': 'PubMed',
        'source': 'NCBI E-Utils',
        'app': PROGRAM,
Beispiel #13
0
  if args['--logfile']:
    logfile =  args['--logfile']
  else:
    logfile = LOGFILE
  loglevel = int(args['--loglevel'])
  logger = logging.getLogger(__name__)
  logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(LOGFILE)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
        
  download(args)
  load(args, dba, logger, logfile)
  
  # Dataset and Provenance
  dataset_id = dba.ins_dataset( {'name': 'IMPC Phenotypes', 'source': "Files %s and %s from ftp://ftp.ebi.ac.uk/pub/databases/impc/all-data-releases/latest/results/"%(os.path.basename(GENO_PHENO_FILE), os.path.basename(STAT_RES_FILE)), 'app': PROGRAM, 'app_version': __version__} )
  assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
  provs = [ {'dataset_id': dataset_id, 'table_name': 'phenotype', 'where_clause': "ptype = 'IMPC'"} ]
  for prov in provs:
    rv = dba.ins_provenance(prov)
    assert rv, f"Error inserting provenance. See logfile {logfile} for details."
Beispiel #14
0
    uptdls = dba.get_uniprots_tdls()
    ct = len(uptdls)
    exp_ct = 0
    print(f"\nExporting UniProts/TDLs for {ct} TCRD targets")
    with open(OUTFILE, 'w') as ofh:
        ofh.write(f"UniProt_accession\tPharos_target\tTDL\n")
        for d in uptdls:
            ofh.write(f"{d['uniprot']}\t{d['uniprot']}\t{d['tdl']}\n")
            exp_ct += 1
            slmf.update_progress(exp_ct / ct)
    print(f"Wrote {exp_ct} lines to file {OUTFILE}")


if __name__ == '__main__':
    start_time = time.time()
    print("\n{} (v{}) [{}]:\n".format(PROGRAM, __version__,
                                      time.strftime("%c")))
    dba = DBAdaptor({'dbhost': 'localhost', 'dbname': DBNAME})
    dbi = dba.get_dbinfo()
    print(
        f"Connected to TCRD database {DBNAME} (schema ver {dbi['schema_ver']}; data ver {dbi['data_ver']})"
    )
    run(dba)
    # Add version number to filename and archive mapping file to old_versions dir
    mmver = '.'.join(dbi['data_ver'].split('.')[:2])
    archivefn = ARCHIVE_FILEPAT.format(mmver)
    shutil.copy(OUTFILE, archivefn)
    elapsed = time.time() - start_time
    print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM,
                                                  slmf.secs2str(elapsed)))
Beispiel #15
0
    logger.setLevel(loglevel)
    if not args['--debug']:
        logger.propagate = False  # turns off console logging
    fh = logging.FileHandler(logfile)
    fmtr = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    fh.setFormatter(fmtr)
    logger.addHandler(fh)

    dba_params = {
        'dbhost': args['--dbhost'],
        'dbname': args['--dbname'],
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    #download()
    # have to download manually for now due to:
    # (venv) [smathias@juniper 20210915]$ wget https://unmtid-shinyapps.net/download/TIGA/20210915/tiga_gene-trait_stats.tsv
    # --2021-10-27 14:52:43--  https://unmtid-shinyapps.net/download/TIGA/20210915/tiga_gene-trait_stats.tsv
    # Resolving unmtid-shinyapps.net... 3.129.66.110
    # Connecting to unmtid-shinyapps.net|3.129.66.110|:443... connected.
    # ERROR: cannot verify unmtid-shinyapps.net’s certificate, issued by “/C=US/O=Let's Encrypt/CN=R3”:
Beispiel #16
0
  if args['--logfile']:
    logfile =  args['--logfile']
  else:
    logfile = LOGFILE
  loglevel = int(args['--loglevel'])
  logger = logging.getLogger(__name__)
  logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(LOGFILE)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  download(args)
  start_time = time.time()
  load(args, dba, logger, logfile)
  # Dataset
  dataset_id = dba.ins_dataset( {'name': 'OMIM', 'source': 'Files {} downloaded from omim.org'.format(", ".join([GENEMAP_FILE, TITLES_FILE, PS_FILE])), 'app': PROGRAM, 'app_version': __version__, 'url': 'http://omim.org/'} )
  assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
  # Provenance
  provs = [ {'dataset_id': dataset_id, 'table_name': 'omim'},
            {'dataset_id': dataset_id, 'table_name': 'omim_ps'},
            {'dataset_id': dataset_id, 'table_name': 'phenotype', 'where_clause': "ptype = 'OMIM'"} ]