logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(LOGFILE) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) download_eco(args) download_uniprots(args) # UniProt uses ECO IDs in GOAs, not GO evidence codes, so get a mapping of # ECO IDs to GO evidence codes eco_map = mk_eco_map(args)
loglevel = int(args['--loglevel']) if args['--logfile']: logfile = args['--logfile'] else: logfile = LOGFILE logger = logging.getLogger(__name__) logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__} dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) for cfgd in CONFIG: name = cfgd['name'] #download(name) parsed_ont = cfgd['parse_function'](cfgd['DOWNLOAD_DIR']+cfgd['FILENAME']) cfgd['load_function'](dba, logger, logfile, parsed_ont, cfgd) elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(LOGFILE) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) load(args, dba, logger, logfile) # Dataset and Provenance dataset_id = dba.ins_dataset({ 'name': 'IDG Eligible Targets List', 'source':
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() #do_glygen(dba, logger, logfile) do_tiga(dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset({ 'name': 'ExtLinks',
if args['--logfile']: logfile = args['--logfile'] else: logfile = LOGFILE loglevel = int(args['--loglevel']) logger = logging.getLogger(__name__) logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__} dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() rv = dba.upd_tdls_null() if type(rv) == int: print(f"\nSet tdl to NULL for {rv} target rows") else: print(f"Error setting target.tdl values to NULL. See logfile {logfile} for details.") exit(1) rv = dba.del_dataset('TDLs') if rv:
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) # for the time being, this has to be done manually because Lars is forcing https # -SLM 20210227 #print("\nDownloading new JensenLab files...") #download_pmscores(args) #download_DISEASES(args) start_time = time.time()
#!/usr/bin/env python3 # Time-stamp: <2020-12-01 17:43:38 smathias> __author__ = "Steve Mathias" __email__ = "smathias @salud.unm.edu" __org__ = "Translational Informatics Division, UNM School of Medicine" __copyright__ = "Copyright 2020, Steve Mathias" __license__ = "Creative Commons Attribution-NonCommercial (CC BY-NC)" __version__ = "1.0.0" import os,sys,time from TCRD.DBAdaptor import DBAdaptor PROGRAM = os.path.basename(sys.argv[0]) DBNAME = 'tcrd6' if __name__ == '__main__': print("\n{} v{}) [{}]:\n".format(PROGRAM, __version__, time.strftime("%c"))) dba = DBAdaptor({'dbname': DBNAME}) dbi = dba.get_dbinfo() print("Connected to TCRD database {} (schema ver {}; data ver {})\n".format(dbi['dbname'], dbi['schema_ver'], dbi['data_ver']))
if args['--logfile']: logfile = args['--logfile'] else: logfile = LOGFILE loglevel = int(args['--loglevel']) logger = logging.getLogger(__name__) logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(LOGFILE) fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__} dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) # Dataset and Provenance # This has to be done first because the dataset id is needed for xrefs dataset_id = dba.ins_dataset( {'name': 'HGNC', 'source': 'Custom download file from https://www.genenames.org/download/custom/', 'app': PROGRAM, 'app_version': __version__, 'url': 'http://www.genenames.org/', 'comments': 'File downloaded with the following column data: HGNC ID, Approved symbol, Approved name, Status, Chromosome, UniProt ID, NCBI Gene ID, Mouse genome database ID'} ) assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details." provs = [ {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'sym', 'comment': "This is only updated with HGNC data if data from UniProt is absent."}, {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'geneid', 'comment': "This is only updated with HGNC data if data from UniProt is absent."}, {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'chr'}, {'dataset_id': dataset_id, 'table_name': 'xref', 'where_clause': f"dataset_id ={dataset_id}", 'comment': 'These are MGI xrefs only.'} ] for prov in provs: rv = dba.ins_provenance(prov)
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) # ChEMBL MySQL connection f = open('/home/smathias/.dbirc', 'r') pw = f.readline().strip() f.close() chembldb = mysql.connector.connect(host='localhost', port=3306, db=CHEMBL_DB,
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() load(args, dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset({ 'name': 'DRGC Resources', 'source': 'RSS APIs at ', 'app': PROGRAM,
if args['--logfile']: logfile = args['--logfile'] else: logfile = LOGFILE loglevel = int(args['--loglevel']) logger = logging.getLogger(__name__) logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(LOGFILE) fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__} dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) print("\nDownloading update files...") download_pmscores(args) download_DISEASES(args) download_mentions(args) download_do(args) start_time = time.time() print("\nUpdating JensenLab PubMed Text-mining Scores...") # delete existing pmscores rv = dba.del_all_rows(pmscore)
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() load(args, dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset({ 'name': 'PubMed', 'source': 'NCBI E-Utils', 'app': PROGRAM,
if args['--logfile']: logfile = args['--logfile'] else: logfile = LOGFILE loglevel = int(args['--loglevel']) logger = logging.getLogger(__name__) logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(LOGFILE) fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__} dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) download(args) load(args, dba, logger, logfile) # Dataset and Provenance dataset_id = dba.ins_dataset( {'name': 'IMPC Phenotypes', 'source': "Files %s and %s from ftp://ftp.ebi.ac.uk/pub/databases/impc/all-data-releases/latest/results/"%(os.path.basename(GENO_PHENO_FILE), os.path.basename(STAT_RES_FILE)), 'app': PROGRAM, 'app_version': __version__} ) assert dataset_id, f"Error inserting dataset See logfile {logfile} for details." provs = [ {'dataset_id': dataset_id, 'table_name': 'phenotype', 'where_clause': "ptype = 'IMPC'"} ] for prov in provs: rv = dba.ins_provenance(prov) assert rv, f"Error inserting provenance. See logfile {logfile} for details."
uptdls = dba.get_uniprots_tdls() ct = len(uptdls) exp_ct = 0 print(f"\nExporting UniProts/TDLs for {ct} TCRD targets") with open(OUTFILE, 'w') as ofh: ofh.write(f"UniProt_accession\tPharos_target\tTDL\n") for d in uptdls: ofh.write(f"{d['uniprot']}\t{d['uniprot']}\t{d['tdl']}\n") exp_ct += 1 slmf.update_progress(exp_ct / ct) print(f"Wrote {exp_ct} lines to file {OUTFILE}") if __name__ == '__main__': start_time = time.time() print("\n{} (v{}) [{}]:\n".format(PROGRAM, __version__, time.strftime("%c"))) dba = DBAdaptor({'dbhost': 'localhost', 'dbname': DBNAME}) dbi = dba.get_dbinfo() print( f"Connected to TCRD database {DBNAME} (schema ver {dbi['schema_ver']}; data ver {dbi['data_ver']})" ) run(dba) # Add version number to filename and archive mapping file to old_versions dir mmver = '.'.join(dbi['data_ver'].split('.')[:2]) archivefn = ARCHIVE_FILEPAT.format(mmver) shutil.copy(OUTFILE, archivefn) elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(logfile) fmtr = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = { 'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) #download() # have to download manually for now due to: # (venv) [smathias@juniper 20210915]$ wget https://unmtid-shinyapps.net/download/TIGA/20210915/tiga_gene-trait_stats.tsv # --2021-10-27 14:52:43-- https://unmtid-shinyapps.net/download/TIGA/20210915/tiga_gene-trait_stats.tsv # Resolving unmtid-shinyapps.net... 3.129.66.110 # Connecting to unmtid-shinyapps.net|3.129.66.110|:443... connected. # ERROR: cannot verify unmtid-shinyapps.net’s certificate, issued by “/C=US/O=Let's Encrypt/CN=R3”:
if args['--logfile']: logfile = args['--logfile'] else: logfile = LOGFILE loglevel = int(args['--loglevel']) logger = logging.getLogger(__name__) logger.setLevel(loglevel) if not args['--debug']: logger.propagate = False # turns off console logging fh = logging.FileHandler(LOGFILE) fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fh.setFormatter(fmtr) logger.addHandler(fh) dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__} dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) download(args) start_time = time.time() load(args, dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset( {'name': 'OMIM', 'source': 'Files {} downloaded from omim.org'.format(", ".join([GENEMAP_FILE, TITLES_FILE, PS_FILE])), 'app': PROGRAM, 'app_version': __version__, 'url': 'http://omim.org/'} ) assert dataset_id, f"Error inserting dataset See logfile {logfile} for details." # Provenance provs = [ {'dataset_id': dataset_id, 'table_name': 'omim'}, {'dataset_id': dataset_id, 'table_name': 'omim_ps'}, {'dataset_id': dataset_id, 'table_name': 'phenotype', 'where_clause': "ptype = 'OMIM'"} ]