Example #1
0
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    load(args, dba, logger, logfile)

    # Dataset and Provenance
    dataset_id = dba.ins_dataset({
        'name':
        'IDG Eligible Targets List',
        'source':
        f'IDG generated data in file {IDG_LIST_FILE}.',
        'app':
        PROGRAM,
        'app_version':
        __version__,
        'comments':
        'IDG Target Flags are archived on GitHub in repo https://github.com/druggablegenome/IDGTargets.',
        'url':
        'https://github.com/druggablegenome/IDGTargets'
    })
    assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
    # Provenance
    provs = [{
        'dataset_id': dataset_id,
        'table_name': 'target',
        'column_name': 'idg'
    }, {
        'dataset_id': dataset_id,
        'table_name': 'target',
Example #2
0
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    download_eco(args)
    download_uniprots(args)

    # UniProt uses ECO IDs in GOAs, not GO evidence codes, so get a mapping of
    # ECO IDs to GO evidence codes
    eco_map = mk_eco_map(args)

    # Human proteins
    # Dataset and Provenance
    # This has to be done first because the dataset id is needed for xrefs and aliases
    dataset_id = dba.ins_dataset({
        'name': 'UniProt',
        'source': f"UniProt XML file {UP_HUMAN_FILE} from {UP_BASE_URL}",
        'app': PROGRAM,
        'app_version': __version__,
        'url': 'https://www.uniprot.org'
    })
    assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
    provs = [{
        'dataset_id': dataset_id,
        'table_name': 'target',
        'column_name': 'name'
    }, {
        'dataset_id': dataset_id,
        'table_name': 'target',
        'column_name': 'description'
    }, {
        'dataset_id': dataset_id,
        'table_name': 'target',
Example #3
0
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    start_time = time.time()
    #do_glygen(dba, logger, logfile)
    do_tiga(dba, logger, logfile)

    # Dataset
    dataset_id = dba.ins_dataset({
        'name': 'ExtLinks',
        'source': 'Tested links to target/protein info in external resources.',
        'app': PROGRAM,
        'app_version': __version__
    })
    assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details."
    # Provenance
    rv = dba.ins_provenance({
        'dataset_id': dataset_id,
        'table_name': 'extlink'
    })
    assert rv, f"Error inserting provenance. See logfile {logfile} for details."

    elapsed = time.time() - start_time
    print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM,
                                                  slmf.secs2str(elapsed)))
Example #4
0
  if type(rv) == int:
    print(f"\nSet tdl to NULL for {rv} target rows")
  else:
    print(f"Error setting target.tdl values to NULL. See logfile {logfile} for details.")
    exit(1)
  rv = dba.del_dataset('TDLs')
  if rv:
    print(f"Deleted previous 'TDLs' dataset")
  else:
    print(f"Error deleting 'TDLs' dataset. See logfile {logfile} for details.")
    exit(1)
  
  load_tdls(dba, logfile, logger)
  
  # Dataset
  dataset_id = dba.ins_dataset( {'name': 'TDLs', 'source': 'IDG-KMC generated data by Steve Mathias at UNM.', 'app': PROGRAM, 'app_version': __version__, 'comments': 'TDLs are calculated by the loading app from data in TCRD.'} )
  assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details."
  # Provenance
  rv = dba.ins_provenance({'dataset_id': dataset_id, 'table_name': 'target', 'column_name': 'tdl'})
  assert rv, f"Error inserting provenance. See logfile {logfile} for details."

  # Add version number to filename and archive mapping file to old_versions dir
  mmver = '.'.join( dbi['data_ver'].split('.')[:2] )
  outfn = OUTFILE_PAT.format(mmver)
  export_uniprot_mapping(dba, outfn)
  shutil.copy(outfn, '/usr/local/apache2/htdocs/tcrd/download/PharosTCRD_UniProt_Mapping.tsv')
  print(f"Copied {outfn} to /usr/local/apache2/htdocs/tcrd/download/PharosTCRD_UniProt_Mapping.tsv")
  
  elapsed = time.time() - start_time
  print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
Example #5
0
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(LOGFILE)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  # Dataset and Provenance
  # This has to be done first because the dataset id is needed for xrefs
  dataset_id = dba.ins_dataset( {'name': 'HGNC', 'source': 'Custom download file from https://www.genenames.org/download/custom/', 'app': PROGRAM, 'app_version': __version__, 'url': 'http://www.genenames.org/', 'comments': 'File downloaded with the following column data: HGNC ID, Approved symbol, Approved name, Status, Chromosome, UniProt ID, NCBI Gene ID, Mouse genome database ID'} )
  assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details."
  provs = [ {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'sym', 'comment': "This is only updated with HGNC data if data from UniProt is absent."},
            {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'geneid', 'comment': "This is only updated with HGNC data if data from UniProt is absent."},
            {'dataset_id': dataset_id, 'table_name': 'protein', 'column_name': 'chr'},
            {'dataset_id': dataset_id, 'table_name': 'xref', 'where_clause': f"dataset_id ={dataset_id}", 'comment': 'These are MGI xrefs only.'} ]
  for prov in provs:
    rv = dba.ins_provenance(prov)
    assert rv, f"Error inserting provenance. See logfile {logfile} for details."

  load(args, dba, dataset_id, logger, logfile)
    
  elapsed = time.time() - start_time
  print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
Example #6
0
    download_mappings()
    up2chembl = parse_mappings(DOWNLOAD_DIR + UNIPROT2CHEMBL_FILE)
    if not args['--quiet']:
        print("  Got {} UniProt to ChEMBL 'SINGLE PROTEIN' mappings".format(
            len(up2chembl)))

    # process and load new data
    load(args, dba, up2chembl, chembldb, logfile, logger)
    # Dataset
    dataset_id = dba.ins_dataset({
        'name':
        'ChEMBL',
        'source':
        'ChEMBL MySQL database {}'.format(CHEMBL_DB),
        'app':
        PROGRAM,
        'app_version':
        __version__,
        'url':
        'https://www.ebi.ac.uk/chembl/',
        'comments':
        "The ChEMBL activities in TCRD are from two sources only: 'Scientific Literature' and 'Patent Bioactivity Data', and are also filtered for family-specific cutoffs."
    })
    assert dataset_id, f"Error inserting ChEMBL dataset. See logfile {logfile} for details."
    dataset_id2 = dba.ins_dataset({
        'name':
        'ChEMBL Info',
        'source':
        'IDG-KMC generated data by Steve Mathias at UNM.',
        'app':
        PROGRAM,
        'app_version':
Example #7
0
        'logger_name': __name__
    }
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    start_time = time.time()
    load(args, dba, logger, logfile)
    # Dataset
    dataset_id = dba.ins_dataset({
        'name': 'DRGC Resources',
        'source': 'RSS APIs at ',
        'app': PROGRAM,
        'app_version': __version__
    })
    assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
    # Provenance
    rv = dba.ins_provenance({
        'dataset_id': dataset_id,
        'table_name': 'drgc_resource'
    })
    assert rv, f"Error inserting provenance. See logfile {logfile} for details."
    elapsed = time.time() - start_time
    print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM,
                                                  slmf.secs2str(elapsed)))
Example #8
0
    dba = DBAdaptor(dba_params)
    dbi = dba.get_dbinfo()
    logger.info(
        "Connected to TCRD database {} (schema ver {}; data ver {})".format(
            args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
    if not args['--quiet']:
        print("Connected to TCRD database {} (schema ver {}; data ver {})".
              format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

    start_time = time.time()
    load(args, dba, logger, logfile)
    # Dataset
    dataset_id = dba.ins_dataset({
        'name': 'PubMed',
        'source': 'NCBI E-Utils',
        'app': PROGRAM,
        'app_version': __version__,
        'url': 'https://www.ncbi.nlm.nih.gov/pubmed'
    })
    assert dataset_id, "Error inserting dataset See logfile {} for details.".format(
        logfile)
    # Provenance
    rv = dba.ins_provenance({'dataset_id': dataset_id, 'table_name': 'pubmed'})
    assert rv, f"Error inserting provenance. See logfile {logfile} for details."
    rv = dba.ins_provenance({
        'dataset_id': dataset_id,
        'table_name': 'protein2pubmed'
    })
    assert rv, "Error inserting provenance. See logfile {} for details.".format(
        logfile)
    elapsed = time.time() - start_time
Example #9
0
  loglevel = int(args['--loglevel'])
  logger = logging.getLogger(__name__)
  logger.setLevel(loglevel)
  if not args['--debug']:
    logger.propagate = False # turns off console logging
  fh = logging.FileHandler(LOGFILE)
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
        
  download(args)
  load(args, dba, logger, logfile)
  
  # Dataset and Provenance
  dataset_id = dba.ins_dataset( {'name': 'IMPC Phenotypes', 'source': "Files %s and %s from ftp://ftp.ebi.ac.uk/pub/databases/impc/all-data-releases/latest/results/"%(os.path.basename(GENO_PHENO_FILE), os.path.basename(STAT_RES_FILE)), 'app': PROGRAM, 'app_version': __version__} )
  assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
  provs = [ {'dataset_id': dataset_id, 'table_name': 'phenotype', 'where_clause': "ptype = 'IMPC'"} ]
  for prov in provs:
    rv = dba.ins_provenance(prov)
    assert rv, f"Error inserting provenance. See logfile {logfile} for details."
  
  elapsed = time.time() - start_time
  print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
Example #10
0
    # --2021-10-27 14:52:43--  https://unmtid-shinyapps.net/download/TIGA/20210915/tiga_gene-trait_stats.tsv
    # Resolving unmtid-shinyapps.net... 3.129.66.110
    # Connecting to unmtid-shinyapps.net|3.129.66.110|:443... connected.
    # ERROR: cannot verify unmtid-shinyapps.net’s certificate, issued by “/C=US/O=Let's Encrypt/CN=R3”:
    # Issued certificate has expired.
    # -SLM 20211027

    start_time = time.time()
    load(dba, logger, logfile)
    # Dataset
    dataset_id = dba.ins_dataset({
        'name':
        'TIGA',
        'source':
        f'TIGA Download files {TIGA_FILE} and {TIGA_PROV_FILE} from {BASE_URL}',
        'app':
        PROGRAM,
        'app_version':
        __version__,
        'url':
        'https://unmtid-shinyapps.net/shiny/tiga/'
    })
    assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details."
    # Provenance
    provs = [{
        'dataset_id': dataset_id,
        'table_name': 'tiga'
    }, {
        'dataset_id': dataset_id,
        'table_name': 'tiga_provenance'
    }]
    for prov in provs:
Example #11
0
  fmtr = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
  fh.setFormatter(fmtr)
  logger.addHandler(fh)

  dba_params = {'dbhost': args['--dbhost'], 'dbname': args['--dbname'], 'logger_name': __name__}
  dba = DBAdaptor(dba_params)
  dbi = dba.get_dbinfo()
  logger.info("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))
  if not args['--quiet']:
    print("Connected to TCRD database {} (schema ver {}; data ver {})".format(args['--dbname'], dbi['schema_ver'], dbi['data_ver']))

  download(args)
  start_time = time.time()
  load(args, dba, logger, logfile)
  # Dataset
  dataset_id = dba.ins_dataset( {'name': 'OMIM', 'source': 'Files {} downloaded from omim.org'.format(", ".join([GENEMAP_FILE, TITLES_FILE, PS_FILE])), 'app': PROGRAM, 'app_version': __version__, 'url': 'http://omim.org/'} )
  assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
  # Provenance
  provs = [ {'dataset_id': dataset_id, 'table_name': 'omim'},
            {'dataset_id': dataset_id, 'table_name': 'omim_ps'},
            {'dataset_id': dataset_id, 'table_name': 'phenotype', 'where_clause': "ptype = 'OMIM'"} ]
  for prov in provs:
    rv = dba.ins_provenance(prov)
    assert rv, "Error inserting provenance. See logfile {} for details.".format(logfile)
  elapsed = time.time() - start_time
  print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))