# -*- coding: utf-8 -*- """Constants for Bio2BEL Entrez.""" import os from bio2bel import get_data_dir MODULE_NAME = 'ncbigene' DATA_DIR = get_data_dir(MODULE_NAME) GENE_INFO_URL = 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz' GENE2REFSEQ_URL = 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2refseq.gz' GENE_INFO_DATA_PATH = os.path.join(DATA_DIR, 'gene_info.gz') GENE2REFSEQ_DATA_PATH = os.path.join(DATA_DIR, 'gene2refseq.gz') GENE2REFSEQ_HUMAN_DATA_PATH = os.path.join(DATA_DIR, 'gene2refseq.human') GENE2REFSEQ_HUMAN_SLIM_DATA_PATH = os.path.join(DATA_DIR, 'gene2refseq.human.slim') HOMOLOGENE_DATA_PATH = os.path.join(DATA_DIR, 'homologene.data') #: Columns fro gene_info.gz that are used GENE_INFO_COLUMNS = [ '#tax_id', 'GeneID', 'Symbol', 'dbXrefs', 'description', 'type_of_gene', ] HOMOLOGENE_BUILD_URL = 'ftp://ftp.ncbi.nih.gov/pub/HomoloGene/current/RELEASE_NUMBER' HOMOLOGENE_URL = 'ftp://ftp.ncbi.nih.gov/pub/HomoloGene/current/homologene.data'
"""This module contains all the constants used in the PathwayForte repo.""" import logging import os import time from bio2bel import get_data_dir logger = logging.getLogger(__name__) dir_path = os.path.dirname(os.path.realpath(__file__)) SOURCE = os.path.join(os.path.abspath(os.path.join(dir_path, os.pardir))) # Data folder where gene sets files are DATA = os.path.join(os.path.abspath(os.path.join(SOURCE, os.pardir)), 'data') BIO2BEL_DATA_DIR = get_data_dir('pathwayforte') """Cancer Data Sets""" CANCER_DATA_SETS = { 'brca', 'lihc', 'kirc', 'prad', 'ov', } TCGA_DATASETS = os.path.join(DATA, 'tcga_datasets') # Raw expression matrix from TCGA EXPRESSION_MATRIX = os.path.join(TCGA_DATASETS, '{}', 'expression_matrix_full.txt') # File with phenotype classes (e.g., tumor vs normal)
# -*- coding: utf-8 -*- """Constants for Bio2BEL ExCAPE-DB.""" import os from bio2bel import get_data_dir MODULE = 'excape' DATA_DIR = get_data_dir(MODULE) URL = 'https://zenodo.org/record/173258/files/pubchem.chembl.dataset4publication_inchi_smiles.tsv.xz?download=1' PATH = os.path.join(DATA_DIR, 'pubchem.chembl.dataset4publication_inchi_smiles.tsv.xz') HEADER = [ 'Ambit_InchiKey', #hashkey 'Original_Entry_ID', #source database id 'pXC50', #measurement value (float) 'DB', #source database + version 'InChI', #Structual information 'SMILES', #Same thing 'Entrez_ID', #Identifer from the entrez database for the target 'Tax_ID', #Species 'Gene_Symbol', #pretty name for gene 'Ortholog_Group', #Gene group classifier 'Activity_Flag', #active / not active 'Original_Assay_ID' #Identifier of original assay ]