def make_goa_df_getter(url, path): return make_df_getter( url, path, sep='\t', names=GAF_COLUMNS, skiprows=15, )
IS_PART_OF, KEGG, KEGG_REACTOME_PATH, KEGG_REACTOME_URL, KEGG_WP_PATH, KEGG_WP_URL, MAPPING_TYPE, SOURCE_ID, SOURCE_RESOURCE, SPECIAL_MAPPINGS_PATH, SPECIAL_MAPPINGS_URL, TARGET_ID, TARGET_RESOURCE, WP_REACTOME_PATH, WP_REACTOME_URL, ) __all__ = [ 'get_mapping_dict', 'get_equivalent_pairs', 'load_compath_mapping_dfs', 'get_equivalent_mappings_dict', ] Identifier = Tuple[str, str] EquivalenceMapping = Mapping[Identifier, List[Identifier]] get_kegg_reactome_df = make_df_getter(KEGG_REACTOME_URL, KEGG_REACTOME_PATH) get_wp_reactome_df = make_df_getter(WP_REACTOME_URL, WP_REACTOME_PATH) get_kegg_wp_df = make_df_getter(KEGG_WP_URL, KEGG_WP_PATH) get_special_mappings_df = make_df_getter(SPECIAL_MAPPINGS_URL, SPECIAL_MAPPINGS_PATH) def get_mapping_dict(df: pd.DataFrame, mapping_type: str) -> Mapping[Identifier, List[Identifier]]: """Create a dictionary with ComPath mappings for each pathway.""" mapping_dict = defaultdict(list) for index, row in df.iterrows(): if row[MAPPING_TYPE] != mapping_type: continue if row[SOURCE_RESOURCE] != KEGG and row[TARGET_RESOURCE] != KEGG: mapping_dict[(row[SOURCE_RESOURCE], row[SOURCE_ID])].append((row[TARGET_RESOURCE], row[TARGET_ID]))
# -*- coding: utf-8 -*- """Downloaders for Bio2BEL HMDD""" from bio2bel.downloading import make_df_getter from .constants import HMDD_COLUMNS, HMDD_PATH, HMDD_URL __all__ = ['get_hmdd_df'] get_hmdd_df = make_df_getter( HMDD_URL, HMDD_PATH, sep='\t', ) """Loads the HMDD into a data frame 1) Index 2) miRNA ID 3) MeSHDisease term 4) PubMed ID 5) Description """
SPECIES_URL, ) __all__ = [ 'get_species_df', 'download_definitions', 'get_aliases_df', 'get_mirbase_alias_to_id', ] logger = logging.getLogger(__name__) get_species_df = make_df_getter( SPECIES_URL, SPECIES_PATH, sep='\t', names=SPECIES_HEADER, skiprows=1, ) download_definitions = make_downloader(DEFINITIONS_URL, DEFINITIONS_PATH) get_aliases_df = make_df_getter( ALIASES_URL, ALIASES_PATH, sep='\t', ) def get_mirbase_alias_to_id( ) -> Tuple[Mapping[str, str], Mapping[str, List[str]]]:
import os import pickle from typing import Optional import pandas as pd from bio2bel.downloading import make_df_getter from .constants import MAPPINGS_PATH, MAPPINGS_URL, SLIM_MAPPINGS_PATH __all__ = [ 'get_mappings_df', 'get_slim_mappings_df', ] get_mappings_df = make_df_getter(MAPPINGS_URL, MAPPINGS_PATH, sep='\t', header=None) """Returns a file with the following columns: 1. UniProtKB-AC 2. UniProtKB-ID 3. GeneID (EntrezGene) 4. RefSeq 5. GI 6. PDB 7. GO 8. UniRef100 9. UniRef90 10. UniRef50 11. UniParc 12. PIR 13. NCBI-taxon
# -*- coding: utf-8 -*- """Parsers for FlyBase.""" from bio2bel.downloading import make_df_getter from .constants import GENE_MAPPING_PATH, GENE_MAPPING_URL __all__ = [ 'get_mapping_df', ] get_mapping_df = make_df_getter( GENE_MAPPING_URL, GENE_MAPPING_PATH, sep='\t', comment='#', compression='gzip', names=[ 'symbol', 'flybase_id', ], usecols=[ 1, 2, ], na_filter=False, )
GENE2REFSEQ_COLUMNS, GENE2REFSEQ_DATA_PATH, GENE2REFSEQ_HUMAN_DATA_PATH, GENE2REFSEQ_HUMAN_SLIM_DATA_PATH, GENE2REFSEQ_URL, GENE_INFO_COLUMNS, GENE_INFO_DATA_PATH, GENE_INFO_URL, HOMOLOGENE_COLUMNS, HOMOLOGENE_DATA_PATH, HOMOLOGENE_URL, ) __all__ = [ 'get_gene_info_df', 'get_homologene_df', 'get_refseq_df', 'get_human_refseq_slim_df', ] get_gene_info_df = make_df_getter( GENE_INFO_URL, GENE_INFO_DATA_PATH, sep='\t', na_values=['-', 'NEWENTRY'], usecols=GENE_INFO_COLUMNS, ) get_homologene_df = make_df_getter( HOMOLOGENE_URL, HOMOLOGENE_DATA_PATH, sep='\t', names=HOMOLOGENE_COLUMNS, ) """Download the HomoloGene data. Columns: 1) HID (HomoloGene group id)
from bio2bel.downloading import make_df_getter from .constants import GENES_PATH, GENES_URL __all__ = [ 'get_genes_df', ] get_genes_df = make_df_getter( GENES_URL, GENES_PATH, sep='\t', usecols=[ 0, # RGD identifier 1, # Symbol 2, # Name 3, # Description 20, # NCBI_GENE_ID 36, # Gene type ], names=[ 'rgd_id', 'symbol', 'name', 'description', 'entrez_id', 'gene_type', ], header=0, comment='#', )
# -*- coding: utf-8 -*- from bio2bel.downloading import make_df_getter from ..constants import MARKERS_PATH, MARKERS_URL __all__ = [ 'get_marker_df', ] get_marker_df = make_df_getter( MARKERS_URL, MARKERS_PATH, sep='\t', usecols=[ 0, # MGI ID 6, # SYMBOL 8, # name 9, # marker type 10, # feature type ], names=[ 'mgi_id', 'symbol', 'name', 'marker_type', 'feature_type', ], skiprows=[0], )
# -*- coding: utf-8 -*- """Parsers and downloaders for Bio2BEL Antibody Registry.""" from bio2bel.downloading import make_df_getter from .constants import HEADER, PATH, URL __all__ = [ 'df_getter', ] df_getter = make_df_getter( URL, PATH, names=HEADER[:3], skiprows=1, usecols=[0, 1, 2], chunksize=250_000, )
from .constants import ( GENE_PERTURBATIONS_DATA_PATH, GENE_PERTURBATIONS_DATA_URL, GENE_PERTURBATIONS_METADATA_PATH, GENE_PERTURBATIONS_METADATA_URL, ) __all__ = [ 'get_gene_perturbations_metadata_df', 'get_gene_perturbations_json', 'get_gene_perturbations_metadata_preprocessed_df', 'get_gene_perturbations_preprocessed_df', ] get_gene_perturbations_metadata_df = make_df_getter( GENE_PERTURBATIONS_METADATA_URL, GENE_PERTURBATIONS_METADATA_PATH, ) get_gene_perturbations_json = make_json_getter( GENE_PERTURBATIONS_DATA_URL, GENE_PERTURBATIONS_DATA_PATH, ) hgnc_gene_symbol_update = { 'PARK2': 'PRKN', 'ERO1L': 'ERO1A', 'RFWD2': 'COP1', 'CYR61': 'CCN1', 'FAM60A': 'SINHCAF', 'PRKCDBP': 'CAVIN3', 'VPRBP': 'DCAF1',
# -*- coding: utf-8 -*- """Parsers and downloaders for Bio2BEL GWAS Catalog.""" from bio2bel.downloading import make_df_getter from .constants import PATH, URL __all__ = [ 'df_getter', ] df_getter = make_df_getter(URL, PATH, sep='\t', usecols=[ 'PUBMEDID', 'MAPPED_GENE', 'SNPS', 'CONTEXT', 'INTERGENIC', 'RISK ALLELE FREQUENCY', 'PVALUE_MLOG', 'OR or BETA', '95% CI (TEXT)', 'MAPPED_TRAIT', 'MAPPED_TRAIT_URI', ])