Esempio n. 1
0
def make_goa_df_getter(url, path):
    return make_df_getter(
        url,
        path,
        sep='\t',
        names=GAF_COLUMNS,
        skiprows=15,
    )
Esempio n. 2
0
    IS_PART_OF, KEGG, KEGG_REACTOME_PATH, KEGG_REACTOME_URL, KEGG_WP_PATH, KEGG_WP_URL, MAPPING_TYPE, SOURCE_ID,
    SOURCE_RESOURCE, SPECIAL_MAPPINGS_PATH, SPECIAL_MAPPINGS_URL, TARGET_ID, TARGET_RESOURCE, WP_REACTOME_PATH,
    WP_REACTOME_URL,
)

__all__ = [
    'get_mapping_dict',
    'get_equivalent_pairs',
    'load_compath_mapping_dfs',
    'get_equivalent_mappings_dict',
]

Identifier = Tuple[str, str]
EquivalenceMapping = Mapping[Identifier, List[Identifier]]

get_kegg_reactome_df = make_df_getter(KEGG_REACTOME_URL, KEGG_REACTOME_PATH)
get_wp_reactome_df = make_df_getter(WP_REACTOME_URL, WP_REACTOME_PATH)
get_kegg_wp_df = make_df_getter(KEGG_WP_URL, KEGG_WP_PATH)
get_special_mappings_df = make_df_getter(SPECIAL_MAPPINGS_URL, SPECIAL_MAPPINGS_PATH)


def get_mapping_dict(df: pd.DataFrame, mapping_type: str) -> Mapping[Identifier, List[Identifier]]:
    """Create a dictionary with ComPath mappings for each pathway."""
    mapping_dict = defaultdict(list)

    for index, row in df.iterrows():
        if row[MAPPING_TYPE] != mapping_type:
            continue

        if row[SOURCE_RESOURCE] != KEGG and row[TARGET_RESOURCE] != KEGG:
            mapping_dict[(row[SOURCE_RESOURCE], row[SOURCE_ID])].append((row[TARGET_RESOURCE], row[TARGET_ID]))
Esempio n. 3
0
# -*- coding: utf-8 -*-
"""Downloaders for Bio2BEL HMDD"""

from bio2bel.downloading import make_df_getter
from .constants import HMDD_COLUMNS, HMDD_PATH, HMDD_URL

__all__ = ['get_hmdd_df']

get_hmdd_df = make_df_getter(
    HMDD_URL,
    HMDD_PATH,
    sep='\t',
)
"""Loads the HMDD into a data frame

    1) Index
    2) miRNA ID
    3) MeSHDisease term
    4) PubMed ID
    5) Description
"""
Esempio n. 4
0
    SPECIES_URL,
)

__all__ = [
    'get_species_df',
    'download_definitions',
    'get_aliases_df',
    'get_mirbase_alias_to_id',
]

logger = logging.getLogger(__name__)

get_species_df = make_df_getter(
    SPECIES_URL,
    SPECIES_PATH,
    sep='\t',
    names=SPECIES_HEADER,
    skiprows=1,
)

download_definitions = make_downloader(DEFINITIONS_URL, DEFINITIONS_PATH)

get_aliases_df = make_df_getter(
    ALIASES_URL,
    ALIASES_PATH,
    sep='\t',
)


def get_mirbase_alias_to_id(
) -> Tuple[Mapping[str, str], Mapping[str, List[str]]]:
Esempio n. 5
0
import os
import pickle
from typing import Optional

import pandas as pd

from bio2bel.downloading import make_df_getter
from .constants import MAPPINGS_PATH, MAPPINGS_URL, SLIM_MAPPINGS_PATH

__all__ = [
    'get_mappings_df',
    'get_slim_mappings_df',
]

get_mappings_df = make_df_getter(MAPPINGS_URL,
                                 MAPPINGS_PATH,
                                 sep='\t',
                                 header=None)
"""Returns a file with the following columns:
1. UniProtKB-AC
2. UniProtKB-ID
3. GeneID (EntrezGene)
4. RefSeq
5. GI
6. PDB
7. GO
8. UniRef100
9. UniRef90
10. UniRef50
11. UniParc
12. PIR
13. NCBI-taxon
Esempio n. 6
0
# -*- coding: utf-8 -*-
"""Parsers for FlyBase."""

from bio2bel.downloading import make_df_getter
from .constants import GENE_MAPPING_PATH, GENE_MAPPING_URL

__all__ = [
    'get_mapping_df',
]

get_mapping_df = make_df_getter(
    GENE_MAPPING_URL,
    GENE_MAPPING_PATH,
    sep='\t',
    comment='#',
    compression='gzip',
    names=[
        'symbol',
        'flybase_id',
    ],
    usecols=[
        1,
        2,
    ],
    na_filter=False,
)
Esempio n. 7
0
    GENE2REFSEQ_COLUMNS, GENE2REFSEQ_DATA_PATH, GENE2REFSEQ_HUMAN_DATA_PATH, GENE2REFSEQ_HUMAN_SLIM_DATA_PATH,
    GENE2REFSEQ_URL, GENE_INFO_COLUMNS, GENE_INFO_DATA_PATH, GENE_INFO_URL, HOMOLOGENE_COLUMNS, HOMOLOGENE_DATA_PATH,
    HOMOLOGENE_URL,
)

__all__ = [
    'get_gene_info_df',
    'get_homologene_df',
    'get_refseq_df',
    'get_human_refseq_slim_df',
]

get_gene_info_df = make_df_getter(
    GENE_INFO_URL,
    GENE_INFO_DATA_PATH,
    sep='\t',
    na_values=['-', 'NEWENTRY'],
    usecols=GENE_INFO_COLUMNS,
)

get_homologene_df = make_df_getter(
    HOMOLOGENE_URL,
    HOMOLOGENE_DATA_PATH,
    sep='\t',
    names=HOMOLOGENE_COLUMNS,
)
"""Download the HomoloGene data.

Columns:

    1) HID (HomoloGene group id)
Esempio n. 8
0
from bio2bel.downloading import make_df_getter
from .constants import GENES_PATH, GENES_URL

__all__ = [
    'get_genes_df',
]

get_genes_df = make_df_getter(
    GENES_URL,
    GENES_PATH,
    sep='\t',
    usecols=[
        0,  # RGD identifier
        1,  # Symbol
        2,  # Name
        3,  # Description
        20,  # NCBI_GENE_ID
        36,  # Gene type
    ],
    names=[
        'rgd_id',
        'symbol',
        'name',
        'description',
        'entrez_id',
        'gene_type',
    ],
    header=0,
    comment='#',
)
Esempio n. 9
0
# -*- coding: utf-8 -*-

from bio2bel.downloading import make_df_getter

from ..constants import MARKERS_PATH, MARKERS_URL

__all__ = [
    'get_marker_df',
]

get_marker_df = make_df_getter(
    MARKERS_URL,
    MARKERS_PATH,
    sep='\t',
    usecols=[
        0,  # MGI ID
        6,  # SYMBOL
        8,  # name
        9,  # marker type
        10,  # feature type
    ],
    names=[
        'mgi_id',
        'symbol',
        'name',
        'marker_type',
        'feature_type',
    ],
    skiprows=[0],
)
Esempio n. 10
0
# -*- coding: utf-8 -*-
"""Parsers and downloaders for Bio2BEL Antibody Registry."""

from bio2bel.downloading import make_df_getter
from .constants import HEADER, PATH, URL

__all__ = [
    'df_getter',
]

df_getter = make_df_getter(
    URL,
    PATH,
    names=HEADER[:3],
    skiprows=1,
    usecols=[0, 1, 2],
    chunksize=250_000,
)
Esempio n. 11
0
from .constants import (
    GENE_PERTURBATIONS_DATA_PATH,
    GENE_PERTURBATIONS_DATA_URL,
    GENE_PERTURBATIONS_METADATA_PATH,
    GENE_PERTURBATIONS_METADATA_URL,
)

__all__ = [
    'get_gene_perturbations_metadata_df',
    'get_gene_perturbations_json',
    'get_gene_perturbations_metadata_preprocessed_df',
    'get_gene_perturbations_preprocessed_df',
]

get_gene_perturbations_metadata_df = make_df_getter(
    GENE_PERTURBATIONS_METADATA_URL,
    GENE_PERTURBATIONS_METADATA_PATH,
)

get_gene_perturbations_json = make_json_getter(
    GENE_PERTURBATIONS_DATA_URL,
    GENE_PERTURBATIONS_DATA_PATH,
)

hgnc_gene_symbol_update = {
    'PARK2': 'PRKN',
    'ERO1L': 'ERO1A',
    'RFWD2': 'COP1',
    'CYR61': 'CCN1',
    'FAM60A': 'SINHCAF',
    'PRKCDBP': 'CAVIN3',
    'VPRBP': 'DCAF1',
Esempio n. 12
0
# -*- coding: utf-8 -*-
"""Parsers and downloaders for Bio2BEL GWAS Catalog."""

from bio2bel.downloading import make_df_getter
from .constants import PATH, URL

__all__ = [
    'df_getter',
]

df_getter = make_df_getter(URL,
                           PATH,
                           sep='\t',
                           usecols=[
                               'PUBMEDID',
                               'MAPPED_GENE',
                               'SNPS',
                               'CONTEXT',
                               'INTERGENIC',
                               'RISK ALLELE FREQUENCY',
                               'PVALUE_MLOG',
                               'OR or BETA',
                               '95% CI (TEXT)',
                               'MAPPED_TRAIT',
                               'MAPPED_TRAIT_URI',
                           ])