コード例 #1
0
def update_drugbank_mappings():
    """Update mappings from DrugBank to CHEBI/CHEMBL"""
    # Note that for this to work, PyOBO (https://github.com/pyobo/pyobo) has
    # to be installed and the DrugBank download
    # (https://www.drugbank.ca/releases/latest) put into ~/.obo/drugbank/
    # Note that the DrugBank download requires signing up for an account and
    # waiting for approval.
    import pyobo
    drugbank_chembl = pyobo.get_filtered_xrefs('drugbank', 'chembl.compound')
    drugbank_chebi = pyobo.get_filtered_xrefs('drugbank', 'chebi')
    chebi_drugbank = pyobo.get_filtered_xrefs('chebi', 'drugbank')
    drugbank_names = pyobo.get_id_name_mapping('drugbank')
    rows = []
    for drugbank_id, chembl_id in drugbank_chembl.items():
        rows.append([drugbank_id, 'CHEMBL', chembl_id, 'drugbank'])
    for drugbank_id, chebi_id in drugbank_chebi.items():
        rows.append([drugbank_id, 'CHEBI', chebi_id, 'drugbank'])
    for chebi_id, drugbank_id in chebi_drugbank.items():
        rows.append([drugbank_id, 'CHEBI', chebi_id, 'chebi'])
    for drugbank_id, name in drugbank_names.items():
        rows.append([drugbank_id, 'NAME', name, 'drugbank'])
    fname = os.path.join(path, 'drugbank_mappings.tsv')
    header = ['DRUGBANK_ID', 'NAMESPACE', 'ID', 'SOURCE']
    rows = [header] + sorted(rows)
    write_unicode_csv(fname, rows, delimiter='\t')
コード例 #2
0
def get_gene_associations_df(identifier: str, *, rows: Optional[int] = None) -> pd.DataFrame:
    """Get gene associations for the given GO identifier as a dataframe.

    - filtered for human onlay
    - filtered for proteins only
    - add HGNC identifier and entrez identifier
    """
    associations = get_gene_associations(identifier, rows=rows)
    df = pd.DataFrame(
        [
            (
                e['subject']['id'],
                e['subject']['label'],
                e['subject']['taxon']['id'][len('NCBITaxon:'):],
                e['object']['id'],
                e['object']['label'],
                e['negated'],
                # e['relation']['category'],
                # e['relation']['id'],
                # e['relation']['inverse'],
                # e['relation']['label'],
                # e['subject_extensions'],
            )
            for e in associations
        ],
        columns=[
            'source_id',
            'source_name',
            'taxonomy_id',
            'target_id',
            'target_label',
            'negated',
            # 'relation_category',
            # 'relation_id',
            # 'relation_inverse',
            # 'relation_label',
            # 'subject_extensions',
        ],
    )
    df = df[df['taxonomy_id'] == '9606']
    df = df[df['source_id'].str.startswith('UniProtKB:')]
    df['uniprot_id'] = df['source_id'].map(lambda s: s[len('UniProtKB:'):])
    del df['source_id']
    del df['taxonomy_id']

    df['hgnc_id'] = df['uniprot_id'].map(get_hgnc_id)
    df = df[df['hgnc_id'].notna()]

    df['ncbigene_id'] = df['hgnc_id'].map(pyobo.get_filtered_xrefs('hgnc', 'ncbigene').__getitem__)
    df['target_id'] = df['target_id'].map(lambda s: s[len('GO:'):])
    return df
コード例 #3
0
    def test_get_target_xrefs(self):
        """Test getting xrefs."""
        kegg_xrefs = get_filtered_xrefs('chebi', 'kegg', url=TEST_CHEBI_OBO_PATH, local=True)
        print(kegg_xrefs)

        for key, value in kegg_xrefs.items():
            self.assertFalse(key.startswith('CHEBI:'))
            self.assertFalse(key.startswith('CHEBI'))
            self.assertFalse(key.startswith('chebi:'))
            self.assertFalse(key.startswith('chebi'))
            self.assertFalse(value.startswith('KEGG:'))
            self.assertFalse(value.startswith('KEGG'))
            self.assertFalse(value.startswith('kegg:'))
            self.assertFalse(value.startswith('kegg'))

        self.assertIsInstance(kegg_xrefs, dict)
コード例 #4
0
ファイル: test_extract.py プロジェクト: shunsunsun/pyobo
    def test_get_target_xrefs(self):
        """Test getting xrefs."""
        with chebi_patch:
            kegg_xrefs = get_filtered_xrefs('chebi', 'kegg')

        for key, value in kegg_xrefs.items():
            self.assertFalse(key.startswith('CHEBI:'))
            self.assertFalse(key.startswith('CHEBI'))
            self.assertFalse(key.startswith('chebi:'))
            self.assertFalse(key.startswith('chebi'))
            self.assertFalse(value.startswith('KEGG:'))
            self.assertFalse(value.startswith('KEGG'))
            self.assertFalse(value.startswith('kegg:'))
            self.assertFalse(value.startswith('kegg'))

        self.assertIsInstance(kegg_xrefs, dict)
コード例 #5
0
    def test_get_target_xrefs(self):
        """Test getting xrefs."""
        with chebi_patch:
            kegg_xrefs = get_filtered_xrefs("chebi", "kegg")

        for key, value in kegg_xrefs.items():
            self.assertFalse(key.startswith("CHEBI:"))
            self.assertFalse(key.startswith("CHEBI"))
            self.assertFalse(key.startswith("chebi:"))
            self.assertFalse(key.startswith("chebi"))
            self.assertFalse(value.startswith("KEGG:"))
            self.assertFalse(value.startswith("KEGG"))
            self.assertFalse(value.startswith("kegg:"))
            self.assertFalse(value.startswith("kegg"))

        self.assertIsInstance(kegg_xrefs, dict)
コード例 #6
0
def mutual_mapping_graph(
    prefixes: Iterable[str],
    skip_sources: Optional[Iterable[str]] = None,
    skip_targets: Optional[Iterable[str]] = None,
) -> nx.Graph:
    """Get the undirected mapping graph between the given prefixes.

    :param prefixes: A list of prefixes to use with :func:`pyobo.get_filtered_xrefs` to get xrefs.
    :param skip_sources: An optional list of prefixes to skip as the source for xrefs
    :param skip_targets: An optional list of prefixes to skip as the target for xrefs
    :return: The undirected mapping graph containing mappings between entries in the given namespaces.
    """
    prefixes = sorted(prefixes)
    skip_sources = set() if skip_sources is None else set(skip_sources)
    skip_targets = set() if skip_targets is None else set(skip_targets)
    graph = nx.Graph()
    for source, target in itt.product(prefixes, repeat=2):
        if source == target or source in skip_sources or target in skip_targets:
            continue
        for source_id, target_id in pyobo.get_filtered_xrefs(source,
                                                             target).items():
            graph.add_edge((source, source_id), (target, target_id))
    return graph
コード例 #7
0
ファイル: manager.py プロジェクト: bio2bel/wikipathways
    def populate(self, paths: Optional[Mapping[str, str]] = None):
        """Populate the database.

        :param paths: mapping from tax identifiers to paths to GMT files
        """
        if not paths:
            logger.info('No paths given.')
            paths = {info.taxonomy_id: info.path for info in infos.values()}
            logger.info(f'Using default paths at {paths}.')
        elif not isinstance(paths, dict):
            raise TypeError('Invalid type for paths. Shoudl be dict.')

        pathways = [
            pathway
            for taxonomy_id, path in paths.items()
            for pathway in parse_wikipathways_gmt(path)
        ]

        versions = {
            version
            for _identifier, version, _revision, _name, _species_name, _entries in pathways
        }
        if len(versions) != 1:
            raise ValueError('got multiple versions')
        version = list(versions)[0]

        taxonomy_name_to_id = get_name_id_mapping('ncbitaxon')
        species_names = {
            SPECIES_REMAPPING.get(species_name, species_name)
            for _identifier, _version, _revision, _name, species_name, _entries in pathways
        }
        species_name_to_species = {}
        for species_name in tqdm(species_names, desc=f'v{version} serializing species'):
            taxonomy_id = taxonomy_name_to_id[species_name]
            species = species_name_to_species[species_name] = Species(taxonomy_id=taxonomy_id, name=species_name)
            self.session.add(species)

        hgnc_id_to_entrez_id = get_filtered_xrefs('hgnc', 'ncbigene')
        if not hgnc_id_to_entrez_id:
            raise ValueError('Mappings from hgnc to ncbigene couldnt be loaded')

        entrez_id_to_hgnc_id = {v: k for k, v in hgnc_id_to_entrez_id.items()}
        hgnc_id_to_name = get_id_name_mapping('hgnc')

        missing_entrez_ids = set()
        entrez_ids = {
            entrez_id
            for _identifier, _version, _revision, _name, _species, entrez_ids in pathways
            for entrez_id in entrez_ids
        }
        entrez_id_protein = {}
        for entrez_id in tqdm(entrez_ids, desc=f'v{version} serializing proteins'):
            hgnc_id = entrez_id_to_hgnc_id.get(entrez_id)
            if hgnc_id:
                hgnc_symbol = hgnc_id_to_name[hgnc_id]
            else:
                hgnc_symbol = None

            if not hgnc_symbol:
                logging.debug(f"ncbigene:{entrez_id} has no HGNC identifier")
                missing_entrez_ids.add(entrez_id)

            entrez_id_protein[entrez_id] = protein = self.get_or_create_protein(
                entrez_id=entrez_id,
                hgnc_symbol=hgnc_symbol,
                hgnc_id=hgnc_id,
            )
            self.session.add(protein)

        logger.info(f'Proteins: {len(entrez_id_protein)}')
        logger.info(f"Proteins w/o HGNC mapping: {len(missing_entrez_ids)}")

        for (
            wikipathways_id, _version, revision,
            pathway_name, species_name, entrez_ids,
        ) in tqdm(pathways, desc=f'v{version} serializing pathways'):
            proteins = [
                entrez_id_protein[entrez_id]
                for entrez_id in entrez_ids
            ]

            pathway = self.get_or_create_pathway(
                identifier=wikipathways_id,
                name=pathway_name.strip(),
                revision=revision,
                species=species_name_to_species[SPECIES_REMAPPING.get(species_name, species_name)],
                proteins=proteins,
            )
            self.session.add(pathway)

        self.session.commit()
コード例 #8
0
ファイル: pid.py プロジェクト: amanchoudhri/bio2bel
def _map_hgnc_to_entrez(hgnc_id):
    return get_filtered_xrefs('hgnc', 'ncbigene').get(hgnc_id)
コード例 #9
0
import pybel
import pybel.dsl
from pybel import BELGraph
from ..compath import CompathManager, CompathPathwayMixin, CompathProteinMixin
from ..utils import get_data_dir

logger = logging.getLogger(__name__)

MODULE_NAME = 'pid'
DIRECTORY = get_data_dir(MODULE_NAME)

URL = 'https://github.com/NCIP/pathway-interaction-database/raw/master/download/NCI-Pathway-Info.xlsx'

chebi_id_to_name = get_id_name_mapping('chebi')
hgnc_name_to_id = get_name_id_mapping('hgnc')
hgnc_id_to_entrez_id = get_filtered_xrefs('hgnc', 'ncbigene')

relation_to_adder = {
    'controls-state-change-of': BELGraph.add_regulates,
}

namespace_to_dsl = {
    'cas': pybel.dsl.Abundance,
    'uniprot': pybel.dsl.Protein,
    'hprd': pybel.dsl.Protein,
    'chebi': pybel.dsl.Abundance,
    'hgnc': pybel.dsl.Protein,
}

UNMAPPED = set()