def test_Abundance(self):
        """Test canonicalization of abundances."""
        short = Abundance(namespace='CHEBI', name='water')
        self.assertEqual('a(CHEBI:water)', str(short))

        long = Abundance(namespace='CHEBI', name='test name')
        self.assertEqual('a(CHEBI:"test name")', str(long))
Example #2
0
 def test_str_has_both(self):
     namespace, identifier = n(), n()
     node = Abundance(namespace=namespace, identifier=identifier)
     self.assertEqual(
         'a({namespace}:{identifier})'.format(namespace=namespace, identifier=ensure_quotes(identifier)),
         node.as_bel(),
     )
Example #3
0
 def test_str_has_identifier(self):
     namespace, identifier = n(), n()
     node = Abundance(namespace=namespace, identifier=identifier)
     self.assertEqual(
         'a({namespace}:{identifier})'.format(namespace=namespace,
                                              identifier=identifier),
         node.as_bel())
Example #4
0
 def setUp(self):
     self.graph: BELGraph = BELGraph()
     test_chem: Abundance = Abundance('INCHIKEY', 'AAAAZQPHATYWOK-YRBRRWAQNA-N')
     test_chem2: Abundance = Abundance('INCHIKEY', 'AAABHMIRDIOYOK-NPVYFSBINA-N')
     self.graph.add_node_from_data(test_chem)
     self.graph.add_node_from_data(test_chem2)
     test_target: Protein = Protein('EGID', '2740')
     test_target2: Protein = Protein('EGID', '2778')
     self.graph.add_node_from_data(test_target)
     self.graph.add_node_from_data(test_target2)
Example #5
0
    def test_simple(self):
        """Test converting a simple dictionary."""
        namespace, name, identifier = n(), n(), n()

        self.assertEqual(
            Abundance(namespace=namespace, name=name),
            _simple_po_to_dict({
                FUNCTION: ABUNDANCE,
                CONCEPT: {
                    NAMESPACE: namespace,
                    NAME: name,
                },
            }))

        self.assertEqual(
            Abundance(namespace=namespace, name=name, identifier=identifier),
            _simple_po_to_dict({
                FUNCTION: ABUNDANCE,
                CONCEPT: {
                    NAMESPACE: namespace,
                    NAME: name,
                    IDENTIFIER: identifier,
                },
            }))

        self.assertEqual(
            Abundance(namespace=namespace, identifier=identifier),
            _simple_po_to_dict({
                FUNCTION: ABUNDANCE,
                CONCEPT: {
                    NAMESPACE: namespace,
                    IDENTIFIER: identifier,
                },
            }))

        with self.assertRaises(ValueError):
            _simple_po_to_dict({
                FUNCTION: ABUNDANCE,
                CONCEPT: {
                    NAMESPACE: namespace,
                },
            })
Example #6
0
    def as_bel(self) -> Optional[BaseEntity]:
        """Convert this term to a BEL node."""
        if self.namespace == 'biological_process':
            return gobp(
                name=self.name,
                identifier=self.go_id,
            )

        if self.namespace == 'cellular_component':
            if self.is_complex:
                return NamedComplexAbundance(
                    namespace='go',
                    name=self.name,
                    identifier=self.go_id,
                )
            else:
                return Abundance(
                    namespace='go',
                    name=self.name,
                    identifier=self.go_id,
                )
Example #7
0
 def test_reaction(self):
     """Add identified reaction."""
     graph = BELGraph()
     reaction = Reaction(
         namespace='rhea',
         identifier='44104',
         reactants=[
             Abundance(namespace='chebi', identifier='17478'),
             Abundance(namespace='chebi', identifier='15377'),
             Abundance(namespace='chebi', identifier='57540'),
         ],
         products=[
             Abundance(namespace='chebi', identifier='29067'),
             Abundance(namespace='chebi', identifier='15378'),
             Abundance(namespace='chebi', identifier='57945'),
         ],
     )
     graph.add_node_from_data(reaction)
     self.assertEqual(7, graph.number_of_nodes())
     self.assertEqual(6, graph.number_of_edges())
Example #8
0
adgrb_complex = ComplexAbundance([adgrb1, adgrb2])
achlorhydria = Pathology(namespace='MESHD', name='Achlorhydria')

akt1_rna = akt1.get_rna()
akt1_gene = akt1_rna.get_gene()
akt_methylated = akt1_gene.with_variants(GeneModification('Me'))
akt1_phe_508_del = akt1_gene.with_variants(Hgvs('p.Phe508del'))

cftr = hgnc('CFTR')
cftr_protein_unspecified_variant = cftr.with_variants(HgvsUnspecified())
cftr_protein_phe_508_del = cftr.with_variants(Hgvs('p.Phe508del'))

adenocarcinoma = Pathology('MESHD', 'Adenocarcinoma')
interleukin_23_complex = NamedComplexAbundance('GO', 'interleukin-23 complex')

oxygen_atom = Abundance(namespace='CHEBI', name='oxygen atom')
hydrogen_peroxide = Abundance('CHEBI', 'hydrogen peroxide')

tmprss2_gene = Gene('HGNC', 'TMPRSS2')

tmprss2_erg_gene_fusion = GeneFusion(
    partner_5p=tmprss2_gene,
    range_5p=EnumeratedFusionRange('c', 1, 79),
    partner_3p=Gene('HGNC', 'ERG'),
    range_3p=EnumeratedFusionRange('c', 312, 5034)
)

bcr_jak2_gene_fusion = GeneFusion(
    partner_5p=Gene('HGNC', 'BCR'),
    range_5p=EnumeratedFusionRange('c', '?', 1875),
    partner_3p=Gene('HGNC', 'JAK2'),
Example #9
0
from pybel.dsl import (
    Abundance,
    BiologicalProcess,
    ComplexAbundance,
    NamedComplexAbundance,
    Pathology,
    Protein,
    ProteinModification,
)
from pybel.language import activity_mapping
from pybel.testing.constants import test_jgif_path
from tests.constants import TestGraphMixin

logging.getLogger('pybel.parser').setLevel(20)

calcium = Abundance('SCHEM', 'Calcium')
calcineurin_complex = NamedComplexAbundance('SCOMP', 'Calcineurin Complex')
foxo3 = Protein('HGNC', 'FOXO3')
tcell_proliferation = BiologicalProcess(
    'GO', 'CD8-positive, alpha-beta T cell proliferation')
il15 = Protein('HGNC', 'IL15')
il2rg = Protein('MGI', 'Il2rg')
jgif_expected_nodes = {
    calcium,
    calcineurin_complex,
    foxo3,
    tcell_proliferation,
    il15,
    il2rg,
    Protein('HGNC', 'CXCR6'),
    Protein('HGNC', 'IL15RA'),
Example #10
0
from pybel.typing import EdgeData


def _rel(x):
    return {RELATION: x}


def _rela(x, y=None):
    return {RELATION: x, OBJECT: activity(y)}


def _assoc(y):
    return {RELATION: ASSOCIATION, 'association_type': y}


a1 = Abundance('CHEBI', '1')
p1 = Protein('HGNC', '1')
pf1 = Protein('INTERPRO', '1')
d1 = Pathology('MESH', '1')
b1 = BiologicalProcess('GO', '1')
b2 = BiologicalProcess('GO', '2')
m1 = MicroRna('MIRBASE', '1')
r1 = Rna('HGNC', '1')
r2 = Rna('HGNC', '2')
nca1 = NamedComplexAbundance('FPLX', '1')
pop1 = Population('taxonomy', '1')

p2 = Protein('HGNC', identifier='9236')
p3 = Protein('HGNC', identifier='9212')
r3 = p3.get_rna()
g3 = r3.get_gene()
Example #11
0
def get_neurommsig_bel(
    df: pd.DataFrame,
    disease: str,
    nift_values: Mapping[str, str],
) -> BELGraph:
    """Generate the NeuroMMSig BEL graph.

    :param df:
    :param disease:
    :param nift_values: a dictionary of lower-cased to normal names in NIFT
    """
    missing_features = set()
    fixed_caps = set()
    nift_value_originals = set(nift_values.values())

    graph = BELGraph(
        name=f'NeuroMMSigDB for {disease}',
        description=f'SNP and Clinical Features for Subgraphs in {disease}',
        authors=
        'Daniel Domingo-Fernández, Charles Tapley Hoyt, Mufassra Naz, Aybuge Altay, Anandhi Iyappan',
        contact='*****@*****.**',
        version=time.strftime('%Y%m%d'),
    )

    for pathway, pathway_df in df.groupby(PATHWAY_COLUMN_NAME):
        sorted_pathway_df = pathway_df.sort_values(GENE_COLUMN_NAME)
        sliced_df = sorted_pathway_df[columns].itertuples()

        for _, gene, pubmeds, lit_snps, gwas_snps, ld_block_snps, clinical_features, clinical_snps in sliced_df:
            gene = ensure_quotes(gene)

            for snp in itt.chain(lit_snps or [], gwas_snps or [], ld_block_snps
                                 or [], clinical_snps or []):
                if not snp.strip():
                    continue
                graph.add_association(
                    Gene('HGNC', gene),
                    Gene('DBSNP', snp),
                    evidence=CANNED_EVIDENCE,
                    citation=CANNED_CITATION,
                    annotations={
                        'MeSHDisease': disease,
                    },
                )

            for clinical_feature in clinical_features or []:
                if not clinical_feature.strip():
                    continue

                if clinical_feature.lower() not in nift_values:
                    missing_features.add(clinical_feature)
                    continue

                if clinical_feature not in nift_value_originals:
                    fixed_caps.add((clinical_feature,
                                    nift_values[clinical_feature.lower()]))
                    clinical_feature = nift_values[
                        clinical_feature.lower()]  # fix capitalization

                graph.add_association(
                    Gene('HGNC', gene),
                    Abundance('NIFT', clinical_feature),
                    evidence=CANNED_EVIDENCE,
                    citation=CANNED_CITATION,
                    annotations={
                        'MeSHDisease': disease,
                    },
                )

                if clinical_snps:
                    for clinical_snp in clinical_snps:
                        graph.add_association(
                            Gene('DBSNP', clinical_snp),
                            Abundance('NIFT', clinical_feature),
                            evidence=CANNED_EVIDENCE,
                            citation=CANNED_CITATION,
                            annotations={
                                'MeSHDisease': disease,
                            },
                        )

    if missing_features:
        logger.warning('Missing Features in %s', disease)
        for feature in missing_features:
            logger.warning(feature)

    if fixed_caps:
        logger.warning('Fixed capitalization')
        for broken, fixed in fixed_caps:
            logger.warning('%s -> %s', broken, fixed)

    return graph
Example #12
0
 def as_cas_bel(self) -> Abundance:
     """Get this drug as a PyBEL abundance identified by its CAS identifier."""
     # https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000237
     return Abundance(namespace='cas', identifier=self.cas_number)
Example #13
0
# -*- coding: utf-8 -*-

from bio2bel_hmdb.enrich import *
from pybel import BELGraph
from pybel.dsl import Abundance, Pathology, Protein
from tests.constants import DatabaseMixin

hmdb_tuple1 = Abundance('HMDB', 'HMDB00008')
protein_tuple = Protein('UP', 'P50440')

# test enriching with tissues
hmdb_tuple2 = Abundance('HMDB', 'HMDB00064')
disease_tuple = Pathology('HMDB_D', 'Lung Cancer')


class TestEnrich(DatabaseMixin):
    def test_enrich_metabolites_proteins(self):
        g = BELGraph()
        g.add_node_from_data(hmdb_tuple1)

        self.assertEqual(1, g.number_of_nodes())
        self.assertEqual(0, g.number_of_edges())

        enrich_metabolites_proteins(g, self.manager)
        self.assertEqual(4, g.number_of_nodes())
        self.assertEqual(3, g.number_of_edges())
        self.assertTrue(g.has_edge(protein_tuple, hmdb_tuple1))

    def test_enrich_metabolites_diseases(self):
        g = BELGraph()
        g.add_node_from_data(hmdb_tuple2)
Example #14
0
 def test_abundance_as_bel(self):
     """Test converting an abundance to BEL with a name that does not need quotation."""
     namespace, name = 'HGNC', 'YFG'
     node = Abundance(namespace=namespace, name=name)
     self.assertEqual('a(HGNC:YFG)', node.as_bel())
Example #15
0
 def as_drugbank_bel(self) -> Abundance:
     """Get this drug as a PyBEL abundance identified by its DrugBank identifier."""
     return Abundance(namespace=MODULE_NAME,
                      name=self.name,
                      identifier=self.drugbank_id)
Example #16
0
 def test_abundance_as_bel_quoted(self):
     """Test converting an abundance to BEL with a name that needs quotation."""
     namespace, name = 'HGNC', 'YFG-1'
     node = Abundance(namespace=namespace, name=name)
     self.assertEqual('a(HGNC:"YFG-1")', node.as_bel())
Example #17
0
 def test_as_tuple(self):
     namespace, name = n(), n()
     node = Abundance(namespace=namespace, name=name)
     self.assertEqual(hash(node), hash(node.as_bel()))
Example #18
0
 def test_abundance_as_no_quotes(self):
     """Test converting an abundance that doesn't need quotes, but looks crazy."""
     namespace, name = 'a-c', 'd.e.f'
     node = Abundance(namespace=namespace, name=name)
     self.assertEqual('a(a-c:d.e.f)', node.as_bel())
Example #19
0
g1 = Gene(namespace=HGNC, name='1')
r1 = Rna(namespace=HGNC, name='1')
p1 = Protein(HGNC, name='1')

g2 = Gene(HGNC, name='2')
r2 = Rna(HGNC, name='2')
p2 = Protein(HGNC, name='2')

g3 = Gene(namespace=HGNC, name='3')
r3 = Rna(namespace=HGNC, name='3')
p3 = Protein(namespace=HGNC, name='3')

g4 = Gene(namespace=HGNC, name='4')
m4 = MicroRna(namespace=HGNC, name='4')

a5 = Abundance(namespace=CHEBI, name='5')
p5 = Pathology(namespace=GO, name='5')


class TestCollapseProteinInteractions(unittest.TestCase):
    def test_protein_interaction_1(self):
        graph = BELGraph()

        graph.add_node_from_data(p1)
        graph.add_node_from_data(p2)
        graph.add_node_from_data(a5)
        graph.add_node_from_data(p5)

        graph.add_qualified_edge(p1, p2, relation=POSITIVE_CORRELATION, citation=n(), evidence=n())

        graph.add_qualified_edge(p1, p2, relation=INCREASES, citation=n(), evidence=n())
Example #20
0
 def as_smiles_bel(self) -> Abundance:
     """Get this drug as a PyBEL abundance identified by SMILES."""
     return Abundance(namespace='smiles', identifier=self.smiles)
 def test_reaction(self):
     node = Reaction(reactants=[Abundance(namespace='CHEBI', name='A')],
                     products=[Abundance(namespace='CHEBI', name='B')])
     self.assertEqual('rxn(reactants(a(CHEBI:A)), products(a(CHEBI:B)))',
                      str(node))
Example #22
0
 def as_inchikey_bel(self) -> Abundance:
     """Get this drug as a PyBEL abundance identified by InChI-key."""
     # https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000387
     return Abundance(namespace='inchikey', identifier=self.inchikey)
Example #23
0
 def as_pubchem_compound_bel(self) -> Abundance:
     """Get this drug as a PyBEL abundance identified by PubChem."""
     return Abundance(namespace='pubchem.compound',
                      identifier=self.pubchem_compound_id)
Example #24
0
def normalize_graph_names(graph: BELGraph, database: str) -> None:
    """Normalize graph names."""
    # Victim to Survivor (one to one node) mapping
    one_to_one_mapping = {}
    # Victim to Survivors (one to many nodes) mapping
    one_to_many_mapping = defaultdict(set)

    for node in graph.nodes():

        # Skip ListAbundances and Reactions since they do not have a name
        if isinstance(node, ListAbundance) or isinstance(
                node, Reaction) or not node.name:
            continue

        # Normalize names: Lower case name and strip quotes or white spaces
        lower_name = node.name.lower().strip('"').strip()

        # Dealing with Genes/miRNAs
        if isinstance(node, CentralDogma):

            ##################
            # miRNA entities #
            ##################

            if lower_name.startswith("mir"):

                # Reactome preprocessing to flat multiple identifiers
                if database == REACTOME:
                    reactome_cell = munge_reactome_gene(lower_name)
                    if isinstance(reactome_cell, list):
                        for lower_name in reactome_cell:
                            one_to_many_mapping[node].add(
                                MicroRna(
                                    node.namespace,
                                    name=lower_name.replace("mir-", "mir"),
                                    identifier=node.identifier,
                                ), )

                    if lower_name.endswith(' genes'):
                        lower_name = lower_name[:-len(' genes')]
                    elif lower_name.endswith(' gene'):
                        lower_name = lower_name[:-len(' gene')]
                    one_to_one_mapping[node] = MicroRna(
                        node.namespace,
                        name=lower_name.replace(
                            "mir-", "mir"),  # Special case for Reactome
                    )
                    continue

                # KEGG and Reactome
                one_to_one_mapping[node] = MicroRna(
                    node.namespace,
                    name=node.name.replace("mir-", "mir"),
                    identifier=node.identifier,
                )

            ##################
            # Genes entities #
            ##################

            else:
                # Reactome preprocessing to flat multiple identifiers
                if database == REACTOME:
                    reactome_cell = munge_reactome_gene(lower_name)
                    if isinstance(reactome_cell, list):
                        for lower_name in reactome_cell:
                            if lower_name in BLACK_LIST_REACTOME:  # Filter entities in black list
                                continue
                            elif lower_name.startswith(
                                    "("):  # remove redundant parentheses
                                lower_name = lower_name.strip("(").strip(")")

                            one_to_many_mapping[node].add(
                                Protein(node.namespace,
                                        name=lower_name,
                                        identifier=node.identifier), )
                    else:
                        one_to_one_mapping[node] = Protein(
                            node.namespace,
                            name=lower_name,
                            identifier=node.identifier)

                    continue

                # WikiPathways and KEGG do not require any processing of genes
                elif database == WIKIPATHWAYS and lower_name in WIKIPATHWAYS_BIOL_PROCESS:
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                one_to_one_mapping[node] = Protein(node.namespace,
                                                   name=lower_name,
                                                   identifier=node.identifier)

        #######################
        # Metabolite entities #
        #######################

        elif isinstance(node, Abundance):

            if database == 'wikipathways':
                # Biological processes that are captured as abundance in
                # BEL since they were characterized wrong in WikiPathways
                if lower_name in WIKIPATHWAYS_BIOL_PROCESS:
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Abundances to BiologicalProcesses
                elif (node.namespace
                      in {'WIKIDATA', 'WIKIPATHWAYS', 'REACTOME'}
                      and lower_name not in WIKIPATHWAYS_METAB):
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Fix naming in duplicate entity
                if lower_name in WIKIPATHWAYS_NAME_NORMALIZATION:
                    lower_name = WIKIPATHWAYS_NAME_NORMALIZATION[lower_name]

            elif database == REACTOME:
                # Curated proteins that were coded as metabolites
                if lower_name in REACTOME_PROT:
                    one_to_one_mapping[node] = Protein(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Flat multiple identifiers (this is not trivial because most of ChEBI names contain commas,
                # so a clever way to fix some of the entities is to check that all identifiers contain letters)
                elif "," in lower_name and all(
                        string.isalpha() for string in lower_name.split(",")):
                    for string in lower_name.split(","):
                        one_to_many_mapping[node].add(
                            Abundance(node.namespace,
                                      name=string,
                                      identifier=node.identifier), )
                    continue

            one_to_one_mapping[node] = Abundance(node.namespace,
                                                 name=lower_name,
                                                 identifier=node.identifier)

        #################################
        # Biological Processes entities #
        #################################

        elif isinstance(node, BiologicalProcess):
            # KEGG normalize name by removing the title prefix
            if lower_name.startswith('title:'):
                lower_name = lower_name[len('title:'):]

            one_to_one_mapping[node] = BiologicalProcess(
                node.namespace,
                name=lower_name,
                identifier=node.identifier,
            )

    relabel_nodes(graph, one_to_one_mapping)
    multi_relabel(graph, one_to_many_mapping)