Example #1
0
def get_bel() -> pybel.BELGraph:
    """Get the ComPath mappings as BEL."""
    graph = pybel.BELGraph(
        name='ComPath Mappings',
        version='1.1.0',
        description=
        'Hierarchical and equivalence relations between entries in KEGG, Reactome, PathBank,'
        ' and WikiPathways.')
    df = get_df()
    for source_ns, source_id, source_name, relation, target_ns, target_id, target_name in df.values:
        source = BiologicalProcess(
            namespace=source_ns,
            identifier=source_id,
            name=source_name,
        )
        target = BiologicalProcess(
            namespace=target_ns,
            identifier=target_id,
            name=target_name,
        )
        if relation == 'isPartOf':
            graph.add_part_of(source, target)
        elif relation == 'equivalentTo':
            graph.add_equivalence(source, target)
        else:
            raise ValueError(f'invalid mapping with relation: {relation}')

    return graph
Example #2
0
def gobp(name: str, identifier: str) -> BiologicalProcess:
    """Make a GO biological process node."""
    return BiologicalProcess(
        namespace='go',
        name=name,
        identifier=identifier,
    )
 def test_bioprocess(self):
     node = BiologicalProcess(namespace='GO', name='apoptosis')
     self.assertEqual('bp(GO:apoptosis)', str(node))
Example #4
0
     Gene('HGNC', 'NCF1'),
     Protein('HGNC', 'HBP1')
 ]),
 Protein('HGNC', 'HBP1'),
 ComplexAbundance([Protein('HGNC', 'FOS'), Protein('HGNC', 'JUN')]),
 Protein('HGNC', 'FOS'),
 Protein('HGNC', 'JUN'),
 Rna('HGNC', 'CFTR', variants=Hgvs('r.1521_1523delcuu')),
 Rna('HGNC', 'CFTR'),
 Rna('HGNC', 'CFTR', variants=Hgvs('r.1653_1655delcuu')),
 CompositeAbundance([
     interleukin_23_complex,
     il6
 ]),
 il6,
 BiologicalProcess('GO', 'cell cycle arrest'),
 hydrogen_peroxide,
 Protein('HGNC', 'CAT'),
 Gene('HGNC', 'CAT'),
 Protein('HGNC', 'HMGCR'),
 BiologicalProcess('GO', 'cholesterol biosynthetic process'),
 Gene('HGNC', 'APP', variants=Hgvs('c.275341G>C')),
 Gene('HGNC', 'APP'),
 Pathology('MESHD', 'Alzheimer Disease'),
 ComplexAbundance([Protein('HGNC', 'F3'), Protein('HGNC', 'F7')]),
 Protein('HGNC', 'F3'),
 Protein('HGNC', 'F7'),
 Protein('HGNC', 'F9'),
 Protein('HGNC', 'GSK3B', variants=ProteinModification('Ph', 'Ser', 9)),
 Protein('HGNC', 'GSK3B'),
 Pathology('MESHD', 'Psoriasis'),
Example #5
0
    ComplexAbundance,
    NamedComplexAbundance,
    Pathology,
    Protein,
    ProteinModification,
)
from pybel.language import activity_mapping
from pybel.testing.constants import test_jgif_path
from tests.constants import TestGraphMixin

logging.getLogger('pybel.parser').setLevel(20)

calcium = Abundance('SCHEM', 'Calcium')
calcineurin_complex = NamedComplexAbundance('SCOMP', 'Calcineurin Complex')
foxo3 = Protein('HGNC', 'FOXO3')
tcell_proliferation = BiologicalProcess(
    'GO', 'CD8-positive, alpha-beta T cell proliferation')
il15 = Protein('HGNC', 'IL15')
il2rg = Protein('MGI', 'Il2rg')
jgif_expected_nodes = {
    calcium,
    calcineurin_complex,
    foxo3,
    tcell_proliferation,
    il15,
    il2rg,
    Protein('HGNC', 'CXCR6'),
    Protein('HGNC', 'IL15RA'),
    BiologicalProcess('GO', 'lymphocyte chemotaxis'),
    Protein('HGNC', 'IL2RG'),
    Protein('HGNC', 'ZAP70'),
    NamedComplexAbundance('SCOMP', 'T Cell Receptor Complex'),
Example #6
0
    return {RELATION: x}


def _rela(x, y=None):
    return {RELATION: x, OBJECT: activity(y)}


def _assoc(y):
    return {RELATION: ASSOCIATION, 'association_type': y}


a1 = Abundance('CHEBI', '1')
p1 = Protein('HGNC', '1')
pf1 = Protein('INTERPRO', '1')
d1 = Pathology('MESH', '1')
b1 = BiologicalProcess('GO', '1')
b2 = BiologicalProcess('GO', '2')
m1 = MicroRna('MIRBASE', '1')
r1 = Rna('HGNC', '1')
r2 = Rna('HGNC', '2')
nca1 = NamedComplexAbundance('FPLX', '1')
pop1 = Population('taxonomy', '1')

p2 = Protein('HGNC', identifier='9236')
p3 = Protein('HGNC', identifier='9212')
r3 = p3.get_rna()
g3 = r3.get_gene()

c1 = ComplexAbundance([p2, g3])
c2 = ComplexAbundance([p1, p2])
c3 = ComplexAbundance([a1, p2])
Example #7
0
        # PyBEL manager
        cls.pybel_manager = pybel.Manager(engine=cls.engine,
                                          session=cls.session)
        cls.pybel_manager.create_all()

    @classmethod
    def tearDownClass(cls):
        """Close the connection in the manager and deletes the temporary database."""
        cls.session.close()
        super().tearDownClass()


protein_a = Protein(namespace=HGNC, identifier='2976', name='DNMT1')
protein_b = Protein(namespace=HGNC, identifier='9173', name='POLA1')
gene_c = Gene(namespace=HGNC, identifier='8903', name='PGLS')
pathway_a = BiologicalProcess(namespace=WIKIPATHWAYS,
                              identifier='WP1604',
                              name='Codeine and Morphine Metabolism')


def get_enrichment_graph():
    """Build a simple test graph with 2 proteins, one gene, and one pathway all contained in HGNC."""
    graph = BELGraph(
        name='My test graph for enrichment',
        version='0.0.1',
    )
    graph.add_increases(protein_a, protein_b, citation='1234', evidence='')
    graph.add_decreases(protein_b, gene_c, citation='1234', evidence='')
    graph.add_part_of(gene_c, pathway_a)
    return graph
Example #8
0
def normalize_graph_names(graph: BELGraph, database: str) -> None:
    """Normalize graph names."""
    # Victim to Survivor (one to one node) mapping
    one_to_one_mapping = {}
    # Victim to Survivors (one to many nodes) mapping
    one_to_many_mapping = defaultdict(set)

    for node in graph.nodes():

        # Skip ListAbundances and Reactions since they do not have a name
        if isinstance(node, ListAbundance) or isinstance(
                node, Reaction) or not node.name:
            continue

        # Normalize names: Lower case name and strip quotes or white spaces
        lower_name = node.name.lower().strip('"').strip()

        # Dealing with Genes/miRNAs
        if isinstance(node, CentralDogma):

            ##################
            # miRNA entities #
            ##################

            if lower_name.startswith("mir"):

                # Reactome preprocessing to flat multiple identifiers
                if database == REACTOME:
                    reactome_cell = munge_reactome_gene(lower_name)
                    if isinstance(reactome_cell, list):
                        for lower_name in reactome_cell:
                            one_to_many_mapping[node].add(
                                MicroRna(
                                    node.namespace,
                                    name=lower_name.replace("mir-", "mir"),
                                    identifier=node.identifier,
                                ), )

                    if lower_name.endswith(' genes'):
                        lower_name = lower_name[:-len(' genes')]
                    elif lower_name.endswith(' gene'):
                        lower_name = lower_name[:-len(' gene')]
                    one_to_one_mapping[node] = MicroRna(
                        node.namespace,
                        name=lower_name.replace(
                            "mir-", "mir"),  # Special case for Reactome
                    )
                    continue

                # KEGG and Reactome
                one_to_one_mapping[node] = MicroRna(
                    node.namespace,
                    name=node.name.replace("mir-", "mir"),
                    identifier=node.identifier,
                )

            ##################
            # Genes entities #
            ##################

            else:
                # Reactome preprocessing to flat multiple identifiers
                if database == REACTOME:
                    reactome_cell = munge_reactome_gene(lower_name)
                    if isinstance(reactome_cell, list):
                        for lower_name in reactome_cell:
                            if lower_name in BLACK_LIST_REACTOME:  # Filter entities in black list
                                continue
                            elif lower_name.startswith(
                                    "("):  # remove redundant parentheses
                                lower_name = lower_name.strip("(").strip(")")

                            one_to_many_mapping[node].add(
                                Protein(node.namespace,
                                        name=lower_name,
                                        identifier=node.identifier), )
                    else:
                        one_to_one_mapping[node] = Protein(
                            node.namespace,
                            name=lower_name,
                            identifier=node.identifier)

                    continue

                # WikiPathways and KEGG do not require any processing of genes
                elif database == WIKIPATHWAYS and lower_name in WIKIPATHWAYS_BIOL_PROCESS:
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                one_to_one_mapping[node] = Protein(node.namespace,
                                                   name=lower_name,
                                                   identifier=node.identifier)

        #######################
        # Metabolite entities #
        #######################

        elif isinstance(node, Abundance):

            if database == 'wikipathways':
                # Biological processes that are captured as abundance in
                # BEL since they were characterized wrong in WikiPathways
                if lower_name in WIKIPATHWAYS_BIOL_PROCESS:
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Abundances to BiologicalProcesses
                elif (node.namespace
                      in {'WIKIDATA', 'WIKIPATHWAYS', 'REACTOME'}
                      and lower_name not in WIKIPATHWAYS_METAB):
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Fix naming in duplicate entity
                if lower_name in WIKIPATHWAYS_NAME_NORMALIZATION:
                    lower_name = WIKIPATHWAYS_NAME_NORMALIZATION[lower_name]

            elif database == REACTOME:
                # Curated proteins that were coded as metabolites
                if lower_name in REACTOME_PROT:
                    one_to_one_mapping[node] = Protein(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Flat multiple identifiers (this is not trivial because most of ChEBI names contain commas,
                # so a clever way to fix some of the entities is to check that all identifiers contain letters)
                elif "," in lower_name and all(
                        string.isalpha() for string in lower_name.split(",")):
                    for string in lower_name.split(","):
                        one_to_many_mapping[node].add(
                            Abundance(node.namespace,
                                      name=string,
                                      identifier=node.identifier), )
                    continue

            one_to_one_mapping[node] = Abundance(node.namespace,
                                                 name=lower_name,
                                                 identifier=node.identifier)

        #################################
        # Biological Processes entities #
        #################################

        elif isinstance(node, BiologicalProcess):
            # KEGG normalize name by removing the title prefix
            if lower_name.startswith('title:'):
                lower_name = lower_name[len('title:'):]

            one_to_one_mapping[node] = BiologicalProcess(
                node.namespace,
                name=lower_name,
                identifier=node.identifier,
            )

    relabel_nodes(graph, one_to_one_mapping)
    multi_relabel(graph, one_to_many_mapping)
Example #9
0
     'AKT1',
     variants=[Hgvs('c.1521_1523delCTT'),
               Hgvs('p.Phe508del')]),
 Gene('HGNC', 'NCF1'),
 ComplexAbundance([Gene('HGNC', 'NCF1'),
                   Protein('HGNC', 'HBP1')]),
 Protein('HGNC', 'HBP1'),
 ComplexAbundance([Protein('HGNC', 'FOS'),
                   Protein('HGNC', 'JUN')]),
 Protein('HGNC', 'FOS'),
 Protein('HGNC', 'JUN'),
 Rna('HGNC', 'CFTR', variants=Hgvs('r.1521_1523delcuu')),
 Rna('HGNC', 'CFTR'),
 Rna('HGNC', 'CFTR', variants=Hgvs('r.1653_1655delcuu')),
 CompositeAbundance([interleukin_23_complex, il6]), il6,
 BiologicalProcess('GO', 'cell cycle arrest'), hydrogen_peroxide,
 Protein('HGNC', 'CAT'),
 Gene('HGNC', 'CAT'),
 Protein('HGNC', 'HMGCR'),
 BiologicalProcess('GO', 'cholesterol biosynthetic process'),
 Gene('HGNC', 'APP', variants=Hgvs('c.275341G>C')),
 Gene('HGNC', 'APP'),
 Pathology('MESHD', 'Alzheimer Disease'),
 ComplexAbundance([Protein('HGNC', 'F3'),
                   Protein('HGNC', 'F7')]),
 Protein('HGNC', 'F3'),
 Protein('HGNC', 'F7'),
 Protein('HGNC', 'F9'),
 Protein('HGNC', 'GSK3B', variants=ProteinModification('Ph', 'Ser', 9)),
 Protein('HGNC', 'GSK3B'),
 Pathology('MESHD', 'Psoriasis'),