Ejemplo n.º 1
0
 def test_rna_fusion_specified(self):
     node = rna_fusion(partner_5p=rna(namespace='HGNC', name='TMPRSS2'),
                       range_5p=fusion_range('r', 1, 79),
                       partner_3p=rna(namespace='HGNC', name='ERG'),
                       range_3p=fusion_range('r', 312, 5034))
     self.assertEqual(
         'r(fus(HGNC:TMPRSS2, "r.1_79", HGNC:ERG, "r.312_5034"))',
         str(node))
Ejemplo n.º 2
0
    def test_cna(self):
        test_network = self.network3  # Defined in test.constants.TestNetworks

        a_as_root = run_cna(graph=test_network,
                            root=protein(HGNC, 'a'),
                            targets=[gene(HGNC, 'c')])

        self.assertEqual(-1, a_as_root[0][2].value)  # A -| C

        d_as_root = run_cna(graph=test_network,
                            root=rna(HGNC, 'd'),
                            targets=[gene(HGNC, 'c'),
                                     gene(HGNC, 'f')])

        self.assertEqual(-1, d_as_root[0][2].value)  # D -| C
        self.assertEqual(-1, d_as_root[1][2].value)  # A -| F

        e_as_root = run_cna(graph=test_network,
                            root=protein(HGNC, 'e'),
                            targets=[gene(HGNC, 'c'),
                                     gene(HGNC, 'f')])

        self.assertEqual(1, e_as_root[0][2].value)  # E -> C
        self.assertEqual(1, e_as_root[1][2].value)  # E -> F

        failed_results = run_cna(graph=test_network,
                                 root=protein(HGNC, 'e'),
                                 targets=[protein(HGNC, 'g')])

        self.assertEqual(0, failed_results[0][2].value)  # E -> G
Ejemplo n.º 3
0
    def test_causalr_rank_hypothesis_1(self):
        test_network = self.network4  # Defined in test.constants.TestNetworks

        observed_regulation_test = {
            protein(HGNC, 'a'): 0,
            protein(HGNC, 'b'): 1,
            gene(HGNC, 'c'): -1,
            rna(HGNC, 'd'): -1,
            protein(HGNC, 'e'): -1,
            gene(HGNC, 'f'): 1,
            protein(HGNC, 'g'): 1,
            protein(HGNC, 'h'): 1,
            protein(HGNC, 'i'): 1,
            protein(HGNC, 'j'): 1
        }

        upregulated_hypothesis, downregulated_hypothesis = rank_causalr_hypothesis(
            graph=test_network,
            node_to_regulation=observed_regulation_test,
            regulator_node=protein(HGNC, 'a'))

        self.assertEqual(5, upregulated_hypothesis['score'])
        self.assertEqual(6, upregulated_hypothesis['correct'])
        self.assertEqual(
            1, upregulated_hypothesis['incorrect'])  # 1 gene( HGNC, 'f')
        self.assertEqual(
            1, upregulated_hypothesis['ambiguous'])  # 1 gene( HGNC, 'h')
Ejemplo n.º 4
0
    def test_no_infer_rna_fusion(self):
        """Test that no RNA nor gene is inferred from a protein fusion node."""
        partner5p = rna(n(), n())
        partner3p = rna(n(), n())

        p = rna_fusion(partner_3p=partner3p, partner_5p=partner5p)

        graph = BELGraph()
        graph.add_node_from_data(p)

        self.assertEqual(1, graph.number_of_nodes())
        self.assertEqual(0, graph.number_of_edges())

        enrich_protein_and_rna_origins(graph)

        self.assertEqual(1, graph.number_of_nodes())
        self.assertEqual(0, graph.number_of_edges())
Ejemplo n.º 5
0
    def test_translation(self):
        """
        3.3.3 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_translatedto
        """
        statement = 'r(HGNC:AKT1,loc(GO:intracellular)) >> p(HGNC:AKT1)'
        result = self.parser.relation.parseString(statement)

        # [[RNA, ['HGNC', 'AKT1']], TRANSLATED_TO, [PROTEIN, ['HGNC', 'AKT1']]]
        expected_result = {
            SOURCE: {
                FUNCTION: RNA,
                CONCEPT: {
                    NAMESPACE: 'HGNC',
                    NAME: 'AKT1',
                },
                LOCATION: {
                    NAMESPACE: 'GO',
                    NAME: 'intracellular',
                }
            },
            RELATION: TRANSLATED_TO,
            TARGET: {
                FUNCTION: PROTEIN,
                CONCEPT: {
                    NAMESPACE: 'HGNC',
                    NAME: 'AKT1',
                },
            },
        }
        self.assertEqual(expected_result, result.asDict())

        self.assertEqual(2, self.graph.number_of_nodes())

        source = rna(name='AKT1', namespace='HGNC')
        self.assertIn(source, self.graph)

        target = protein(name='AKT1', namespace='HGNC')
        self.assertIn(target, self.graph)

        self.assertEqual(1, self.graph.number_of_edges())
        self.assertTrue(self.graph.has_edge(source, target))

        key_data = self.parser.graph[source][target]
        self.assertEqual(1, len(key_data))

        key = list(key_data)[0]
        data = key_data[key]

        self.assertIn(RELATION, data)
        self.assertEqual(TRANSLATED_TO, data[RELATION])

        calculated_edge_bel = edge_to_bel(source, target, data=data)
        self.assertEqual(
            'r(HGNC:AKT1, loc(GO:intracellular)) translatedTo p(HGNC:AKT1)',
            calculated_edge_bel)
Ejemplo n.º 6
0
    def test_no_infer_on_rna_variants(self):
        """Test that expansion doesn't occur on RNA variants."""
        r = rna('HGNC', n(), variants=[hgvs(n())])

        graph = BELGraph()
        graph.add_node_from_data(r)

        self.assertEqual(2, graph.number_of_nodes())
        self.assertEqual(1, graph.number_of_edges())

        enrich_protein_and_rna_origins(graph)

        self.assertEqual(3, graph.number_of_nodes())
        self.assertEqual(2, graph.number_of_edges())
Ejemplo n.º 7
0
def node_to_bel(node: Dict, hgnc_manager: Manager, pathway_id) -> BaseEntity:
    """Create a BEL node."""
    node_types = node['node_types']
    uri_id = node['uri_id']

    # Get identifier from if exists else use uri_id as identifier
    if 'identifier' in node:
        identifier = node['identifier']
    else:
        identifier = uri_id

    identifier = check_multiple(identifier, 'identifier', pathway_id)

    uri_id = check_multiple(uri_id, 'uri_id', pathway_id)
    _, _, namespace, _ = parse_id_uri(uri_id)

    name = check_multiple(node['name'], 'name', pathway_id)

    # Get dictinoary of multiple identifiers
    if 'identifiers' in node:
        node_ids_dict = node['identifiers']
    else:
        node_ids_dict = node

    if any(node_type in node_types for node_type in ('Protein', 'Rna', 'GeneProduct')):
        namespace, name, identifier = get_valid_gene_identifier(node_ids_dict, hgnc_manager, pathway_id)
        if 'Protein' in node_types:
            return protein(namespace=namespace.upper(), name=name, identifier=identifier)
        elif 'Rna' in node_types:
            return rna(namespace=namespace.upper(), name=name, identifier=identifier)
        else:  # 'GeneProduct' in node_types
            return gene(namespace=HGNC, name=name, identifier=identifier)

    elif 'Metabolite' in node_types:
        # Parse URI to get namespace
        _, _, namespace, _ = parse_id_uri(uri_id)
        return abundance(namespace=namespace.upper(), name=name, identifier=identifier)

    elif '/wikipathways/WP' in str(uri_id) and {'DataNode'} == node_types:
        # Check the uri_id if is a Pathway
        _, _, namespace, _ = parse_id_uri(uri_id)
        return bioprocess(namespace=namespace.upper(), name=name, identifier=identifier)

    elif 'DataNode' in node_types:
        # Parse URI to get namespace
        _, _, namespace, _ = parse_id_uri(uri_id)
        return abundance(namespace=namespace.upper(), name=name, identifier=identifier)

    else:
        logger.debug('Unknown %s [pathway=%s]', node_types, pathway_id)
Ejemplo n.º 8
0
    def test_transcription(self):
        """
        3.3.2 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_transcribedto
        """
        statement = 'g(HGNC:AKT1) :> r(HGNC:AKT1)'
        result = self.parser.relation.parseString(statement)

        expected_result = [[GENE, ['HGNC', 'AKT1']], TRANSCRIBED_TO, [RNA, ['HGNC', 'AKT1']]]
        self.assertEqual(expected_result, result.asList())

        sub = gene('HGNC', 'AKT1')
        self.assert_has_node(sub)

        obj = rna('HGNC', 'AKT1')
        self.assert_has_node(obj)

        self.assert_has_edge(sub, obj, **{RELATION: TRANSCRIBED_TO})
Ejemplo n.º 9
0
    def test_update_matrix_repression(self):
        """Test updating the matrix with RNA repression."""
        sub = protein(namespace='HGNC', name='A', identifier='1')
        obj = rna(namespace='HGNC', name='B', identifier='2')

        index = {'A', 'B'}

        test_dict = {}

        test_matrix = DataFrame(0, index=index, columns=index)

        test_dict["repression"] = test_matrix

        update_spia_matrices(test_dict, sub, obj, {'relation': 'decreases'})

        self.assertEqual(test_dict["repression"]['A']['B'], 1)
        self.assertEqual(test_dict["repression"]['A']['A'], 0)
        self.assertEqual(test_dict["repression"]['B']['A'], 0)
        self.assertEqual(test_dict["repression"]['B']['B'], 0)
    def test_convert_promote_translation(self):
        """Test the conversion of a bel statement like A -> r(B)"""
        # example from Colorectal Cancer Model v2.0.6 @ scai
        # act(p(HGNC:CTNNB1), ma(tscript)) increases r(HGNC:BIRC5)
        ctnnb1 = protein('HGNC', name='CTNNB1')
        birc5 = rna('HGNC', name='BIRC5')

        # a(MESH:Microglia) reg deg(a(CHEBI:"amyloid-beta"))
        bel_graph = BELGraph()
        bel_graph.add_increases(ctnnb1,
                                birc5,
                                evidence='10.1038/s41586-018-0368-8',
                                citation='18075512',
                                subject_modifier=activity('tscript'))

        expected_reified_graph = self.help_make_simple_expected_graph(
            ctnnb1, birc5, PROMOTES_TRANSLATION, 0, self.help_causal_increases)
        reified_graph = reify_bel_graph(bel_graph)

        self.help_test_graphs_equal(expected_reified_graph, reified_graph)
Ejemplo n.º 11
0
from pybel import BELGraph
from pybel.constants import POSITIVE_CORRELATION, RELATION
from pybel.dsl import CompositeAbundance, Protein, gene, hgvs, pathology, protein_fusion, rna, rna_fusion
from pybel.struct.mutation import (
    enrich_protein_and_rna_origins,
    prune_protein_rna_origins,
    remove_associations,
    remove_isolated_list_abundances,
    remove_pathologies,
)
from pybel.struct.mutation.utils import remove_isolated_nodes, remove_isolated_nodes_op
from pybel.testing.utils import n

trem2_gene = gene(namespace='HGNC', name='TREM2')
trem2_rna = rna(namespace='HGNC', name='TREM2')
trem2_protein = Protein(namespace='HGNC', name='TREM2')


class TestDeletions(unittest.TestCase):
    """Test cases for deletion functions."""
    def test_remove_pathologies(self):
        """Test removal of pathologies."""
        g = BELGraph()

        p1, p2, p3 = (Protein(namespace='HGNC', name=n()) for _ in range(3))
        d1, d2 = (pathology(namespace='MESH', name=n()) for _ in range(2))

        g.add_increases(p1, p2, citation=n(), evidence=n())
        g.add_increases(p2, p3, citation=n(), evidence=n())
        g.add_positive_correlation(p1, d1, citation=n(), evidence=n())
Ejemplo n.º 12
0
example_graph.add_negative_correlation(mthfr_c677t, folic_acid, citation=c2, evidence=e2)
example_graph.add_positive_correlation(mthfr_c677t, alzheimer_disease, citation=c2, evidence=e2)

c3 = '17948130'
e3 = 'A polymorphism in the NDUFB6 promoter region that creates a possible DNA methylation site (rs629566, A/G) was ' \
     'associated with a decline in muscle NDUFB6 expression with age. Although young subjects with the rs629566 G/G ' \
     'genotype exhibited higher muscle NDUFB6 expression, this genotype was associated with reduced expression in' \
     ' elderly subjects. This was subsequently explained by the finding of increased DNA methylation in the promoter ' \
     'of elderly, but not young, subjects carrying the rs629566 G/G genotype. Furthermore, the degree of DNA' \
     ' methylation correlated negatively with muscle NDUFB6 expression, which in turn was associated with insulin ' \
     'sensitivity.'
e3 = str(hash(e3))

rs629566 = gene('DBSNP', 'rs629566', variants=[gmod('Me')])
ndufb6_gene = gene('HGNC', 'NDUFB6')
ndufb6_rna = rna('HGNC', 'NDUFB6')

example_graph.add_unqualified_edge(ndufb6_gene, rs629566, HAS_VARIANT)
example_graph.add_negative_correlation(rs629566, ndufb6_rna, citation=c3, evidence=e3,
                                       annotations={'Confidence': 'Low', 'Number': '50'})

"""
SET Evidence = "% Entrez Gene summary: Rat: SUMMARY: precursor protein of kinin which is found in plasma; cysteine protease inhibitor and a major acute phase reactant [RGD] OMIM summary: (summary is not available from this source) kininogens; Endogenous peptides present in most body fluids. Certain enzymes convert them to active kinins which are involved in inflammation, blood clotting, complement reactions, etc. Kininogens belong to the cystatin superfamily. They are cysteine proteinase inhibitors. High-molecular-weight kininogen (hmwk) is split by plasma kallikrein to produce bradykinin. Low-molecular-weight kininogen (lmwk) is split by tissue kallikrein to produce kallidin. kinins; Inflammatory mediators that cause dilation of blood vessels and altered vascular permeability.  Kinins are small peptides produced from kininogen by kallikrein and are broken down by kininases. Act on phospholipase and increase arachidonic acid release and thus prostaglandin (PGE2) production. bradykinin; Vasoactive nonapeptide (RPPGFSPFR) formed by action of proteases on kininogens. Very similar to kallidin (which has the same sequence but with an additional N terminal lysine). Bradykinin is a very potent vasodilator and increases permeability of post capillary venules, it acts on endothelial cells to activate phospholipase A2. It is also spasmogenic for some smooth muscle and will cause pain. kallidin; Decapeptide (lysyl bradykinin, amino acid sequence KRPPGFSPFR) produced in kidney. Like bradykinin, an inflammatory mediator (a kinin), causes dilation of renal blood vessels and increased water excretion."
SET Species = 9606
SET Citation = {"Other","Genstruct Kininogen Overview","Genstruct Kininogen Overview","","",""}

bp(GOBP:"inflammatory response") increases rxn(reactants(p(HGNC:KNG1)),products(a(SCHEM:Kallidin)))
path(SDIS:"tissue damage") increases rxn(reactants(p(HGNC:KNG1)),products(a(SCHEM:Kallidin)))
a(SCHEM:Kallidin) increases cat(p(HGNC:BDKRB1))
cat(p(HGNC:BDKRB1)) increases cat(p(SFAM:"PLA2 Family"))
"""
Ejemplo n.º 13
0
        super(ManagerMixin, self).setUp()

        self.db_fd, self.db_file = tempfile.mkstemp()

        self.connection = 'sqlite:///' + self.db_file
        self.manager = Manager(connection=self.connection)

    def tearDown(self):
        os.close(self.db_fd)
        os.unlink(self.db_file)


protein_a = protein(namespace=HGNC, name='a')
protein_b = protein(namespace=HGNC, name='b')
gene_c = gene(namespace=HGNC, name='c')
rna_d = rna(namespace=HGNC, name='d')
protein_e = protein(namespace=HGNC, name='e')
gene_f = gene(namespace=HGNC, name='f')
protein_g = protein(namespace=HGNC, name='g')
protein_h = protein(namespace=HGNC, name='h')
protein_i = protein(namespace=HGNC, name='i')
protein_j = protein(namespace=HGNC, name='j')


def make_graph_1() -> BELGraph:
    graph = BELGraph(
        name='Lab course example',
        version='1.1.0',
        description='',
        authors='LSI',
        contact='*****@*****.**',
Ejemplo n.º 14
0
"""Tests for collapse functions."""

import unittest

from pybel import BELGraph
from pybel.constants import DIRECTLY_INCREASES
from pybel.dsl import gene, mirna, pathology, pmod, protein, rna
from pybel.struct.mutation.collapse import collapse_all_variants, collapse_nodes, collapse_to_genes
from pybel.testing.utils import n

HGNC = 'HGNC'
GO = 'GO'
CHEBI = 'CHEBI'

g1 = gene(HGNC, '1')
r1 = rna(HGNC, '1')
p1 = protein(HGNC, '1')
p1_phosphorylated = protein(HGNC, '1', variants=[pmod('Ph')])

g2 = gene(HGNC, '2')
r2 = rna(HGNC, '2')
p2 = protein(HGNC, '2')

g3 = gene(HGNC, '3')
r3 = rna(HGNC, '3')
p3 = protein(HGNC, '3')

g4 = gene(HGNC, '4')
m4 = mirna(HGNC, '4')

p5 = pathology(GO, '5')
Ejemplo n.º 15
0
 def test_rna_fusion_unspecified(self):
     node = rna_fusion(
         partner_5p=rna(namespace='HGNC', name='TMPRSS2'),
         partner_3p=rna(namespace='HGNC', name='ERG'),
     )
     self.assertEqual('r(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))', str(node))
Ejemplo n.º 16
0
def node_to_bel(node: Dict, graph, hgnc_manager: HgncManager,
                chebi_manager: ChebiManager, species) -> BaseEntity:
    """Convert node dictionary to BEL node object."""
    node_types = node['entity_type']

    identifier, name, namespace = get_valid_node_parameters(
        node, hgnc_manager, chebi_manager, species)
    members = set()

    if namespace == 'hgnc_multiple_entry':
        return composite_abundance(process_multiple_proteins(identifier))

    elif 'Protein' in node_types:
        return protein(namespace=namespace.upper(),
                       name=name,
                       identifier=identifier)

    elif 'Dna' in node_types:
        return gene(namespace=namespace.upper(),
                    name=name,
                    identifier=identifier)

    elif 'Rna' in node_types:
        return rna(namespace=namespace.upper(),
                   name=name,
                   identifier=identifier)

    elif 'SmallMolecule' in node_types:
        return abundance(namespace=namespace.upper(),
                         name=name,
                         identifier=identifier)

    elif 'PhysicalEntity' in node_types:
        return abundance(namespace=namespace.upper(),
                         name=name,
                         identifier=identifier)

    elif 'Complex' in node_types:
        complex_components = node.get('complex_components')

        if complex_components:
            for component in complex_components:
                bel_node = node_to_bel(component, graph, hgnc_manager,
                                       chebi_manager, species)

                members.add(bel_node)

        if members:
            return complex_abundance(name=node.get('display_name'),
                                     members=members,
                                     identifier=identifier,
                                     namespace=namespace.upper())
        else:
            return NamedComplexAbundance(name=node.get('display_name'),
                                         identifier=identifier,
                                         namespace=namespace.upper())

    elif 'Pathway' in node_types:
        bioprocess_node = bioprocess(identifier=identifier,
                                     name=name,
                                     namespace=namespace.upper())
        graph.add_node_from_data(bioprocess_node)
        return bioprocess_node
    else:
        log.warning('Entity type not recognized', node_types)
Ejemplo n.º 17
0
# -*- coding: utf-8 -*-
"""Tests for Bio2BEL miRTarBase."""

from bio2bel_mirtarbase.manager import _build_entrez_map
from bio2bel_mirtarbase.models import Evidence, HGNC, MIRBASE, Mirna, NCBIGENE, Species, Target
from pybel import BELGraph
from pybel.constants import FUNCTION, IDENTIFIER, NAME, NAMESPACE
from pybel.dsl import BaseAbundance, mirna, rna
from tests.constants import TemporaryFilledCacheMixin

hif1a_symbol = 'HIF1A'

hif1a_hgnc_name = rna(name=hif1a_symbol, namespace=HGNC)
hif1a_hgnc_identifier = rna(identifier='4910', namespace=HGNC)
hif1a_entrez_name = rna(name='3091', namespace=NCBIGENE)
hif1a_entrez_identifier = rna(identifier='3091', namespace=NCBIGENE)
mi2_data = mirna(name='hsa-miR-20a-5p', namespace=MIRBASE)
mi5_data = mirna(name='mmu-miR-124-3p', namespace=MIRBASE)


class TestBuildDatabase(TemporaryFilledCacheMixin):
    """Test the database."""
    def test_count_human_genes(self):
        """Test the number of genes in Bio2BEL HGNC."""
        self.assertEqual(2, self.hgnc_manager.count_human_genes())

    def test_count_mirnas(self):
        """Test the number of miRNAs."""
        self.assertEqual(5, self.manager.count_mirnas())

    def test_count_targets(self):