def test_phosphorylation_one_site_with_evidence(): mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC', variants=[pmod('Ph', position=185, code='Thr')]) g = BELGraph() ev_text = 'Some evidence.' ev_pmid = '123456' edge_hash = g.add_directly_increases(mek, erk, evidence=ev_text, citation=ev_pmid, annotations={"TextLocation": 'Abstract'}) pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], Phosphorylation) assert pbp.statements[0].residue == 'T' assert pbp.statements[0].position == '185' enz = pbp.statements[0].enz sub = pbp.statements[0].sub assert enz.name == 'MAP2K1' assert enz.mods == [] assert sub.name == 'MAPK1' assert sub.mods == [] # Check evidence assert len(pbp.statements[0].evidence) == 1 ev = pbp.statements[0].evidence[0] assert ev.source_api == 'bel' assert ev.source_id == edge_hash assert ev.pmid == ev_pmid assert ev.text == ev_text assert ev.annotations == {'bel': 'p(HGNC:MAP2K1) directlyIncreases ' 'p(HGNC:MAPK1, pmod(Ph, Thr, 185))'} assert ev.epistemics == {'direct': True, 'section_type': 'abstract'}
def test_conversion(): enz = protein(name='PLCG1', namespace='HGNC') react_1 = abundance('SCHEM', '1-Phosphatidyl-D-myo-inositol 4,5-bisphosphate') p1 = abundance('SCHEM', 'Diacylglycerol') p2 = abundance('SCHEM', 'Inositol 1,4,5-trisphosphate') rxn = reaction( reactants=react_1, products=[p1, p2], ) g = BELGraph() g.add_directly_increases(enz, rxn, subject_modifier=activity(name='activity'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, Conversion) assert stmt.subj.name == 'PLCG1' assert stmt.subj.activity.activity_type == 'activity' assert stmt.subj.activity.is_active is True assert len(stmt.obj_from) == 1 assert isinstance(stmt.obj_from[0], Agent) assert stmt.obj_from[0].name == '1-Phosphatidyl-D-myo-inositol ' \ '4,5-bisphosphate' assert len(stmt.obj_to) == 2 # why do these not appear in alphabetical order? # PyBEL sorts the nodes based on their BEL, and # Inositol 1,4,5-trisphosphate gets quoted. assert stmt.obj_to[0].name == 'Inositol 1,4,5-trisphosphate' assert stmt.obj_to[1].name == 'Diacylglycerol' assert len(stmt.evidence) == 1
def test_regulate_amount1_prot_obj(): mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC') g = BELGraph() g.add_increases(mek, erk, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], IncreaseAmount) assert len(pbp.statements[0].evidence) == 1
def test_controlled_transloc_loc_cond(): """Controlled translocations are currently not handled.""" subj = protein(name='MAP2K1', namespace='HGNC') obj = protein(name='MAPK1', namespace='HGNC') g = BELGraph() transloc = translocation(from_loc=Entity('GOCC', 'intracellular'), to_loc=Entity('GOCC', 'extracellular space')) g.add_increases(subj, obj, object_modifier=transloc, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert not pbp.statements
def test_regulate_amount2_rna_obj(): # FIXME: Create a transcription-specific statement for p->rna mek = protein(name='MAP2K1', namespace='HGNC') erk = rna(name='MAPK1', namespace='HGNC') g = BELGraph() g.add_increases(mek, erk, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], IncreaseAmount) assert len(pbp.statements[0].evidence) == 1
def test_regulate_amount3_deg(): # FIXME: Create a stability-specific statement for p->deg(p(Foo)) mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC') g = BELGraph() g.add_increases(mek, erk, object_modifier=degradation(), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], DecreaseAmount) assert len(pbp.statements[0].evidence) == 1
def test_activation_bioprocess(): bax = protein(name='BAX', namespace='HGNC') apoptosis = bioprocess(name='apoptosis', namespace='GOBP') g = BELGraph() g.add_increases(bax, apoptosis, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, Activation) assert stmt.subj.name == 'BAX' assert stmt.obj.name == 'apoptosis' assert stmt.obj.db_refs == {} # FIXME: Update when GO lookup is implemented assert len(pbp.statements[0].evidence) == 1
def test_subject_transloc_loc_cond(): """Translocations of the subject are treated as location conditions on the subject (using the to_loc location as the condition)""" subj = protein(name='MAP2K1', namespace='HGNC') obj = protein(name='MAPK1', namespace='HGNC') transloc = translocation(from_loc=Entity('GOCC', 'intracellular'), to_loc=Entity('GOCC', 'extracellular space')) g = BELGraph() g.add_increases(subj, obj, subject_modifier=transloc, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, IncreaseAmount) assert stmt.subj.name == 'MAP2K1' assert stmt.subj.location == 'extracellular space' assert stmt.obj.name == 'MAPK1'
def test_gap(): sos = protein(name='RASA1', namespace='HGNC') kras = protein(name='KRAS', namespace='HGNC') g = BELGraph() g.add_directly_decreases(sos, kras, subject_modifier=activity(name='activity'), object_modifier=activity(name='gtp'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, Gap) assert stmt.gap.name == 'RASA1' assert stmt.ras.name == 'KRAS' assert stmt.gap.activity.activity_type == 'activity' assert stmt.gap.activity.is_active is True assert stmt.ras.activity is None assert len(pbp.statements[0].evidence) == 1
def test_indirect_gef_is_activation(): sos = protein(name='SOS1', namespace='HGNC') kras = protein(name='KRAS', namespace='HGNC') g = BELGraph() g.add_increases(sos, kras, subject_modifier=activity(name='activity'), object_modifier=activity(name='gtp'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, Activation) assert stmt.subj.name == 'SOS1' assert stmt.obj.name == 'KRAS' assert stmt.subj.activity.activity_type == 'activity' assert stmt.subj.activity.is_active is True assert stmt.obj.activity is None assert stmt.obj_activity == 'gtpbound' assert len(pbp.statements[0].evidence) == 1
def test_gtpactivation(): kras = protein(name='KRAS', namespace='HGNC') braf = protein(name='BRAF', namespace='HGNC') g = BELGraph() g.add_directly_increases(kras, braf, subject_modifier=activity(name='gtp'), object_modifier=activity(name='kin'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, GtpActivation) assert stmt.subj.name == 'KRAS' assert stmt.subj.activity.activity_type == 'gtpbound' assert stmt.subj.activity.is_active is True assert stmt.obj.name == 'BRAF' assert stmt.obj.activity is None assert stmt.obj_activity == 'kinase' assert len(stmt.evidence) == 1
def test_phosphorylation_two_sites(): mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC', variants=[pmod('Ph', position=185, code='Thr'), pmod('Ph', position=187, code='Tyr')]) g = BELGraph() g.add_directly_increases(mek, erk, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 2 stmt1 = pbp.statements[0] stmt2 = pbp.statements[1] assert stmt1.residue == 'T' assert stmt1.position == '185' assert stmt2.residue == 'Y' assert stmt2.position == '187' assert stmt1.sub.mods == [] assert stmt2.sub.mods == [] assert len(pbp.statements[0].evidence) == 1
def test_subject_transloc_active_form(): """ActiveForms where the subject is a translocation--should draw on the to-location of the subject.""" subj = protein(name='MAP2K1', namespace='HGNC') obj = protein(name='MAP2K1', namespace='HGNC') transloc = translocation(from_loc=Entity('GOCC', 'intracellular'), to_loc=Entity('GOCC', 'extracellular space')) g = BELGraph() g.add_increases(subj, obj, subject_modifier=transloc, object_modifier=activity(name='kin'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, ActiveForm) assert stmt.agent.name == 'MAP2K1' assert stmt.agent.location == 'extracellular space' assert stmt.agent.activity is None assert stmt.activity == 'kinase' assert stmt.is_active is True
def test_regulate_activity(): mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC') g = BELGraph() g.add_increases(mek, erk, subject_modifier=activity(name='kin'), object_modifier=activity(name='kin'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], Activation) subj = pbp.statements[0].subj assert subj.name == 'MAP2K1' assert isinstance(subj.activity, ActivityCondition) assert subj.activity.activity_type == 'kinase' assert subj.activity.is_active obj = pbp.statements[0].obj assert obj.name == 'MAPK1' assert obj.activity is None assert pbp.statements[0].obj_activity == 'kinase' assert len(pbp.statements[0].evidence) == 1
def test_regulate_amount4_subj_act(): mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC') g = BELGraph() g.add_increases(mek, erk, subject_modifier=activity(name='tscript'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], IncreaseAmount) subj = pbp.statements[0].subj assert subj.name == 'MAP2K1' assert isinstance(subj.activity, ActivityCondition) assert subj.activity.activity_type == 'transcription' assert subj.activity.is_active assert len(pbp.statements[0].evidence) == 1 g = BELGraph() g.add_increases(mek, erk, subject_modifier=activity(name='act'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], IncreaseAmount) subj = pbp.statements[0].subj assert subj.name == 'MAP2K1' assert isinstance(subj.activity, ActivityCondition) assert subj.activity.activity_type == 'activity' assert subj.activity.is_active assert len(pbp.statements[0].evidence) == 1
def convert_to_bel( nodes: Dict[str, Dict], interactions: List[Tuple[str, str, Dict]], pathway_info: Dict, hgnc_manager: HgncManager, chebi_manager: ChebiManager, ) -> BELGraph: """Convert RDF graph dictionary into BEL graph.""" uri_id = pathway_info['uri_reactome_id'] if uri_id != UNKNOWN: _, _, namespace, identifier = parse_id_uri(uri_id) else: identifier = UNKNOWN description = pathway_info['comment'] if isinstance(description, (set, list)): description = '\n'.join(description) """Convert graph-like dictionaries to BELGraph.""" graph = BELGraph( name=pathway_info['display_name'], version='1.0.0', description=description, authors="Josep Marín-Llaó, Daniel Domingo-Fernández & Sarah Mubeen", contact='*****@*****.**', ) add_bel_metadata(graph) graph.graph['pathway_id'] = identifier nodes = nodes_to_bel(nodes, graph, hgnc_manager, chebi_manager) for interaction in interactions: participants = interaction['participants'] interaction_metadata = interaction['metadata'] add_edges(graph, participants, nodes, interaction_metadata) return graph
def test_get_node_by_namespace(self): """Test getting nodes with a given namespace.""" g = BELGraph() a = Protein(namespace='N1', name='a') b = Protein(namespace='N1', name='b') c = Protein(namespace='N2', name='c') d = Protein(namespace='N3', name='d') g.add_node_from_data(a) g.add_node_from_data(b) g.add_node_from_data(c) g.add_node_from_data(d) nodes = set(get_nodes_by_namespace(g, 'N1')) self.assertIn(a, nodes) self.assertIn(b, nodes) self.assertNotIn(c, nodes) self.assertNotIn(d, nodes) nodes = set(get_nodes_by_namespace(g, ('N1', 'N2'))) self.assertIn(a, nodes) self.assertIn(b, nodes) self.assertIn(c, nodes) self.assertNotIn(d, nodes)
def test_object_has_secretion(self): """p(MGI:Il4) increases sec(p(MGI:Cxcl1))""" g = BELGraph() u = g.add_node_from_data(protein(name='Il4', namespace='MGI')) v = g.add_node_from_data(protein(name='Cxcl1', namespace='MGI')) g.add_increases( u, v, citation='10072486', evidence='Compared with controls treated with culture medium alone, IL-4 and IL-5 induced significantly ' 'higher levels of MIP-2 and KC production; IL-4 also increased the production of MCP-1 ' '(Fig. 2, A and B)....we only tested the effects of IL-3, IL-4, IL-5, and IL-13 on chemokine ' 'expression and cellular infiltration....Recombinant cytokines were used, ... to treat naive ' 'BALB/c mice.', annotations={'Species': '10090', 'MeSH': 'bronchoalveolar lavage fluid'}, object_modifier=secretion() ) self.assertFalse(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertFalse(has_activity(g, u)) self.assertFalse(has_causal_in_edges(g, u)) self.assertTrue(has_causal_out_edges(g, u)) self.assertTrue(is_translocated(g, v)) self.assertFalse(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) self.assertTrue(has_causal_in_edges(g, v)) self.assertFalse(has_causal_out_edges(g, v))
def test_insert_annotation(self, mock_get): self.assertEqual(0, self.manager.count_annotations()) self.assertEqual(0, self.manager.count_annotation_entries()) annotation = self.manager.get_or_create_annotation(CELL_LINE_URL) self.assertIsNotNone(annotation) self.assertEqual(CELL_LINE_URL, annotation.url) entry = self.manager.get_namespace_entry(CELL_LINE_URL, '1321N1 cell') self.assertEqual('1321N1 cell', entry.name) self.assertEqual('CLO_0001072', entry.identifier) entries = self.manager.get_annotation_entries_by_names( CELL_LINE_URL, ['1321N1 cell']) self.assertIsNotNone(entries) self.assertEqual(1, len(entries)) entry = entries[0] self.assertEqual('1321N1 cell', entry.name) self.assertEqual('CLO_0001072', entry.identifier) graph = BELGraph() graph.annotation_url[CELL_LINE_KEYWORD] = CELL_LINE_URL data = {ANNOTATIONS: {CELL_LINE_KEYWORD: {'1321N1 cell': True}}} annotations_iter = dict( self.manager._iter_from_annotations_dict( graph, annotations_dict=data[ANNOTATIONS])) self.assertIn(CELL_LINE_URL, annotations_iter) self.assertIn('1321N1 cell', annotations_iter[CELL_LINE_URL]) entries = self.manager._get_annotation_entries_from_data(graph, data) self.assertIsNotNone(entries) self.assertEqual(1, len(entries)) entry = entries[0] self.assertEqual('1321N1 cell', entry.name) self.assertEqual('CLO_0001072', entry.identifier) self.manager.drop_namespace_by_url(CELL_LINE_URL) self.assertEqual(0, self.manager.count_annotations()) self.assertEqual(0, self.manager.count_annotation_entries())
def test_p1_active(self): """cat(p(HGNC:HSD11B1)) increases deg(a(CHEBI:cortisol))""" g = BELGraph() u = g.add_node_from_data(protein(name='HSD11B1', namespace='HGNC')) v = g.add_node_from_data(abundance(name='cortisol', namespace='CHEBI', identifier='17650')) g.add_qualified_edge( u, v, relation=INCREASES, citation={ CITATION_TYPE: CITATION_TYPE_ONLINE, CITATION_REFERENCE: 'https://www.ncbi.nlm.nih.gov/gene/3290' }, evidence="Entrez Gene Summary: Human: The protein encoded by this gene is a microsomal enzyme that " "catalyzes the conversion of the stress hormone cortisol to the inactive metabolite cortisone. " "In addition, the encoded protein can catalyze the reverse reaction, the conversion of cortisone " "to cortisol. Too much cortisol can lead to central obesity, and a particular variation in this " "gene has been associated with obesity and insulin resistance in children. Two transcript " "variants encoding the same protein have been found for this gene.", annotations={'Species': '9606'}, subject_modifier=activity('cat'), object_modifier=degradation() ) self.assertFalse(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertTrue(has_activity(g, u)) self.assertFalse(is_translocated(g, v)) self.assertTrue(is_degraded(g, v)) self.assertFalse(has_activity(g, v))
def test_no_infer_on_rna_variants(self): """Test that expansion doesn't occur on RNA variants.""" r = rna('HGNC', n(), variants=[hgvs(n())]) graph = BELGraph() graph.add_node_from_data(r) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) enrich_protein_and_rna_origins(graph) self.assertEqual(3, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges())
def test_orthologus_mapping(self): g = BELGraph() g.add_node('Ccl2', attr_dict={NAMESPACE: 'MGI', NAME: 'Ccl2'}) g.add_node('CCL2', attr_dict={NAMESPACE: 'HGNC', NAME: 'CCL2'}) g.add_edge('CCL2', 'Ccl2', **{RELATION: ORTHOLOGOUS}) mapped_nodes = get_mapped_nodes(g, 'HGNC', {'CCL2'}) self.assertEqual(1, len(mapped_nodes)) self.assertIn('CCL2', mapped_nodes) self.assertEqual({'Ccl2'}, mapped_nodes['CCL2'])
def test_node_exclusion_tuples(self): g = BELGraph() u = protein(name='S100b', namespace='MGI') v = abundance(name='nitric oxide', namespace='CHEBI') w = abundance(name='cortisol', namespace='CHEBI', identifier='17650') g.add_node_from_data(u) g.add_node_from_data(v) g.add_node_from_data(w) f = node_exclusion_predicate_builder([u]) self.assertFalse(f(g, u)) self.assertTrue(f(g, v)) self.assertTrue(f(g, w)) f = node_exclusion_predicate_builder([u, v]) self.assertFalse(f(g, u)) self.assertFalse(f(g, v)) self.assertTrue(f(g, w)) f = node_exclusion_predicate_builder([]) self.assertTrue(f(g, u)) self.assertTrue(f(g, v)) self.assertTrue(f(g, w))
def test_multiple(self): """Test building a node predicate with multiple functions.""" f = function_inclusion_filter_builder([GENE, PROTEIN]) p1 = protein(n(), n()) g1 = gene(n(), n()) b1 = bioprocess(n(), n()) g = BELGraph() g.add_node_from_data(p1) g.add_node_from_data(g1) g.add_node_from_data(b1) self.assertIn(p1.as_tuple(), g) self.assertIn(g1.as_tuple(), g) self.assertIn(b1.as_tuple(), g) self.assertTrue(f(g, p1.as_tuple())) self.assertTrue(f(g, g1.as_tuple())) self.assertFalse(f(g, b1.as_tuple())) f = invert_node_predicate(f) self.assertFalse(f(g, p1.as_tuple())) self.assertFalse(f(g, g1.as_tuple())) self.assertTrue(f(g, b1.as_tuple()))
def test_node_inclusion_data(self): g = BELGraph() u = protein(name='S100b', namespace='MGI') v = abundance(name='nitric oxide', namespace='CHEBI') w = abundance(name='cortisol', namespace='CHEBI', identifier='17650') g.add_node_from_data(u) g.add_node_from_data(v) g.add_node_from_data(w) f = one_of([u]) self.assertTrue(f(u)) self.assertFalse(f(v)) self.assertFalse(f(w)) f = one_of([u, v]) self.assertTrue(f(u)) self.assertTrue(f(v)) self.assertFalse(f(w)) f = one_of([]) self.assertFalse(f(u)) self.assertFalse(f(v)) self.assertFalse(f(w))
def test_subject_has_secretion(self): """sec(p(MGI:S100b)) increases a(CHEBI:"nitric oxide")""" g = BELGraph() u = g.add_node_from_data(protein(name='S100b', namespace='MGI')) v = g.add_node_from_data(abundance(name='nitric oxide', namespace='CHEBI')) g.add_increases( u, v, citation='11180510', evidence='S100B protein is also secreted by astrocytes and acts on these cells to stimulate nitric oxide ' 'secretion in an autocrine manner.', annotations={'Species': '10090', 'Cell': 'astrocyte'}, subject_modifier=secretion() ) self.assertTrue(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertFalse(has_activity(g, u)) self.assertFalse(has_causal_in_edges(g, u)) self.assertTrue(has_causal_out_edges(g, u)) self.assertFalse(is_translocated(g, v)) self.assertFalse(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) self.assertTrue(has_causal_in_edges(g, v)) self.assertFalse(has_causal_out_edges(g, v))
def get_tau_references(graph: BELGraph, hgnc_gene_symbol='MAPT') -> List[Tuple[str, str]]: """Get a list of references that contain the Tau protein.""" return list(sorted(set( ( data[CITATION][CITATION_TYPE], data[CITATION][CITATION_REFERENCE], ) for source, target, data in graph.edges(data=True) if ( CITATION in data and data.get(LINE) and (is_hgnc_protein(source, hgnc_gene_symbol) or is_hgnc_protein(target, hgnc_gene_symbol)) ) )))
def test_separate_unstable(self): graph = BELGraph() a = Protein('HGNC', 'A') b = Protein('HGNC', 'B') c = Protein('HGNC', 'C') d = Protein('HGNC', 'D') graph.add_node_from_data(a) graph.add_node_from_data(b) graph.add_node_from_data(c) graph.add_node_from_data(d) graph.add_edge(a, b, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(a, c, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(c, b, **{RELATION: NEGATIVE_CORRELATION}) infer_missing_two_way_edges(graph) cg = get_correlation_graph(graph) self.assertIn(a, cg) self.assertIn(b, cg) self.assertIn(c, cg) self.assertTrue(cg.has_edge(a, b)) self.assertTrue(cg.has_edge(a, c)) self.assertTrue(cg.has_edge(b, c)) self.assertIn(POSITIVE_CORRELATION, cg[a][b]) self.assertIn(POSITIVE_CORRELATION, cg[a][c]) self.assertIn(NEGATIVE_CORRELATION, cg[c][b]) triangles = tuple(get_correlation_triangles(cg)) self.assertEqual(1, len(triangles)) self.assertEqual((a, b, c), triangles[0]) result = tuple(get_separate_unstable_correlation_triples(graph)) self.assertEqual(1, len(result)) self.assertEqual((a, b, c), result[0])
def add_to_bel_graph(self, graph: BELGraph) -> str: """Add this interaction to a BEL graph.""" if self.activity_flag == 'A': return graph.add_increases( self.chemical.as_pybel(), self.target.as_pybel(), citation='28316655', evidence='from ExCAPE-DB', annotations={ 'pXC50': self.pxc50, 'activity_flag': self.activity_flag, }) else: return graph.add_inhibits( self.chemical.as_pybel(), self.target.as_pybel(), citation='28316655', evidence='from ExCAPE-DB', annotations={ 'pXC50': self.pxc50, 'activity_flag': self.activity_flag, })
def collapse_to_protein_interactions(graph: BELGraph) -> BELGraph: """Collapse to a graph made of only causal gene/protein edges.""" rv: BELGraph = graph.copy() collapse_to_genes(rv) def is_edge_ppi(_: BELGraph, u: BaseEntity, v: BaseEntity, __: str) -> bool: """Check if an edge is a PPI.""" return isinstance(u, Gene) and isinstance(v, Gene) return get_subgraph_by_edge_filter( rv, edge_predicates=[has_polarity, is_edge_ppi])
def _get_filtered_variants_of( graph: BELGraph, node: Protein, modifications: Collection[str], ) -> Set[Protein]: return { target for source, target, key, data in graph.edges(keys=True, data=True) if (source == node and data[RELATION] == HAS_VARIANT and pybel.struct.has_protein_modification(target) and any( variant.name in modifications for variant in target.variants if isinstance(variant, ProteinModification))) }
def test_gtpactivation(): kras = Protein(name='KRAS', namespace='HGNC') braf = Protein(name='BRAF', namespace='HGNC') g = BELGraph() g.add_directly_increases(kras, braf, source_modifier=activity(name='gtp'), target_modifier=activity(name='kin'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, GtpActivation), stmt assert stmt.subj.name == 'KRAS' assert stmt.subj.activity.activity_type == 'gtpbound' assert stmt.subj.activity.is_active is True assert stmt.obj.name == 'BRAF' assert stmt.obj.activity is None assert stmt.obj_activity == 'kinase' assert len(stmt.evidence) == 1
def test_active_form(): p53_pmod = protein(name='TP53', namespace='HGNC', variants=[pmod('Ph', position=33, code='Ser')]) p53_obj = protein(name='TP53', namespace='HGNC') g = BELGraph() g.add_increases(p53_pmod, p53_obj, object_modifier=activity(name='tscript'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, ActiveForm) assert stmt.activity == 'transcription' assert stmt.is_active is True ag = stmt.agent assert ag.name == 'TP53' assert len(ag.mods) == 1 mc = ag.mods[0] assert mc.mod_type == 'phosphorylation' assert mc.residue == 'S' assert mc.position == '33' assert len(pbp.statements[0].evidence) == 1
def test_doi_evidence(): """Test processing edges with DOI citations.""" mek = Protein(name='MAP2K1', namespace='HGNC') erk = Protein(name='MAPK1', namespace='HGNC') g = BELGraph() g.annotation_list['TextLocation'] = {'Abstract'} ev_doi = '123456' g.add_directly_increases( mek, erk, evidence='Some evidence.', citation=('doi', ev_doi), annotations={"TextLocation": 'Abstract'}, ) pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert len(pbp.statements[0].evidence) == 1 ev = pbp.statements[0].evidence[0] assert ev.pmid is None assert 'DOI' in ev.text_refs assert ev.text_refs['DOI'] == ev_doi
def test_indirect_gef_is_activation(): sos = Protein(name='SOS1', namespace='HGNC') kras = Protein(name='KRAS', namespace='HGNC') g = BELGraph() g.add_increases(sos, kras, source_modifier=activity(), target_modifier=activity(name='gtp'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, Activation) assert stmt.subj.name == 'SOS1' assert stmt.obj.name == 'KRAS' assert stmt.subj.activity.activity_type == 'activity' assert stmt.subj.activity.is_active is True assert stmt.obj.activity is None assert stmt.obj_activity == 'gtpbound' assert len(pbp.statements[0].evidence) == 1
def add_to_bel_graph(self, graph: BELGraph) -> str: """Add this relationship as an edge to the BEL graph.""" return graph.add_decreases( self.compound.as_bel(), self.umls.as_bel(), citation='26481350', evidence='Extracted from SIDER', annotations={ 'Database': 'SIDER', 'SIDER_MEDDRA_TYPE': self.meddra_type.name, 'SIDER_DETECTION': self.detection.name, } )
def test_convert_dephosphorylates(self): """Test the conversion of a BEL statement like ``act(p(X)) -| p(Y, pmod(Ph)).""" bel_graph = BELGraph() bel_graph.add_directly_decreases( cdk5, p_tau, evidence=n(), citation=n(), subject_modifier=activity('kin'), ) r_edge = 0 expected_reified_graph = self.help_make_simple_expected_graph( cdk5, p_tau, PHOSPHORYLATES, r_edge, self.help_causal_decreases, ) reified_graph = reify_bel_graph(bel_graph) self.help_test_graphs_equal(expected_reified_graph, reified_graph)
def test_phosphorylation_one_site_with_evidence(): mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC', variants=[pmod('Ph', position=185, code='Thr')]) g = BELGraph() ev_text = 'Some evidence.' ev_pmid = '123456' edge_hash = g.add_directly_increases( mek, erk, evidence=ev_text, citation=ev_pmid, annotations={"TextLocation": 'Abstract'}) pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 assert isinstance(pbp.statements[0], Phosphorylation) assert pbp.statements[0].residue == 'T' assert pbp.statements[0].position == '185' enz = pbp.statements[0].enz sub = pbp.statements[0].sub assert enz.name == 'MAP2K1' assert enz.mods == [] assert sub.name == 'MAPK1' assert sub.mods == [] # Check evidence assert len(pbp.statements[0].evidence) == 1 ev = pbp.statements[0].evidence[0] assert ev.source_api == 'bel' assert ev.source_id == edge_hash assert ev.pmid == ev_pmid assert ev.text == ev_text assert ev.annotations == { 'bel': 'p(HGNC:MAP2K1) directlyIncreases ' 'p(HGNC:MAPK1, pmod(Ph, Thr, 185))' } assert ev.epistemics == {'direct': True, 'section_type': 'abstract'}
def test_infer_on_sialic_acid_example(self): """Test infer_central_dogma on the sialic acid example.""" graph = BELGraph() graph.add_node_from_data(trem2_protein) self.assert_in_graph(trem2_protein, graph) self.assert_not_in_graph(trem2_gene, graph) self.assert_not_in_graph(trem2_rna, graph) enrich_protein_and_rna_origins(graph) self.assert_in_graph(trem2_gene, graph) self.assert_in_graph(trem2_rna, graph) prune_protein_rna_origins(graph) self.assert_not_in_graph(trem2_gene, graph) self.assert_not_in_graph(trem2_rna, graph) self.assert_in_graph(trem2_protein, graph) self.assertIn(FUNCTION, graph.node[trem2_protein.as_tuple()]) self.assertIn(PROTEIN, graph.node[trem2_protein.as_tuple()][FUNCTION])
def test_strip_annotations(self): """Test the strip_annotation function.""" x = protein(namespace='HGNC', name='X') y = protein(namespace='HGNC', name='X') graph = BELGraph() graph.add_qualified_edge( x, y, relation=INCREASES, citation='123456', evidence='Fake', annotations={ 'A': {'B': True} }, key=1 ) self.assertIn(ANNOTATIONS, graph.edge[x.as_tuple()][y.as_tuple()][1]) strip_annotations(graph) self.assertNotIn(ANNOTATIONS, graph.edge[x.as_tuple()][y.as_tuple()][1])
def test_separate_unstable(self): graph = BELGraph() a = PROTEIN, 'HGNC', 'A' b = PROTEIN, 'HGNC', 'B' c = PROTEIN, 'HGNC', 'C' d = PROTEIN, 'HGNC', 'D' graph.add_simple_node(*a) graph.add_simple_node(*b) graph.add_simple_node(*c) graph.add_simple_node(*d) graph.add_edge(a, b, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(a, c, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(c, b, **{RELATION: NEGATIVE_CORRELATION}) infer_missing_two_way_edges(graph) cg = get_correlation_graph(graph) self.assertIn(a, cg) self.assertIn(b, cg) self.assertIn(c, cg) self.assertTrue(cg.has_edge(a, b)) self.assertTrue(cg.has_edge(a, c)) self.assertTrue(cg.has_edge(b, c)) self.assertIn(POSITIVE_CORRELATION, cg.edge[a][b]) self.assertIn(POSITIVE_CORRELATION, cg.edge[a][c]) self.assertIn(NEGATIVE_CORRELATION, cg.edge[c][b]) triangles = tuple(get_correlation_triangles(cg)) self.assertEqual(1, len(triangles)) self.assertEqual((a, b, c), triangles[0]) result = tuple(get_separate_unstable_correlation_triples(graph)) self.assertEqual(1, len(result)) self.assertEqual((a, b, c), result[0])
def test_simple(self, mock): """This test checks that the network can be added and dropped""" graph = BELGraph(name='test', version='0.0.0') graph.add_increases(yfg1, yfg2, evidence=test_evidence_text, citation=test_citation_dict, annotations={ 'Disease': { 'Disease1': True }, 'Cell': { 'Cell1': True } }) make_dummy_namespaces(self.manager, graph) make_dummy_annotations(self.manager, graph) network = self.manager.insert_graph(graph, store_parts=True) self.manager.drop_network_by_id(network.id)
def test_triangle_has_namespace(self): graph = BELGraph() a = Protein(namespace='A', name='CD33') b = Protein(namespace='B', identifier='1659') c = Protein(namespace='C', identifier='1659') d = Protein(namespace='HGNC', identifier='1659') graph.add_equivalence(a, b) graph.add_equivalence(b, c) graph.add_equivalence(c, a) graph.add_equivalence(c, d) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(a)) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(b)) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(c)) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(d)) self.assertTrue(graph.node_has_namespace(a, 'HGNC')) self.assertTrue(graph.node_has_namespace(b, 'HGNC')) self.assertTrue(graph.node_has_namespace(c, 'HGNC')) self.assertTrue(graph.node_has_namespace(d, 'HGNC'))
def debug_pathway_info(bel_graph: BELGraph, pathway_path: str, **kwargs): """Debug information about the pathway graph representation. :param bel_graph: bel graph :param pathway_path: path of the pathway """ logger.debug('Pathway id: %s', os.path.basename(pathway_path)) pathway_name = bel_graph.name logger.debug('Pathway Name: %s', pathway_name) bel_nodes = bel_graph.number_of_nodes() bel_edges = bel_graph.number_of_edges() logger.debug('Nodes imported to BEL: %s', bel_nodes) logger.debug('Edges imported to BEL: %s', bel_edges) if 'statistics' in kwargs: statistics = kwargs.get('statistics') logger.debug('RDF Nodes statistics: %s', statistics['RDF nodes']) logger.debug('RDF Edges statistics: %s', statistics['RDF interactions'])
def get_leaves_by_type( graph: BELGraph, func: Optional[str] = None, prune_threshold: int = 1, ) -> Iterable[BaseEntity]: """Iterate over all nodes in graph (in-place) with only a connection to one node. Useful for gene and RNA. Allows for optional filter by function type. :param graph: A BEL graph :param func: If set, filters by the node's function from :mod:`pybel.constants` like :data:`pybel.constants.GENE`, :data:`pybel.constants.RNA`, :data:`pybel.constants.PROTEIN`, or :data:`pybel.constants.BIOPROCESS` :param prune_threshold: Removes nodes with less than or equal to this number of connections. Defaults to :code:`1` :return: An iterable over nodes with only a connection to one node """ for node in graph.nodes(data=True): if func and func != node.function: continue if graph.in_degree(node) + graph.out_degree(node) <= prune_threshold: yield node
def test_complex_stmt_with_activation(): raf = protein(name='BRAF', namespace='HGNC') mek = protein(name='MAP2K1', namespace='HGNC') erk = protein(name='MAPK1', namespace='HGNC') cplx = complex_abundance([raf, mek]) g = BELGraph() g.add_directly_increases(cplx, erk, object_modifier=activity(name='kin'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 2 stmt1 = pbp.statements[0] assert isinstance(stmt1, Complex) assert len(stmt1.agent_list()) == 2 assert sorted([ag.name for ag in stmt1.agent_list()]) == ['BRAF', 'MAP2K1'] assert stmt1.evidence stmt2 = pbp.statements[1] assert isinstance(stmt2, Activation) assert stmt2.subj.name == 'BRAF' assert stmt2.subj.bound_conditions[0].agent.name == 'MAP2K1' assert stmt2.obj.name == 'MAPK1' assert stmt2.obj.activity is None assert stmt2.obj_activity == 'kinase'