def test_subject_transloc_active_form(): """ActiveForms where the subject is a translocation--should draw on the to-location of the subject.""" subj = Protein(name='MAP2K1', namespace='HGNC') obj = Protein(name='MAP2K1', namespace='HGNC') transloc = translocation( from_loc=Entity(namespace='GOCC', name='intracellular'), to_loc=Entity(namespace='GOCC', name='extracellular space'), ) g = BELGraph() g.add_increases(subj, obj, source_modifier=transloc, target_modifier=activity(name='kin'), evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, ActiveForm) assert stmt.agent.name == 'MAP2K1' assert stmt.agent.location == 'extracellular space' assert stmt.agent.activity is None assert stmt.activity == 'kinase' assert stmt.is_active is True
def test_get_annotation_values(self): """Test getting annotation values.""" expected = { 'Confidence': { Entity(namespace='Confidence', identifier='High'), Entity(namespace='Confidence', identifier='Low'), }, 'Species': { Entity(namespace='Species', identifier='9606'), }, } self.assertEqual({'Confidence', 'Species'}, get_annotations(sialic_acid_graph)) self.assertEqual({ 'Confidence': 8, 'Species': 8 }, dict(count_annotations(sialic_acid_graph))) annotation_values_by_annotation = get_annotation_values_by_annotation( sialic_acid_graph) self.assertEqual(expected, annotation_values_by_annotation) annotation_values = get_annotation_values(sialic_acid_graph, 'Confidence') self.assertEqual(expected['Confidence'], annotation_values)
def test_object_has_translocation(self): """p(HGNC: EGF) increases tloc(p(HGNC: VCP), GO:0005634, GO:0005737)""" g = BELGraph() u = protein(name='EFG', namespace='HGNC') v = protein(name='VCP', namespace='HGNC') g.add_increases( u, v, citation='10855792', evidence="Although found predominantly in the cytoplasm and, less abundantly, in the nucleus, VCP can be " "translocated from the nucleus after stimulation with epidermal growth factor.", annotations={'Species': '9606'}, target_modifier=translocation( from_loc=Entity(namespace='GO', identifier='0005634'), to_loc=Entity(namespace='GO', identifier='0005737'), ) ) self.assertFalse(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertFalse(has_activity(g, u)) self.assertFalse(has_causal_in_edges(g, u)) self.assertTrue(has_causal_out_edges(g, u)) self.assertTrue(is_translocated(g, v)) self.assertFalse(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) self.assertTrue(has_causal_in_edges(g, v)) self.assertFalse(has_causal_out_edges(g, v))
def test_mixed_2(self): """Tests both subject and object activity with location information as well.""" self.graph.add_directly_increases( Protein(namespace='HGNC', name='HDAC4'), Protein(namespace='HGNC', name='MEF2A'), citation='10487761', evidence= """"In the nucleus, HDAC4 associates with the myocyte enhancer factor MEF2A. Binding of HDAC4 to MEF2A results in the repression of MEF2A transcriptional activation, a function that requires the deacetylase domain of HDAC4.""", annotations={'Species': '9606'}, subject_modifier=activity('cat', location=Entity(namespace='GO', name='nucleus')), object_modifier=activity('tscript', location=Entity(namespace='GO', name='nucleus'))) make_dummy_namespaces(self.manager, self.graph) make_dummy_annotations(self.manager, self.graph) network = self.manager.insert_graph(self.graph) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first()
def test_enrich_wikipathway_protein(self): """Test enriching pathway memberships of a protein.""" self.assertNotEqual(0, self.wikipathways_manager.count_proteins()) self.assertNotEqual(0, self.wikipathways_manager.count_pathways()) graph = BELGraph() pathway_1 = dsl.BiologicalProcess( namespace='wikipathways', identifier='WP1604', name='Codeine and Morphine Metabolism', ) pathway_2 = dsl.BiologicalProcess( namespace='wikipathways', identifier='WP536', name='Calcium Regulation in the Cardiac Cell', ) protein_1 = dsl.Protein( namespace='hgnc', identifier='12553', name='UGT2B4', xrefs=[Entity(namespace='ncbigene', identifier='7363')], ) protein_2 = dsl.Protein( namespace='hgnc', identifier='12554', name='UGT2B7', xrefs=[Entity(namespace='ncbigene', identifier='7363')], ) hgnc_ids = {protein_1.identifier, protein_2.identifier} for hgnc_id in hgnc_ids: self.assertIsNotNone( self.wikipathways_manager.get_protein_by_hgnc_id(hgnc_id), msg=f'IDs available: {",".join(p.hgnc_id for p in self.wikipathways_manager.list_proteins())}', ) proteins = self.wikipathways_manager.get_proteins_by_hgnc_ids(hgnc_ids) self.assertEqual(2, len(proteins)) pathways = self.wikipathways_manager.get_pathways_by_hgnc_ids(hgnc_ids) self.assertEqual(2, len(pathways), msg=', '.join(map(repr, pathways))) graph.add_node_from_data(protein_1) graph.add_node_from_data(protein_2) self.wikipathways_manager.enrich_proteins(graph) self.assertEqual( { pathway_1, protein_1, protein_2, pathway_2, dsl.Protein(namespace='hgnc', identifier='6264', name='KCNJ3'), dsl.Protein(namespace='hgnc', identifier='4403', name='GNG11'), dsl.Protein(namespace='hgnc', identifier='4411', name='GNGT1'), dsl.Protein(namespace='hgnc', identifier='10001', name='RGS5'), dsl.Protein(namespace='hgnc', identifier='50056', name='MIR6869'), }, set(graph), msg='Wrong nodes in graph', ) self.assertEqual(8, graph.number_of_edges())
def test_get_subgraphs_by_annotation_with_sentinel(self): sentinel = n() subgraphs = get_subgraphs_by_annotation(self.graph, annotation='subgraph', sentinel=sentinel) self.assertEqual(3, len(subgraphs)) self.assertIn(Entity(namespace='subgraph', identifier='1'), subgraphs) self.assertIn(Entity(namespace='subgraph', identifier='2'), subgraphs) self.assertIn(sentinel, subgraphs)
def test_get_agent_with_translocation(): node_data = protein(name='MAPK1', namespace='HGNC') # Some example edge data edge_data = translocation(from_loc=Entity('GOCC', 'intracellular'), to_loc=Entity('GOCC', 'extracellular space')) agent = pb.get_agent(node_data, edge_data) assert isinstance(agent, Agent) assert agent.name == 'MAPK1' assert agent.location == 'extracellular space'
def test_unmappable_name(self, *_): """Test when the identifier can not be looked up by name.""" self._help( { # Expected 'Disease': [Entity(namespace='doid', name='Failure')], }, { 'Disease': [Entity(namespace='Disease', name='Failure')], }, )
def test_unmappable_category(self, *_): """Test when the category can't be mapped.""" self._help( { # Expected 'Custom Annotation': [Entity(namespace='Custom Annotation', identifier="Custom Value")], }, { 'Custom Annotation': [Entity(namespace='Custom Annotation', identifier="Custom Value")], }, )
def test_unmappable_identifier(self, *_): """Test when the identifier can not be resolved.""" self._help( { # Expected 'Disease': [Entity(namespace='doid', identifier='Failure')], }, { 'Disease': [Entity(namespace='Disease', identifier='Failure')], }, )
def test_controlled_transloc_loc_cond(): """Controlled translocations are currently not handled.""" subj = protein(name='MAP2K1', namespace='HGNC') obj = protein(name='MAPK1', namespace='HGNC') g = BELGraph() transloc = translocation(from_loc=Entity('GOCC', 'intracellular'), to_loc=Entity('GOCC', 'extracellular space')) g.add_increases(subj, obj, object_modifier=transloc, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert not pbp.statements
def test_lookup_by_name(self, *_): """Test lookup by name.""" self._help( { 'Disease': [ Entity(namespace='mesh', identifier='D010300', name='Parkinson Disease') ] }, {'Disease': [Entity(namespace='mesh', name='Parkinson Disease')]}, )
def test_subject_degradation_location(self): self.assertEqual(self.add_edge(source_modifier=degradation()), self.add_edge(source_modifier=degradation())) self.assertEqual( self.add_edge(source_modifier=degradation( location=Entity(name='somewhere', namespace='GO'))), self.add_edge(source_modifier=degradation( location=Entity(name='somewhere', namespace='GO')))) self.assertNotEqual( self.add_edge(source_modifier=degradation()), self.add_edge(source_modifier=degradation( location=Entity(name='somewhere', namespace='GO'))))
def test_get_qualified_edge(self): """Test adding an edge to a graph.""" test_source = protein(namespace='TEST', name='YFG') test_target = protein(namespace='TEST', name='YFG2') self.graph.add_node_from_data(test_source) self.graph.add_node_from_data(test_target) test_evidence = n() test_pmid = n() test_key = self.graph.add_increases( test_source, test_target, citation=test_pmid, evidence=test_evidence, annotations={ 'Species': '9606', 'Confidence': 'Very High' }, ) citation = self.graph.get_edge_citation(test_source, test_target, test_key) self.assertIsNotNone(citation) self.assertIsInstance(citation, dict) self.assertIn(NAMESPACE, citation) self.assertEqual(CITATION_TYPE_PUBMED, citation[NAMESPACE]) self.assertIn(IDENTIFIER, citation) self.assertEqual(test_pmid, citation[IDENTIFIER]) evidence = self.graph.get_edge_evidence(test_source, test_target, test_key) self.assertIsNotNone(evidence) self.assertIsInstance(evidence, str) self.assertEqual(test_evidence, evidence) annotations = self.graph.get_edge_annotations(test_source, test_target, test_key) self.assertIsNotNone(annotations) self.assertIsInstance(annotations, dict) self.assertIn('Species', annotations) self.assertIn(Entity(namespace='Species', identifier='9606'), annotations['Species']) self.assertIn('Confidence', annotations) self.assertIn(Entity(namespace='Confidence', identifier='Very High'), annotations['Confidence'])
def test_lookup_by_name_as_identifier(self, *_): """Test lookup by name if it's accidentally in the identifier slot.""" self._help( { 'Disease': [ Entity(namespace='mesh', identifier='D010300', name='Parkinson Disease') ] }, { 'Disease': [Entity(namespace='mesh', identifier='Parkinson Disease')] }, )
def test_missing_information(self): """Test that entity and abundance functions raise on missing name/identifier.""" with self.assertRaises(ValueError): Entity(namespace='test') with self.assertRaises(ValueError): protein(namespace='test')
def test_different_key_and_namespace(self): key, namespace, value = map(lambda _: n(), range(3)) self.graph.annotation_curie.add(key) self.graph.add_increases(Protein(namespace='HGNC', name='YFG1'), Protein(namespace='HGNC', name='YFG'), citation=self.citation, evidence=self.evidence, annotations={ key: Entity(namespace=namespace, identifier=value), }) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) expected_lines = [ f'SET Citation = {{"{CITATION_TYPE_PUBMED}", "{self.citation}"}}\n', f'SET SupportingText = "{self.evidence}"', f'SET {key} = "{namespace}:{value}"', 'p(HGNC:YFG1) increases p(HGNC:YFG)', f'UNSET {key}', 'UNSET SupportingText', 'UNSET Citation\n', ('#' * 80), ] self._help_check_lines(expected_lines)
def test_missing_information(self): """Test that entity and abundance functions raise on missing name/identifier.""" with self.assertRaises(ValueError): Entity(namespace='test') with self.assertRaises(ValueError): Protein(namespace='test') with self.assertRaises(ValueError): Protein(namespace='') with self.assertRaises(TypeError): Protein(namespace='uniprot', name=1234) with self.assertRaises(TypeError): Protein(namespace='uniprot', identifier=1234) with self.assertRaises(ValueError): Protein(namespace='uniprot', name='') with self.assertRaises(ValueError): Protein(namespace='uniprot', identifier='') with self.assertRaises(ValueError): Protein(namespace='uniprot', identifier='12345', name='') with self.assertRaises(ValueError): Protein(namespace='uniprot', identifier='', name='123')
def _process_findable_annotations(x, y, prefix, names): name_id = get_name_id_mapping(prefix) for name, polarity in names.items(): identifier = name_id.get(name) if identifier: x.append((Entity(namespace=prefix, identifier=identifier, name=name), polarity)) else: y.append((prefix, name, polarity))
def test_upgrade_category_and_namespace(self, *_): """Test upgrading the category and the namespace simultaneously.""" self._help( { 'Disease': [ Entity(namespace='mesh', identifier='D010300', name='Parkinson Disease') ] }, { 'MeSHDisease': [ Entity(namespace='MeSHDisease', identifier='D010300', name='Parkinson Disease') ] }, )
def test_upgrade_category(self, *_): """Test upgrading the category.""" self._help( { 'Disease': [ Entity(namespace='mesh', identifier='D010300', name='Parkinson Disease') ] }, { 'MeSHDisease': [ Entity(namespace='mesh', identifier='D010300', name='Parkinson Disease') ] }, )
def test_subject_transloc_loc_cond(): """Translocations of the subject are treated as location conditions on the subject (using the to_loc location as the condition)""" subj = protein(name='MAP2K1', namespace='HGNC') obj = protein(name='MAPK1', namespace='HGNC') transloc = translocation(from_loc=Entity('GOCC', 'intracellular'), to_loc=Entity('GOCC', 'extracellular space')) g = BELGraph() g.add_increases(subj, obj, subject_modifier=transloc, evidence="Some evidence.", citation='123456') pbp = bel.process_pybel_graph(g) assert pbp.statements assert len(pbp.statements) == 1 stmt = pbp.statements[0] assert isinstance(stmt, IncreaseAmount) assert stmt.subj.name == 'MAP2K1' assert stmt.subj.location == 'extracellular space' assert stmt.obj.name == 'MAPK1'
def _process_annotations( data, remove_ungrounded: bool = False, skip_namespaces: Optional[Collection[str]] = None, ) -> None: """Process the annotations in a PyBEL edge data dictionary.""" cell_line_entities = data[ANNOTATIONS].get('CellLine') if cell_line_entities: ne = [] for entity in cell_line_entities: if entity[NAMESPACE] == 'CellLine': _namespaces = [ 'efo', # 'clo', # FIXME implement CLO in PyOBO then uncomment ] g_prefix, g_identifier, g_name = pyobo.ground( _namespaces, entity[IDENTIFIER]) if g_prefix and g_identifier: ne.append( Entity(namespace=g_prefix, identifier=g_identifier, name=g_name)) elif not remove_ungrounded: logger.warning('could not ground CellLine: "%s"', entity[IDENTIFIER]) ne.append(entity) data[ANNOTATIONS]['CellLine'] = ne # fix text locations text_location = data[ANNOTATIONS].get('TextLocation') if text_location: data[ANNOTATIONS]['TextLocation'] = [ text_location_labels.get(entity.identifier, entity) for entity in text_location ] # remap category names data[ANNOTATIONS] = { _BEL_ANNOTATION_PREFIX_CATEGORY_MAP.get(category, category): entities for category, entities in data[ANNOTATIONS].items() } # fix namespaces that were categories before for category, entities in data[ANNOTATIONS].items(): if category in CATEGORY_BLACKLIST: continue ne = [] for entity in entities: if not isinstance(entity, dict): raise TypeError(f'entity should be a dict. got: {entity}') nn = _BEL_ANNOTATION_PREFIX_MAP.get(entity[NAMESPACE]) if nn is not None: entity[NAMESPACE] = nn _process_concept(concept=entity, skip_namespaces=skip_namespaces) ne.append(entity) data[ANNOTATIONS][category] = ne
def test_subject_translocation_custom_to_loc(self, mock): self.graph.add_increases( Protein(name='F2', namespace='HGNC'), Protein(name='EDN1', namespace='HGNC'), evidence= 'In endothelial cells, ET-1 secretion is detectable under basal conditions, whereas thrombin induces its secretion.', citation='10473669', subject_modifier=translocation( from_loc=Entity(namespace='TEST', name='A'), to_loc=Entity(namespace='GO', name='extracellular space'), )) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first()
def _help_test_non_standard_namespace(self, statement): result = self.parser.parseString(statement) expected = { KIND: PMOD, CONCEPT: Entity(namespace='MOD', name='PhosRes'), PMOD_CODE: 'Ser', PMOD_POSITION: 473, } self.assertEqual(expected, ProteinModification(name='PhosRes', namespace='MOD', code='Ser', position=473)) self.assertEqual(expected, result.asDict())
def test_get_subgraphs_by_annotation(self): subgraphs = get_subgraphs_by_annotation(self.graph, annotation='subgraph') self.assertEqual(2, len(subgraphs)) self.assertIn(Entity(namespace='subgraph', identifier='1'), subgraphs) self.assertIn(Entity(namespace='subgraph', identifier='2'), subgraphs) subgraph_1 = subgraphs[Entity(namespace='subgraph', identifier='1')] self.assertIsInstance(subgraph_1, BELGraph) self.assertIn('test', subgraph_1.namespace_url) self.assertIn('subgraph', subgraph_1.annotation_url) self.assertIn(a, subgraph_1) self.assertIn(b, subgraph_1) self.assertIn(c, subgraph_1) self.assertIn(d, subgraph_1) self.assertIn(b, subgraph_1[a]) self.assertIn(c, subgraph_1[a]) self.assertIn(d, subgraph_1[b]) self.assertNotIn(d, subgraph_1[a]) self.assertNotIn(d, subgraph_1[c]) subgraph_2 = subgraphs[Entity(namespace='subgraph', identifier='2')] self.assertIsInstance(subgraph_2, BELGraph) self.assertIn('test', subgraph_2.namespace_url) self.assertIn('subgraph', subgraph_2.annotation_url) self.assertIn(a, subgraph_2) self.assertIn(b, subgraph_2) self.assertNotIn(c, subgraph_2) self.assertIn(d, subgraph_2) self.assertIn(b, subgraph_2[a]) self.assertNotIn(c, subgraph_2[a]) self.assertIn(d, subgraph_2[b]) self.assertIn(d, subgraph_2[a])
def _help_test_non_standard_namespace(self, statement): result = self.parser.parseString(statement) expected = { KIND: PMOD, IDENTIFIER: Entity('MOD', 'PhosRes'), PMOD_CODE: 'Ser', PMOD_POSITION: 473 } self.assertEqual( expected, pmod(name='PhosRes', namespace='MOD', code='Ser', position=473)) self.assertEqual(expected, result.asDict())
def test_subject_location(self): self.graph.add_increases(Protein(name='YFG', namespace='HGNC'), Protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=location( Entity(namespace='GO', name='nucleus', identifier='GO:0005634'))) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first()
def _process_annotations(data, add_free_annotations: bool = False): x = [] y = [] for prefix, names in data[ANNOTATIONS].items(): if prefix == 'CellLine': efo_name_to_id = get_name_id_mapping('efo') # clo_name_to_id = get_name_id_mapping('clo') # FIXME implement CLO import for name, polarity in names.items(): prefix, identifier = 'efo', efo_name_to_id.get(name) # if identifier is None: # prefix, identifier = 'clo', clo_name_to_id.get(name) if identifier is not None: x.append((Entity(namespace=prefix, identifier=identifier, name=name), polarity)) else: y.append((prefix, identifier, polarity)) elif prefix in _BEL_ANNOTATION_PREFIX_MAP: prefix = _BEL_ANNOTATION_PREFIX_MAP[prefix] _process_findable_annotations(x, y, prefix, names) elif normalize_prefix(prefix): prefix_norm = normalize_prefix(prefix) _process_findable_annotations(x, y, prefix_norm, names) else: if prefix not in _UNHANDLED_ANNOTATION: logger.warning('unhandled annotation: %s', prefix) _UNHANDLED_ANNOTATION.add(prefix) if isinstance(names, dict): for name, polarity in names.items(): y.append((prefix, name, polarity)) else: y.append((prefix, names, True)) data[ANNOTATIONS] = defaultdict(dict) for entity, polarity in x: data[ANNOTATIONS][entity.namespace][entity.identifier] = polarity if add_free_annotations: data['free_annotations'] = defaultdict(dict) for prefix, name, polarity in y: data['free_annotations'][prefix][name] = polarity
def test_upgrade_by_name(self, *_): """Test upgrading and lookup by name.""" self._help( { # Expected 'Disease': [Entity(namespace='mesh', identifier='D010300', name='Parkinson Disease')], 'Anatomy': [Entity(namespace='mesh', identifier='D013378', name='Substantia Nigra')], 'Species': [Entity(namespace='ncbitaxon', identifier='9606', name='H**o sapiens')], }, { # Original 'MeSHDisease': [Entity(namespace='MeSHDisease', name='Parkinson Disease')], 'MeSHAnatomy': [Entity(namespace='MeSHAnatomy', name='Substantia Nigra')], 'Species': [Entity(namespace='Species', name='H**o sapiens')], }, )