def test_GO_annotate_genelist2_vs_enriched(self): input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read() req = dict(job_req, **{'paste_data': input_lines}) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) enrich_job = GOnetSubmission.objects.latest('submit_time') df = enrich_job.enrich_res_df enriched_terms = df[df['q'] < enrich_job.qvalue]['term'] custom_annotation = '\n'.join(enriched_terms) req = dict( job_req, **{ 'paste_data': input_lines, 'analysis_type': 'annot', 'slim': 'custom', 'custom_terms': custom_annotation }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) annot_job = GOnetSubmission.objects.latest('submit_time') G_enrich = cyjs.cyjs2nx(json.loads(enrich_job.network)) G_annot = cyjs.cyjs2nx(json.loads(annot_job.network)) self.assertSetEqual(set(G_enrich.nodes), set(G_annot.nodes)) self.assertSetEqual(set(G_enrich.edges), set(G_annot.edges))
def test_GO_enrichment_qval01_celcomp(self): input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read() req = dict(job_req, **{'paste_data':input_lines, 'qvalue':0.01, 'namespace':'cellular_component'}) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') net_dict = json.loads(sn.network) G = cyjs.cyjs2nx(net_dict) enriched = set(filter(lambda n: n.startswith('GO:'), G.nodes())) for term in ['GO:0000786', 'GO:0044815', 'GO:0032993', 'GO:0000785']: self.assertIn(term, enriched)
def test_resolution(self): # CMKBR7 is synonym of CCR7 # O14804 is PNR input_lines = '\n'.join([ "PNR", "KIAA0457", "ELDF10", "CCR7", "AIM", "O14804", "CMKBR7", "Q9UNH8", "P28068", "FOO" ]) req = dict(job_req, **{'paste_data': input_lines}) URL = urls.reverse('GOnet-submit-form') resp = c.post(URL, req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') res_d = sn.parsed_data['submit_name'].to_dict() self.assertDictEqual( res_d, { 'O75787': 'ELDF10', 'P32248': 'CCR7', 'Q9NRI5': 'KIAA0457', 'O14804': 'PNR', '_00000': 'O14804', '_00001': 'CMKBR7', 'Q9UNH8': 'Q9UNH8', 'P28068': 'P28068', 'P26358': 'AIM', '_00002': 'FOO' }) self.assertEqual(sn.parsed_data.loc['_00000', 'duplicate_of'], 'O14804') #Test id mapping response idmap_resp = c.get( urls.reverse('GOnet-input-idmap', args=(str(sn.id), ))) b = io.StringIO() b.write(idmap_resp.content.decode()) b.seek(0) res = pd.read_csv(b, sep='\t', index_col=0) self.assertEqual(res.loc['CMKBR7', 'Notes'], 'same as CCR7') self.assertEqual(res.loc['O14804', 'Notes'], 'same as PNR') self.assertEqual(res.loc['AIM', 'Description'], 'DNA (cytosine-5)-methyltransferase 1') self.assertIn('ambiguous', res.loc['PNR', 'Notes']) self.assertIn('not recognized', res.loc['FOO', 'Notes']) #Test tricky Ensembl IDs self.assertEqual(res.loc['P28068', 'Ensembl_ID'], 'ENSG00000242574') # Check graph attributes G = cyjs.cyjs2nx(json.loads(sn.network)) self.assertEqual(G.node['_00002']['data']['identified'], False) self.assertEqual(G.node['P32248']['data']['identified'], True) self.assertEqual(G.node['O14804']['data']['ambiguous'], True)
def test_GO_annotate_genelist1(self): input_lines = open(pkg_file(__name__, 'data/genelist1.lst'), 'r').read() req = dict( job_req, **{ 'paste_data': input_lines, 'analysis_type': 'annot', 'slim': 'goslim_immunol' }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') net_dict = json.loads(sn.network) G = cyjs.cyjs2nx(net_dict) self.assertTrue(G.has_edge('GO:0042254', 'Q9HC36'))
def test_GO_annotate_genelist2(self): input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read() custom_annotation = open( pkg_file(__name__, 'data/custom_annotation.txt'), 'r').read() req = dict( job_req, **{ 'paste_data': input_lines, 'analysis_type': 'annot', 'slim': 'custom', 'custom_terms': custom_annotation }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) self.assertEqual(resp.status_code, 200) sn = GOnetSubmission.objects.latest('submit_time') net_dict = json.loads(sn.network) G = cyjs.cyjs2nx(net_dict) self.assertListEqual(list(G.predecessors('P29376')), ['GO:0071300']) self.assertListEqual(list(G.predecessors('Q5TBA9')), ['GO:0016043']) self.assertListEqual(list(G.predecessors('P16403')), ['GO:0065003']) # Test node GO:0071300 (cellular response to retinoic acid) n = list( filter(lambda n: n['data']['id'] == 'GO:0071300', net_dict['elements']['nodes']))[0] self.assertEqual(n['data']['tot_gn'], len(O.get_attr('GO:0071300', 'human'))) # Test CSV response csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), ))) b = io.StringIO() b.write(csv_resp.content.decode()) b.seek(0) res = pd.read_csv(b, sep=',', index_col=1) self.assertIn('LTK', res.loc['GO:0032526', 'Genes']) # Test TXT response txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), ))) b = io.StringIO() b.write(txt_resp.content.decode()) b.seek(0) line_found = False for line in b: if line.strip().startswith('GO:0032526'): self.assertIn('LTK', line) line_found = True break self.assertTrue(line_found)
def test_GO_annotate_genelist2(self): input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read() input_data_df = pd.read_csv(pkg_file(__name__, 'data/genelist2.tsv'), sep='\t', header=None) req = dict( job_req, **{ 'paste_data': input_lines, 'analysis_type': 'annot', 'slim': 'goslim_immunol' }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) self.assertEqual(resp.status_code, 200) sn = GOnetSubmission.objects.latest('submit_time') net = json.loads(sn.network) G = cyjs.cyjs2nx(net) self.assertTrue(G.has_edge('GO:0007165', 'P29376')) # Test recognition of user-supplied contrast values gene_nodes = filter(lambda n: not n['data']['name'].startswith('GO:'), net['elements']['nodes']) gene_nodes = list(gene_nodes) self.assertEqual(len(list(filter(lambda node: float(node['data']['expr:user_supplied'])>0, gene_nodes))), \ np.sum(input_data_df[1]>0) - 1 ) #-1 for HIST1H2AM self.assertEqual(len(list(filter(lambda node: float(node['data']['expr:user_supplied'])<0, gene_nodes))), \ np.sum(input_data_df[1]<0)) #Test CSV response csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), ))) res = io.StringIO() res.write(csv_resp.content.decode()) res.seek(0) res_df = pd.read_csv(res, sep=',', index_col=0) self.assertIn('GO:0007165', set(res_df['GO_term_ID'])) self.assertEqual(res_df.index[0], 1) #Test TXT response txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), ))) res = io.StringIO() res.write(txt_resp.content.decode()) res.seek(0) goterms = set() for line in res: goterms.add(line.split()[0]) self.assertIn('GO:0007165', goterms)
def test_GO_annotate_genelist8_large(self): input_lines = open(pkg_file(__name__, 'data/genelist8.tsv'), 'r').read().split('\n') input_str = '\n'.join(input_lines[:2500]) req = dict( job_req, **{ 'paste_data': input_str, 'analysis_type': 'annot', 'slim': 'goslim_generic' }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') G = cyjs.cyjs2nx(json.loads(sn.network)) self.assertEqual(G.node['Q8TCT6']['data']['nodesymbol'], 'SPPL3') #Check expression values (DICE-DB) resp = c.get( urls.reverse('GOnet-get-expression', kwargs={ 'jobid': str(sn.id), 'celltype': 'DICE-CD4 T cell (stim)' })) expr_vals = json.loads(resp.content.decode()) self.assertAlmostEqual(expr_vals['A1XBS5'], 0.09010716525639434) # Test TXT response txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), ))) b = io.StringIO() b.write(txt_resp.content.decode()) b.seek(0) # Test CSV response csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), ))) b = io.StringIO() b.write(csv_resp.content.decode()) b.seek(0) res = pd.read_csv(b, sep=',', index_col=0)
def test_GO_annot_mouse_genes(self): input_lines = open(pkg_file(__name__, 'data/genelist7.txt'), 'r').read() req = dict( job_req, **{ 'paste_data': input_lines, 'analysis_type': 'annot', 'slim': 'goslim_generic', 'organism': 'mouse' }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') net_dict = json.loads(sn.network) G = cyjs.cyjs2nx(net_dict) self.assertIn('GO:0006950', G.predecessors('MGI:1351618')) self.assertEqual(len(G.node['GO:0048856']['data']['xgenes']), 35) self.assertEqual(G.node['MGI:1351618']['data']['ensembl_id'], 'ENSMUSG00000014905') self.assertEqual(G.node['MGI:1351618']['data']['uniprot_id'], 'Q9QYI6') self.assertEqual(G.node['MGI:1351618']['data']['mgi_id'], 'MGI:1351618') self.assertEqual(G.node['MGI:1351618']['data']['desc'], 'DnaJ heat shock protein family (Hsp40) member B9')
def test_GO_annot_goslim_generic(self): input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read() req = dict( job_req, **{ 'paste_data': input_lines, 'analysis_type': 'annot', 'slim': 'goslim_generic', 'namespace': 'cellular_component' }) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') G = cyjs.cyjs2nx(json.loads(sn.network)) self.assertTrue(G.has_edge('GO:0005886', 'P32248')) # CCR7 in plasma membrane self.assertEqual(len(G.node['O60282']['data']['slimterms']), 7) ids = list( filter(lambda n: G.node[n]['data']['nodesymbol'] == 'ZNF761', G.nodes())) self.assertEqual(len(ids), 1) znf_node = G.node[ids[0]] self.assertEqual(len(znf_node['data']['slimterms']), 4)
def test_GO_enrichment_default(self): input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read() req = dict(job_req, **{'paste_data':input_lines}) resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True) sn = GOnetSubmission.objects.latest('submit_time') self.assertIn('GO:0006334', set(sn.enrich_res_df.query('q<0.05')['term'])) net_dict = json.loads(sn.network) G_case = cyjs.cyjs2nx(net_dict) # Test CSV response csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id),))) b = io.StringIO(); b.write(csv_resp.content.decode()); b.seek(0) res = pd.read_csv(b, sep=',', index_col=0) self.assertEqual(res.index[0], 1) self.assertListEqual(list(res.columns), ['GO_term_ID', 'GO_term_def', 'P', 'P_FDR_adj', 'NofGenes', 'Genes']) res.set_index('GO_term_ID', inplace=True) self.assertEqual(res.index.name, 'GO_term_ID') self.assertIn('GO:0006334', set(res.index)) self.assertEqual(res.loc['GO:2000520', 'Genes'], "CCR7|HAVCR2") # Test TXT response txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id),))) b = io.StringIO() b.write(txt_resp.content.decode()) b.seek(0) enrichterms = [] for line in b: enrichterms.append(line.split()[0]) self.assertIn('GO:0006334', set(enrichterms)) # Test specific terms t_edge_name = 'GO:0006334 (interacts with) HIST1H2BC' e = list(filter(lambda e: e['data']['name']==t_edge_name, net_dict['elements']['edges']))[0] self.assertDictEqual(e['data']['specific_terms'], {'GO:0006334': {'refs': ['PMID:21873635', 'PMID:422550', 'PMID:9119399'], 'specific_term_name': 'nucleosome assembly'}}) # Test relation # "histone H3-K27 trimethylation" is_a "histone H3-K27 methylation" t_edge_name = 'GO:0070734 (interacts with) GO:0098532' e = list(filter(lambda e: e['data']['name']==t_edge_name, net_dict['elements']['edges']))[0] self.assertEqual(e['data']['relation'], 'is_a') # Test node ABCB1 n = list(filter(lambda n: n['data']['nodesymbol']=='ABCB1', net_dict['elements']['nodes']))[0] #self.assertAlmostEqual(n['data']['val'], 0.60217044443096) self.assertAlmostEqual(n['data']['expr:user_supplied'], 0.6021704444) self.assertEqual(n['data']['ensembl_id'], 'ENSG00000085563') # Test node GO:0098532 (histone H3-K27 trimethylation) n = list(filter(lambda n: n['data']['id']=='GO:0098532', net_dict['elements']['nodes']))[0] self.assertLess(n['data']['P'], 8.0e-07) # Should be rather significant self.assertLess(n['data']['Padj'], 0.01) self.assertEqual(n['data']['tot_gn'], 6) # Test resolved attribute n = list(filter(lambda e: e['data']['nodesymbol']=='LPPR2', net_dict['elements']['nodes']))[0] self.assertEqual(n['data']['uniprot_id'], 'Q96GM1') self.assertEqual(n['data']['ensembl_id'], 'ENSG00000105520') self.assertEqual(n['data']['desc'], 'Phospholipid phosphatase-related protein type 2') self.assertEqual(n['data']['primname'], 'PLPPR2') n = list(filter(lambda e: e['data']['nodesymbol']=='LTC4S', net_dict['elements']['nodes']))[0] self.assertEqual(n['data']['uniprot_id'], 'Q16873') self.assertEqual(n['data']['ensembl_id'], 'ENSG00000213316') #Check expression values (protein atlas) resp = c.get(urls.reverse('GOnet-get-expression', kwargs={'jobid':str(sn.id), 'celltype':'HPA-adipose tissue'})) expr_vals = json.loads(resp.content.decode()) self.assertEqual(expr_vals['O14503'], 170.0) #Check expression values (DICE-DB) resp = c.get(urls.reverse('GOnet-get-expression', kwargs={'jobid':str(sn.id), 'celltype':'DICE-Th1Th17'})) expr_vals = json.loads(resp.content.decode()) self.assertAlmostEqual(expr_vals['P10721'], 9.893615) #Test id mapping response idmap_resp = c.get(urls.reverse('GOnet-input-idmap', args=(str(sn.id),))) b = io.StringIO(); b.write(idmap_resp.content.decode()); b.seek(0) res = pd.read_csv(b, sep='\t', index_col=0) self.assertEqual(res.loc['HIST1H2AM', 'Notes'], 'same as HIST1H2AG') self.assertEqual(res.loc['LPPR2', 'Preferred_name'], 'PLPPR2')