def test_parse_gg_summary_flat(self): """Parse the gg summary files from flat_files.py""" exp = [GreengenesRecord({'prokMSA_id':'1', 'ncbi_acc_w_ver':'xyzf'}), GreengenesRecord({'prokMSA_id':'25', 'ncbi_acc_w_ver':'abcd', 'country':'australia'}), GreengenesRecord({'prokMSA_id':'50', 'ncbi_acc_w_ver':'223xx'})] obs = list(parse_gg_summary_flat(StringIO(gg_summary))) self.assertEqual(obs,exp)
def test_write_gg_record(self): """Writes a gg record""" exp = sorted([ 'BEGIN', 'prokmsa_id=123', 'gg_id=', 'hugenholtz_tax_string=', 'ncbi_acc_w_ver=xyz', 'ncbi_gi=333', 'n_pos_aligned=', 'n_pos_unaligned=', 'db_name=', 'gold_id=', 'decision=', 'prokmsaname=', 'isolation_source=', 'clone=foo', 'organism=', 'strain=', 'specific_host=', 'authors=', 'title=', 'pubmed=123', 'journal=', 'study_id=', 'submit_date=', 'country=', 'ncbi_tax_string=', 'silva_tax_string=', 'rdp_tax_string=', 'greengenes_tax_string=', 'non_acgt_percent=0.5', 'perc_ident_to_invariant_core=', 'small_gap_intrusions=', 'bellerophon=', 'bel3_div_ratio=', 'chim_slyr_a=', 'chim_slyr_b=', 'chim_slyr_a_tax=', 'chim_slyr_b_tax=', 'aligned_seq=', 'unaligned_seq=', 'END', '' ]) ggrec = GreengenesRecord({ 'prokmsa_id': 123, 'ncbi_acc_w_ver': 'xyz', 'ncbi_gi': '333', 'pubmed': 123, 'clone': 'foo', 'non_acgt_percent': '0.5' }) f = StringIO() write_gg_record(f, ggrec) f.seek(0) obs = sorted(f.read().splitlines()) self.assertEqual(obs, exp)
def get_genbank_summary(r): """Get the gb summary data""" rec = GreengenesRecord() for f, m in parse_funs: rec[f] = m(r) return rec
def parse_gg_summary_flat(open_file): """Parse a flat greengenes summary file from flat_files""" header_line = open_file.readline() if not header_line.startswith('#'): raise ValueError, "Missing the header!" header = header_line[1:].strip().split('\t') print "WARNING: NOT SETTING TYPES CURRENTLY" for line in open_file: record = GreengenesRecord() for key, value in zip(header, line.strip().split('\t')): record[key] = value #record.setTypes() yield record
def test_get_genbank_summary(self): """Get the summary!!""" exp = GreengenesRecord({'ncbi_acc_w_ver':'AGIY01000001.1', 'ncbi_gi':'354825968', 'gold_id':'Gi05850', 'decision':'named_isolate', 'isolation_source':'anaerobic digested sludge', 'organism':'Methanolinea tarda NOBI-1', 'strain':'NOBI-1', 'prokmsaname':'Methanolinea tarda NOBI-1', 'specific_host':'Methanolinea tarda NOBI-1 ctg73, whole genome shotgun sequence.', 'authors':'Lucas,S., Han,J., Lapidus,A., Cheng,J.-F., Goodwin,L., Pitluck,S., Peters,L., Land,M.L., Hauser,L., Imachi,H., Sekiguchi,Y., Kamagata,Y., Cadillo-Quiroz,H., Zinder,S., Liu,W.T., Tamaki,H. and Woyke,T.J.', 'title':'The draft genome of Methanolinea tarda NOBI-1', 'submit_date':'31-OCT-2011', 'country':'Japan: Nagaoka', #'NCBI_tax_id':'882090', 'ncbi_tax_string':'Archaea; Euryarchaeota; Methanomicrobia; Methanomicrobiales; Methanoregulaceae; Methanolinea'}) obs = get_genbank_summary(self.gb1) self.assertEqual(obs,exp)
def setUp(self): self.ggrecord = GreengenesRecord({'prokmsa_id':123})
class GreengenesRecordTests(TestCase): def setUp(self): self.ggrecord = GreengenesRecord({'prokmsa_id':123}) def test_init(self): """test initialization""" exp = {'prokmsa_id':123, 'ncbi_acc_w_ver':None, 'ncbi_gi':None, 'gg_id':None, 'db_name':None, 'gold_id':None, 'decision':None, 'prokmsaname':None, 'isolation_source':None, 'clone':None, 'organism':None, 'strain':None, 'specific_host':None, 'authors':None, 'title':None, 'pubmed':None, 'journal':None, 'study_id':None, 'submit_date':None, 'country':None, 'ncbi_tax_string':None, 'silva_tax_string':None, 'rdp_tax_string':None, 'greengenes_tax_string':None, 'hugenholtz_tax_string':None, 'non_acgt_percent':None, 'perc_ident_to_invariant_core':None, 'small_gap_intrusions':None, 'bellerophon':None, 'bel3_div_ratio':None, 'chim_slyr_a':None, 'chim_slyr_b':None, 'chim_slyr_a_tax':None, 'chim_slyr_b_tax':None, 'aligned_seq':None, 'unaligned_seq':None, 'n_pos_aligned':None, 'n_pos_unaligned':None } obs = self.ggrecord self.assertEqual(obs,exp) def test_setTypes(self): """Sets types GG fields""" self.ggrecord['ncbi_acc_w_ver'] = 'asd' self.ggrecord.setTypes() self.assertEqual(self.ggrecord['prokmsa_id'], 123) self.assertEqual(self.ggrecord['ncbi_acc_w_ver'], 'asd') def test_getARBRules(self): """pull out arb rules right""" obs = sorted(self.ggrecord.getARBRules().split("\n\n")) exp = sorted(arbrules.split("\n\n")[:-1]) for a,b in zip(obs,exp): self.assertEqual(a,b) def test_toGreengenesFormat(self): """Stringamify self""" obs = sorted(self.ggrecord.toGreengenesFormat().splitlines()) exp = sorted(exp_testrecord.splitlines()) self.assertEqual(obs,exp) def test_sanityCheck(self): """verify types are right""" self.assertEqual(self.ggrecord.sanityCheck(), None) self.ggrecord['prokmsa_id'] = "bad" self.assertRaises(ValueError, self.ggrecord.sanityCheck)