Esempio n. 1
0
    def test_parse_gg_summary_flat(self):
        """Parse the gg summary files from flat_files.py"""
        exp = [GreengenesRecord({'prokMSA_id':'1', 'ncbi_acc_w_ver':'xyzf'}),
               GreengenesRecord({'prokMSA_id':'25', 'ncbi_acc_w_ver':'abcd',
                                 'country':'australia'}),
               GreengenesRecord({'prokMSA_id':'50', 'ncbi_acc_w_ver':'223xx'})]
        obs = list(parse_gg_summary_flat(StringIO(gg_summary)))

        self.assertEqual(obs,exp)
Esempio n. 2
0
 def test_write_gg_record(self):
     """Writes a gg record"""
     exp = sorted([
         'BEGIN', 'prokmsa_id=123', 'gg_id=', 'hugenholtz_tax_string=',
         'ncbi_acc_w_ver=xyz', 'ncbi_gi=333', 'n_pos_aligned=',
         'n_pos_unaligned=', 'db_name=', 'gold_id=', 'decision=',
         'prokmsaname=', 'isolation_source=', 'clone=foo', 'organism=',
         'strain=', 'specific_host=', 'authors=', 'title=', 'pubmed=123',
         'journal=', 'study_id=', 'submit_date=', 'country=',
         'ncbi_tax_string=', 'silva_tax_string=', 'rdp_tax_string=',
         'greengenes_tax_string=', 'non_acgt_percent=0.5',
         'perc_ident_to_invariant_core=', 'small_gap_intrusions=',
         'bellerophon=', 'bel3_div_ratio=', 'chim_slyr_a=', 'chim_slyr_b=',
         'chim_slyr_a_tax=', 'chim_slyr_b_tax=', 'aligned_seq=',
         'unaligned_seq=', 'END', ''
     ])
     ggrec = GreengenesRecord({
         'prokmsa_id': 123,
         'ncbi_acc_w_ver': 'xyz',
         'ncbi_gi': '333',
         'pubmed': 123,
         'clone': 'foo',
         'non_acgt_percent': '0.5'
     })
     f = StringIO()
     write_gg_record(f, ggrec)
     f.seek(0)
     obs = sorted(f.read().splitlines())
     self.assertEqual(obs, exp)
Esempio n. 3
0
def get_genbank_summary(r):
    """Get the gb summary data"""
    rec = GreengenesRecord()

    for f, m in parse_funs:
        rec[f] = m(r)

    return rec
Esempio n. 4
0
def parse_gg_summary_flat(open_file):
    """Parse a flat greengenes summary file from flat_files"""
    header_line = open_file.readline()
    if not header_line.startswith('#'):
        raise ValueError, "Missing the header!"

    header = header_line[1:].strip().split('\t')

    print "WARNING: NOT SETTING TYPES CURRENTLY"
    for line in open_file:
        record = GreengenesRecord()

        for key, value in zip(header, line.strip().split('\t')):
            record[key] = value

        #record.setTypes()
        yield record
Esempio n. 5
0
 def test_get_genbank_summary(self):
     """Get the summary!!"""
     exp = GreengenesRecord({'ncbi_acc_w_ver':'AGIY01000001.1',
             'ncbi_gi':'354825968',
             'gold_id':'Gi05850',
             'decision':'named_isolate',
             'isolation_source':'anaerobic digested sludge',
             'organism':'Methanolinea tarda NOBI-1',
             'strain':'NOBI-1',
             'prokmsaname':'Methanolinea tarda NOBI-1',
             'specific_host':'Methanolinea tarda NOBI-1 ctg73, whole genome shotgun sequence.',
            'authors':'Lucas,S., Han,J., Lapidus,A., Cheng,J.-F., Goodwin,L., Pitluck,S., Peters,L., Land,M.L., Hauser,L., Imachi,H., Sekiguchi,Y., Kamagata,Y., Cadillo-Quiroz,H., Zinder,S., Liu,W.T., Tamaki,H. and Woyke,T.J.',
            'title':'The draft genome of Methanolinea tarda NOBI-1',
            'submit_date':'31-OCT-2011',
            'country':'Japan: Nagaoka',
            #'NCBI_tax_id':'882090',
            'ncbi_tax_string':'Archaea; Euryarchaeota; Methanomicrobia; Methanomicrobiales; Methanoregulaceae; Methanolinea'})
     obs = get_genbank_summary(self.gb1)
    
     self.assertEqual(obs,exp)
Esempio n. 6
0
 def setUp(self):
     self.ggrecord = GreengenesRecord({'prokmsa_id':123})