예제 #1
0
    def test_parse_gg_summary_flat(self):
        """Parse the gg summary files from flat_files.py"""
        exp = [GreengenesRecord({'prokMSA_id':'1', 'ncbi_acc_w_ver':'xyzf'}),
               GreengenesRecord({'prokMSA_id':'25', 'ncbi_acc_w_ver':'abcd',
                                 'country':'australia'}),
               GreengenesRecord({'prokMSA_id':'50', 'ncbi_acc_w_ver':'223xx'})]
        obs = list(parse_gg_summary_flat(StringIO(gg_summary)))

        self.assertEqual(obs,exp)
예제 #2
0
 def test_write_gg_record(self):
     """Writes a gg record"""
     exp = sorted([
         'BEGIN', 'prokmsa_id=123', 'gg_id=', 'hugenholtz_tax_string=',
         'ncbi_acc_w_ver=xyz', 'ncbi_gi=333', 'n_pos_aligned=',
         'n_pos_unaligned=', 'db_name=', 'gold_id=', 'decision=',
         'prokmsaname=', 'isolation_source=', 'clone=foo', 'organism=',
         'strain=', 'specific_host=', 'authors=', 'title=', 'pubmed=123',
         'journal=', 'study_id=', 'submit_date=', 'country=',
         'ncbi_tax_string=', 'silva_tax_string=', 'rdp_tax_string=',
         'greengenes_tax_string=', 'non_acgt_percent=0.5',
         'perc_ident_to_invariant_core=', 'small_gap_intrusions=',
         'bellerophon=', 'bel3_div_ratio=', 'chim_slyr_a=', 'chim_slyr_b=',
         'chim_slyr_a_tax=', 'chim_slyr_b_tax=', 'aligned_seq=',
         'unaligned_seq=', 'END', ''
     ])
     ggrec = GreengenesRecord({
         'prokmsa_id': 123,
         'ncbi_acc_w_ver': 'xyz',
         'ncbi_gi': '333',
         'pubmed': 123,
         'clone': 'foo',
         'non_acgt_percent': '0.5'
     })
     f = StringIO()
     write_gg_record(f, ggrec)
     f.seek(0)
     obs = sorted(f.read().splitlines())
     self.assertEqual(obs, exp)
예제 #3
0
def get_genbank_summary(r):
    """Get the gb summary data"""
    rec = GreengenesRecord()

    for f, m in parse_funs:
        rec[f] = m(r)

    return rec
예제 #4
0
파일: parse.py 프로젝트: wasade/Greengenes
def parse_gg_summary_flat(open_file):
    """Parse a flat greengenes summary file from flat_files"""
    header_line = open_file.readline()
    if not header_line.startswith('#'):
        raise ValueError, "Missing the header!"

    header = header_line[1:].strip().split('\t')

    print "WARNING: NOT SETTING TYPES CURRENTLY"
    for line in open_file:
        record = GreengenesRecord()

        for key, value in zip(header, line.strip().split('\t')):
            record[key] = value

        #record.setTypes()
        yield record
예제 #5
0
 def test_get_genbank_summary(self):
     """Get the summary!!"""
     exp = GreengenesRecord({'ncbi_acc_w_ver':'AGIY01000001.1',
             'ncbi_gi':'354825968',
             'gold_id':'Gi05850',
             'decision':'named_isolate',
             'isolation_source':'anaerobic digested sludge',
             'organism':'Methanolinea tarda NOBI-1',
             'strain':'NOBI-1',
             'prokmsaname':'Methanolinea tarda NOBI-1',
             'specific_host':'Methanolinea tarda NOBI-1 ctg73, whole genome shotgun sequence.',
            'authors':'Lucas,S., Han,J., Lapidus,A., Cheng,J.-F., Goodwin,L., Pitluck,S., Peters,L., Land,M.L., Hauser,L., Imachi,H., Sekiguchi,Y., Kamagata,Y., Cadillo-Quiroz,H., Zinder,S., Liu,W.T., Tamaki,H. and Woyke,T.J.',
            'title':'The draft genome of Methanolinea tarda NOBI-1',
            'submit_date':'31-OCT-2011',
            'country':'Japan: Nagaoka',
            #'NCBI_tax_id':'882090',
            'ncbi_tax_string':'Archaea; Euryarchaeota; Methanomicrobia; Methanomicrobiales; Methanoregulaceae; Methanolinea'})
     obs = get_genbank_summary(self.gb1)
    
     self.assertEqual(obs,exp)
예제 #6
0
 def setUp(self):
     self.ggrecord = GreengenesRecord({'prokmsa_id':123})
예제 #7
0
class GreengenesRecordTests(TestCase):
    def setUp(self):
        self.ggrecord = GreengenesRecord({'prokmsa_id':123})

    def test_init(self):
        """test initialization"""
        exp = {'prokmsa_id':123,
                'ncbi_acc_w_ver':None,
                'ncbi_gi':None,
                'gg_id':None,
                'db_name':None,
                'gold_id':None,
                'decision':None,
                'prokmsaname':None,
                'isolation_source':None,
                'clone':None,
                'organism':None,
                'strain':None,
                'specific_host':None,
                'authors':None,
                'title':None,
                'pubmed':None,
                'journal':None,                 
                'study_id':None,   
                'submit_date':None, 
                'country':None,
                'ncbi_tax_string':None,   
                'silva_tax_string':None,
                'rdp_tax_string':None,
                'greengenes_tax_string':None,
                'hugenholtz_tax_string':None,
                'non_acgt_percent':None,
                'perc_ident_to_invariant_core':None,
                'small_gap_intrusions':None,
                'bellerophon':None,
                'bel3_div_ratio':None,
                'chim_slyr_a':None,
                'chim_slyr_b':None,
                'chim_slyr_a_tax':None,
                'chim_slyr_b_tax':None,
                'aligned_seq':None,
                'unaligned_seq':None,
                'n_pos_aligned':None,
                'n_pos_unaligned':None
                } 
        obs = self.ggrecord
        self.assertEqual(obs,exp)
    
    def test_setTypes(self):
        """Sets types GG fields"""
        self.ggrecord['ncbi_acc_w_ver'] = 'asd'
        self.ggrecord.setTypes()
        self.assertEqual(self.ggrecord['prokmsa_id'], 123)
        self.assertEqual(self.ggrecord['ncbi_acc_w_ver'], 'asd')

    def test_getARBRules(self):
        """pull out arb rules right"""
        obs = sorted(self.ggrecord.getARBRules().split("\n\n"))
        exp = sorted(arbrules.split("\n\n")[:-1])
        for a,b in zip(obs,exp):
            self.assertEqual(a,b)
    
    def test_toGreengenesFormat(self):
        """Stringamify self"""
        obs = sorted(self.ggrecord.toGreengenesFormat().splitlines())
        exp = sorted(exp_testrecord.splitlines())
        self.assertEqual(obs,exp)
    
    def test_sanityCheck(self):
        """verify types are right"""
        self.assertEqual(self.ggrecord.sanityCheck(), None)
        self.ggrecord['prokmsa_id'] = "bad"
        self.assertRaises(ValueError, self.ggrecord.sanityCheck)