def run(self): 'It runs the analysis.' output_dir = self._create_output_dirs()['result'] inputs = self._get_input_fpaths() pickle_paths = inputs['pickle'] for seq_path in pickle_paths: output_fpath = join(output_dir, seq_path.basename + '.txt') seqs = seqs_in_file(open(seq_path.last_version, 'r')) do_annotation_statistics(seqs, open(output_fpath, 'w'))
def test_annotation_statistics(): 'It tests the seq annotation statistics' orf = SeqFeature(FeatureLocation(1, 3), type='orf', qualifiers={'strand':'reverse'}) intron = SeqFeature(FeatureLocation(2, 2), type='intron') ssr1 = SeqFeature(FeatureLocation(2, 2), type='microsatellite', qualifiers={'unit':'GAA'}) ssr2 = SeqFeature(FeatureLocation(7, 10), type='microsatellite', qualifiers={'unit':'GAA'}) snv = SeqFeature(FeatureLocation(7, 7), type='snv', qualifiers={'alleles':{('T',0):'', ('G',0):''}}) snv2 = SeqFeature(FeatureLocation(7, 7), type='snv', qualifiers={'alleles':{('T',0):'', ('R',0):''}}) feats1 = [intron, ssr1] feats2 = [orf, ssr2, snv, snv2] annots1 = {'arabidopsis-orthologs':['arab_1']} annots2 = {'melon-orthologs':['melon_1'], 'arabidopsis-orthologs':['arab_2'], 'GOs':['Go1', 'Go2']} seq1 = SeqWithQuality(Seq('ACTG'), description='hola', features=feats1, annotations=annots1) seq2 = SeqWithQuality(Seq('ACTG'), features=feats2, annotations=annots2) seqs = [seq1, seq2] stats_fhand = StringIO() do_annotation_statistics(seqs, stats_fhand) result = stats_fhand.getvalue() expected = '''Annotation statistics --------------------- Number of sequences: 2 Sequences with description: 1 Sequences with ORF: 1 Number of ORFs: 1 Sequences with intron: 1 Number of introns: 1 Orthologs _________ Sequences with melon orthologs: 1 Number of melon orthologs: 1 Sequences with arabidopsis orthologs: 2 Number of arabidopsis orthologs: 2 GO terms ________ Sequences with GOs: 1 Number of GOs: 2 SNVs ____ Sequences with SNVs: 1 SNVs found: 2 SNV types: \ttransversion: 1 \tunknown: 1 SNV locations: \tunknown: 2 Microsatellites _______________ Sequences with microsatellites: 2 | dinucleotide| trinucleotide|tetranucleotide|pentanucleotide| hexanucleotide| Total| -------------------------------------------------------------------------------------------------- utr3| 0| 0| 0| 0| 0| 0| utr5| 0| 1| 0| 0| 0| 1| orf| 0| 0| 0| 0| 0| 0| unknown| 0| 1| 0| 0| 0| 1| -------------------------------------------------------------------------------------------------- total| 0| 2| 0| 0| 0| | ''' result = result.splitlines() expected = expected.splitlines() for index, line in enumerate(result): assert line == expected[index]