def run(self):
        'It runs the analysis.'
        output_dir = self._create_output_dirs()['result']
        inputs = self._get_input_fpaths()
        pickle_paths = inputs['pickle']

        for seq_path in pickle_paths:
            output_fpath = join(output_dir, seq_path.basename + '.txt')
            seqs = seqs_in_file(open(seq_path.last_version, 'r'))
            do_annotation_statistics(seqs, open(output_fpath, 'w'))
Example #2
0
    def test_annotation_statistics():
        'It tests the seq annotation statistics'

        orf = SeqFeature(FeatureLocation(1, 3), type='orf',
                         qualifiers={'strand':'reverse'})
        intron = SeqFeature(FeatureLocation(2, 2), type='intron')
        ssr1 = SeqFeature(FeatureLocation(2, 2), type='microsatellite',
                          qualifiers={'unit':'GAA'})
        ssr2 = SeqFeature(FeatureLocation(7, 10), type='microsatellite',
                          qualifiers={'unit':'GAA'})
        snv = SeqFeature(FeatureLocation(7, 7), type='snv',
                          qualifiers={'alleles':{('T',0):'', ('G',0):''}})
        snv2 = SeqFeature(FeatureLocation(7, 7), type='snv',
                          qualifiers={'alleles':{('T',0):'', ('R',0):''}})

        feats1 = [intron, ssr1]
        feats2 = [orf, ssr2, snv, snv2]

        annots1 = {'arabidopsis-orthologs':['arab_1']}

        annots2 = {'melon-orthologs':['melon_1'],
                   'arabidopsis-orthologs':['arab_2'],
                   'GOs':['Go1', 'Go2']}

        seq1 = SeqWithQuality(Seq('ACTG'), description='hola', features=feats1,
                              annotations=annots1)
        seq2 = SeqWithQuality(Seq('ACTG'), features=feats2, annotations=annots2)
        seqs = [seq1, seq2]

        stats_fhand = StringIO()

        do_annotation_statistics(seqs, stats_fhand)
        result = stats_fhand.getvalue()

        expected = '''Annotation statistics
---------------------
Number of sequences: 2
Sequences with description: 1
Sequences with ORF: 1
Number of ORFs: 1
Sequences with intron: 1
Number of introns: 1

Orthologs
_________
Sequences with melon orthologs: 1
Number of melon orthologs: 1
Sequences with arabidopsis orthologs: 2
Number of arabidopsis orthologs: 2

GO terms
________
Sequences with GOs: 1
Number of GOs: 2

SNVs
____
Sequences with SNVs: 1
SNVs found: 2
SNV types:
\ttransversion: 1
\tunknown: 1
SNV locations:
\tunknown: 2

Microsatellites
_______________
Sequences with microsatellites: 2
        |   dinucleotide|  trinucleotide|tetranucleotide|pentanucleotide| hexanucleotide|   Total|
--------------------------------------------------------------------------------------------------
    utr3|              0|              0|              0|              0|              0|       0|
    utr5|              0|              1|              0|              0|              0|       1|
     orf|              0|              0|              0|              0|              0|       0|
 unknown|              0|              1|              0|              0|              0|       1|
--------------------------------------------------------------------------------------------------
   total|              0|              2|              0|              0|              0|        |

'''

        result = result.splitlines()
        expected = expected.splitlines()
        for index, line in enumerate(result):
            assert line == expected[index]