Ejemplo n.º 1
0
    def test_stats_valid_megahit_fasta_with_contig_filtering(self, tmpdir):
        tmpdir = str(tmpdir)

        contig_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                                   tmpdir + 'contigs.fasta')
        with open(contig_file) as f:
            fstats = gen_stats_report.FastaStats(f, 700, 'megahit')
            fstats.parse_file()
            contig_lengths = 2473 + 767 + 730
            expected_report = {
                'Min length 1000 bp': {
                    'num_contigs': 1,
                    'total_base_pairs': 2473
                },
                'Min length 10000 bp': {
                    'num_contigs': 0,
                    'total_base_pairs': 0
                },
                'Min length 50000 bp': {
                    'num_contigs': 0,
                    'total_base_pairs': 0
                },
                'num_contigs': 3,
                'total_assembled_pairs': contig_lengths,
                'largest_contig': 2473,
                'n50': 1,
                'l50': 3
            }
            assert fstats.get_largest_contig() == expected_report['largest_contig']
            assert fstats.get_n50() == expected_report['n50']
            assert fstats.get_l50() == expected_report['l50']
            assert fstats.get_total_pairs(), expected_report['total_assembled_pairs']
            assert fstats.get_filtered_stats(2000) == {'num_contigs': 1, 'total_base_pairs': 2473}
            assert fstats.gen_report() == expected_report
Ejemplo n.º 2
0
 def test_stats_empty_fasta(self, tmpdir):
     tmpdir = str(tmpdir)
     contig_file = write_empty_file(tmpdir + 'contigs.fasta')
     with open(contig_file) as f:
         fstats = gen_stats_report.FastaStats(f, 500, 'metaspades')
         fstats.parse_file()
         assert fstats.get_largest_contig() == 0
         assert fstats.get_n50() == 0
         assert fstats.get_l50() == 0
         assert fstats.get_total_pairs() == 0
         assert fstats.get_filtered_stats(100) == {'num_contigs': 0, 'total_base_pairs': 0}
Ejemplo n.º 3
0
 def test_stats_valid_metaspades_fasta_with_contig_filtering(self, tmpdir):
     tmpdir = str(tmpdir)
     contig_file = copy_fixture('ERP0102/ERP010229/ERR8665/ERR866589/metaspades/001/contigs.fasta',
                                tmpdir + 'contigs.fasta')
     with open(contig_file) as f:
         fstats = gen_stats_report.FastaStats(f, 100, 'metaspades')
         fstats.parse_file()
         expected_report = {
             'Min length 1000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
             'Min length 10000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
             'Min length 50000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
             'num_contigs': 3,
             'total_assembled_pairs': 262 + 245 + 116,
             'largest_contig': 262,
             'n50': 2,
             'l50': 2
         }
         assert fstats.get_largest_contig() == expected_report['largest_contig']
         assert fstats.get_n50() == expected_report['n50']
         assert fstats.get_l50() == expected_report['l50']
         assert fstats.get_total_pairs() == expected_report['total_assembled_pairs']
         assert fstats.get_filtered_stats(100) == {'num_contigs': 3, 'total_base_pairs': 262 + 245 + 116}
         assert fstats.gen_report() == expected_report
Ejemplo n.º 4
0
 def test_unsupported_assemblers(self):
     unsupported = ['minia', 'invalid_assembler']
     for assembler in unsupported:
         with pytest.raises(ValueError):
             gen_stats_report.FastaStats('contigs.fasta', 500, assembler)
Ejemplo n.º 5
0
 def test_supported_assemblers(self):
     supported = ['metaspades', 'spades', 'megahit']
     for assembler in supported:
         fstats = gen_stats_report.FastaStats('contigs.fasta', 500, assembler)
         assert fstats.assembler == assembler