Beispiel #1
0
    def test_main_metaspades(self, tmpdir):
        tmpdir = str(tmpdir)
        contig_file = copy_fixture('ERP0102/ERP010229/ERR8665/ERR866589/metaspades/001/contigs.fasta',
                                   tmpdir + 'contigs.fasta')
        coverage_file = copy_fixture('ERP0102/ERP010229/ERR8665/ERR866589/metaspades/001/coverage.tab',
                                     tmpdir + 'coverage.tab')

        output_file = write_empty_file(tmpdir + 'output.json')
        base_count = 106000
        args = gen_stats_report.parse_args(
            [str(base_count), contig_file, coverage_file, output_file, '0', 'metaspades'])
        try:
            gen_stats_report.main(args)
        except SystemExit:
            pass

        expected_report = {
            'Base count': base_count,
            'Coverage': 0.01,
            'Min length 1000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
            'Min length 10000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
            'Min length 50000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
            'num_contigs': 5,
            'total_assembled_pairs': 262 + 245 + 116 + 87 + 60,
            'largest_contig': 262,
            'n50': 2,
            'l50': 4
        }
        with open(output_file) as output:
            report = json.load(output)
        assert expected_report == report
Beispiel #2
0
    def test_main_megahit(self, tmpdir):
        tmpdir = str(tmpdir)
        contig_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                                   tmpdir + 'contigs.fasta')
        coverage_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/coverage.tab',
                                     tmpdir + 'coverage.tab')

        output_file = write_empty_file(tmpdir + 'output.json')
        base_count = 106000
        args = gen_stats_report.parse_args([str(base_count), contig_file, coverage_file, output_file, '0', 'megahit'])
        try:
            gen_stats_report.main(args)
        except SystemExit:
            pass

        expected_report = {
            'Base count': base_count,
            'Coverage': 14.2,
            'Min length 1000 bp': {'num_contigs': 1, 'total_base_pairs': 2473},
            'Min length 10000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
            'Min length 50000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
            'num_contigs': 22,
            'total_assembled_pairs': 11716,
            'largest_contig': 2473,
            'n50': 9,
            'l50': 14
        }
        with open(output_file) as output:
            report = json.load(output)
        assert expected_report == report
Beispiel #3
0
 def test_raises_error_on_invalid_basecount(self, tmpdir):
     tmpdir = str(tmpdir)
     contig_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                                tmpdir + 'contigs.fasta')
     coverage_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/coverage.tab',
                                  tmpdir + 'coverage.tab')
     output_file = write_empty_file(tmpdir + 'output.json')
     with pytest.raises(ValueError) as exc:
         args = gen_stats_report.parse_args(['0', contig_file, coverage_file, output_file, '0', 'metaspades'])
         gen_stats_report.main(args)
     # Assert error comes from coverage file message, not fasta parsing error
     assert 'Base count (0) cannot be <= 0.' in str(exc)
Beispiel #4
0
 def test_compress_file_should_make_output_dir(self, tmpdir):
     tmpdir = str(tmpdir)
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     out_file = os.path.join(os.path.dirname(fasta_file), 'extra_dir', os.path.basename(fasta_file) + '.gzip')
     compress_file(fasta_file, out_file)
     assert os.path.exists(out_file)
Beispiel #5
0
    def test_stats_valid_megahit_fasta_with_contig_filtering(self, tmpdir):
        tmpdir = str(tmpdir)

        contig_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                                   tmpdir + 'contigs.fasta')
        with open(contig_file) as f:
            fstats = gen_stats_report.FastaStats(f, 700, 'megahit')
            fstats.parse_file()
            contig_lengths = 2473 + 767 + 730
            expected_report = {
                'Min length 1000 bp': {
                    'num_contigs': 1,
                    'total_base_pairs': 2473
                },
                'Min length 10000 bp': {
                    'num_contigs': 0,
                    'total_base_pairs': 0
                },
                'Min length 50000 bp': {
                    'num_contigs': 0,
                    'total_base_pairs': 0
                },
                'num_contigs': 3,
                'total_assembled_pairs': contig_lengths,
                'largest_contig': 2473,
                'n50': 1,
                'l50': 3
            }
            assert fstats.get_largest_contig() == expected_report['largest_contig']
            assert fstats.get_n50() == expected_report['n50']
            assert fstats.get_l50() == expected_report['l50']
            assert fstats.get_total_pairs(), expected_report['total_assembled_pairs']
            assert fstats.get_filtered_stats(2000) == {'num_contigs': 1, 'total_base_pairs': 2473}
            assert fstats.gen_report() == expected_report
Beispiel #6
0
 def test_write_md5_should_output_md5file(self, tmpdir):
     tmpdir = str(tmpdir)
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     write_md5(fasta_file)
     with open(fasta_file + '.md5') as f:
         md5 = f.read()
     assert md5 == 'dc94b51a736f6a43e146f1c1133d7aab'
Beispiel #7
0
 def test_compress_file_should_be_gzip(self, tmpdir):
     tmpdir = str(tmpdir)
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     out_file = fasta_file + '.gzip'
     compress_file(fasta_file, out_file)
     out_file_content = gzip.open(out_file, 'rb').read()
     assert open(fasta_file, 'rb').read() == out_file_content
Beispiel #8
0
 def test_compress_file_should_be_smaller(self, tmpdir):
     tmpdir = str(tmpdir)
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     out_file = fasta_file + '.gzip'
     compress_file(fasta_file, out_file)
     assert os.path.exists(out_file)
     assert os.stat(fasta_file).st_size > os.stat(out_file).st_size
Beispiel #9
0
 def test_trim_megahit_assembler(self, tmpdir):
     tmpdir = str(tmpdir)
     assembler = 'megahit'
     min_contig_length = 400
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     output_file = tmpdir + 'output.fasta'
     trim_and_validate_against_trim(fasta_file, output_file, min_contig_length, assembler)
Beispiel #10
0
 def test_trim_metaspades_assembler(self, tmpdir):
     tmpdir = str(tmpdir)
     assembler = 'metaspades'
     min_contig_length = 200
     fasta_file = copy_fixture('ERP0102/ERP010229/ERR8665/ERR866589/metaspades/001/contigs.fasta',
                               tmpdir + '/contigs.fasta')
     output_file = tmpdir + 'output.fasta'
     trim_and_validate_against_trim(fasta_file, output_file, min_contig_length, assembler)
Beispiel #11
0
 def test_main_should_duplicate_fasta_if_no_trimming_required(self, tmpdir):
     tmpdir = str(tmpdir)
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     output_name = os.path.join(tmpdir, 'output')
     args = parse_args([fasta_file, '0', output_name, 'megahit'])
     fasta_trim_main(args)
     assert open(fasta_file, 'rb').read() == open(output_name + '.fasta', 'rb').read()
Beispiel #12
0
 def test_main_should_output_files(self, tmpdir):
     tmpdir = str(tmpdir)
     fasta_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/final.contigs.fa',
                               tmpdir + '/contigs.fasta')
     output_name = os.path.join(tmpdir, 'output')
     args = parse_args([fasta_file, '500', output_name, 'megahit'])
     fasta_trim_main(args)
     os.path.exists(tmpdir + '.contigs.fasta')
     os.path.exists(tmpdir + '.contigs.fasta.gz')
     os.path.exists(tmpdir + '.contigs.fasta.gz.md5')
Beispiel #13
0
    def test_coverage_report_fixture_empty_fasta_file(self, tmpdir):
        tmpdir = str(tmpdir)
        coverage_file = copy_fixture('SRP0741/SRP074153/SRR6257/SRR6257420/megahit/001/coverage.tab',
                                     tmpdir + 'tmp.tab')
        contig_file = write_empty_file(tmpdir + 'contigs.fasta')
        output_file = write_empty_file(tmpdir + 'output.json')

        open(coverage_file, 'a').close()
        args = gen_stats_report.parse_args(
            ['106000', contig_file, coverage_file, output_file, '500', 'metaspades'])
        coverage = gen_stats_report.calc_coverage(args)
        assert coverage == 14.2
Beispiel #14
0
 def test_stats_valid_metaspades_fasta_with_contig_filtering(self, tmpdir):
     tmpdir = str(tmpdir)
     contig_file = copy_fixture('ERP0102/ERP010229/ERR8665/ERR866589/metaspades/001/contigs.fasta',
                                tmpdir + 'contigs.fasta')
     with open(contig_file) as f:
         fstats = gen_stats_report.FastaStats(f, 100, 'metaspades')
         fstats.parse_file()
         expected_report = {
             'Min length 1000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
             'Min length 10000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
             'Min length 50000 bp': {'num_contigs': 0, 'total_base_pairs': 0},
             'num_contigs': 3,
             'total_assembled_pairs': 262 + 245 + 116,
             'largest_contig': 262,
             'n50': 2,
             'l50': 2
         }
         assert fstats.get_largest_contig() == expected_report['largest_contig']
         assert fstats.get_n50() == expected_report['n50']
         assert fstats.get_l50() == expected_report['l50']
         assert fstats.get_total_pairs() == expected_report['total_assembled_pairs']
         assert fstats.get_filtered_stats(100) == {'num_contigs': 3, 'total_base_pairs': 262 + 245 + 116}
         assert fstats.gen_report() == expected_report