コード例 #1
0
    def test_compute_sequence_stats_no_unknowns(self):
        """Test computing seq stats on taxonomy with no unknown kws."""
        exp = {"1": [3, 4, "AGGT"], "2": [3, 4, "AGGA"], "3": [5, 4, "AGGC"]}
        obs = compute_sequence_stats(self.fasta1, self.tax_map2)
        self.assertEqual(obs, exp)

        obs = compute_sequence_stats(self.fasta1, self.tax_map2, [])
        self.assertEqual(obs, exp)
コード例 #2
0
    def test_compute_sequence_stats_no_unknowns(self):
        """Test computing seq stats on taxonomy with no unknown kws."""
        exp = {'1': [3, 4, 'AGGT'], '2': [3, 4, 'AGGA'], '3': [5, 4, 'AGGC']}
        obs = compute_sequence_stats(self.fasta1, self.tax_map2)
        self.assertEqual(obs, exp)

        obs = compute_sequence_stats(self.fasta1, self.tax_map2, [])
        self.assertEqual(obs, exp)
コード例 #3
0
    def test_compute_sequence_stats_single_depth(self):
        """Test computing seq stats on taxonomy with only one level."""
        exp = {"1": [3, 4, "AGGT"], "3": [3, 4, "AGGC"], "2": [1, 4, "AGGA"]}
        obs = compute_sequence_stats(self.fasta1, self.tax_map3, ["Z"])
        self.assertEqual(obs, exp)

        exp = {"1": [2, 4, "AGGT"], "3": [4, 4, "AGGC"], "2": [0, 4, "AGGA"]}
        obs = compute_sequence_stats(self.fasta1, self.tax_map3, ["A"])
        self.assertEqual(obs, exp)
コード例 #4
0
    def test_compute_sequence_stats_single_depth(self):
        """Test computing seq stats on taxonomy with only one level."""
        exp = {'1': [3, 4, 'AGGT'], '3': [3, 4, 'AGGC'], '2': [1, 4, 'AGGA']}
        obs = compute_sequence_stats(self.fasta1, self.tax_map3, ['Z'])
        self.assertEqual(obs, exp)

        exp = {'1': [2, 4, 'AGGT'], '3': [4, 4, 'AGGC'], '2': [0, 4, 'AGGA']}
        obs = compute_sequence_stats(self.fasta1, self.tax_map3, ['A'])
        self.assertEqual(obs, exp)
コード例 #5
0
    def test_compute_sequence_stats_missing_taxonomy(self):
        """Test computing seq stats on a sequence with no taxonomy mapping."""
        # Save stdout and replace it with something that will capture the print
        # statement. Note: this code was taken from here:
        # http://stackoverflow.com/questions/4219717/how-to-assert-output-
        #     with-nosetest-unittest-in-python/4220278#4220278
        saved_stdout = sys.stdout
        try:
            out = StringIO()
            sys.stdout = out

            self.fasta1.append(">4")
            self.fasta1.append("AACCGGTT")
            exp = {
                '1': [3, 4, 'AGGT'],
                '3': [2, 4, 'AGGC'],
                '2': [3, 4, 'AGGA'],
                '4': [0, 8, 'AACCGGTT']
            }
            obs = compute_sequence_stats(self.fasta1, self.tax_map1, ['Z'])
            self.assertEqual(obs, exp)

            output = out.getvalue().strip()
            self.assertEqual(
                output, "Found sequence id '4' in the FASTA file "
                "that wasn't in the taxonomy mapping file")
        finally:
            sys.stdout = saved_stdout
コード例 #6
0
 def test_compute_sequence_stats_unequal_read_lengths(self):
     """Test computing seq stats on unequal read lengths."""
     exp = {
         '1': [3, 6, 'AGGTAC'],
         '3': [2, 7, 'AGGCAAA'],
         '2': [3, 2, 'AG']
     }
     obs = compute_sequence_stats(self.fasta2, self.tax_map1, ['Z'])
     self.assertEqual(obs, exp)
コード例 #7
0
 def test_compute_sequence_stats_empty_taxonomy_levels(self):
     """Test computing seq stats on taxonomies with empty levels."""
     exp = {
         '1': [3, 6, 'AGGTAC'],
         '3': [3, 7, 'AGGCAAA'],
         '2': [3, 2, 'AG']
     }
     obs = compute_sequence_stats(self.fasta2, self.tax_map4, ['Z'])
     self.assertEqual(obs, exp)
コード例 #8
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    seq_stats = compute_sequence_stats(
        open(opts.input_fasta_fp, 'U').readlines(),
        open(opts.input_taxonomy_map, 'U').readlines(),
        ['Incertae_sedis', 'unidentified'])
    seq_stats_sorted = sort_seqs_by_taxonomic_depth(seq_stats)

    # Write out our sorted sequences.
    out_fasta_f = open(opts.output_fp, 'w')
    for seq in seq_stats_sorted:
        out_fasta_f.write('>' + seq[0] + '\n' + seq[3] + '\n')
    out_fasta_f.close()
コード例 #9
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    seq_stats = compute_sequence_stats(
            open(opts.input_fasta_fp, 'U').readlines(),
            open(opts.input_taxonomy_map, 'U').readlines(),
            ['Incertae_sedis', 'unidentified'])
    seq_stats_sorted = sort_seqs_by_taxonomic_depth(seq_stats)

    # Write out our sorted sequences.
    out_fasta_f = open(opts.output_fp, 'w')
    for seq in seq_stats_sorted:
        out_fasta_f.write('>' + seq[0] + '\n' + seq[3] + '\n')
    out_fasta_f.close()
コード例 #10
0
    def test_compute_sequence_stats_missing_taxonomy(self):
        """Test computing seq stats on a sequence with no taxonomy mapping."""
        # Save stdout and replace it with something that will capture the print
        # statement. Note: this code was taken from here:
        # http://stackoverflow.com/questions/4219717/how-to-assert-output-
        #     with-nosetest-unittest-in-python/4220278#4220278
        saved_stdout = sys.stdout
        try:
            out = StringIO()
            sys.stdout = out

            self.fasta1.append(">4")
            self.fasta1.append("AACCGGTT")
            exp = {"1": [3, 4, "AGGT"], "3": [2, 4, "AGGC"], "2": [3, 4, "AGGA"], "4": [0, 8, "AACCGGTT"]}
            obs = compute_sequence_stats(self.fasta1, self.tax_map1, ["Z"])
            self.assertEqual(obs, exp)

            output = out.getvalue().strip()
            self.assertEqual(
                output, "Found sequence id '4' in the FASTA file " "that wasn't in the taxonomy mapping file"
            )
        finally:
            sys.stdout = saved_stdout
コード例 #11
0
 def test_compute_sequence_stats_multiple_unknown_keywords(self):
     """Test computing seq stats on taxonomy with multiple unknown kws."""
     exp = {"1": [3, 4, "AGGT"], "3": [2, 4, "AGGC"], "2": [3, 4, "AGGA"]}
     obs = compute_sequence_stats(self.fasta1, self.tax_map2, ["Z", "F"])
     self.assertEqual(obs, exp)
コード例 #12
0
 def test_compute_sequence_stats_multiple_unknown_keywords(self):
     """Test computing seq stats on taxonomy with multiple unknown kws."""
     exp = {'1': [3, 4, 'AGGT'], '3': [2, 4, 'AGGC'], '2': [3, 4, 'AGGA']}
     obs = compute_sequence_stats(self.fasta1, self.tax_map2, ['Z', 'F'])
     self.assertEqual(obs, exp)
コード例 #13
0
 def test_compute_sequence_stats_unequal_depths(self):
     """Test computing seq stats on unequal taxonomic depths."""
     exp = {"1": [3, 4, "AGGT"], "3": [2, 4, "AGGC"], "2": [3, 4, "AGGA"]}
     obs = compute_sequence_stats(self.fasta1, self.tax_map1, ["Z"])
     self.assertEqual(obs, exp)
コード例 #14
0
 def test_compute_sequence_stats_empty_taxonomy_levels(self):
     """Test computing seq stats on taxonomies with empty levels."""
     exp = {"1": [3, 6, "AGGTAC"], "3": [3, 7, "AGGCAAA"], "2": [3, 2, "AG"]}
     obs = compute_sequence_stats(self.fasta2, self.tax_map4, ["Z"])
     self.assertEqual(obs, exp)
コード例 #15
0
 def test_compute_sequence_stats_unequal_read_lengths(self):
     """Test computing seq stats on unequal read lengths."""
     exp = {"1": [3, 6, "AGGTAC"], "3": [2, 7, "AGGCAAA"], "2": [3, 2, "AG"]}
     obs = compute_sequence_stats(self.fasta2, self.tax_map1, ["Z"])
     self.assertEqual(obs, exp)
コード例 #16
0
 def test_compute_sequence_stats_all_unknown(self):
     """Test computing seq stats on taxonomy with all unknown tax levels."""
     exp = {"1": [0, 4, "AGGT"], "2": [0, 4, "AGGA"], "3": [0, 4, "AGGC"]}
     obs = compute_sequence_stats(self.fasta1, self.tax_map2, ["A", "B", "C", "D", "F", "Z"])
     self.assertEqual(obs, exp)
コード例 #17
0
 def test_compute_sequence_stats_all_unknown(self):
     """Test computing seq stats on taxonomy with all unknown tax levels."""
     exp = {'1': [0, 4, 'AGGT'], '2': [0, 4, 'AGGA'], '3': [0, 4, 'AGGC']}
     obs = compute_sequence_stats(self.fasta1, self.tax_map2,
                                  ['A', 'B', 'C', 'D', 'F', 'Z'])
     self.assertEqual(obs, exp)
コード例 #18
0
 def test_compute_sequence_stats_unequal_depths(self):
     """Test computing seq stats on unequal taxonomic depths."""
     exp = {'1': [3, 4, 'AGGT'], '3': [2, 4, 'AGGC'], '2': [3, 4, 'AGGA']}
     obs = compute_sequence_stats(self.fasta1, self.tax_map1, ['Z'])
     self.assertEqual(obs, exp)