def test_compute_sequence_stats_no_unknowns(self): """Test computing seq stats on taxonomy with no unknown kws.""" exp = {"1": [3, 4, "AGGT"], "2": [3, 4, "AGGA"], "3": [5, 4, "AGGC"]} obs = compute_sequence_stats(self.fasta1, self.tax_map2) self.assertEqual(obs, exp) obs = compute_sequence_stats(self.fasta1, self.tax_map2, []) self.assertEqual(obs, exp)
def test_compute_sequence_stats_no_unknowns(self): """Test computing seq stats on taxonomy with no unknown kws.""" exp = {'1': [3, 4, 'AGGT'], '2': [3, 4, 'AGGA'], '3': [5, 4, 'AGGC']} obs = compute_sequence_stats(self.fasta1, self.tax_map2) self.assertEqual(obs, exp) obs = compute_sequence_stats(self.fasta1, self.tax_map2, []) self.assertEqual(obs, exp)
def test_compute_sequence_stats_single_depth(self): """Test computing seq stats on taxonomy with only one level.""" exp = {"1": [3, 4, "AGGT"], "3": [3, 4, "AGGC"], "2": [1, 4, "AGGA"]} obs = compute_sequence_stats(self.fasta1, self.tax_map3, ["Z"]) self.assertEqual(obs, exp) exp = {"1": [2, 4, "AGGT"], "3": [4, 4, "AGGC"], "2": [0, 4, "AGGA"]} obs = compute_sequence_stats(self.fasta1, self.tax_map3, ["A"]) self.assertEqual(obs, exp)
def test_compute_sequence_stats_single_depth(self): """Test computing seq stats on taxonomy with only one level.""" exp = {'1': [3, 4, 'AGGT'], '3': [3, 4, 'AGGC'], '2': [1, 4, 'AGGA']} obs = compute_sequence_stats(self.fasta1, self.tax_map3, ['Z']) self.assertEqual(obs, exp) exp = {'1': [2, 4, 'AGGT'], '3': [4, 4, 'AGGC'], '2': [0, 4, 'AGGA']} obs = compute_sequence_stats(self.fasta1, self.tax_map3, ['A']) self.assertEqual(obs, exp)
def test_compute_sequence_stats_missing_taxonomy(self): """Test computing seq stats on a sequence with no taxonomy mapping.""" # Save stdout and replace it with something that will capture the print # statement. Note: this code was taken from here: # http://stackoverflow.com/questions/4219717/how-to-assert-output- # with-nosetest-unittest-in-python/4220278#4220278 saved_stdout = sys.stdout try: out = StringIO() sys.stdout = out self.fasta1.append(">4") self.fasta1.append("AACCGGTT") exp = { '1': [3, 4, 'AGGT'], '3': [2, 4, 'AGGC'], '2': [3, 4, 'AGGA'], '4': [0, 8, 'AACCGGTT'] } obs = compute_sequence_stats(self.fasta1, self.tax_map1, ['Z']) self.assertEqual(obs, exp) output = out.getvalue().strip() self.assertEqual( output, "Found sequence id '4' in the FASTA file " "that wasn't in the taxonomy mapping file") finally: sys.stdout = saved_stdout
def test_compute_sequence_stats_unequal_read_lengths(self): """Test computing seq stats on unequal read lengths.""" exp = { '1': [3, 6, 'AGGTAC'], '3': [2, 7, 'AGGCAAA'], '2': [3, 2, 'AG'] } obs = compute_sequence_stats(self.fasta2, self.tax_map1, ['Z']) self.assertEqual(obs, exp)
def test_compute_sequence_stats_empty_taxonomy_levels(self): """Test computing seq stats on taxonomies with empty levels.""" exp = { '1': [3, 6, 'AGGTAC'], '3': [3, 7, 'AGGCAAA'], '2': [3, 2, 'AG'] } obs = compute_sequence_stats(self.fasta2, self.tax_map4, ['Z']) self.assertEqual(obs, exp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) seq_stats = compute_sequence_stats( open(opts.input_fasta_fp, 'U').readlines(), open(opts.input_taxonomy_map, 'U').readlines(), ['Incertae_sedis', 'unidentified']) seq_stats_sorted = sort_seqs_by_taxonomic_depth(seq_stats) # Write out our sorted sequences. out_fasta_f = open(opts.output_fp, 'w') for seq in seq_stats_sorted: out_fasta_f.write('>' + seq[0] + '\n' + seq[3] + '\n') out_fasta_f.close()
def test_compute_sequence_stats_missing_taxonomy(self): """Test computing seq stats on a sequence with no taxonomy mapping.""" # Save stdout and replace it with something that will capture the print # statement. Note: this code was taken from here: # http://stackoverflow.com/questions/4219717/how-to-assert-output- # with-nosetest-unittest-in-python/4220278#4220278 saved_stdout = sys.stdout try: out = StringIO() sys.stdout = out self.fasta1.append(">4") self.fasta1.append("AACCGGTT") exp = {"1": [3, 4, "AGGT"], "3": [2, 4, "AGGC"], "2": [3, 4, "AGGA"], "4": [0, 8, "AACCGGTT"]} obs = compute_sequence_stats(self.fasta1, self.tax_map1, ["Z"]) self.assertEqual(obs, exp) output = out.getvalue().strip() self.assertEqual( output, "Found sequence id '4' in the FASTA file " "that wasn't in the taxonomy mapping file" ) finally: sys.stdout = saved_stdout
def test_compute_sequence_stats_multiple_unknown_keywords(self): """Test computing seq stats on taxonomy with multiple unknown kws.""" exp = {"1": [3, 4, "AGGT"], "3": [2, 4, "AGGC"], "2": [3, 4, "AGGA"]} obs = compute_sequence_stats(self.fasta1, self.tax_map2, ["Z", "F"]) self.assertEqual(obs, exp)
def test_compute_sequence_stats_multiple_unknown_keywords(self): """Test computing seq stats on taxonomy with multiple unknown kws.""" exp = {'1': [3, 4, 'AGGT'], '3': [2, 4, 'AGGC'], '2': [3, 4, 'AGGA']} obs = compute_sequence_stats(self.fasta1, self.tax_map2, ['Z', 'F']) self.assertEqual(obs, exp)
def test_compute_sequence_stats_unequal_depths(self): """Test computing seq stats on unequal taxonomic depths.""" exp = {"1": [3, 4, "AGGT"], "3": [2, 4, "AGGC"], "2": [3, 4, "AGGA"]} obs = compute_sequence_stats(self.fasta1, self.tax_map1, ["Z"]) self.assertEqual(obs, exp)
def test_compute_sequence_stats_empty_taxonomy_levels(self): """Test computing seq stats on taxonomies with empty levels.""" exp = {"1": [3, 6, "AGGTAC"], "3": [3, 7, "AGGCAAA"], "2": [3, 2, "AG"]} obs = compute_sequence_stats(self.fasta2, self.tax_map4, ["Z"]) self.assertEqual(obs, exp)
def test_compute_sequence_stats_unequal_read_lengths(self): """Test computing seq stats on unequal read lengths.""" exp = {"1": [3, 6, "AGGTAC"], "3": [2, 7, "AGGCAAA"], "2": [3, 2, "AG"]} obs = compute_sequence_stats(self.fasta2, self.tax_map1, ["Z"]) self.assertEqual(obs, exp)
def test_compute_sequence_stats_all_unknown(self): """Test computing seq stats on taxonomy with all unknown tax levels.""" exp = {"1": [0, 4, "AGGT"], "2": [0, 4, "AGGA"], "3": [0, 4, "AGGC"]} obs = compute_sequence_stats(self.fasta1, self.tax_map2, ["A", "B", "C", "D", "F", "Z"]) self.assertEqual(obs, exp)
def test_compute_sequence_stats_all_unknown(self): """Test computing seq stats on taxonomy with all unknown tax levels.""" exp = {'1': [0, 4, 'AGGT'], '2': [0, 4, 'AGGA'], '3': [0, 4, 'AGGC']} obs = compute_sequence_stats(self.fasta1, self.tax_map2, ['A', 'B', 'C', 'D', 'F', 'Z']) self.assertEqual(obs, exp)
def test_compute_sequence_stats_unequal_depths(self): """Test computing seq stats on unequal taxonomic depths.""" exp = {'1': [3, 4, 'AGGT'], '3': [2, 4, 'AGGC'], '2': [3, 4, 'AGGA']} obs = compute_sequence_stats(self.fasta1, self.tax_map1, ['Z']) self.assertEqual(obs, exp)