def test_truncate_rev_primers(self): """ Properly truncates remove primers """ out_f = FakeOutFile() rev_primers = { 'PC.481': ['CTCTCCG'], 'PC.634': ['CTCTCAG'], 'PC.635': ['CTCTCAG'], 'PC.636': ['CTCTCAG'], 'PC.354': ['CTCTCAG'] } # Use default options, all sequences should get truncated and written log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f, rev_primers) expected_log_data = { 'seqs_written': 5, 'total_seqs': 6, 'sample_id_not_found': 0, 'reverse_primer_not_found': 0 } self.assertEqual(log_data, expected_log_data) # Note that because these are short sequences, two mismatches allows # for a very short truncation of one of the sequences self.assertEqual(out_f.data, self.expected_truncation_default_settings) # With zero mismatches will not truncate all seqs out_f = FakeOutFile() # Use default options, all sequences should get truncated and written log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f, rev_primers, primer_mismatches=0) expected_log_data = { 'seqs_written': 5, 'total_seqs': 6, 'sample_id_not_found': 0, 'reverse_primer_not_found': 2 } self.assertEqual(log_data, expected_log_data) # With zero mismatches allowed, 2 seqs should not be truncated self.assertEqual(out_f.data, self.expected_truncation_zero_mismatches) # With zero mismatches and truncate_remove option, should only write # 3 of the sequences out_f = FakeOutFile() log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f, rev_primers, truncate_option="truncate_remove", primer_mismatches=0) expected_log_data = { 'seqs_written': 3, 'total_seqs': 6, 'sample_id_not_found': 0, 'reverse_primer_not_found': 2 } self.assertEqual(log_data, expected_log_data) # With zero mismatches allowed, 3 seqs total should be written self.assertEqual( out_f.data, self.expected_truncation_zero_mismatches_truncate_remove) # Should count sample ids not found in log out_f = FakeOutFile() rev_primers = { 'PC.481': ['CTCTCCG'], 'PC.634': ['CTCTCAG'], 'PC.635': ['CTCTCAG'], 'PC.636': ['CTCTCAG'], 'PC.354': ['CTCTCAG'] } # Use default options, all sequences should get truncated and written log_data = truncate_rev_primers(open(self.fasta_badlabels_fp, "U"), out_f, rev_primers) expected_log_data = { 'seqs_written': 5, 'total_seqs': 5, 'sample_id_not_found': 5, 'reverse_primer_not_found': 0 } self.assertEqual(log_data, expected_log_data) # No matches to sample IDs, so sequences are written unmodified self.assertEqual(out_f.data, self.sample_fasta_file_bad_labels_data)
def test_truncate_rev_primers(self): """ Properly truncates remove primers """ out_f = FakeOutFile() rev_primers = {'PC.481': ['CTCTCCG'], 'PC.634': ['CTCTCAG'], 'PC.635': ['CTCTCAG'], 'PC.636': ['CTCTCAG'], 'PC.354': ['CTCTCAG']} # Use default options, all sequences should get truncated and written log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f, rev_primers) expected_log_data = {'seqs_written': 5, 'total_seqs': 6, 'sample_id_not_found': 0, 'reverse_primer_not_found': 0} self.assertEqual(log_data, expected_log_data) # Note that because these are short sequences, two mismatches allows # for a very short truncation of one of the sequences self.assertEqual(out_f.data, self.expected_truncation_default_settings) # With zero mismatches will not truncate all seqs out_f = FakeOutFile() # Use default options, all sequences should get truncated and written log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f, rev_primers, primer_mismatches=0) expected_log_data = {'seqs_written': 5, 'total_seqs': 6, 'sample_id_not_found': 0, 'reverse_primer_not_found': 2} self.assertEqual(log_data, expected_log_data) # With zero mismatches allowed, 2 seqs should not be truncated self.assertEqual(out_f.data, self.expected_truncation_zero_mismatches) # With zero mismatches and truncate_remove option, should only write # 3 of the sequences out_f = FakeOutFile() log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f, rev_primers, truncate_option="truncate_remove", primer_mismatches=0) expected_log_data = {'seqs_written': 3, 'total_seqs': 6, 'sample_id_not_found': 0, 'reverse_primer_not_found': 2} self.assertEqual(log_data, expected_log_data) # With zero mismatches allowed, 3 seqs total should be written self.assertEqual(out_f.data, self.expected_truncation_zero_mismatches_truncate_remove) # Should count sample ids not found in log out_f = FakeOutFile() rev_primers = {'PC.481': ['CTCTCCG'], 'PC.634': ['CTCTCAG'], 'PC.635': ['CTCTCAG'], 'PC.636': ['CTCTCAG'], 'PC.354': ['CTCTCAG']} # Use default options, all sequences should get truncated and written log_data = truncate_rev_primers(open(self.fasta_badlabels_fp, "U"), out_f, rev_primers) expected_log_data = {'seqs_written': 5, 'total_seqs': 5, 'sample_id_not_found': 5, 'reverse_primer_not_found': 0} self.assertEqual(log_data, expected_log_data) # No matches to sample IDs, so sequences are written unmodified self.assertEqual(out_f.data, self.sample_fasta_file_bad_labels_data)