def test_truncate_rev_primers(self):
        """ Properly truncates remove primers """

        out_f = FakeOutFile()

        rev_primers = {
            'PC.481': ['CTCTCCG'],
            'PC.634': ['CTCTCAG'],
            'PC.635': ['CTCTCAG'],
            'PC.636': ['CTCTCAG'],
            'PC.354': ['CTCTCAG']
        }

        # Use default options, all sequences should get truncated and written
        log_data = truncate_rev_primers(open(self.fasta_fp, "U"), out_f,
                                        rev_primers)

        expected_log_data = {
            'seqs_written': 5,
            'total_seqs': 6,
            'sample_id_not_found': 0,
            'reverse_primer_not_found': 0
        }

        self.assertEqual(log_data, expected_log_data)

        # Note that because these are short sequences, two mismatches allows
        # for a very short truncation of one of the sequences
        self.assertEqual(out_f.data, self.expected_truncation_default_settings)

        # With zero mismatches will not truncate all seqs
        out_f = FakeOutFile()

        # Use default options, all sequences should get truncated and written
        log_data = truncate_rev_primers(open(self.fasta_fp, "U"),
                                        out_f,
                                        rev_primers,
                                        primer_mismatches=0)

        expected_log_data = {
            'seqs_written': 5,
            'total_seqs': 6,
            'sample_id_not_found': 0,
            'reverse_primer_not_found': 2
        }

        self.assertEqual(log_data, expected_log_data)

        # With zero mismatches allowed, 2 seqs should not be truncated
        self.assertEqual(out_f.data, self.expected_truncation_zero_mismatches)

        # With zero mismatches and truncate_remove option, should only write
        # 3 of the sequences
        out_f = FakeOutFile()

        log_data = truncate_rev_primers(open(self.fasta_fp, "U"),
                                        out_f,
                                        rev_primers,
                                        truncate_option="truncate_remove",
                                        primer_mismatches=0)

        expected_log_data = {
            'seqs_written': 3,
            'total_seqs': 6,
            'sample_id_not_found': 0,
            'reverse_primer_not_found': 2
        }

        self.assertEqual(log_data, expected_log_data)

        # With zero mismatches allowed, 3 seqs total should be written
        self.assertEqual(
            out_f.data,
            self.expected_truncation_zero_mismatches_truncate_remove)

        # Should count sample ids not found in log
        out_f = FakeOutFile()

        rev_primers = {
            'PC.481': ['CTCTCCG'],
            'PC.634': ['CTCTCAG'],
            'PC.635': ['CTCTCAG'],
            'PC.636': ['CTCTCAG'],
            'PC.354': ['CTCTCAG']
        }

        # Use default options, all sequences should get truncated and written
        log_data = truncate_rev_primers(open(self.fasta_badlabels_fp, "U"),
                                        out_f, rev_primers)

        expected_log_data = {
            'seqs_written': 5,
            'total_seqs': 5,
            'sample_id_not_found': 5,
            'reverse_primer_not_found': 0
        }

        self.assertEqual(log_data, expected_log_data)

        # No matches to sample IDs, so sequences are written unmodified
        self.assertEqual(out_f.data, self.sample_fasta_file_bad_labels_data)
    def test_truncate_rev_primers(self):
        """ Properly truncates remove primers """

        out_f = FakeOutFile()

        rev_primers = {'PC.481': ['CTCTCCG'], 'PC.634': ['CTCTCAG'],
                       'PC.635': ['CTCTCAG'], 'PC.636': ['CTCTCAG'], 'PC.354': ['CTCTCAG']}

        # Use default options, all sequences should get truncated and written
        log_data = truncate_rev_primers(open(self.fasta_fp, "U"),
                                        out_f, rev_primers)

        expected_log_data = {'seqs_written': 5, 'total_seqs': 6,
                             'sample_id_not_found': 0, 'reverse_primer_not_found': 0}

        self.assertEqual(log_data, expected_log_data)

        # Note that because these are short sequences, two mismatches allows
        # for a very short truncation of one of the sequences
        self.assertEqual(out_f.data, self.expected_truncation_default_settings)

        # With zero mismatches will not truncate all seqs
        out_f = FakeOutFile()

        # Use default options, all sequences should get truncated and written
        log_data = truncate_rev_primers(open(self.fasta_fp, "U"),
                                        out_f, rev_primers, primer_mismatches=0)

        expected_log_data = {'seqs_written': 5, 'total_seqs': 6,
                             'sample_id_not_found': 0, 'reverse_primer_not_found': 2}

        self.assertEqual(log_data, expected_log_data)

        # With zero mismatches allowed, 2 seqs should not be truncated
        self.assertEqual(out_f.data, self.expected_truncation_zero_mismatches)

        # With zero mismatches and truncate_remove option, should only write
        # 3 of the sequences
        out_f = FakeOutFile()

        log_data = truncate_rev_primers(open(self.fasta_fp, "U"),
                                        out_f, rev_primers, truncate_option="truncate_remove",
                                        primer_mismatches=0)

        expected_log_data = {'seqs_written': 3, 'total_seqs': 6,
                             'sample_id_not_found': 0, 'reverse_primer_not_found': 2}

        self.assertEqual(log_data, expected_log_data)

        # With zero mismatches allowed, 3 seqs total should be written
        self.assertEqual(out_f.data,
                         self.expected_truncation_zero_mismatches_truncate_remove)

        # Should count sample ids not found in log
        out_f = FakeOutFile()

        rev_primers = {'PC.481': ['CTCTCCG'], 'PC.634': ['CTCTCAG'],
                       'PC.635': ['CTCTCAG'], 'PC.636': ['CTCTCAG'], 'PC.354': ['CTCTCAG']}

        # Use default options, all sequences should get truncated and written
        log_data = truncate_rev_primers(open(self.fasta_badlabels_fp, "U"),
                                        out_f, rev_primers)

        expected_log_data = {'seqs_written': 5, 'total_seqs': 5,
                             'sample_id_not_found': 5, 'reverse_primer_not_found': 0}

        self.assertEqual(log_data, expected_log_data)

        # No matches to sample IDs, so sequences are written unmodified
        self.assertEqual(out_f.data, self.sample_fasta_file_bad_labels_data)