def test_process_barcode_paired_stitched_reverse_primer_match(self): """ Handles stitched barcode data, parses barcodes from ends """ fastq1_data = [ "HWI-ST830", "ATCGATCGATCGATCGATCG", np.arange(3, 23, dtype=np.int8) ] reads1_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'AAAAAA'])) ] reverse_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'GATCG'])) ] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() # With reverse primer match, should write in order of read2, read 1 process_barcode_paired_stitched( fastq1_data, bcs_out, reads1_out, bc1_len=3, bc2_len=4, rev_comp_bc1=True, rev_comp_bc2=False, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq_out_not_oriented=fastq1_out_not_oriented, switch_bc_order=False) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'TCGCGAT', '+', "567'&%$", ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = [ '@HWI-ST830', 'TCGATCGATCGAT', '+', '43210/.-,+*)(', '' ] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_stitched(self): """ Handles stitched barcode data, parses barcodes from ends """ fastq1_data = [ "HWI-ST830", "ATCGATCGATCGATCGATCG", np.arange(3, 23, dtype=np.int8) ] reads1_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'ATA'])) ] reverse_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'ATA'])) ] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() # With no matches, should write to the not_oriented files, and keep # in the same order of output file process_barcode_paired_stitched( fastq1_data, bcs_out, reads1_out, bc1_len=3, bc2_len=4, rev_comp_bc1=True, rev_comp_bc2=True, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq_out_not_oriented=fastq1_out_not_oriented, switch_bc_order=True) actual_bcs = bcs_out.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', '7654&%$', ''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads =\ ['@HWI-ST830', 'GATCGATCGATCG', '+', "'()*+,-./0123", ''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_stitched_forward_primer_match(self): """ Handles stitched barcode data, parses barcodes from ends """ fastq1_data = [ "HWI-ST830", "ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ" ] reads1_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'GATCGA'])) ] reverse_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'ATA'])) ] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() # With forward primer match, should write in order of read 1, read 2 process_barcode_paired_stitched( fastq1_data, bcs_out, reads1_out, bc1_len=3, bc2_len=4, rev_comp_bc1=True, rev_comp_bc2=True, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq_out_not_oriented=fastq1_out_not_oriented, switch_bc_order=True) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', 'JIHG321', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = [ '@HWI-ST830', 'GATCGATCGATCG', '+', '4567890ABCDEF', '' ] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_stitched(self): """ Handles stitched barcode data, parses barcodes from ends """ fastq1_data = ["HWI-ST830","ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ"] reads1_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for\ symbol in 'ATA']))] reverse_primers = [compile(''.join([self.iupac[symbol] for\ symbol in 'ATA']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() # With no matches, should write to the not_oriented files, and keep # in the same order of output file process_barcode_paired_stitched(fastq1_data, bcs_out, reads1_out, bc1_len=3, bc2_len=4, rev_comp_bc1=True, rev_comp_bc2=True, attempt_read_orientation = True, forward_primers = forward_primers, reverse_primers = reverse_primers, output_bc_not_oriented = output_bc_not_oriented, fastq_out_not_oriented = fastq1_out_not_oriented, switch_bc_order = True) actual_bcs = bcs_out.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', 'JIHG321', ''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads =\ ['@HWI-ST830', 'GATCGATCGATCG', '+', '4567890ABCDEF', ''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_stitched_reverse_primer_match(self): """ Handles stitched barcode data, parses barcodes from ends """ fastq1_data = ["HWI-ST830","ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ"] reads1_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for\ symbol in 'AAAAAA']))] reverse_primers = [compile(''.join([self.iupac[symbol] for\ symbol in 'GATCG']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() # With reverse primer match, should write in order of read2, read 1 process_barcode_paired_stitched(fastq1_data, bcs_out, reads1_out, bc1_len=3, bc2_len=4, rev_comp_bc1=True, rev_comp_bc2=False, attempt_read_orientation = True, forward_primers = forward_primers, reverse_primers = reverse_primers, output_bc_not_oriented = output_bc_not_oriented, fastq_out_not_oriented = fastq1_out_not_oriented, switch_bc_order = False) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'TCGCGAT', '+', 'HIJ4321', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'TCGATCGATCGAT', '+', 'GFEDCBA098765', ''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_stitched_forward_primer_match(self): """ Handles stitched barcode data, parses barcodes from ends """ fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG", np.arange(3, 23, dtype=np.int8)] reads1_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for symbol in 'GATCGA']))] reverse_primers = [compile(''.join([self.iupac[symbol] for symbol in 'ATA']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() # With forward primer match, should write in order of read 1, read 2 process_barcode_paired_stitched(fastq1_data, bcs_out, reads1_out, bc1_len=3, bc2_len=4, rev_comp_bc1=True, rev_comp_bc2=True, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq_out_not_oriented=fastq1_out_not_oriented, switch_bc_order=True) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'CGATGAT', '+', '7654&%$', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'GATCGATCGATCG', '+', "'()*+,-./0123", ''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)