def test_fastq_bam(self) : myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) args.func_main(args) # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBamCmd, outSam) # picard.sam.FastqToSam outputs header fields in different order for # java version 1.8 vs 1.7/1.6, so compare both self.assertTrue(filecmp.cmp(outSam, expected1_7Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam, shallow=False)) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser()) args = parser.parse_args([outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1', ]) args.func_main(args) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeader, inHeader)
def test_fastq_bam(self) : myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expectedSam = os.path.join(myInputDir, 'expected.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam() args = parser.parse_args([inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) read_utils.main_fastq_to_bam(args) # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.execute('view', ['-h', outBamCmd], stdout=outSam) self.assertEqualContents(outSam, expectedSam) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam() args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader]) read_utils.main_fastq_to_bam(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq() args = parser.parse_args([outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1', ]) read_utils.main_bam_to_fastq(args) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeader, inHeader)
def test_fastq_bam(self): myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') outHeaderFix = util.file.mkstempfname('.fix.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar',]) args.func_main(args) # Note for developers: if you're fixing the tests to handle non-bugs # (ie our testing here is too brittle), let's just replace a lot of this # in the future with code that just reads the header, sorts it, and # tests for equality of sorted values in the RG line (and stricter # equality in the non-header lines). This is kind of hacky. # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBamCmd, outSam) # picard.sam.FastqToSam outputs header fields in different order for # java version 1.8 vs 1.7/1.6, so compare both self.assertTrue(filecmp.cmp(outSam, expected1_7Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam_v15, shallow=False)) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser()) args = parser.parse_args([outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1',]) args.func_main(args) # filter out any "PG" lines from header for testing purposes # I don't like this... let's replace later. with open(outHeader, 'rt') as inf: with open(outHeaderFix, 'wt') as outf: for line in inf: if not line.startswith('@PG'): outf.write(line) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeaderFix, inHeader)
def test_fastq_bam(self): myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([ inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) args.func_main(args) # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBamCmd, outSam) # picard.sam.FastqToSam outputs header fields in different order for # java version 1.8 vs 1.7/1.6, so compare both self.assertTrue( filecmp.cmp(outSam, expected1_7Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam_v15, shallow=False)) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args( [inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser()) args = parser.parse_args([ outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1', ]) args.func_main(args) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeader, inHeader)