def test_fastq_bam(self): myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') outHeaderFix = util.file.mkstempfname('.fix.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar',]) args.func_main(args) # Note for developers: if you're fixing the tests to handle non-bugs # (ie our testing here is too brittle), let's just replace a lot of this # in the future with code that just reads the header, sorts it, and # tests for equality of sorted values in the RG line (and stricter # equality in the non-header lines). This is kind of hacky. # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBamCmd, outSam) # picard.sam.FastqToSam outputs header fields in different order for # java version 1.8 vs 1.7/1.6, so compare both self.assertTrue(filecmp.cmp(outSam, expected1_7Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam_v15, shallow=False)) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args)
def test_fastq_bam(self) : myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) args.func_main(args) # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBamCmd, outSam) # picard.sam.FastqToSam outputs header fields in different order for # java version 1.8 vs 1.7/1.6, so compare both self.assertTrue(filecmp.cmp(outSam, expected1_7Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam, shallow=False)) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser()) args = parser.parse_args([outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1', ]) args.func_main(args) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeader, inHeader)
def test_fastq_bam(self) : myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expectedSam = os.path.join(myInputDir, 'expected.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam() args = parser.parse_args([inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) read_utils.main_fastq_to_bam(args) # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.execute('view', ['-h', outBamCmd], stdout=outSam) self.assertEqualContents(outSam, expectedSam) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam() args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader]) read_utils.main_fastq_to_bam(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq() args = parser.parse_args([outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1', ]) read_utils.main_bam_to_fastq(args) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeader, inHeader)
def test_fastq_bam(self): myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') outHeaderFix = util.file.mkstempfname('.fix.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([ inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) args.func_main(args) alternate_expected_vals = {"VN": ["1.4", "1.5", "1.6", "1.7"]} self.assertEqualSamHeaders( outBamCmd, expected1_7Sam, other_allowed_values=alternate_expected_vals) assert_equal_bam_reads(self, outBamCmd, expected1_7Sam) parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args( [inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args)
def test_deplete_blastn_bam(self): tempDir = tempfile.mkdtemp() myInputDir = util.file.get_test_input_path(self) # Make blast databases makeblastdbPath = tools.blast.MakeblastdbTool().install_and_get_path() dbnames = ['humanChr1Subset.fa', 'humanChr9Subset.fa'] refDbs = [] for dbname in dbnames: refDb = os.path.join(tempDir, dbname) os.symlink(os.path.join(myInputDir, dbname), refDb) refDbs.append(refDb) subprocess.check_call([makeblastdbPath, '-dbtype', 'nucl', '-in', refDb]) # convert the input fastq's to a bam inFastq1 = os.path.join(myInputDir, "in1.fastq") inFastq2 = os.path.join(myInputDir, "in2.fastq") inBam = os.path.join(tempDir, 'in.bam') parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([inFastq1, inFastq2, inBam, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar',]) args.func_main(args) # Run deplete_blastn_bam outBam = os.path.join(tempDir, 'out.bam') args = taxon_filter.parser_deplete_blastn_bam(argparse.ArgumentParser()).parse_args( [inBam, refDbs[0], refDbs[1], outBam, "--chunkSize", "1"]) args.func_main(args) # samtools view for out.sam and compare to expected outSam = os.path.join(tempDir, 'out.sam') samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBam, outSam) with open(outSam, "r") as outSamFile: for line in outSamFile.readlines(): print(line) # the header field ordering may be different with Java 1.8 self.assertTrue(filecmp.cmp(outSam, os.path.join(myInputDir, 'expected.sam'), shallow=False) or filecmp.cmp(outSam, os.path.join(myInputDir, 'expected_1_8.sam'), shallow=False) or filecmp.cmp(outSam, os.path.join(myInputDir, 'expected_alt_v1.5.sam'), shallow=False) or filecmp.cmp(outSam, os.path.join(myInputDir, 'expected_1_8_v1.5.sam'), shallow=False))
def test_fastq_bam(self): myInputDir = util.file.get_test_input_path(self) # Define file names inFastq1 = os.path.join(myInputDir, 'in1.fastq') inFastq2 = os.path.join(myInputDir, 'in2.fastq') inHeader = os.path.join(myInputDir, 'inHeader.txt') expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam') expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam') expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam') expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1') outBamCmd = util.file.mkstempfname('.bam') outBamTxt = util.file.mkstempfname('.bam') outSam = util.file.mkstempfname('.sam') outFastq1 = util.file.mkstempfname('.fastq') outFastq2 = util.file.mkstempfname('.fastq') outHeader = util.file.mkstempfname('.txt') # in1.fastq, in2.fastq -> out.bam; header params from command-line parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args([ inFastq1, inFastq2, outBamCmd, '--sampleName', 'FreeSample', '--JVMmemory', '1g', '--picardOptions', 'LIBRARY_NAME=Alexandria', 'PLATFORM=9.75', 'SEQUENCING_CENTER=KareemAbdul-Jabbar', ]) args.func_main(args) # samtools view for out.sam and compare to expected samtools = tools.samtools.SamtoolsTool() samtools.view(['-h'], outBamCmd, outSam) # picard.sam.FastqToSam outputs header fields in different order for # java version 1.8 vs 1.7/1.6, so compare both self.assertTrue( filecmp.cmp(outSam, expected1_7Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam, shallow=False) or filecmp.cmp(outSam, expected1_8Sam_v15, shallow=False)) # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser()) args = parser.parse_args( [inFastq1, inFastq2, outBamTxt, '--header', inHeader]) args.func_main(args) # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1 parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser()) args = parser.parse_args([ outBamTxt, outFastq1, outFastq2, '--outHeader', outHeader, '--JVMmemory', '1g', '--picardOptions', 'READ1_TRIM=1', ]) args.func_main(args) # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed self.assertEqualContents(outFastq2, inFastq2) self.assertEqualContents(outHeader, inHeader)