예제 #1
0
    def test_fastq_bam(self):
        myInputDir = util.file.get_test_input_path(self)

        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        outHeaderFix = util.file.mkstempfname('.fix.txt')

        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1,
                                  inFastq2,
                                  outBamCmd,
                                  '--sampleName',
                                  'FreeSample',
                                  '--JVMmemory',
                                  '1g',
                                  '--picardOptions',
                                  'LIBRARY_NAME=Alexandria',
                                  'PLATFORM=9.75',
                                  'SEQUENCING_CENTER=KareemAbdul-Jabbar',])
        args.func_main(args)

        # Note for developers: if you're fixing the tests to handle non-bugs
        # (ie our testing here is too brittle), let's just replace a lot of this
        # in the future with code that just reads the header, sorts it, and
        # tests for equality of sorted values in the RG line (and stricter
        # equality in the non-header lines). This is kind of hacky.

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(filecmp.cmp(outSam,
                                    expected1_7Sam,
                                    shallow=False) or filecmp.cmp(outSam,
                                                                  expected1_8Sam,
                                                                  shallow=False) or
                                                      filecmp.cmp(outSam,
                                                                  expected1_8Sam_v15,
                                                                  shallow=False))

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader])
        args.func_main(args)
예제 #2
0
    def test_fastq_bam(self):
        myInputDir = util.file.get_test_input_path(self)

        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        outHeaderFix = util.file.mkstempfname('.fix.txt')

        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1,
                                  inFastq2,
                                  outBamCmd,
                                  '--sampleName',
                                  'FreeSample',
                                  '--JVMmemory',
                                  '1g',
                                  '--picardOptions',
                                  'LIBRARY_NAME=Alexandria',
                                  'PLATFORM=9.75',
                                  'SEQUENCING_CENTER=KareemAbdul-Jabbar',])
        args.func_main(args)

        # Note for developers: if you're fixing the tests to handle non-bugs
        # (ie our testing here is too brittle), let's just replace a lot of this
        # in the future with code that just reads the header, sorts it, and
        # tests for equality of sorted values in the RG line (and stricter
        # equality in the non-header lines). This is kind of hacky.

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(filecmp.cmp(outSam,
                                    expected1_7Sam,
                                    shallow=False) or filecmp.cmp(outSam,
                                                                  expected1_8Sam,
                                                                  shallow=False) or
                                                      filecmp.cmp(outSam,
                                                                  expected1_8Sam_v15,
                                                                  shallow=False))

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader])
        args.func_main(args)
예제 #3
0
    def test_fastq_bam(self) :
        myInputDir = util.file.get_test_input_path(self)
        
        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        
        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamCmd,
            '--sampleName', 'FreeSample',
            '--JVMmemory', '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
            ])
        args.func_main(args)

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(filecmp.cmp(outSam, expected1_7Sam, shallow=False) or
                        filecmp.cmp(outSam, expected1_8Sam, shallow=False))
 
        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt,
            '--header', inHeader])
        args.func_main(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser())
        args = parser.parse_args([outBamTxt, outFastq1, outFastq2,
            '--outHeader', outHeader,
            '--JVMmemory', '1g',
            '--picardOptions', 'READ1_TRIM=1',
            ])
        args.func_main(args)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeader, inHeader)
예제 #4
0
    def test_fastq_bam(self) :
        myInputDir = util.file.get_test_input_path(self)
        
        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expectedSam = os.path.join(myInputDir, 'expected.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        
        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam()
        args = parser.parse_args([inFastq1, inFastq2, outBamCmd,
            '--sampleName', 'FreeSample',
            '--JVMmemory', '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
            ])
        read_utils.main_fastq_to_bam(args)

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.execute('view', ['-h', outBamCmd], stdout=outSam)
        self.assertEqualContents(outSam, expectedSam)

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam()
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt,
            '--header', inHeader])
        read_utils.main_fastq_to_bam(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq()
        args = parser.parse_args([outBamTxt, outFastq1, outFastq2,
            '--outHeader', outHeader,
            '--JVMmemory', '1g',
            '--picardOptions', 'READ1_TRIM=1',
            ])
        read_utils.main_bam_to_fastq(args)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeader, inHeader)
예제 #5
0
    def test_fastq_bam(self):
        myInputDir = util.file.get_test_input_path(self)

        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expected1_8Sam_v15 = os.path.join(myInputDir,
                                          'expected.java1_8_v1.5.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        outHeaderFix = util.file.mkstempfname('.fix.txt')

        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([
            inFastq1,
            inFastq2,
            outBamCmd,
            '--sampleName',
            'FreeSample',
            '--JVMmemory',
            '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
        ])
        args.func_main(args)

        alternate_expected_vals = {"VN": ["1.4", "1.5", "1.6", "1.7"]}
        self.assertEqualSamHeaders(
            outBamCmd,
            expected1_7Sam,
            other_allowed_values=alternate_expected_vals)
        assert_equal_bam_reads(self, outBamCmd, expected1_7Sam)

        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args(
            [inFastq1, inFastq2, outBamTxt, '--header', inHeader])
        args.func_main(args)
예제 #6
0
    def test_deplete_blastn_bam(self):
        tempDir = tempfile.mkdtemp()
        myInputDir = util.file.get_test_input_path(self)

        # Make blast databases
        makeblastdbPath = tools.blast.MakeblastdbTool().install_and_get_path()
        dbnames = ['humanChr1Subset.fa', 'humanChr9Subset.fa']
        refDbs = []
        for dbname in dbnames:
            refDb = os.path.join(tempDir, dbname)
            os.symlink(os.path.join(myInputDir, dbname), refDb)
            refDbs.append(refDb)
            subprocess.check_call([makeblastdbPath, '-dbtype', 'nucl', '-in', refDb])

        # convert the input fastq's to a bam
        inFastq1 = os.path.join(myInputDir, "in1.fastq")
        inFastq2 = os.path.join(myInputDir, "in2.fastq")
        inBam = os.path.join(tempDir, 'in.bam')
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1,
                                  inFastq2,
                                  inBam,
                                  '--sampleName',
                                  'FreeSample',
                                  '--JVMmemory',
                                  '1g',
                                  '--picardOptions',
                                  'LIBRARY_NAME=Alexandria',
                                  'PLATFORM=9.75',
                                  'SEQUENCING_CENTER=KareemAbdul-Jabbar',])
        args.func_main(args)

        # Run deplete_blastn_bam
        outBam = os.path.join(tempDir, 'out.bam')
        args = taxon_filter.parser_deplete_blastn_bam(argparse.ArgumentParser()).parse_args(
            [inBam, refDbs[0], refDbs[1], outBam, "--chunkSize", "1"])
        args.func_main(args)

        # samtools view for out.sam and compare to expected
        outSam = os.path.join(tempDir, 'out.sam')
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBam, outSam)

        with open(outSam, "r") as outSamFile:
            for line in outSamFile.readlines():
                print(line)

        # the header field ordering may be different with Java 1.8
        self.assertTrue(filecmp.cmp(outSam,
                                    os.path.join(myInputDir, 'expected.sam'),
                                    shallow=False) or 
                        filecmp.cmp(outSam,
                                    os.path.join(myInputDir, 'expected_1_8.sam'),
                                    shallow=False) or
                        filecmp.cmp(outSam,
                                    os.path.join(myInputDir, 'expected_alt_v1.5.sam'),
                                    shallow=False) or
                        filecmp.cmp(outSam,
                                    os.path.join(myInputDir, 'expected_1_8_v1.5.sam'),
                                    shallow=False))
예제 #7
0
    def test_fastq_bam(self):
        myInputDir = util.file.get_test_input_path(self)

        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expected1_8Sam_v15 = os.path.join(myInputDir,
                                          'expected.java1_8_v1.5.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')

        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([
            inFastq1,
            inFastq2,
            outBamCmd,
            '--sampleName',
            'FreeSample',
            '--JVMmemory',
            '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
        ])
        args.func_main(args)

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(
            filecmp.cmp(outSam, expected1_7Sam, shallow=False)
            or filecmp.cmp(outSam, expected1_8Sam, shallow=False)
            or filecmp.cmp(outSam, expected1_8Sam_v15, shallow=False))

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args(
            [inFastq1, inFastq2, outBamTxt, '--header', inHeader])
        args.func_main(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser())
        args = parser.parse_args([
            outBamTxt,
            outFastq1,
            outFastq2,
            '--outHeader',
            outHeader,
            '--JVMmemory',
            '1g',
            '--picardOptions',
            'READ1_TRIM=1',
        ])
        args.func_main(args)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1)  # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeader, inHeader)