Ejemplo n.º 1
0
    def test_fastq_bam(self) :
        myInputDir = util.file.get_test_input_path(self)
        
        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        
        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamCmd,
            '--sampleName', 'FreeSample',
            '--JVMmemory', '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
            ])
        args.func_main(args)

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(filecmp.cmp(outSam, expected1_7Sam, shallow=False) or
                        filecmp.cmp(outSam, expected1_8Sam, shallow=False))
 
        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt,
            '--header', inHeader])
        args.func_main(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser())
        args = parser.parse_args([outBamTxt, outFastq1, outFastq2,
            '--outHeader', outHeader,
            '--JVMmemory', '1g',
            '--picardOptions', 'READ1_TRIM=1',
            ])
        args.func_main(args)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeader, inHeader)
Ejemplo n.º 2
0
    def test_fastq_bam(self) :
        myInputDir = util.file.get_test_input_path(self)
        
        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expectedSam = os.path.join(myInputDir, 'expected.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        
        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam()
        args = parser.parse_args([inFastq1, inFastq2, outBamCmd,
            '--sampleName', 'FreeSample',
            '--JVMmemory', '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
            ])
        read_utils.main_fastq_to_bam(args)

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.execute('view', ['-h', outBamCmd], stdout=outSam)
        self.assertEqualContents(outSam, expectedSam)

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam()
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt,
            '--header', inHeader])
        read_utils.main_fastq_to_bam(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq()
        args = parser.parse_args([outBamTxt, outFastq1, outFastq2,
            '--outHeader', outHeader,
            '--JVMmemory', '1g',
            '--picardOptions', 'READ1_TRIM=1',
            ])
        read_utils.main_bam_to_fastq(args)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1) # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeader, inHeader)
Ejemplo n.º 3
0
    def test_fastq_bam(self):
        myInputDir = util.file.get_test_input_path(self)

        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expected1_8Sam_v15 = os.path.join(myInputDir, 'expected.java1_8_v1.5.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')
        outHeaderFix = util.file.mkstempfname('.fix.txt')

        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1,
                                  inFastq2,
                                  outBamCmd,
                                  '--sampleName',
                                  'FreeSample',
                                  '--JVMmemory',
                                  '1g',
                                  '--picardOptions',
                                  'LIBRARY_NAME=Alexandria',
                                  'PLATFORM=9.75',
                                  'SEQUENCING_CENTER=KareemAbdul-Jabbar',])
        args.func_main(args)

        # Note for developers: if you're fixing the tests to handle non-bugs
        # (ie our testing here is too brittle), let's just replace a lot of this
        # in the future with code that just reads the header, sorts it, and
        # tests for equality of sorted values in the RG line (and stricter
        # equality in the non-header lines). This is kind of hacky.

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(filecmp.cmp(outSam,
                                    expected1_7Sam,
                                    shallow=False) or filecmp.cmp(outSam,
                                                                  expected1_8Sam,
                                                                  shallow=False) or
                                                      filecmp.cmp(outSam,
                                                                  expected1_8Sam_v15,
                                                                  shallow=False))

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([inFastq1, inFastq2, outBamTxt, '--header', inHeader])
        args.func_main(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser())
        args = parser.parse_args([outBamTxt,
                                  outFastq1,
                                  outFastq2,
                                  '--outHeader',
                                  outHeader,
                                  '--JVMmemory',
                                  '1g',
                                  '--picardOptions',
                                  'READ1_TRIM=1',])
        args.func_main(args)

        # filter out any "PG" lines from header for testing purposes
        # I don't like this... let's replace later.
        with open(outHeader, 'rt') as inf:
            with open(outHeaderFix, 'wt') as outf:
                for line in inf:
                    if not line.startswith('@PG'):
                        outf.write(line)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1)  # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeaderFix, inHeader)
Ejemplo n.º 4
0
    def test_fastq_bam(self):
        myInputDir = util.file.get_test_input_path(self)

        # Define file names
        inFastq1 = os.path.join(myInputDir, 'in1.fastq')
        inFastq2 = os.path.join(myInputDir, 'in2.fastq')
        inHeader = os.path.join(myInputDir, 'inHeader.txt')
        expected1_7Sam = os.path.join(myInputDir, 'expected.java1_7.sam')
        expected1_8Sam = os.path.join(myInputDir, 'expected.java1_8.sam')
        expected1_8Sam_v15 = os.path.join(myInputDir,
                                          'expected.java1_8_v1.5.sam')
        expectedFastq1 = os.path.join(myInputDir, 'expected.fastq1')
        outBamCmd = util.file.mkstempfname('.bam')
        outBamTxt = util.file.mkstempfname('.bam')
        outSam = util.file.mkstempfname('.sam')
        outFastq1 = util.file.mkstempfname('.fastq')
        outFastq2 = util.file.mkstempfname('.fastq')
        outHeader = util.file.mkstempfname('.txt')

        # in1.fastq, in2.fastq -> out.bam; header params from command-line
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args([
            inFastq1,
            inFastq2,
            outBamCmd,
            '--sampleName',
            'FreeSample',
            '--JVMmemory',
            '1g',
            '--picardOptions',
            'LIBRARY_NAME=Alexandria',
            'PLATFORM=9.75',
            'SEQUENCING_CENTER=KareemAbdul-Jabbar',
        ])
        args.func_main(args)

        # samtools view for out.sam and compare to expected
        samtools = tools.samtools.SamtoolsTool()
        samtools.view(['-h'], outBamCmd, outSam)
        # picard.sam.FastqToSam outputs header fields in different order for
        #    java version 1.8 vs 1.7/1.6, so compare both
        self.assertTrue(
            filecmp.cmp(outSam, expected1_7Sam, shallow=False)
            or filecmp.cmp(outSam, expected1_8Sam, shallow=False)
            or filecmp.cmp(outSam, expected1_8Sam_v15, shallow=False))

        # in1.fastq, in2.fastq, inHeader.txt -> out.bam; header from txt
        parser = read_utils.parser_fastq_to_bam(argparse.ArgumentParser())
        args = parser.parse_args(
            [inFastq1, inFastq2, outBamTxt, '--header', inHeader])
        args.func_main(args)

        # out.bam -> out1.fastq, out2.fastq, outHeader.txt; trim 1 base from 1
        parser = read_utils.parser_bam_to_fastq(argparse.ArgumentParser())
        args = parser.parse_args([
            outBamTxt,
            outFastq1,
            outFastq2,
            '--outHeader',
            outHeader,
            '--JVMmemory',
            '1g',
            '--picardOptions',
            'READ1_TRIM=1',
        ])
        args.func_main(args)

        # compare to out1.fastq, out2.fastq, outHeader.txt to in and expected
        self.assertEqualContents(outFastq1, expectedFastq1)  # 1 base trimmed
        self.assertEqualContents(outFastq2, inFastq2)
        self.assertEqualContents(outHeader, inHeader)