Beispiel #1
0
    def testHaplotyperPath(self):
        Grid.useGrid = False
        expOutFile = "../testFiles/output/testPool/out.testPoolSL2.40ch11_22900-24100.BEAGLE.PL.phased.gz"

        TestHaplotyper.testPool.vcf = {}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool,
                                              "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool,
                                                    TestHaplotyper.sample,
                                                    TestHaplotyper.inputBam,
                                                    sortedBam=True,
                                                    headerLine=True,
                                                    duplicates=False,
                                                    mdTag=True,
                                                    index=True)

        TestHaplotyper.haplotyper.callHaplotypes()
        createdOutFile = TestHaplotyper.testPool.beagleFiles.values(
        )[0].phasedFile
        self.assertEqual(
            os.path.abspath(createdOutFile), os.path.abspath(expOutFile),
            os.path.abspath(createdOutFile) + " not is " +
            os.path.abspath(expOutFile))
        self.checkNoOfSnps(expOutFile)
Beispiel #2
0
    def _executeBwa(self, forwardFq, reversedFq=None):
        """The method executeBwa executes the mapping with BWA.
        
        :param forwardFq: The forward fastq file to map against the reference genome
        :type forwardFq: an instance of :py:class:`FastqFile.FastqFile`
        :param reversedFq: The reversed fastq file to map against the reference genome
        :type reversedFq: an instance of :py:class:`FastqFile.FastqFile`, None if the data has no paired end reads.
        
        """
        if os.path.exists(Program.config.getPath("refGenome") +
                          ".pac") == False:
            exitStatus = subprocess.call("bwa index " +
                                         Program.config.getPath("refGenome"))
            if exitStatus == 1:
                print(
                    "ERROR: Failed to create a bwa index file for the reference genome, do I have permissions to write in the directory of the reference genome?"
                )
                exit(1)
        ##Build the command
        cmd = Program.config.getPath("bwa")  # @UndefinedVariable
        if reversedFq == None:
            cmd = cmd + " samse "
        else:
            cmd = cmd + " sampe " + self.getProgramArguments("BWA")

        cmd = cmd + Program.config.getPath("refGenome")

        #add the .sai files to the command
        forwardMapped = self._bwaAlign(forwardFq)
        cmd = cmd + " " + forwardMapped
        if reversedFq != None:
            reversedMapped = self._bwaAlign(reversedFq)
            cmd = cmd + " " + reversedMapped

        #add the high quality reads to the command
        cmd = cmd + " " + forwardFq.fileName
        if reversedFq != None:
            cmd = cmd + " " + reversedFq.fileName

        #add the output file to the command
        forwardFq.sample.bam = BamFile.BamFile(forwardFq.pool,
                                               forwardFq.sample,
                                               sam=True)
        cmd = cmd + " > " + forwardFq.sample.bam.getFile()

        ##Execute the command
        self.execute(cmd, "BWA", forwardFq.sample.bam)

        ##Cleanup the mess
        self.execute("rm " + forwardMapped, "rm", forwardFq)
        if reversedFq != None:
            self.execute("rm " + reversedMapped, "rm", reversedFq)
Beispiel #3
0
 def testAddMdTag(self):
     TestConversionTools.sample.bam = BamFile.BamFile(
         TestConversionTools.testPool, TestConversionTools.sample,
         TestConversionTools.bamFile)
     self.convTools.addMdTag(TestConversionTools.sample)
     self.assertTrue(os.path.exists(TestConversionTools.expBamOutFile),
                     "output file not created...")
     output, error = Popen(Program.config.getPath("samtools") +
                           " view -h " + TestConversionTools.expBamOutFile +
                           " | wc -l",
                           shell=True,
                           stdout=PIPE,
                           stderr=PIPE).communicate()
     self.assertEqual(
         output.rstrip(), "1283",
         "number of lines: " + output.rstrip() + " is not " + str(1283))
Beispiel #4
0
    def testHaplotyperPathGrid(self):
        TestHaplotyper.testPool.vcf = {}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool,
                                              "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool,
                                                    TestHaplotyper.sample,
                                                    TestHaplotyper.inputBam,
                                                    sortedBam=True,
                                                    headerLine=True,
                                                    duplicates=False,
                                                    mdTag=True,
                                                    index=True)

        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
        #         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        self.checkNoOfSnps(
            "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        )
Beispiel #5
0
    def testSamtoolsMultiple(self):
        #add an extra sample to the pool
        TestSnvCaller.sample2 = Sample.Sample(TestSnvCaller.testPool,
                                              "testLib2")
        TestSnvCaller.sample2.bam = BamFile.BamFile(TestSnvCaller.testPool,
                                                    TestSnvCaller.sample2,
                                                    TestSnvCaller.inputBam)
        TestSnvCaller.testPool.addSample(TestSnvCaller.sample2)

        #Execute and check execution output
        SamtoolsMpileup.SamtoolsMpileup(TestSnvCaller.testPool).callSnvs()
        outputFile = TestSnvCaller.testPool.vcf[None].fileName
        self.assertEqual(
            os.path.abspath(outputFile),
            os.path.abspath(TestSnvCaller.expVcfFile),
            os.path.abspath(outputFile) + " not is " +
            os.path.abspath(TestSnvCaller.expVcfFile))
        #Check if the file contains exactly one snp
        self.checkNoOfSnps(TestSnvCaller.expVcfFile)
Beispiel #6
0
 def setUp(self):
     for handler in logging.getLogger().handlers:
         handler.close()
     for delFile in os.listdir("../testFiles/output/"):
         file_path = os.path.join("../testFiles/output/", delFile)
         if os.path.isdir(file_path):
             shutil.rmtree(file_path)
         else:
             os.unlink(file_path)
     TestSnvCaller.testPool = Pool.Pool("testPool", "../testFiles/output/")
     Program.config.setPath("refGenome",
                            "../testFiles/input/smallRefGenome.fa")
     TestSnvCaller.sample = Sample.Sample(TestSnvCaller.testPool, "testLib")
     TestSnvCaller.testPool.addSample(TestSnvCaller.sample)
     TestSnvCaller.sample.bam = BamFile.BamFile(TestSnvCaller.testPool,
                                                TestSnvCaller.sample,
                                                TestSnvCaller.inputBam,
                                                sortedBam=True,
                                                headerLine=True,
                                                duplicates=False,
                                                mdTag=True,
                                                index=True)
Beispiel #7
0
 def testCreateBamIndex(self):
     indexFilesize = 96
     TestConversionTools.sample.bam = BamFile.BamFile(
         TestConversionTools.testPool, TestConversionTools.sample,
         TestConversionTools.bamFile)
     self.convTools.sortBam(TestConversionTools.sample)
     self.convTools.createBamIndex(TestConversionTools.sample)
     outputFile = TestConversionTools.sample.bam.fileName
     self.assertEqual(
         os.path.abspath(outputFile),
         os.path.abspath(TestConversionTools.expBamOutFile),
         os.path.abspath(outputFile) + " not is " +
         os.path.abspath(TestConversionTools.expBamOutFile))
     self.assertTrue(
         os.path.isfile(TestConversionTools.expBamOutFile + ".bai"),
         os.path.abspath(TestConversionTools.expBamOutFile + ".bai") +
         " is not created")
     self.assertEqual(
         indexFilesize,
         os.path.getsize(TestConversionTools.expBamOutFile + ".bai"),
         "filesize: " +
         str(os.path.getsize(TestConversionTools.expBamOutFile + ".bai")) +
         " is not " + str(indexFilesize))
Beispiel #8
0
 def setbam(self, fileName):
     """Setter for setting the bam file as a :py:class:`FastqFile.FastqFile`
     
     """
     self.bam = BamFile.BamFile(self.pool, self, fileName)
Beispiel #9
0
    def findFastqFiles(self, directory, inFormat):
        """The method findFastqFiles finds all fastq files recursively in a directory, from each directory with fastq files a sample is created.
        :param directory: the directory where the user hid his fastq files
        :type directory: str -- path to the directory
        
        """
        fastqFiles = []
        for fileName in os.listdir(directory):
            fileName = directory + "/" + fileName
            if os.path.isdir(fileName):
                self.findFastqFiles(fileName, inFormat)
            else:
                if inFormat == "bam":
                    if fileName.endswith(".bam") or fileName.endswith(
                            ".bam.gz"):
                        newSamp = Sample.Sample(
                            self.pool,
                            os.path.basename(os.path.splitext(fileName)[0]))
                        self.samples.append(newSamp)
                        newSamp.bam = BamFile.BamFile(self.pool,
                                                      newSamp,
                                                      fileName,
                                                      sortedBam=True,
                                                      headerLine=True,
                                                      duplicates=False,
                                                      mdTag=True,
                                                      index=True)
                        self.pool.addSample(newSamp)
                elif inFormat == "fq":
                    if fileName.endswith(".fq") or fileName.endswith(".fq.gz"):
                        fastqFiles.append(fileName)
                elif inFormat == "vcf":
                    if fileName.endswith(".vcf") or fileName.endswith(
                            ".vcf.gz"):
                        if len(os.listdir(directory)) == 1:
                            chrom = None
                        else:
                            chrom = self.getChromosomeFromVcf(fileName)
                        self.pool.vcf[chrom] = VcfFile.VcfFile(self.pool,
                                                               fileName,
                                                               bcf=False,
                                                               filtered=True,
                                                               phased=True,
                                                               chrom=chrom)
                    elif fileName.endswith(".bcf") or fileName.endswith(
                            ".bcf.gz"):
                        chrom = Tools.getChromosomeOfFile(
                            Program.config.getPath("refGenome"), fileName)
                        self.pool.vcf[chrom] = VcfFile.VcfFile(self.pool,
                                                               fileName,
                                                               bcf=True,
                                                               filtered=True,
                                                               phased=True,
                                                               chrom=chrom)

        if inFormat == "bam" or inFormat == "vcf":
            return
        if len(fastqFiles) > 0:
            #create a library name from the file name
            libName = os.path.basename(fastqFiles[0])
            if libName.endswith("_1.fq") or libName.endswith("_2.fq"):
                libName = libName[:-5]
            if libName.endswith("_1.fq.gz") or libName.endswith("_2.fq.gz"):
                libName = libName[:-8]
            else:
                libName = libName[:-3]

            #create the sample
            sample = Sample.Sample(self.pool, libName)
            self.pool.addSample(sample)
            #add the fastq files to the sample
            if len(fastqFiles) == 1:
                sample.setForwardFq(fastqFiles[0])
            elif len(fastqFiles) == 2:
                sample.setForwardFq(fastqFiles[0])
                sample.setReversedFq(fastqFiles[1])
            elif len(fastqFiles) > 2:
                if fastqFiles[0].endswith("_1.fq"):
                    suffix = "_1.fq"
                elif fastqFiles[0].endswith("_1.fq.gz"):
                    suffix = "_1.fq.gz"
                else:
                    print(
                        "WARNING: files do not end with _1.fq or _1.fq.gz or _2.fq or _2.fq.gz, using all files in one directory as 1 sample with only forward reads"
                    )
                    suffix = fastqFiles[0][-3:]
                    #create a list of forward fastq files and one of reversed fastq files
                    forward = []
                    reversedFastq = []
                    for fastqFile in fastqFiles:
                        if fastqFile.endswith(suffix):
                            forward.append(fastqFile)
                        else:
                            reversedFastq.append(fastqFile)

                    #Convert files to fastqFile objects
                    for i in range(len(forward)):
                        forward[i] = FastqFile.FastqFile(
                            self.pool, sample, forward[i])
                    for i in range(len(reversedFastq)):
                        reversedFastq[i] = FastqFile.FastqFile(
                            self.pool, sample, reversedFastq[i], forward=False)

                    #add the fastq files to the sample
                    sample.forwardFq = forward
                    sample.reversedFq = reversedFastq