Esempio n. 1
0
 def execute(self, executable):
     """The method execute checks which program has to be executed and executes this program
     :param executable: the argument of the commandline which determines which program has to be executed
     :type executable: str
     """
     if executable == "haplotyping":
         if Grid.useGrid == True:
             Haplotyper.executeBeagleCluster(self.pool)
         else:
             Haplotyper.executeBeagleMultiThread(self.pool)
     elif executable == "snvCalling":
         if Program.config.snvCaller == "samtools":  # @UndefinedVariable
             SamtoolsMpileup.executeSamtoolsMultiThreaded(self.pool)
         elif Program.config.snvCaller == "GATK":  # @UndefinedVariable
             Gatk.Gatk(self.pool).callSnvs()
     elif executable == "mapping":
         mapper = Mapper.Mapper()
         for sample in self.samples:
             mapper.map(sample)
     elif executable == "allelicDiversity":
         if Program.config.gffFile == None:  # @UndefinedVariable
             print("When calculating the allelic diversity, a gff file is needed, this option can be set with the option --gff <file>")
             exit()
         allelicDiverityCalculator = AllelicDiversity.AllelicDiversity(self.pool, Program.config.gffFile)  # @UndefinedVariable
         allelicDiverityCalculator.getAllelicDiversity()
     elif executable == "findLoci":
         if Program.config.phenoData == None:  # @UndefinedVariable
             print("When finding loci, a csv file is needed with the phenotype data, this option can be set with the option --phen <file>")
             exit()
         if Program.config.gffFile == None:  # @UndefinedVariable
             print("When finding loci, a file with phenotype data is needed, this option can be set with the option --gff <file>")
             exit()
         lociFinder = LociFinder.LociFinder()
         lociFinder.findLoci(self.pool)
Esempio n. 2
0
    def findLoci(self, pool):
        #read the input files
        Haplotyper.executeBeagleMultiThread(pool)

        phenReader = Readers.PhenotypeReader()
        Program.config.phenoData = self.convertExcelToCsv(
            Program.config.phenoData, pool.outputDir)
        phenReader.readFile(Program.config.phenoData)
        converter = Readers.AccessionConverter()
        converter.readFile(
            os.path.dirname(os.path.realpath(__file__)) +
            "/convertToAccession.txt")
        for phenotype in phenReader.phenotypes:
            deletedKeys = 0
            #             for oldKey in phenotype.alleles.keys():
            #                 try:
            #                     newKey = converter.getAccession(oldKey)
            #                     phenotype.alleles[newKey] = phenotype.alleles.pop(oldKey)
            #                 except KeyError:
            #                     deletedKeys += 1
            #                     del phenotype.alleles[oldKey]
            for (chrom, vcfFile) in pool.vcf.items():
                gffReader = Readers.GffReader(chrom=chrom)
                gffReader.readFile(
                    Program.config.gffFile)  # @UndefinedVariable
                phenotype.contigs = gffReader.contigs

                vcfReader = Readers.VcfReader(phenotype.contigs.values())
                vcfReader.readFile(vcfFile.getFile())

                pVals = self.findLociInPheno(phenotype)
                self.writePvaluesToFile(pVals, chrom, pool,
                                        phenotype.description)
Esempio n. 3
0
 def testExecuteBeagleGrid(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
     self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
     #Check if the file contains exactly one snp
     self.checkNoOfSnps(expOutFile)
Esempio n. 4
0
    def findLoci(self, pool):
        #read the input files
        Haplotyper.executeBeagleMultiThread(pool)
        
        phenReader = Readers.PhenotypeReader()
        Program.config.phenoData = self.convertExcelToCsv(Program.config.phenoData, pool.outputDir)
        phenReader.readFile(Program.config.phenoData)
        converter = Readers.AccessionConverter()
        converter.readFile(os.path.dirname(os.path.realpath(__file__)) + "/convertToAccession.txt")
        for phenotype in  phenReader.phenotypes:
            deletedKeys = 0
#             for oldKey in phenotype.alleles.keys():
#                 try:
#                     newKey = converter.getAccession(oldKey)
#                     phenotype.alleles[newKey] = phenotype.alleles.pop(oldKey)
#                 except KeyError:
#                     deletedKeys += 1
#                     del phenotype.alleles[oldKey]
            for (chrom, vcfFile) in pool.vcf.items():
                gffReader = Readers.GffReader(chrom=chrom)
                gffReader.readFile(Program.config.gffFile)  # @UndefinedVariable
                phenotype.contigs = gffReader.contigs 
                
                vcfReader = Readers.VcfReader(phenotype.contigs.values())
                vcfReader.readFile(vcfFile.getFile())
                
                pVals = self.findLociInPheno(phenotype)
                self.writePvaluesToFile(pVals, chrom, pool, phenotype.description)
Esempio n. 5
0
    def getAllelicDiversity(self):
        """The method getAllelicDiversity calculates the allelic diversity and writes the output to a file.
        
        """
        if Grid.useGrid == True:
            Haplotyper.executeBeagleCluster(self.pool)
        else:
            Haplotyper.executeBeagleMultiThread(self.pool)

        for vcf in self.pool.vcf:
            if vcf == None:
                logging.info("Starting to calculate the allelic diversity")
                outputFile = self.pool.outputDir + "/allelicDiversity.csv"
            else:
                logging.info("calculating allelic diversity of " + vcf)
                outputFile = self.pool.outputDir + "/" + vcf + "_" + "allelicDiversity.csv"
            try:
                self.vcfFile = self.pool.vcf[vcf].getFile()
                self._parseFiles(vcf)
                haplotypes = self._getAllHaplotypesByAccession(self.allContigs)
                accessions = haplotypes.values()[0].keys()

                with open(outputFile, "w") as outWriter:
                    outWriter.write("contig\toriginal\t")
                    for accession in accessions:
                        outWriter.write(accession + "_1\t" + accession +
                                        "_2\t")
                    outWriter.write("\n")
                    for contigId in self.allContigs:
                        outWriter.write(contigId + "\t")
                        try:
                            outWriter.write(
                                self.allContigs[contigId].refHaplotype + "\t")
                        except AttributeError:
                            outWriter.write("-\t")
                        for accession in accessions:
                            for i in range(2):
                                if contigId in haplotypes:
                                    outWriter.write(
                                        haplotypes[contigId][accession][i] +
                                        "\t")
                                else:
                                    outWriter.write("-\t")
                        outWriter.write("\n")
            except IndexError:
                if vcf == None:
                    logging.warning("No SNPs within contigs found")
                else:
                    logging.warning("No SNPs within contigs found of " + vcf)

            except Exception as ex:
                if vcf == None:
                    logging.error(
                        "an error occured during parsing the vcf file")
                else:
                    logging.error("an error occured during parsing " + vcf)
                logging.error(ex)
                traceback.print_exc()
Esempio n. 6
0
    def testHaplotyperPathGrid(self):         
        TestHaplotyper.testPool.vcf ={}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool, TestHaplotyper.sample, TestHaplotyper.inputBam, sortedBam = True, headerLine = True, duplicates = False, mdTag = True, index = True)
 
        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
#         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        self.checkNoOfSnps("../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf")
Esempio n. 7
0
 def testExecuteBeagleMultiThread(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleMultiThread(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[
         TestHaplotyper.chrIndex].fileName
     self.assertEqual(
         os.path.abspath(createdOutFile), os.path.abspath(expOutFile),
         os.path.abspath(createdOutFile) + " not is " +
         os.path.abspath(expOutFile))
     self.checkNoOfSnps(expOutFile)
Esempio n. 8
0
 def testExecuteBeagleGrid(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[
         TestHaplotyper.chrIndex].fileName
     self.assertEqual(
         os.path.abspath(createdOutFile), os.path.abspath(expOutFile),
         os.path.abspath(createdOutFile) + " not is " +
         os.path.abspath(expOutFile))
     #Check if the file contains exactly one snp
     self.checkNoOfSnps(expOutFile)
Esempio n. 9
0
 def getAllelicDiversity(self):
     """The method getAllelicDiversity calculates the allelic diversity and writes the output to a file.
     
     """
     if Grid.useGrid == True:
         Haplotyper.executeBeagleCluster(self.pool)
     else:
         Haplotyper.executeBeagleMultiThread(self.pool)
     
     for vcf in self.pool.vcf: 
         if vcf == None:
             logging.info("Starting to calculate the allelic diversity")
             outputFile = self.pool.outputDir + "/allelicDiversity.csv"  
         else:
             logging.info("calculating allelic diversity of " + vcf)
             outputFile = self.pool.outputDir + "/"+vcf + "_" + "allelicDiversity.csv"  
         try:
             self.vcfFile = self.pool.vcf[vcf].getFile()
             self._parseFiles(vcf)
             haplotypes = self._getAllHaplotypesByAccession(self.allContigs)
             accessions = haplotypes.values()[0].keys()
             
             with open(outputFile, "w") as outWriter:
                 outWriter.write("contig\toriginal\t")
                 for accession in accessions: outWriter.write( accession + "_1\t" + accession + "_2\t")
                 outWriter.write("\n")
                 for contigId in self.allContigs:
                     outWriter.write(contigId + "\t")
                     try:
                         outWriter.write(self.allContigs[contigId].refHaplotype + "\t")
                     except AttributeError: outWriter.write("-\t")
                     for accession in accessions:
                         for i in range(2):
                             if contigId in haplotypes:
                                 outWriter.write(haplotypes[contigId][accession][i] + "\t")
                             else:
                                 outWriter.write("-\t")
                     outWriter.write("\n")
         except IndexError:
             if vcf == None:
                 logging.warning("No SNPs within contigs found")
             else:
                 logging.warning("No SNPs within contigs found of " + vcf)
         
         except Exception as ex:
             if vcf == None:
                 logging.error("an error occured during parsing the vcf file")
             else:
                 logging.error("an error occured during parsing " + vcf)
             logging.error(ex)
             traceback.print_exc()
             
Esempio n. 10
0
    def testHaplotyperFullPathGrid(self):
        expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        gzFile = "../testFiles/input/test.fq.gz"
        refGzFile = "../testFiles/input/revTest.fq.gz"
        TestHaplotyper.testPool.vcf ={}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.setForwardFq(gzFile)
        TestHaplotyper.sample.setReversedFq(refGzFile)
        TestHaplotyper.sample.reversedFq.forward = False
        
        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
#         createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
#         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        #Check if the file contains exactly one snp
        self.checkNoOfSnps(expOutFile)
Esempio n. 11
0
    def testHaplotyperFullPathGrid(self):
        expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        gzFile = "../testFiles/input/test.fq.gz"
        refGzFile = "../testFiles/input/revTest.fq.gz"
        TestHaplotyper.testPool.vcf = {}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool,
                                              "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.setForwardFq(gzFile)
        TestHaplotyper.sample.setReversedFq(refGzFile)
        TestHaplotyper.sample.reversedFq.forward = False

        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
        #         createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
        #         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        #Check if the file contains exactly one snp
        self.checkNoOfSnps(expOutFile)
Esempio n. 12
0
    def testHaplotyperPathGrid(self):
        TestHaplotyper.testPool.vcf = {}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool,
                                              "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool,
                                                    TestHaplotyper.sample,
                                                    TestHaplotyper.inputBam,
                                                    sortedBam=True,
                                                    headerLine=True,
                                                    duplicates=False,
                                                    mdTag=True,
                                                    index=True)

        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
        #         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        self.checkNoOfSnps(
            "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        )
Esempio n. 13
0
 def execute(self, executable):
     """The method execute checks which program has to be executed and executes this program
     :param executable: the argument of the commandline which determines which program has to be executed
     :type executable: str
     """
     if executable == "haplotyping":
         if Grid.useGrid == True:
             Haplotyper.executeBeagleCluster(self.pool)
         else:
             Haplotyper.executeBeagleMultiThread(self.pool)
     elif executable == "snvCalling":
         if Program.config.snvCaller == "samtools":  # @UndefinedVariable
             SamtoolsMpileup.executeSamtoolsMultiThreaded(self.pool)
         elif Program.config.snvCaller == "GATK":  # @UndefinedVariable
             Gatk.Gatk(self.pool).callSnvs()
     elif executable == "mapping":
         mapper = Mapper.Mapper()
         for sample in self.samples:
             mapper.map(sample)
     elif executable == "allelicDiversity":
         if Program.config.gffFile == None:  # @UndefinedVariable
             print(
                 "When calculating the allelic diversity, a gff file is needed, this option can be set with the option --gff <file>"
             )
             exit()
         allelicDiverityCalculator = AllelicDiversity.AllelicDiversity(
             self.pool, Program.config.gffFile)  # @UndefinedVariable
         allelicDiverityCalculator.getAllelicDiversity()
     elif executable == "findLoci":
         if Program.config.phenoData == None:  # @UndefinedVariable
             print(
                 "When finding loci, a csv file is needed with the phenotype data, this option can be set with the option --phen <file>"
             )
             exit()
         if Program.config.gffFile == None:  # @UndefinedVariable
             print(
                 "When finding loci, a file with phenotype data is needed, this option can be set with the option --gff <file>"
             )
             exit()
         lociFinder = LociFinder.LociFinder()
         lociFinder.findLoci(self.pool)
Esempio n. 14
0
    def setUp(self):
        for delFile in os.listdir("../testFiles/output/"):
            file_path = os.path.join("../testFiles/output/", delFile)
            if os.path.isdir(file_path):
                shutil.rmtree(file_path)
            else:
                os.unlink(file_path)

        TestHaplotyper.testPool = Pool.Pool("testPool", "../testFiles/output/")
        Program.config.setPath("refGenome",
                               "../testFiles/input/smallRefGenome.fa")
        TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex] = VcfFile.VcfFile(
            TestHaplotyper.testPool,
            TestHaplotyper.inputVcf,
            bcf=False,
            chrom=TestHaplotyper.chrIndex)
        TestHaplotyper.haplotyper = Haplotyper.Haplotyper(
            TestHaplotyper.testPool, TestHaplotyper.chrIndex)
Esempio n. 15
0
 def testExecuteBeagleMultiThread(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleMultiThread(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
     self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
     self.checkNoOfSnps(expOutFile)