Beispiel #1
0
    def findLoci(self, pool):
        #read the input files
        Haplotyper.executeBeagleMultiThread(pool)

        phenReader = Readers.PhenotypeReader()
        Program.config.phenoData = self.convertExcelToCsv(
            Program.config.phenoData, pool.outputDir)
        phenReader.readFile(Program.config.phenoData)
        converter = Readers.AccessionConverter()
        converter.readFile(
            os.path.dirname(os.path.realpath(__file__)) +
            "/convertToAccession.txt")
        for phenotype in phenReader.phenotypes:
            deletedKeys = 0
            #             for oldKey in phenotype.alleles.keys():
            #                 try:
            #                     newKey = converter.getAccession(oldKey)
            #                     phenotype.alleles[newKey] = phenotype.alleles.pop(oldKey)
            #                 except KeyError:
            #                     deletedKeys += 1
            #                     del phenotype.alleles[oldKey]
            for (chrom, vcfFile) in pool.vcf.items():
                gffReader = Readers.GffReader(chrom=chrom)
                gffReader.readFile(
                    Program.config.gffFile)  # @UndefinedVariable
                phenotype.contigs = gffReader.contigs

                vcfReader = Readers.VcfReader(phenotype.contigs.values())
                vcfReader.readFile(vcfFile.getFile())

                pVals = self.findLociInPheno(phenotype)
                self.writePvaluesToFile(pVals, chrom, pool,
                                        phenotype.description)
Beispiel #2
0
 def execute(self, executable):
     """The method execute checks which program has to be executed and executes this program
     :param executable: the argument of the commandline which determines which program has to be executed
     :type executable: str
     """
     if executable == "haplotyping":
         if Grid.useGrid == True:
             Haplotyper.executeBeagleCluster(self.pool)
         else:
             Haplotyper.executeBeagleMultiThread(self.pool)
     elif executable == "snvCalling":
         if Program.config.snvCaller == "samtools":  # @UndefinedVariable
             SamtoolsMpileup.executeSamtoolsMultiThreaded(self.pool)
         elif Program.config.snvCaller == "GATK":  # @UndefinedVariable
             Gatk.Gatk(self.pool).callSnvs()
     elif executable == "mapping":
         mapper = Mapper.Mapper()
         for sample in self.samples:
             mapper.map(sample)
     elif executable == "allelicDiversity":
         if Program.config.gffFile == None:  # @UndefinedVariable
             print("When calculating the allelic diversity, a gff file is needed, this option can be set with the option --gff <file>")
             exit()
         allelicDiverityCalculator = AllelicDiversity.AllelicDiversity(self.pool, Program.config.gffFile)  # @UndefinedVariable
         allelicDiverityCalculator.getAllelicDiversity()
     elif executable == "findLoci":
         if Program.config.phenoData == None:  # @UndefinedVariable
             print("When finding loci, a csv file is needed with the phenotype data, this option can be set with the option --phen <file>")
             exit()
         if Program.config.gffFile == None:  # @UndefinedVariable
             print("When finding loci, a file with phenotype data is needed, this option can be set with the option --gff <file>")
             exit()
         lociFinder = LociFinder.LociFinder()
         lociFinder.findLoci(self.pool)
Beispiel #3
0
    def findLoci(self, pool):
        #read the input files
        Haplotyper.executeBeagleMultiThread(pool)
        
        phenReader = Readers.PhenotypeReader()
        Program.config.phenoData = self.convertExcelToCsv(Program.config.phenoData, pool.outputDir)
        phenReader.readFile(Program.config.phenoData)
        converter = Readers.AccessionConverter()
        converter.readFile(os.path.dirname(os.path.realpath(__file__)) + "/convertToAccession.txt")
        for phenotype in  phenReader.phenotypes:
            deletedKeys = 0
#             for oldKey in phenotype.alleles.keys():
#                 try:
#                     newKey = converter.getAccession(oldKey)
#                     phenotype.alleles[newKey] = phenotype.alleles.pop(oldKey)
#                 except KeyError:
#                     deletedKeys += 1
#                     del phenotype.alleles[oldKey]
            for (chrom, vcfFile) in pool.vcf.items():
                gffReader = Readers.GffReader(chrom=chrom)
                gffReader.readFile(Program.config.gffFile)  # @UndefinedVariable
                phenotype.contigs = gffReader.contigs 
                
                vcfReader = Readers.VcfReader(phenotype.contigs.values())
                vcfReader.readFile(vcfFile.getFile())
                
                pVals = self.findLociInPheno(phenotype)
                self.writePvaluesToFile(pVals, chrom, pool, phenotype.description)
Beispiel #4
0
    def getAllelicDiversity(self):
        """The method getAllelicDiversity calculates the allelic diversity and writes the output to a file.
        
        """
        if Grid.useGrid == True:
            Haplotyper.executeBeagleCluster(self.pool)
        else:
            Haplotyper.executeBeagleMultiThread(self.pool)

        for vcf in self.pool.vcf:
            if vcf == None:
                logging.info("Starting to calculate the allelic diversity")
                outputFile = self.pool.outputDir + "/allelicDiversity.csv"
            else:
                logging.info("calculating allelic diversity of " + vcf)
                outputFile = self.pool.outputDir + "/" + vcf + "_" + "allelicDiversity.csv"
            try:
                self.vcfFile = self.pool.vcf[vcf].getFile()
                self._parseFiles(vcf)
                haplotypes = self._getAllHaplotypesByAccession(self.allContigs)
                accessions = haplotypes.values()[0].keys()

                with open(outputFile, "w") as outWriter:
                    outWriter.write("contig\toriginal\t")
                    for accession in accessions:
                        outWriter.write(accession + "_1\t" + accession +
                                        "_2\t")
                    outWriter.write("\n")
                    for contigId in self.allContigs:
                        outWriter.write(contigId + "\t")
                        try:
                            outWriter.write(
                                self.allContigs[contigId].refHaplotype + "\t")
                        except AttributeError:
                            outWriter.write("-\t")
                        for accession in accessions:
                            for i in range(2):
                                if contigId in haplotypes:
                                    outWriter.write(
                                        haplotypes[contigId][accession][i] +
                                        "\t")
                                else:
                                    outWriter.write("-\t")
                        outWriter.write("\n")
            except IndexError:
                if vcf == None:
                    logging.warning("No SNPs within contigs found")
                else:
                    logging.warning("No SNPs within contigs found of " + vcf)

            except Exception as ex:
                if vcf == None:
                    logging.error(
                        "an error occured during parsing the vcf file")
                else:
                    logging.error("an error occured during parsing " + vcf)
                logging.error(ex)
                traceback.print_exc()
Beispiel #5
0
 def testExecuteBeagleMultiThread(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleMultiThread(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[
         TestHaplotyper.chrIndex].fileName
     self.assertEqual(
         os.path.abspath(createdOutFile), os.path.abspath(expOutFile),
         os.path.abspath(createdOutFile) + " not is " +
         os.path.abspath(expOutFile))
     self.checkNoOfSnps(expOutFile)
Beispiel #6
0
 def getAllelicDiversity(self):
     """The method getAllelicDiversity calculates the allelic diversity and writes the output to a file.
     
     """
     if Grid.useGrid == True:
         Haplotyper.executeBeagleCluster(self.pool)
     else:
         Haplotyper.executeBeagleMultiThread(self.pool)
     
     for vcf in self.pool.vcf: 
         if vcf == None:
             logging.info("Starting to calculate the allelic diversity")
             outputFile = self.pool.outputDir + "/allelicDiversity.csv"  
         else:
             logging.info("calculating allelic diversity of " + vcf)
             outputFile = self.pool.outputDir + "/"+vcf + "_" + "allelicDiversity.csv"  
         try:
             self.vcfFile = self.pool.vcf[vcf].getFile()
             self._parseFiles(vcf)
             haplotypes = self._getAllHaplotypesByAccession(self.allContigs)
             accessions = haplotypes.values()[0].keys()
             
             with open(outputFile, "w") as outWriter:
                 outWriter.write("contig\toriginal\t")
                 for accession in accessions: outWriter.write( accession + "_1\t" + accession + "_2\t")
                 outWriter.write("\n")
                 for contigId in self.allContigs:
                     outWriter.write(contigId + "\t")
                     try:
                         outWriter.write(self.allContigs[contigId].refHaplotype + "\t")
                     except AttributeError: outWriter.write("-\t")
                     for accession in accessions:
                         for i in range(2):
                             if contigId in haplotypes:
                                 outWriter.write(haplotypes[contigId][accession][i] + "\t")
                             else:
                                 outWriter.write("-\t")
                     outWriter.write("\n")
         except IndexError:
             if vcf == None:
                 logging.warning("No SNPs within contigs found")
             else:
                 logging.warning("No SNPs within contigs found of " + vcf)
         
         except Exception as ex:
             if vcf == None:
                 logging.error("an error occured during parsing the vcf file")
             else:
                 logging.error("an error occured during parsing " + vcf)
             logging.error(ex)
             traceback.print_exc()
             
Beispiel #7
0
 def execute(self, executable):
     """The method execute checks which program has to be executed and executes this program
     :param executable: the argument of the commandline which determines which program has to be executed
     :type executable: str
     """
     if executable == "haplotyping":
         if Grid.useGrid == True:
             Haplotyper.executeBeagleCluster(self.pool)
         else:
             Haplotyper.executeBeagleMultiThread(self.pool)
     elif executable == "snvCalling":
         if Program.config.snvCaller == "samtools":  # @UndefinedVariable
             SamtoolsMpileup.executeSamtoolsMultiThreaded(self.pool)
         elif Program.config.snvCaller == "GATK":  # @UndefinedVariable
             Gatk.Gatk(self.pool).callSnvs()
     elif executable == "mapping":
         mapper = Mapper.Mapper()
         for sample in self.samples:
             mapper.map(sample)
     elif executable == "allelicDiversity":
         if Program.config.gffFile == None:  # @UndefinedVariable
             print(
                 "When calculating the allelic diversity, a gff file is needed, this option can be set with the option --gff <file>"
             )
             exit()
         allelicDiverityCalculator = AllelicDiversity.AllelicDiversity(
             self.pool, Program.config.gffFile)  # @UndefinedVariable
         allelicDiverityCalculator.getAllelicDiversity()
     elif executable == "findLoci":
         if Program.config.phenoData == None:  # @UndefinedVariable
             print(
                 "When finding loci, a csv file is needed with the phenotype data, this option can be set with the option --phen <file>"
             )
             exit()
         if Program.config.gffFile == None:  # @UndefinedVariable
             print(
                 "When finding loci, a file with phenotype data is needed, this option can be set with the option --gff <file>"
             )
             exit()
         lociFinder = LociFinder.LociFinder()
         lociFinder.findLoci(self.pool)
Beispiel #8
0
 def testExecuteBeagleMultiThread(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleMultiThread(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
     self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
     self.checkNoOfSnps(expOutFile)