def preprocess(self, lib): """ The method preprocess determines which preprocessing steps have to be executed for a given library. """ logging.info("Preprocessing of " + lib.libName) LaTeX.ltxSection("Preprocessing of " + lib.libName) lib.forward = DirUtils.fileRegexToList(lib.forward) lib.reversed = DirUtils.fileRegexToList(lib.reversed) if lib.format == "sff": for idx, sffFile in enumerate(lib.forward): lib.forward[idx] = SffToFastqConverter.SffToFastqConverter(lib.outputDir, sffFile=sffFile).execute() smallReport = FastqSmallReport.FastqSmallReport() smallReport.createSmallReport(lib.forward, lib.reversed) if len(lib.forward) > 1: lib.forward = FastqCommands.MergeCommand(lib.outputDir, direction="forward", fastqFiles=lib.forward).execute() if lib.reversed != None: lib.reversed = FastqCommands.MergeCommand(lib.outputDir, direction="reversed", fastqFiles=lib.forward).execute() FastqSmallReport.FastqSmallReport().createSmallReport(lib.forward, lib.reversed) else: lib.forward = lib.forward[0] if lib.reversed != None: lib.reversed = lib.reversed[0] lib.avgReadlength = float(smallReport.fastqInfo[smallReport.fastqInfo.keys()[0]][2]) if lib.sequencingPlatform == "illumina": self.illuminaPreprocess(lib) elif lib.sequencingPlatform == "454": lib.forward = FastqMcfTrimming.FastqTrimmer(lib.outputDir, forward=lib.forward,noTrim=True).execute() FastqSmallReport.FastqSmallReport().createSmallReport(lib.forward, lib.reversed) self.filterContamination(lib)
def doAssembly(self, pool): """ The method doAssembly creates all objects to execute a wgs assembly. Afterwards the insert sizes of all pe and mp libraries are estimated. """ logging.info("Executing assembly") LaTeX.ltxSection("Assembly") if Configuration.instance.getGlobalOption("assembler") == None or Configuration.instance.getGlobalOption("assembler") == "wgs": assembler = WgsAssembler.WgsAssembler() self.assembly = assembler.doAssembly(pool.outputDir + "assembly/", pool) elif Configuration.instance.getGlobalOption("assembler") == "allpaths": self.assembly = AllpathsAssembler.AllpathsAssembler().doAssembly(pool.outputDir + "allPathsAssembly/", pool) for lib in pool.libs: if lib.reversed == None: continue logging.info("Calculating insert sizes for " + lib.libName) insertSizeChecker = InsertSizeChecker.InsertSizeChecker() insertSizeChecker.checkInsertSize(lib.outputDir, lib.rawForward, lib.rawReversed, self.assembly, lib.libName, lib.insertSize)
def doGenomeSizeEstimation(self, outputDir, pool): """ The method doGenomeSizeEstimation contains the mainflow of the genomesize estimation. This mainflow contains the following methods: * Execute Jellyfish count * Execute Jellyfish stats * Create a histogram of the unique kmers with Jellyfish histo * Draw a histogram of the unique kmers * Estimate the genome size with the BGI method """ logging.info("Starting genome size estimation") if not os.path.isdir(outputDir): os.makedirs(outputDir) LaTeX.ltxSection("Genome size estimation}") self.jellyFishCountsFile = JellyFish.JellyFishCount(outputDir, pool=pool).execute() self.jellyFishStatsFile = JellyFish.JellyFishStats(outputDir, jellyFishCountsFile=self.jellyFishCountsFile).execute() self.jellyfishHistoFile = JellyFish.JellyFishHisto(outputDir, jellyFishCountsFile=self.jellyFishCountsFile).execute() self.genSizeHistoPlot = outputDir + "kmer_graph.png" self.peak = int(self.drawHisto(self.jellyfishHistoFile, self.genSizeHistoPlot)) self.calculateGenomeSize(pool, self.jellyFishStatsFile, self.jellyfishHistoFile) Reporter.instance.objects.append(self)