def getCommands(self): self.commands = [] self.output_files = [] self.checkDiskSpace() seqs = FastaFile.getSequenceDict(self.refgenome, False) if self.checkInputFiles() == False: raise Exception("Input files [%s] don't exist = can't continue" % (self.input_files)) fileparts = FileUtils.getFileParts(self.input_files[0]) self.basename = fileparts['basename'] # Need to set dbtype somewhere outfile1 = self.working_dir + "/" + self.basename + ".raw.vcf" outfile2 = self.working_dir + "/" + self.basename + ".flt.vcf" regstr = "" if self.regionstr != "": regstr = " -r " + self.regionstr outfile1 = self.working_dir + "/" + self.basename + "." + self.regionstr + ".raw.vcf" outfile2 = self.working_dir + "/" + self.basename + "." + self.regionstr + ".flt.vcf" self.expected_output_files.append(outfile1) self.expected_output_files.append(outfile2) command1 = self.samtools + " mpileup -uf " + self.refgenome + " " + self.input_files[ 0] + " " + regstr + " | " + self.bcftools + " view " + " -bvcg - > " + outfile1 command2 = self.bcftools + " view " + outfile1 + " | " + self.vcfutils + " varFilter -D100 > " + outfile2 print "Command %s" % command1 print "Command %s" % command2 self.commands.append( AnalysisCommand(command=command1, command_rank=len(self.commands) + 1)) self.commands.append( AnalysisCommand(command=command2, command_rank=len(self.commands) + 1)) return self.commands
def getCommands(self): self.commands = [] self.output_files = [] self.checkDiskSpace() print "Reading genome file" seqs = FastaFile.getSequenceDict(self.refgenome,False) if self.checkInputFiles() == False: raise Exception("Input files [%s] don't exist = can't continue"%(self.input_files)) fileparts = FileUtils.getFileParts(self.input_files[0]) self.basename = fileparts['basename'] for seq in seqs: len = seqs[seq]['len'] i = 1 while i < len: end = i + self.chunk -1 if end > len: end = len regionstr = "%s:%d-%d"%(seq,i,end) tmpana = AnalysisFactory.createAnalysisFromModuleName("SamtoolsMpileup") tmpana.setInputFiles(self.input_files,self.input_types) tmpana.refgenome = self.refgenome tmpana.regionstr = regionstr tmpana.init() tmpcmds = tmpana.getCommands() for cmd in tmpcmds: self.commands.append(cmd) i = i + self.chunk return self.commands
def getCommands(self): self.checkDiskSpace() if self.checkInputFiles() == False: raise Exception("Input files [%s] don't exist = can't continue" % (self.input_files)) fileparts = FileUtils.getFileParts(self.input_files[0]) self.basename = fileparts['basename'] # Need to set dbtype somewhere command = self.makeblastdb + " -in " + self.input_files[ 0] + " -input_type fasta -dbtype prot -title " + self.basename + " -parse_seqids -out " + fileparts[ 'dirname'] + "/" + self.basename print "Command %s" % command self.commands.append(command) return self.commands
def init(self): super(FastQCAnalysis, self).init() if len(self.input_files) == 0: raise Exception( "No input files for FastQCAnalysis module. Can't init") fileparts = FileUtils.getFileParts(self.input_files[0].input_file) if fileparts['fileext'] == ".fastq": dir = fileparts['filestub'] + "_fastqc/" elif fileparts['fileext'] == ".gz": dir = fileparts['filestub'].replace(".fastq", "") + "_fastqc/" else: dir = fileparts['basename'] + "_fastqc/" self.fastqc_dir = dir tmp = [] for i, f in enumerate(self.expected_output_filelist): #tmp.append(dir + f) AnalysisUtils.addExpectedOutputFile(self, dir + f)
def init(self): if len(self.input_files) == 0: raise Exception("No input files for BlastOutput6Parsermodule. Can't init") fileparts = FileUtils.getFileParts(self.input_files[0])
def getCommands(self): if self.commands and len(self.commands) > 0: return self.commands logging.info(" ========> Analysis %20s Getting commands" % (self.name)) self.commands = [] self.expected_output_files = [] self.temp_output_files = [] outdir = self.output_dir tmpdir = self.working_dir btbin = self.bowtiebindir + self.bowtiebinname stbin = self.samtoolsbindir + self.samtoolsbinname self.calculateSpaceNeeded() if FileUtils.fileExists(btbin) == False: raise Exception("Binary file [%s] doesn't exist = can't continue" % btbin) if FileUtils.fileExists(stbin) == False: raise Exception("Binary file [%s] doesn't exist = can't continue" % stbin) if AnalysisUtils.checkInputFiles(self) == False: raise Exception("Input files [%s] don't exist = can't continue" % (self.input_files)) AnalysisUtils.checkDiskSpace(self) for fobj in self.input_files: f = fobj.input_file try: if f.endswith(".gz"): # f = "<( zcat -c " + f + " )" tmpf = f.replace(".gz", "") fparts = FileUtils.getFileParts(tmpf) command = "gunzip -c " + f + " > " + tmpdir + "/" + fparts[ 'basename'] self.commands.append(command) self.temp_output_files.append(tmpf) f = tmpdir + "/" + fparts['basename'] fparts = FileUtils.getFileParts(f) fstub = fparts['filestub'] bowtieoutfile = tmpdir + "/" + fstub + ".sam" samtoolsoutfile = tmpdir + "/" + fstub + ".bam" if self.param == None: raise Exception( "No parameters entered for bowtie = needs -x <genomeindex>" ) command1 = btbin + " " + self.param + " " + f + " | " + stbin + " view -bS - | " + stbin + " sort - " + tmpdir + "/" + fstub logging.info(" ========> Analysis %20s command 1 : %s" % (self.name, command1)) #command2 = stbin + " view -bS " + bowtieoutfile + "| " + stbin + " sort - " + tmpdir + "/" + fstub # logging.info(" ========> Analysis %20s command 2 : %s" % (self.name,command2)) command2 = stbin + " index " + samtoolsoutfile logging.info(" ========> Analysis %20s command 3 : %s" % (self.name, command2)) # self.expected_output_files.append(fstub + ".sam") self.expected_output_files.append( AnalysisExpectedOutputFile(expected_output_file=fstub + ".bam")) self.expected_output_files.append( AnalysisExpectedOutputFile(expected_output_file=fstub + ".bam.bai")) self.commands.append(AnalysisCommand(command=command1)) self.commands.append(AnalysisCommand(command=command2)) #self.commands.append(command3) except Exception as e: logging.info( " ========> Analysis %20s Failed building command list [%s]" % (self.name, e)) raise return self.commands