def run_transdecoder_predict(self, infasta, longorfs_dir, out_dir=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): if not pu.check_files_exist(infasta): pu.print_boldred("Please check input file:" + infasta) if not pu.check_paths_exist(longorfs_dir): pu.print_boldred("Path {} doesn't exist".format(longorfs_dir)) move_flag = True if not out_dir: out_dir = os.getcwd() move_flag = False if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) newOpts = {"-t": infasta, "-O": longorfs_dir} mergedOpts = {**newOpts, **kwargs} #execute Predict status = self.run_transdecoder('TransDecoder.Predict', verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: pu.print_boldred("Transdecoder failed") return "" #move output files to outdir if move_flag: outfile_prefix = pu.get_filename(infasta) + ".transdecoder" pe.move_file(outfile_prefix + ".bed", os.path.join(out_dir, outfile_prefix + ".bed"), verbose) pe.move_file(outfile_prefix + ".cds", os.path.join(out_dir, outfile_prefix + ".cds"), verbose) pe.move_file(outfile_prefix + ".gff3", os.path.join(out_dir, outfile_prefix + ".gff3"), verbose) pe.move_file(outfile_prefix + ".pep", os.path.join(out_dir, outfile_prefix + ".pep"), verbose) return out_dir
def build_index(self,index_path,index_name,fasta,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """ build salmon index and store the path to index in self index_path: str path to the output directory index_name: str index name verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to salmon. This will override the existing options :return: status of salmon index :rtype: bool """ #check input if not pu.check_files_exist(fasta): pu.print_boldred("{} does not exist. Exiting".format(fasta)) return False #create out dir if not pu.check_paths_exist(index_path): if not pu.mkdir(index_path): print("ERROR in building hisat2 index. Failed to create index directory.") return False indexOut=os.path.join(index_path,index_name) newOpts={"-t":fasta,"-i":indexOut} mergedOpts={**kwargs,**newOpts} #call salmon status=self.run_salmon("index",verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**mergedOpts) if status: #check if sam file is present in the location directory of sra_object #if check_files_exist(os.path.join(indexOut,"versionInfo.json")): #not sure if this is reliable if pu.check_paths_exist(indexOut): self.salmon_index=indexOut self.passedArgumentDict['-i']=self.salmon_index pu.print_green("salmon index is:"+self.salmon_index) return True pu.print_boldred("Failed to create salmon index") return False
def create_lock(self, target_list, message): """ Cretes a temporary .Lock file associated with a target file and write a message in it. Parameters ---------- target_list : List List of target files. message : Str Message to write in file. Returns ------- templist : List A list of .Lock file names coressponding to the target files. """ templist = [] for f in target_list: temp_path = pu.get_file_directory(f) if not pu.check_paths_exist(temp_path): pu.mkdir(temp_path) prefix = pu.get_filename(f) + '_' temp = tempfile.NamedTemporaryFile(prefix=prefix, suffix='.Lock', dir=temp_path, delete=False) #TODO: dump command in lock timestamp = pu.get_timestamp() temp.write(str.encode(timestamp + '\t' + message)) templist.append(temp.name) return templist
def createMikadoGTFlist(self, out_file, out_dir, searchPath, searchQuery="*.gtf", strand=False): """Create a file to be used by mikado configure """ files = pe.find_files(searchPath, searchQuery) args = files #create out dir if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) outFilePath = os.path.join(out_dir, out_file + ".txt") gtfs = [] for l in args: thisName = pu.get_file_basename(l) if thisName: gtfs.append("\t".join([l, thisName, str(strand)])) f = open(outFilePath, "w") f.write("\n".join(gtfs)) f.close() pu.print_green("Mikado list file written to:" + outFilePath) return outFilePath
def build_index(self, in_fasta, dbname, out_dir=None, threads=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Build a diamond index and store its path in self """ #check input files if not pu.check_files_exist(in_fasta): pu.print_boldred( "Input fasta: {} not found...\n diamond makedb failed".format( in_fasta)) return False #create out_dir if not out_dir: out_dir = os.getcwd() if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) #check if index already exists index_path = os.path.join(out_dir, dbname) self.index = index_path if self.check_index(): pu.print_green("Diamond index: {} exists, using it...".format( self.index)) self.index = index_path return True if not threads: threads = self.threads newOpts = { "--in": in_fasta, "-d": index_path, "--threads": str(threads) } #add input files to kwargs, overwrite newOpts with kwargs mergedOpts = {**newOpts, **kwargs} #call run_diamond status = self.run_diamond("makedb", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if status: self.index = index_path return True return False
def perform_qc(self,sra_object,out_dir="",out_suffix="_bbduk",objectid="NA"): """Run bbduk on fastq files specified by the sra_object sra_object: SRA An SRA object whose fastq files will be used out_dir: str Path to output directory out_suffix: string Suffix for the output sam file objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the path of fastq files after QC. tuple has one item for single end files and 2 for paired. :rtype: tuple """ #make out_dir if not out_dir: out_dir=sra_object.directory else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) if sra_object.layout=='PAIRED': fq1=sra_object.fastq_path fq2=sra_object.fastq2_path out_fileName1=pu.get_file_basename(fq1)+out_suffix+".fastq" out_fileName2=pu.get_file_basename(fq2)+out_suffix+".fastq" out_file1Path=os.path.join(out_dir,out_fileName1) out_file2Path=os.path.join(out_dir,out_fileName2) internal_args=() internal_kwargs={"in":fq1,"in2":fq2,"out":out_file1Path,"out2":out_file2Path} #run bbduk status=self.run(*internal_args,objectid=objectid,target=[out_file1Path,out_file2Path],**internal_kwargs) if status: if not pu.check_files_exist(out_file1Path,out_file2Path) and not _dryrun: return("",) return(out_file1Path,out_file2Path) else: fq=sra_object.fastq_path out_fileName=pu.get_file_basename(fq)+out_suffix+".fastq" out_filePath=os.path.join(out_dir,out_fileName) internal_args=() internal_kwargs={"in":fq,"out":out_filePath} #run bbduk status=self.run(*internal_args,objectid=objectid,target=out_filePath,**internal_kwargs) if status: if not pu.check_files_exist(out_filePath) and not _dryrun: return("",) return(out_filePath,)
def test_portcullis(): pc = tools.Portcullis() port_out = pc.run_portcullisFull(testVars.genome, testVars.portcullisBam, out_dir=testVars.testDir, threads=8) st = pu.check_paths_exist(port_out) assert st == True, "Failed portcullis run"
def init_from_path(self,path): if not pu.check_paths_exist(path): raise Exception("Please provide a valid path to scan for RNA-Seq data") #scan path for fastq self.search_fastq(path) #scan path for sra self.search_sra(path) if not (self.fastqFilesExistsLocally() or self.sraFileExistsLocally()): raise Exception("No files found at:"+ path+ "Please provide a valid path to scan for RNA-Seq data")
def init_from_path(self, path): if not pu.check_paths_exist(path): raise Exception( "Please provide a valid path to scan for RNA-Seq data") #scan path if not self.search_fastq(path): if not self.search_sra(path): raise Exception( "Please provide a valid path to scan for RNA-Seq data")
def test_transdecoder(): td = tools.Transdecoder() longOrfOut = td.run_transdecoder_longorfs(testVars.cdna_small, out_dir=testVars.testDir + "/longorfsout") preddir = testVars.testDir + "/predout" predout = td.run_transdecoder_predict(testVars.cdna_small, longOrfOut, out_dir=preddir) st = pu.check_paths_exist(predout) assert st == True, "TransDecoder failed"
def perform_alignment(self, sra_object, out_suffix="_bowtie2", out_dir="", objectid="NA"): """Function to perform alignment using sra_object. Parameters ---------- sra_object SRA object An object of type SRA. The path to fastq files will be obtained from this object. out_suffix: string Suffix for the output sam file out_dir: string Directory to save the results. Default value is sra_object.directory objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the sorted bam file path after converting sam to bam and sorting it :rtype: string """ if not out_dir: out_dir = sra_object.directory else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) #create path to output sam file outSamFile = os.path.join( out_dir, sra_object.srr_accession + out_suffix + ".sam") #outBamFile=os.path.join(out_dir,sra_object.srr_accession+out_suffix+"_sorted.bam") #find layout and fq file paths if sra_object.layout == 'PAIRED': internal_kwargs = { "-1": sra_object.fastq_path, "-2": sra_object.fastq2_path, "-S": outSamFile } else: internal_kwargs = {"-U": sra_object.fastq_path, "-S": outSamFile} status = self.run(None, objectid=sra_object.srr_accession, target=outSamFile, **internal_kwargs) if status: if not pu.check_files_exist(outSamFile) and not _dryrun: return "" #convert to bam before returning; returns outBamFile return tools.Samtools().sam_sorted_bam(outSamFile) return ""
def generate_multiqc(directory, tempDir, outDir="", coverage='a', verbose=False, cleanup=False): """ Generate reports using multiqc Parameters ---------- directory : str path to directory containing logs. tempDir : str temp dir. outDir : str, optional output dir. The default is "". coverage : char, optional commands to use in pyrpipe log: fa(i)led (p)assed or (a)ll. The default is 'a'. verbose : bool, optional print messages. The default is False. cleanup : bool, optional remove temp files. The default is False. Returns ------- None. """ #searg all _pyrpipe.log files under current directory files = pu.find_files(directory, ".*_pyrpipe\.log$", recursive=True) #extract stdout from each file and save to temp if not outDir: outDir = 'MultiQC_out' #create tempdir if not pu.check_paths_exist(tempDir): pu.mkdir(tempDir) for f in files: #dump stdout from logs to temp directory stdout = getStdoutFromLog(f, None, coverage) fid = f.split('_pyrpipe')[0].split('_')[-1] for o in stdout: thisName = o + "_" + fid + ".txt" tempFile = os.path.join(tempDir, thisName) f = open(tempFile, "w") f.write(stdout[o]) #print('written',tempFile) f.close() #run multiqc mc.run(analysis_dir=directory, outdir=outDir) pass
def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"): """Run kallisto quant sra_object: SRA SRA object contatining paths to fastq files out_suffix: str suffix for output file out_dir: str path to output directory objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Path to kallisto out directory :rtype: string """ if not out_dir: out_dir=os.path.join(sra_object.directory,"kallisto_out") else: #create out_dir if not exists if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) if sra_object.layout == 'PAIRED': args=(sra_object.fastq_path,sra_object.fastq2_path) internal_kwargs={"-o":out_dir,"-i":self.index} else: args=(sra_object.fastq_path,) internal_kwargs={"-o":out_dir,"--single":"","-i":self.index} #targets outfile=os.path.join(out_dir,"abundance.tsv") newfile=os.path.join(out_dir,"abundance"+out_suffix+".tsv") #check if final files already exists if not _force and pu.check_files_exist(newfile): pu.print_green('Target files {} already exist.'.format(newfile)) return newfile #call kallisto status=self.run(*args,subcommand='quant',objectid=sra_object.srr_accession,target=outfile,**internal_kwargs) if status: #return rename the bam file and return path if not _dryrun: pe.move_file(outfile,newfile) if not pu.check_files_exist(newfile): return "" return newfile return ""
def perform_assembly(self, bam_file, out_dir=None, out_suffix="_stringtie", objectid="NA"): """Function to run stringtie using a bam file. Parameters ---------- bam_file: string path to the bam file out_dir: string Path to out file out_suffix: string Suffix for the output gtf file objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname = pu.get_file_basename(bam_file) if not out_dir: out_dir = pu.get_file_directory(bam_file) if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf") #Add output file name and input bam internal_args = (bam_file, ) internal_kwargs = {"-o": out_gtf_file} #add positional args internal_kwargs['--'] = internal_args #call stringtie status = self.run(None, objectid=objectid, target=out_gtf_file, **internal_kwargs) if status: #check if sam file is present in the location directory of sraOb if not pu.check_files_exist(out_gtf_file) and not _dryrun: return "" return out_gtf_file return ""
def build_index(self,index_path,index_name,fasta,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to build kallisto index index_path: str path to the output directory index_name: str index name verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to kallisto. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). :return: Status of kallisto index :rtype: bool """ #check input if not pu.check_files_exist(fasta): pu.print_boldred("{} does not exist. Exiting".format(fasta)) return False #create out dir if not pu.check_paths_exist(index_path): if not pu.mkdir(index_path): print("ERROR in building kallisto index. Failed to create index directory.") return False indexOut=os.path.join(index_path,index_name) newOpts={"--":(fasta,),"-i":indexOut} mergedOpts={**kwargs,**newOpts} #call salmon status=self.run_kallisto("index",verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**mergedOpts) if status: #check if sam file is present in the location directory of sra_object if pu.check_files_exist(indexOut): self.kallisto_index=indexOut self.passedArgumentDict['-i']=self.kallisto_index pu.print_green("kallisto_index is:"+self.kallisto_index) return True else: pu.print_boldred("Failed to create kallisto index") return False
def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"): """run salmon quant sra_object: SRA An SRA object with valid fastq files out_suffix: str suffix string fout out file out_dir: str path to outdir objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Path to salmon out file :rtype: string """ if not out_dir: out_dir=os.path.join(sra_object.directory,"salmon_out") else: #create out_dir if not exists if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) if sra_object.layout == 'PAIRED': internal_kwargs={"-o":out_dir,"-l":"A","-1":sra_object.fastq_path,"-2":sra_object.fastq2_path,"-i":self.index} else: internal_kwargs={"-o":out_dir,"-l":"A","-r":sra_object.fastq_path,"-i":self.index} #targets outfile=os.path.join(out_dir,"quant.sf") newfile=os.path.join(out_dir,"quant"+out_suffix+".sf") #check if final files already exists if not _force and pu.check_files_exist(newfile): pu.print_green('Target files {} already exist.'.format(newfile)) return newfile #call salmon status=self.run(None,subcommand='quant',objectid=sra_object.srr_accession,target=newfile,**internal_kwargs) if status: #return rename the bam file and return path if not _dryrun: pe.move_file(outfile,newfile) if not pu.check_files_exist(newfile): return "" return newfile return ""
def multiqc(): print("Generating html report with multiqc") parser = argparse.ArgumentParser( description='pyrpipe diagnostic utility\nGenerate report with multiqc.', usage='''pyrpipe_diagnostic multiqc [<args>] <logfile> ''') parser.add_argument('-o', help='out directory \ndefault: <./>',action="store") parser.add_argument('-c',help='Dump command options [(a)ll,fa(i)l,(p)ass]\ndefault: a',default='a',action="store") parser.add_argument('-v',help='verbose',action="store_true") parser.add_argument('-f',help='Filter by programs. Provide a comma-separated list e.g., prefetch,STAR,bowtie2 \ndefault None') parser.add_argument('-t',help='Temporary directory. \ndefault ./tmp',action="store") parser.add_argument('-r',help='Remove stdout files after processing. \ndefault ./tmp',action="store_true") parser.add_argument('logfile', help='The log file generated by pyrpipe',action="store") args = parser.parse_args(sys.argv[2:]) logFile=args.logfile #parse args vFlag=args.v if vFlag: print("Generating MutiQC report") outDir="" if args.o is None: outDir=os.getcwd() else: outDir=args.o filters=[] if args.f is not None: filters= args.f.split(',') #create temp dir tempDir="" if args.t is not None: tempDir= args.t else: tempDir=os.path.join(os.getcwd(),"tmp") #create tmp dir if not pu.check_paths_exist(tempDir): pu.mkdir(tempDir) #run multiqc generateMultiqcReport(logFile,filters,tempDir,outDir=outDir,coverage=args.c,verbose=args.v,cleanup=args.r)
def runMikadoSerialise(self, jsonconf, blastTargets, orfs, xml, out_dir="", verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado serialise """ #check input files exist if not pu.check_files_exist(blastTargets, orfs, xml): print("Please check the input to mikado.") return "" if not out_dir: out_dir = os.getcwd() newOpts = { "--json-conf": jsonconf, "--blast_targets": blastTargets, "--xml": xml, "--orfs": orfs, "--output-dir": out_dir } #merge with kwargs mergedOpts = {**kwargs, **newOpts} status = self.runMikado("serialise", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Mikado serialise failed for:" + jsonconf) return "" #check if bam file exists if not pu.check_paths_exist(out_dir): return "" return out_dir
def runMikadoPick(self, yamlconf, threads=None, out_dir=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado pick """ #check input files exist if not pu.check_files_exist(yamlconf): print("Please check the input to mikado.") return "" if not out_dir: out_dir = os.getcwd() if not threads: threads = self.threads newOpts = { "--procs": str(threads), "--json-conf": yamlconf, "--output-dir": out_dir } #merge with kwargs mergedOpts = {**newOpts, **kwargs} status = self.runMikado("pick", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Mikado pick failed for:" + yamlconf) return "" #check if bam file exists if not pu.check_paths_exist(out_dir): return "" return out_dir
def benchmark(): print("Generating benchmarks") parser = argparse.ArgumentParser( description='pyrpipe diagnostic utility\nGenerate benchmark report.', usage='''pyrpipe_diagnostic report [<args>] <logfile> ''') parser.add_argument('-o', help='out file \ndefault: same as input logfile',action="store") parser.add_argument('-e', help='report output type: [MD,PDF,HTML] \ndefault: PDF',default='PDF',action="store") parser.add_argument('-v',help='verbose',action="store_true") parser.add_argument('-f',help='Filter by programs. Provide a comma-separated list e.g., prefetch,STAR,bowtie2 \ndefault None') parser.add_argument('-t',help='Temporary directory. \ndefault ./tmp',action="store") parser.add_argument('logfile', help='The log file generated by pyrpipe',action="store") args = parser.parse_args(sys.argv[2:]) logFile=args.logfile envLog=checkEnvLog(logFile) #parse args vFlag=args.v if vFlag: print("Generating benchmarks") outFile="" if args.o is None: outFile=pu.get_file_basename(args.logfile) else: outFile=args.o outFile+='.'+args.e filters=[] if args.f is not None: filters= args.f.split(',') #create temp dir tempDir="" if args.t is not None: tempDir= args.t else: tempDir=os.path.join(os.getcwd(),"tmp") #create tmp dir if not pu.check_paths_exist(tempDir): pu.mkdir(tempDir) generateBenchmarkReport(logFile,envLog,filters,tempDir,outFile=outFile,verbose=args.v)
def __init__(self,log_file,env_log,out_dir=""): if not pu.check_files_exist(log_file,env_log): raise Exception("Please check input for benchmark report. {} {}".format(log_file,env_log)) if not out_dir: out_dir=os.getcwd() self.log_file=log_file self.env_log=env_log self.runtimes_by_prog={} self.runtimes_by_object={} #init pu.print_blue("parsing log...") self.parse_logs() pu.print_blue("done.") #out_dir self.benchmark_dir=os.path.join(out_dir,'benchmark_reports') if not pu.check_paths_exist(self.benchmark_dir): if not pu.mkdir(self.benchmark_dir): raise Exception("Error running benchmarks. Can not create output directory {}".format(self.benchmark_dir))
def createMikadoGTFlist(self, out_file, out_dir, searchPath, searchQuery="*.gtf", strand=False): """Create a file to be used by mikado configure out_file: str outfile name out_dir: str path to out_dir searchPath: str Path where gtf/gff files will be searched searchQuery: str Query to perform search. Default: "*.gtf" strand: bool Stranded flag: Default false """ files = pe.find_files(searchPath, searchQuery, recursive=True) args = files #create out dir if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) outFilePath = os.path.join(out_dir, out_file + ".txt") gtfs = [] for l in args: thisName = pu.get_file_basename(l) if thisName: gtfs.append("\t".join([l, thisName, str(strand)])) f = open(outFilePath, "w") f.write("\n".join(gtfs)) f.close() pu.print_green("Mikado list file written to:" + outFilePath) return outFilePath
def runMikadoPrepare(self, jsonconf, out_dir="", verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado prepare """ #check input files exist if not pu.check_files_exist(jsonconf): print("Please check the input configuration to mikado.") return "" if not out_dir: out_dir = os.getcwd() newOpts = {"--output-dir": out_dir, "--json-conf": jsonconf} #merge with kwargs mergedOpts = {**kwargs, **newOpts} status = self.runMikado("prepare", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Mikado prepare failed for:" + jsonconf) return "" #check if bam file exists if not pu.check_paths_exist(out_dir): return "" return out_dir
def perform_assembly(self,sra_object=None,bam_file=None,out_dir="trinity_out_dir",max_memory=None,max_intron=10000,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to run trinity with sra object or BAM file as input. Parameters ---------- sra_object: SRA object of SRA class bam_file: string path to bam file out_dir: string path to out directory max_memory: string Max memory argument e.g. "2G" max_intron: int specify the "--genome_guided_max_intron" argument threads: int Number of threads to use overwrite: bool Overwrite if output file already exists verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options to pass to Trinity. :return: Return the path to output GTF file :rtype: string """ #add trinity to outdir if "trinity" not in out_dir: out_dir+="_trinity" if not threads: threads=self.threads if not max_memory: max_memory=self.max_memory new_opts={} if sra_object is not None: parent_dir=sra_object.location out_dir=os.path.join(parent_dir,out_dir) if sra_object.layout == 'PAIRED': new_opts={"--seqType":"fq","--left":sra_object.localfastq1Path,"--right":sra_object.localfastq2Path,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)} else: new_opts={"--seqType":"fq","--single":sra_object.localfastqPath,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)} elif bam_file is not None: if not pu.check_files_exist(bam_file): pu.print_boldred("Input to trinity does not exist:"+bam_file) return "" parent_dir=pu.get_file_directory(bam_file) out_dir=os.path.join(parent_dir,out_dir) new_opts={"--genome_guided_bam":bam_file,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads),"--genome_guided_max_intron":max_intron} else: pu.print_boldred("Please provide valid input to run trinity") return "" merged_opts={**new_opts,**kwargs} #call trinity status=self.run_trinity(valid_args_list=None,verbose=False,quiet=False,logs=True,objectid="NA",**merged_opts) if status: #check out dir if pu.check_paths_exist(out_dir): return out_dir else: return ""
def perform_assembly(self,bam_file,out_dir="",out_suffix="_cufflinks",reference_gtf=None,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to run cufflinks with BAM file as input. Parameters ---------- bam_file: string path to bam file out_dir: output directory out_suffix: string Suffix for the output gtf file reference_gtf: str Path to reference gtf threads: int Number of threads to use overwrite: bool Overwrite if output file already exists. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options to pass to cufflinks. :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname=pu.get_file_basename(bam_file) if not out_dir: out_dir=pu.get_file_directory(bam_file) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf") """ Handle overwrite """ if not overwrite: #check if file exists. return if yes if os.path.isfile(out_gtf_file): print("The file "+out_gtf_file+" already exists. Exiting..") return out_gtf_file if not threads: threads=self.threads #Add output file name and input bam new_opts={"-o":out_dir,"--":(bam_file,),"-p":str(threads)} #add ref gtf if reference_gtf: if not pu.check_files_exist(reference_gtf): pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf)) return "" new_opts["-g"]=reference_gtf merged_opts={**new_opts,**kwargs} #call cufflinks status=self.run_cufflinks(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts) if status: #move out_dir/transcripts.gtf to outfile pe.move_file(os.path.join(out_dir,"transcripts.gtf"),out_gtf_file) #check if sam file is present in the location directory of sraOb if pu.check_files_exist(out_gtf_file): return out_gtf_file else: return ""
def runMikadoConfigure(self, listFile, genome, mode, scoring, junctions, out_file, out_dir=os.getcwd(), verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado configure Make sure the paths in list file are global. Parameters ---------- :return: Path to the created configuration file :rtype: string """ #check all file exists if not pu.check_files_exist(listFile, genome, junctions, scoring): print("Please check mikado input") return "" #create out dir if not pu.check_paths_exist(out_dir): if not pu.mkdir(out_dir): raise Exception("Exception in mikado configure.") outFilePath = os.path.join(out_dir, out_file + ".yaml") newOpts = { "--list": listFile, "--reference": genome, "--mode": mode, "--scoring": scoring, "--junctions": junctions, "--": (outFilePath, ) } #merge with kwargs mergedOpts = {**kwargs, **newOpts} status = self.runMikado("configure", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: pu.print_boldred( "Mikado configure failed.\nPlease make sure the paths in list file are global." ) return "" #check if bam file exists if not pu.check_files_exist(outFilePath): return "" return outFilePath
def run_portcullisFull(self, reference_fasta, bam_file, out_dir="", delete_bam=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """ run portculis full Parameters ---------- reference_fasta: string Path to the reference fasta file bam_file: string Path to input bam file out_dir: string Path to the out put dir. current directory is not given. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore. This will override the existing options """ if not pu.check_files_exist(reference_fasta, bam_file): print("Please check input for portcullis.") return "" newOpts = {"--": (reference_fasta, bam_file)} mergedOpts = {**kwargs, **newOpts} #add out dir path if not out_dir: out_dir = os.path.join(os.getcwd(), "portcullis_out") mergedOpts = {**mergedOpts, **{"-o": out_dir}} status = self.run_portcullis("full", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("portcullis full failed for:" + bam_file) return "" #check if bam file exists if not pu.check_paths_exist(out_dir): return "" if delete_bam: if not pe.deleteFileFromDisk(bam_file): print("Error deleting bam file:" + bam_file) return out_dir
def sam_to_bam(self, sam_file, out_dir="", out_suffix="", delete_sam=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Convert sam file to a bam file. Output bam file will have same name as input sam. out_suffix: string Suffix for the output sam file delete_sam: bool delete the sam file after conversion verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore. This will override the existing options :return: Returns the path to the bam file. Returns empty string if operation failed. :rtype: string """ if not out_dir: out_dir = pu.get_file_directory(sam_file) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) fname = pu.get_file_basename(sam_file) #output will be out_bam out_bam = os.path.join(out_dir, fname + out_suffix + '.bam') newOpts = {"--": (sam_file, ), "-o": out_bam, "-b": ""} mergedOpts = {**kwargs, **newOpts} status = self.run_samtools("view", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Sam to bam failed for:" + sam_file) return "" #check if bam file exists if not pu.check_files_exist(out_bam): return "" #delete_sam_file if delete_sam: if not pe.deleteFileFromDisk(sam_file): print("Error deleting sam file:" + sam_file) #return path to file return out_bam
def merge_bam(self, *args, out_file="merged", out_dir="", delete_bams=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Merge multiple bam files into a single file Parameters ---------- out_file: string Output file name to save the results. .bam will be added at the end. args:tuple Paths to bam files to combine out_dir: string Path where to save the merged bam file. Default path is the same as the first bam_file's verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore. This will override the existing options :return: Returns the path to the merged bam file. :rtype: string """ if len(args) < 2: print("Please supply at least 2 files to merge") return "" if not out_dir: out_dir = pu.get_file_directory(args[0]) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) outMergedFile = os.path.join(out_dir, out_file + ".bam") newOpts = {"--": (outMergedFile, ) + args} mergedOpts = {**kwargs, **newOpts} status = self.run_samtools("merge", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Bam merge failed for:" + outMergedFile) return "" #check if bam file exists if not pu.check_files_exist(outMergedFile): return "" if delete_bams: for bam_file in args: if not pe.deleteFileFromDisk(bam_file): print("Error deleting sam file:" + bam_file) return outMergedFile
def perform_qc(self,sra_object,out_dir="",out_suffix="_trimgalore",objectid="NA"): """Function to perform qc using trimgalore. The function perform_qc() is consistent for all QC classess. Parameters ---------- sra_object: SRA An SRA object whose fastq files will be used out_dir: str Path to output directory out_suffix: string Suffix for the output sam file objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the path of fastq files after QC. tuple has one item for single end files and two for paired. :rtype: tuple """ if not out_dir: out_dir=sra_object.directory else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) #get layout if sra_object.layout=='PAIRED': fq1=sra_object.fastq_path fq2=sra_object.fastq2_path internal_args=(fq1,fq2) internal_kwargs={"--paired":"","-o":out_dir} """ running trim galore will create two files named <input>_val_1.fq and <input>_val_2.fq move these files to the specified out files """ file1=os.path.join(out_dir,pu.get_file_basename(fq1)+"_val_1.fq") file2=os.path.join(out_dir,pu.get_file_basename(fq2)+"_val_2.fq") #targets out_file1=os.path.join(out_dir,pu.get_file_basename(fq1)+out_suffix+".fastq") out_file2=os.path.join(out_dir,pu.get_file_basename(fq2)+out_suffix+".fastq") #check if final files already exists if not _force and pu.check_files_exist(out_file1,out_file2): pu.print_green('Target files {}, {} already exist.'.format(out_file1,out_file2)) return out_file1,out_file2 #run trimgalore status=self.run(*internal_args,objectid=objectid,target=[file1,file2],**internal_kwargs) if status: #return rename the bam file and return path if not _dryrun: pe.move_file(file1,out_file1,verbose=False) pe.move_file(file2,out_file2,verbose=False) if not pu.check_files_exist(out_file1,out_file2): return "" return out_file1,out_file2 return ("",) else: fq=sra_object.fastq_path internal_args=(fq,) internal_kwargs={"-o":out_dir} """ running trim galore will create one file named <input>_trimmed.fq move these files to the specified out files """ file=os.path.join(out_dir,pu.get_file_basename(fq)+"_trimmed.fq") #target out_file=os.path.join(out_dir, pu.get_file_basename(fq)+out_suffix+".fastq") #check if final files already exists if not _force and pu.check_files_exist(out_file): pu.print_green('Target files {} already exist.'.format(out_file)) return (out_file,) #run trimgalore status=self.run(*internal_args,objectid=objectid,target=file,**internal_kwargs) if status: #return rename the bam file and return path if not _dryrun: pe.move_file(file,out_file) if not pu.check_files_exist(out_file): return "" return (out_file,) return ("",)