def call_cuffmerge_and_cuffdiff( logger, ws_client, hs, ws_id, num_threads, list_file, gtf_file, bam_files, t_labels, genome_id, expressionset_id, alignmentset_id, sampleset_id, params, directory, token, ): ## Adding Advanced options for cuffmerge command cuffmerge_dir = os.path.join(directory, "cuffmerge") cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(str(num_threads), cuffmerge_dir, gtf_file, list_file) try: logger.info("Executing: cuffmerge {0}".format(cuffmerge_command)) script_util.runProgram(logger, "cuffmerge", cuffmerge_command, None, directory) if os.path.exists(cuffmerge_dir + "/merged.gtf"): merged_gtf = os.path.join(cuffmerge_dir, "merged.gtf") except Exception, e: raise Exception("Error executing cuffmerge {0},{1}".format(cuffmerge_command, cuffmerge_dir))
def call_stringtieBall(directory,num_threads,m_gtf_file,alignment_file): #directory = os.path.join(directory,"cuffmerge") strdiff_command = " -p {0} -o {1} -e -B -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file) try: logger.info("Executing: stringtie {0}".format(strdiff_command)) script_util.runProgram(logger,"stringtie",strdiff_command,None,directory) except Exception,e: raise Exception("Error executing StringTie differential expression {0},{1}".format(strdiff_command,directory))
def call_tablemaker(directory,num_threads,m_gtf_file,alignment_file): #cuffmerge_dir = os.path.join(directory,"cuffmerge") tm_command = " -p {0} -o {1} -q -W -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file) try: logger.info("Executing: tablemaker {0}".format(tm_command)) script_util.runProgram(logger,"tablemaker",tm_command,None,directory) except Exception,e: raise Exception("Error executing tablemaker {0},{1}".format(tm_command,directory))
def BuildBowtie2Index(self, ctx, params): # ctx is the context object # return variables are: returnVal # BEGIN BuildBowtie2Index user_token = ctx["token"] print "start" print user_token # svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token ws_client = Workspace(url=self.__WS_URL, token=user_token) # hs = HandleService(url=self.__HS_URL, token=user_token) try: self.__LOGGER.info("Downloading KBaseGenome.ContigSet object from workspace") try: # assembly = ws_client.get_objects( # [{'name' : params['reference'], # 'workspace' : params['ws_id']}])['data'] # except Exception,e: # raise KBaseRNASeqException("Error Downloading FASTA object from the workspace {0}".format(params['reference'])) ## Check if the bowtie_dir is present; remove files in bowtie_dir if exists ; create a new dir if doesnt exists bowtie_dir = self.__BOWTIE_DIR if os.path.exists(bowtie_dir): files = glob.glob("%s/*" % bowtie_dir) for f in files: os.remove(f) if not os.path.exists(bowtie_dir): os.makedirs(bowtie_dir) ## dump fasta object to a file in bowtie_dir dumpfasta = "--workspace_service_url {0} --workspace_name {1} --working_directory {2} --output_file_name {3} --object_name {4} --shock_service_url {5} --token '{6}'".format( self.__WS_URL, params["ws_id"], bowtie_dir, params["reference"], params["reference"], self.__SHOCK_URL, user_token, ) print dumpfasta script_util.runProgram( self.__LOGGER, self.__SCRIPT_TYPE["ContigSet_to_fasta"], dumpfasta, self.__SCRIPTS_DIR, os.getcwd() ) except Exception, e: raise KBaseRNASeqException( "Error Creating FASTA object from the workspace {0},{1},{2}".format( params["reference"], os.getcwd(), e ) ) ## Run the bowtie_indexing on the command line try: bowtie_index_cmd = "{0} {1}".format(params["reference"], params["reference"]) script_util.runProgram(self.__LOGGER, "bowtie2-build", bowtie_index_cmd, None, bowtie_dir) except Exception, e: raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params["reference"], e))
def call_stringtieBall(working_dir,directory,num_threads,m_gtf_file,alignment_file): #directory = os.path.join(directory,"cuffmerge") print "Inside stringtieBall" strdiff_command = " -p {0} -o {1} -e -B -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file) try: print "Executing: stringtie {0}".format(strdiff_command) script_util.runProgram(None,"stringtie",strdiff_command,None,working_dir) except Exception,e: raise Exception("Error executing StringTie differential expression {0},{1}".format(strdiff_command,working_dir))
def call_cuffmerge(directory,num_threads,gtf_file,list_file): #cuffmerge_dir = os.path.join(directory,"cuffmerge") cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(str(num_threads),directory,gtf_file,list_file) merged_gtf = None try: logger.info("Executing: cuffmerge {0}".format(cuffmerge_command)) script_util.runProgram(logger,"cuffmerge",cuffmerge_command,None,directory) if os.path.exists(cuffmerge_dir+"/merged.gtf") : merged_gtf = os.path.join(directory,"merged.gtf") except Exception,e: raise Exception("Error executing cuffmerge {0},{1}".format(cuffmerge_command,directory))
def call_stringtiemerge(working_dir,directory,num_threads,gtf_file,list_file): #directory = os.path.join(directory,"cuffmerge") strmerge_command = " -p {0} -o {1} --merge -G {2} {3}".format(str(num_threads),directory,gtf_file,list_file) merged_gtf = None try: print "Executing: stringtie {0}".format(strmerge_command) script_util.runProgram(None,"stringtie",strmerge_command,None,working_dir) if os.path.exists(directory+"/merged.gtf") : merged_gtf = os.path.join(directory,"merged.gtf") except Exception,e: raise Exception("Error executing StringTie merge {0},{1}".format(strmerge_command,working_dir))
def call_tablemaker(working_dir,directory,num_threads,m_gtf_file,alignment_file): print "Inside Tablemaker" print "Args passed : {0} , {1} , {2} , {3} , {4} ".format(working_dir,directory,num_threads,m_gtf_file,alignment_file) #cuffmerge_dir = os.path.join(directory,"cuffmerge") tm_command = " -p {0} -o {1} -q -W -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file) try: print "Executing: tablemaker {0}".format(tm_command) script_util.runProgram(None,"tablemaker",tm_command,None,working_dir) except Exception,e: logger.exception(e) raise Exception("Error executing tablemaker {0},{1}".format(tm_command,working_dir))
def call_stringtieBall(working_dir, directory, num_threads, m_gtf_file, alignment_file): #directory = os.path.join(directory,"cuffmerge") print "Inside stringtieBall" strdiff_command = " -p {0} -o {1} -e -B -G {2} {3}".format( str(num_threads), directory, m_gtf_file, alignment_file) try: print "Executing: stringtie {0}".format(strdiff_command) script_util.runProgram(None, "stringtie", strdiff_command, None, working_dir) except Exception, e: raise Exception( "Error executing StringTie differential expression {0},{1}".format( strdiff_command, working_dir))
def call_stringtiemerge(working_dir, directory, num_threads, gtf_file, list_file): #directory = os.path.join(directory,"cuffmerge") strmerge_command = " -p {0} -o {1} --merge -G {2} {3}".format( str(num_threads), directory, gtf_file, list_file) merged_gtf = None try: print "Executing: stringtie {0}".format(strmerge_command) script_util.runProgram(None, "stringtie", strmerge_command, None, working_dir) if os.path.exists(directory + "/merged.gtf"): merged_gtf = os.path.join(directory, "merged.gtf") except Exception, e: raise Exception("Error executing StringTie merge {0},{1}".format( strmerge_command, working_dir))
def call_tablemaker(working_dir, directory, num_threads, m_gtf_file, alignment_file): print "Inside Tablemaker" print "Args passed : {0} , {1} , {2} , {3} , {4} ".format( working_dir, directory, num_threads, m_gtf_file, alignment_file) #cuffmerge_dir = os.path.join(directory,"cuffmerge") tm_command = " -p {0} -o {1} -q -W -G {2} {3}".format( str(num_threads), directory, m_gtf_file, alignment_file) try: print "Executing: tablemaker {0}".format(tm_command) script_util.runProgram(None, "tablemaker", tm_command, None, working_dir) except Exception, e: logger.exception(e) raise Exception("Error executing tablemaker {0},{1}".format( tm_command, working_dir))
def call_cuffmerge_and_cuffdiff(logger, ws_client, hs, ws_id, num_threads, list_file, gtf_file, bam_files, t_labels, genome_id, expressionset_id, alignmentset_id, sampleset_id, params, directory, token): ## Adding Advanced options for cuffmerge command cuffmerge_dir = os.path.join(directory, "cuffmerge") cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format( str(num_threads), cuffmerge_dir, gtf_file, list_file) try: logger.info("Executing: cuffmerge {0}".format(cuffmerge_command)) script_util.runProgram(logger, "cuffmerge", cuffmerge_command, None, directory) if os.path.exists(cuffmerge_dir + "/merged.gtf"): merged_gtf = os.path.join(cuffmerge_dir, "merged.gtf") except Exception, e: raise Exception("Error executing cuffmerge {0},{1}".format( cuffmerge_command, cuffmerge_dir))
def runEach(self, task_list): ### Call Cuffmerge function cuffmerge_dir = os.path.join(self.directory, "cuffmerge") merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir, self.num_threads, self.details['gtf_file'], self.details['gtf_list_file']) ### Run Cuffdiff output_dir = os.path.join(self.directory, self.method_params['output_obj_name']) cuffdiff_command = (' -p ' + str(self.num_threads)) ### Setting Advanced parameters for Cuffdiff if ('time_series' in self.method_params and self.method_params['time_series'] != 0): cuffdiff_command += (' -T ') if ('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None): cuffdiff_command += ( ' -c ' + str(self.method_params['min_alignment_count'])) if ('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0): cuffdiff_command += (' --multi-read-correct ') if ('library_type' in self.method_params and self.method_params['library_type'] is not None): cuffdiff_command += (' --library-type ' + self.method_params['library_type']) if ('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None): cuffdiff_command += (' --library-norm-method ' + self.method_params['library_norm_method']) try: cuffdiff_command += " -o {0} -L {1} -u {2} {3}".format( output_dir, self.t_labels, merged_gtf, self.bam_files) self.logger.info( "Executing: cuffdiff {0}".format(cuffdiff_command)) ret = script_util.runProgram(None, "cuffdiff", cuffdiff_command, None, self.directory) result = ret["result"] for line in result.splitlines(False): self.logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value self.logger.info(line) else: prev_value = '' self.logger.info(line) except Exception, e: raise Exception(e) raise Exception("Error executing cuffdiff {0},{1}".format( cuffdiff_command, e))
def call_cuffmerge(working_dir,directory,num_threads,gtf_file,list_file): #cuffmerge_dir = os.path.join(directory,"cuffmerge") print "Entering cuffmerge" print "Args passed {0},{1},{2},{3}".format(directory,num_threads,gtf_file,list_file) cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(str(num_threads),directory,gtf_file,list_file) merged_gtf = None try: #logger.info("Executing: cuffmerge {0}".format(cuffmerge_command)) print "Executing: cuffmerge {0}".format(cuffmerge_command) r,e = script_util.runProgram(None,"cuffmerge",cuffmerge_command,None,working_dir) print r + "\n" + e if os.path.exists(directory+"/merged.gtf") : merged_gtf = os.path.join(directory,"merged.gtf") except Exception,e: print "".join(traceback.format_exc()) raise Exception("Error executing cuffmerge {0},{1}".format(cuffmerge_command,"".join(traceback.format_exc())))
def runEach(self,task_list): ### Call Cuffmerge function used_tool = self.details['used_tool'] if used_tool == 'StringTie': merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,num_threads,self.details['gtf_file'],assembly_file) run_tool = "StringTie" tool_version = "1.2.3" elif used_tool == 'Cufflinks': merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir,merge_dir,num_threads,gtf_file,assembly_file) run_tool = "Tablemaker" tool_version = '2.0.9' cuffmerge_dir = os.path.join(self.directory,"cuffmerge") merged_gtf = rnaseq_util.call_cuffmerge(self.directory,cuffmerge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file']) ### Run DiffExpforBallgown output_dir = os.path.join(self.directory,self.method_params['output_obj_name']) diffexp_command = (' -p '+str(self.num_threads)) ### Setting Advanced parameters for DiffExpforBallgown if('time_series' in self.method_params and self.method_params['time_series'] != 0) : diffexp_command += (' -T ') if('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None ) : diffexp_command += (' -c '+str(self.method_params['min_alignment_count'])) if('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0 ): diffexp_command += (' --multi-read-correct ') if('library_type' in self.method_params and self.method_params['library_type'] is not None ) : diffexp_command += ( ' --library-type '+self.method_params['library_type']) if('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None ) : diffexp_command += ( ' --library-norm-method '+self.method_params['library_norm_method']) try: diffexp_command += " -o {0} -L {1} -u {2} {3}".format(output_dir,self.t_labels,merged_gtf,self.bam_files) self.logger.info("Executing: diffexp {0}".format(diffexp_command)) ret = script_util.runProgram(None,"diffexp",diffexp_command,None,self.directory) result = ret["result"] #error = ret['stderr'] #print result for line in result.splitlines(False): self.logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value self.logger.info(line) else: prev_value = '' self.logger.info(line) except Exception,e: raise Exception(e) raise Exception("Error executing diffexp {0},{1}".format(diffexp_command,e))
def BuildBowtie2Index(self, ctx, params): """ :param params: instance of type "Bowtie2IndexParams" -> structure: parameter "ws_id" of String, parameter "reference" of String, parameter "output_obj_name" of String :returns: instance of type "ResultsToReport" (Object for Report type) -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN BuildBowtie2Index user_token=ctx['token'] ws_client=Workspace(url=self.__WS_URL, token=user_token) hs = HandleService(url=self.__HS_URL, token=user_token) try: if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH) bowtie_dir = os.path.join(self.__SCRATCH ,'tmp') handler_util.setupWorkingDir(self.__LOGGER,bowtie_dir) ## Update the provenance provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] # add additional info to provenance here, in this case the input data object reference provenance[0]['input_ws_objects']=[params['ws_id']+'/'+params['reference']] try: ref_id, outfile_ref_name = rnaseq_util.get_fa_from_genome(self.__LOGGER,ws_client,self.__SERVICES,params['ws_id'],bowtie_dir,params['reference']) except Exception, e: self.__LOGGER.exception("".join(traceback.format_exc())) raise ValueError('Unable to get FASTA for object {}'.format("".join(traceback.format_exc()))) ## Run the bowtie_indexing on the command line try: if outfile_ref_name: bowtie_index_cmd = "{0} {1}".format(outfile_ref_name,params['reference']) else: bowtie_index_cmd = "{0} {1}".format(params['reference'],params['reference']) self.__LOGGER.info("Executing: bowtie2-build {0}".format(bowtie_index_cmd)) cmdline_output = script_util.runProgram(self.__LOGGER,"bowtie2-build",bowtie_index_cmd,None,bowtie_dir) if 'result' in cmdline_output: report = cmdline_output['result'] except Exception,e: raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params['reference'],e))
def call_cuffmerge(working_dir, directory, num_threads, gtf_file, list_file): #cuffmerge_dir = os.path.join(directory,"cuffmerge") print "Entering cuffmerge" print "Args passed {0},{1},{2},{3}".format(directory, num_threads, gtf_file, list_file) cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format( str(num_threads), directory, gtf_file, list_file) merged_gtf = None try: #logger.info("Executing: cuffmerge {0}".format(cuffmerge_command)) print "Executing: cuffmerge {0}".format(cuffmerge_command) r, e = script_util.runProgram(None, "cuffmerge", cuffmerge_command, None, working_dir) print r + "\n" + e if os.path.exists(directory + "/merged.gtf"): merged_gtf = os.path.join(directory, "merged.gtf") except Exception, e: print "".join(traceback.format_exc()) raise Exception("Error executing cuffmerge {0},{1}".format( cuffmerge_command, "".join(traceback.format_exc())))
def runEach(self,task_list): ### Call Cuffmerge function cuffmerge_dir = os.path.join(self.directory,"cuffmerge") merged_gtf = rnaseq_util.call_cuffmerge(self.directory,cuffmerge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file']) ### Run Cuffdiff output_dir = os.path.join(self.directory,self.method_params['output_obj_name']) cuffdiff_command = (' -p '+str(self.num_threads)) ### Setting Advanced parameters for Cuffdiff if('time_series' in self.method_params and self.method_params['time_series'] != 0) : cuffdiff_command += (' -T ') if('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None ) : cuffdiff_command += (' -c '+str(self.method_params['min_alignment_count'])) if('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0 ): cuffdiff_command += (' --multi-read-correct ') if('library_type' in self.method_params and self.method_params['library_type'] is not None ) : cuffdiff_command += ( ' --library-type '+self.method_params['library_type']) if('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None ) : cuffdiff_command += ( ' --library-norm-method '+self.method_params['library_norm_method']) try: cuffdiff_command += " -o {0} -L {1} -u {2} {3}".format(output_dir,self.t_labels,merged_gtf,self.bam_files) self.logger.info("Executing: cuffdiff {0}".format(cuffdiff_command)) ret = script_util.runProgram(None,"cuffdiff",cuffdiff_command,None,self.directory) result = ret["result"] for line in result.splitlines(False): self.logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value self.logger.info(line) else: prev_value = '' self.logger.info(line) except Exception,e: raise Exception(e) raise Exception("Error executing cuffdiff {0},{1}".format(cuffdiff_command,e))
read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err)) try: bam_file = output_dir+"/accepted_hits.bam" align_stats_cmd="flagstat {0}".format(bam_file) stats = script_util.runProgram(logger,"samtools",align_stats_cmd,None,directory) #print stats stats_data = {} # Pass it to the stats['result'] #stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data =script_util.extractAlignmentStatsInfo(logger,"samtools",ws_client,ws_id,None,stats['result'],None) except Exception , e : raise Exception("Failed to create RNASeqAlignmentStats: {0}".format(bam_file)) # Zip tophat folder out_file_path = os.path.join(directory,"%s.zip" % output_name) try: logger.info("Zipping the output files".format(out_file_path)) script_util.zip_files(logger, output_dir,out_file_path) except Exception, e: raise Exception("Failed to compress the index: {0}".format(out_file_path)) ## Upload the file using handle service
class HiSat2SampleSet(HiSat2): def __init__(self, logger, directory, urls, max_cores): super(HiSat2SampleSet, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.sample = None self.sampleset_info = None #self.num_threads = None def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] hisat2_dir = self.directory try: #sample,annotation_name = ws_client.get_objects( # [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']}, # { 'name' : params['genome_id'], 'workspace' : params['ws_id']}]) sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] annotation_name = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['genome_id'])[0] self.sample = sample except Exception, e: logger.exception("".join(traceback.format_exc())) raise ValueError(" Error Downloading objects from the workspace ") ### Get object Info and IDs #sampleset_info,annotation_info = ws_client.get_object_info_new({"objects": [ # {'name': params['sampleset_id'], 'workspace': params['ws_id']}, # {'name': params['genome_id'], 'workspace': params['ws_id']} # ]}) sampleset_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] self.sampleset_info = sampleset_info ### Get the workspace object ids for the objects ### sampleset_id = str(sampleset_info[6]) + '/' + str( sampleset_info[0]) + '/' + str(sampleset_info[4]) #annotation_id = str(annotation_info[6]) + '/' + str(annotation_info[0]) + '/' + str(annotation_info[4]) annotation_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['genome_id']) sample_type = sampleset_info[2].split('-')[0] ### Check if the Library objects exist in the same workspace if not (sample_type == 'KBaseRNASeq.RNASeqSampleSet' or sample_type == 'KBaseSets.ReadsSet'): raise HiSat2SampleSetException( 'RNASeqSampleSet or ReadsSet is required') #logger.info("Check if the Library objects do exist in the current workspace") #reads = sample['data']['sample_ids'] #r_label = sample['data']['condition'] (reads, r_label) = rnaseq_util.get_reads_conditions(logger, sample, sample_type) #e_ws_objs = script_util.if_ws_obj_exists_notype(None,ws_client,params['ws_id'],reads) #missing_objs = [i for i in reads if not i in e_ws_objs] #if len(e_ws_objs) != len(reads): # raise HiSat2SampleSetException('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id'])) self.num_jobs = len(reads) ref_id, fasta_file = rnaseq_util.get_fa_from_genome( logger, ws_client, self.urls, params['ws_id'], hisat2_dir, params['genome_id']) hisat2base = os.path.basename(fasta_file) #hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fa")) hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base) try: logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd)) cmdline_output = script_util.runProgram(logger, "hisat2-build", hisat2base_cmd, None, hisat2_dir) except Exception, e: raise Exception("Failed to run command {0}".format(hisat2base_cmd))
lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file) except Exception,e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram(logger,"hisat2",hisat2_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}".format(hisat2_cmd)) #logger.exception("Failed to run command {0}".format(hisat2_cmd)) try: stats_data = {} stats_data = script_util.extractAlignmentStatsInfo(logger,"bowtie2",ws_client,ws_id,None,cmdline_output['stderr'],None) bam_file = os.path.join(output_dir,"accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file,out_file) script_util.runProgram(logger,"samtools",sam_to_bam,None,directory) final_bam_prefix = os.path.join(output_dir,"accepted_hits") logger.info("Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file,final_bam_prefix) script_util.runProgram(logger,"samtools",sort_bam_cmd,None,directory) except Exception,e:
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample) output_name = sample_name.split('.')[0]+"_tophat_alignment" output_dir = os.path.join(directory,output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p '+str(self.num_threads)) if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length'])) if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length'])) if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length'])) if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist'])) if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length'])) if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches'])) if('library_type' in params and params['library_type'] is not None ) : tophat_cmd += (' --library-type ' + params['library_type']) if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments' if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref,directory, token) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd']) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) else: # TODO: the following can be read from PEL object if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'],ds['rev']) # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err))
try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger,os.path.join(bowtie2_dir,bw_index_files),bowtie2_dir) mv_dir= handler_util.get_dir(bowtie2_dir) if mv_dir is not None: script_util.move_files(logger,mv_dir,bowtie2_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error: Please contact [email protected]") ### Build Index for the fasta file fasta_file =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa")+".fa") bowtie2base =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa")) bowtie2base_cmd = '{0} {1}'.format(fasta_file,bowtie2base) try: logger.info("Building Index for Hisat2 {0}".format(bowtie2base_cmd)) cmdline_output = script_util.runProgram(logger,"bowtie2-build",bowtie2base_cmd,None,bowtie2_dir) except Exception,e: raise Exception("Failed to run command {0}".format(bowtie2base_cmd)) ### Check if GTF object exists in the workspace pull the gtf ref_id = bowtie_index['data']['genome_id'] genome_name = ws_client.get_object_info_new({"objects": [{'ref' : ref_id }] })[0][1] ws_gtf = genome_name+"_GTF" gtf_file = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],ws_gtf,"KBaseRNASeq.GFFAnnotation",bowtie2_dir,token) if gtf_file is None: rnaseq_util.create_gtf_annotation_from_genome(logger,ws_client,hs,self.urls,params['ws_id'],ref_id,genome_name,bowtie2_dir,token) # Determine the num_threads provided by the user otherwise default the number of threads to 2 self.num_jobs = 1 logger.info(" Number of threads used by each process {0}".format(self.num_threads)) task_param = {'job_id' : params['sampleset_id'], 'label' : r_label, 'ws_id' : params['ws_id'],
logger, shock_service_url=services["shock_service_url"], shock_id=read2_id, filename=read2_name, directory=directory, token=token, ) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file ) except Exception, e: # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name)) try: logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram(logger, "bowtie2", bowtie2_cmd, None, directory) # print cmdline_output except Exception, e: # logger.exception("Failed to upload the index") raise Exception("Failed to upload the index") try: # stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( logger, "bowtie2", ws_client, ws_id, None, cmdline_output["stderr"], None ) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(logger, "samtools", sam_to_bam, None, directory) final_bam_prefix = os.path.join(output_dir, "accepted_hits")
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['bowtie2_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample) input_direc = os.path.join(directory,sample_name.split('.')[0]+"_bowtie2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = sample_name.split('.')[0]+"_bowtie2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) base = handler_util.get_file_with_suffix(directory,".rev.1.bt2") bowtie2_base =os.path.join(directory,base) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += ( ' -p {0}'.format(self.num_threads)) if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] ) if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --'+params['preset_options']+'-local') else: bowtie2_cmd += (' --'+params['preset_options'] ) if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins'])) out_file = output_dir +"/accepted_hits.sam" #### try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref,input_direc, token) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' bowtie2_cmd += " -U {0} -x {1} -S {2}".format(ds['fwd'],bowtie2_base,out_file) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) else: # TODO: the following can be read from PEL object if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],hisat2_base,out_file) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],bowtie2_base,out_file) ### # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) # bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) # bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram(self.logger,"bowtie2",bowtie2_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}".format(bowtie2_cmd))
filename=read2_name, directory=input_direc, token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(input_direc, read1_name), os.path.join(output_dir, read2_name), hisat2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name)) try: logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram(logger, "hisat2", hisat2_cmd, None, directory) except Exception, e: raise Exception("Failed to run command {0}".format(hisat2_cmd)) #logger.exception("Failed to run command {0}".format(hisat2_cmd)) try: stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( logger, "bowtie2", ws_client, ws_id, None, cmdline_output['stderr'], None) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(logger, "samtools", sam_to_bam, None, directory) final_bam_prefix = os.path.join(output_dir, "accepted_hits")
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['hisat2_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] r_sample = script_util.ws_get_obj(self.logger, ws_client, ws_id, read_sample)[0] sample_type = script_util.ws_get_type_name(self.logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) input_direc = os.path.join( directory, sample_name.split('.')[0] + "_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = sample_name.split('.')[0] + "_hisat2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) print directory base = handler_util.get_file_with_suffix(directory, ".1.ht2") print base hisat2_base = os.path.join(directory, base) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += (' -p {0}'.format(self.num_threads)) if ('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += (' --' + params['alignment_type']) if ('trim5' in params and params['trim5'] is not None): hisat2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): hisat2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): hisat2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): hisat2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): hisat2_cmd += (' --maxins ' + str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if ('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += (' --min-intronlen ' + str(params['min_intron_length'])) if ('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += (' --max-intronlen ' + str(params['max_intron_length'])) if ('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += (' --no-spliced-alignment') if ('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += (' --transcriptome-mapping-only') if ('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += (' --' + params['tailor_alignments']) out_file = output_dir + "/accepted_hits.sam" #### try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, input_direc, token) self.logger.info(ds) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' hisat2_cmd += " -U {0} -x {1} -S {2}".format( ds['fwd'], hisat2_base, out_file) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( ds['fwd'], ds['rev'], hisat2_base, out_file) #if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) # hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) #if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) # hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),hisat2_base,out_file) # except Exception,e: # logger.exception(e) # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram( self.logger, "hisat2", hisat2_cmd, None, directory) except Exception, e: logger.exception(e) raise Exception("Failed to run command {0}".format(hisat2_cmd))
class Bowtie2(ExecutionBase): def __init__(self, logger, directory, urls, max_cores): pprint(self.__class__) super(Bowtie2, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods #self.sample = None #self.sampleset_info = None self.num_threads = None self.tool_used = "Bowtie2" self.tool_version = "2.2.6" def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['bowtie2_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger, ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) input_direc = os.path.join( directory, sample_name.split('.')[0] + "_bowtie2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = sample_name.split('.')[0] + "_bowtie2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) base = handler_util.get_file_with_suffix(directory, ".rev.1.bt2") bowtie2_base = os.path.join(directory, base) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += (' -p {0}'.format(self.num_threads)) if ('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += (' --' + params['alignment_type']) if ('preset_options' in params and params['preset_options'] is not None) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --' + params['preset_options'] + '-local') else: bowtie2_cmd += (' --' + params['preset_options']) if ('trim5' in params and params['trim5'] is not None): bowtie2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): bowtie2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): bowtie2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): bowtie2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): bowtie2_cmd += (' --maxins ' + str(params['maxins'])) out_file = output_dir + "/accepted_hits.sam" #### try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, input_direc, token) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' bowtie2_cmd += " -U {0} -x {1} -S {2}".format( ds['fwd'], bowtie2_base, out_file) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( ds['fwd'], ds['rev'], hisat2_base, out_file) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( ds['fwd'], ds['rev'], bowtie2_base, out_file) ### # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) # bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) # bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram( self.logger, "bowtie2", bowtie2_cmd, None, directory) except Exception, e: raise Exception( "Failed to run command {0}".format(bowtie2_cmd)) try: stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( self.logger, "bowtie2", ws_client, ws_id, None, cmdline_output['stderr'], None) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(self.logger, "samtools", sam_to_bam, None, directory) final_bam_prefix = os.path.join(output_dir, "accepted_hits") logger.info( "Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file, final_bam_prefix) script_util.runProgram(self.logger, "samtools", sort_bam_cmd, None, directory) except Exception, e: raise Exception( "Error Running the bowtie2 command {0},{1} {2}".format( bowtie2_cmd, directory, " ".join(traceback.print_exc())))
class Cufflinks(ExecutionBase): def __init__(self, logger, directory, urls, max_cores): pprint(self.__class__) super(Cufflinks, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods #self.sample = None #self.sampleset_info = None self.num_threads = None self.tool_used = "Cufflinks" self.tool_version = "1.2.3" def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['cufflinks_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format( s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format( s_alignment)) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger( directory, "run_cufflinks_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] input_direc = os.path.join( directory, alignment_name.split('_alignment')[0] + "_cufflinks_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = alignment_name.split( '_alignment')[0] + "_cufflinks_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock( logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=input_direc, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0},{1}".format( a_filename, "".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(input_direc, a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error") input_file = os.path.join(input_dir, "accepted_hits.bam") ### Adding advanced options to tophat command tool_opts = { k: str(v) for k, v in params.iteritems() if not k in ('ws_id', 'alignmentset_id', 'num_threads') and v is not None } cufflinks_command = (' -p ' + str(self.num_threads)) if 'max_intron_length' in params and params[ 'max_intron_length'] is not None: cufflinks_command += (' --max-intron-length ' + str(params['max_intron_length'])) if 'min_intron_length' in params and params[ 'min_intron_length'] is not None: cufflinks_command += (' --min-intron-length ' + str(params['min_intron_length'])) if 'overhang_tolerance' in params and params[ 'overhang_tolerance'] is not None: cufflinks_command += (' --overhang-tolerance ' + str(params['overhang_tolerance'])) cufflinks_command += " -o {0} -G {1} {2}".format( output_dir, gtf_file, input_file) #cufflinks_command += " -o {0} -A {1} -G {2} {3}".format(t_file_name,g_output_file,gtf_file,input_file) logger.info("Executing: cufflinks {0}".format(cufflinks_command)) print "Executing: cufflinks {0}".format(cufflinks_command) ret = script_util.runProgram(None, "cufflinks", cufflinks_command, None, directory) result = ret["result"] for line in result.splitlines(False): self.logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value self.logger.info(line) else: prev_value = '' self.logger.info(line) ##Parse output files try: g_output_file = os.path.join(output_dir, "genes.fpkm_tracking") #exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'Cufflinks', 'FPKM' ) #tpm_exp_dict = script_util.parse_FPKMtracking(g_output_file,'Cufflinks','TPM') # Cufflinks doesn't produce TPM, we infer from FPKM # (see discussion @ https://www.biostars.org/p/160989/) exp_dict, tpm_exp_dict = rnaseq_util.parse_FPKMtracking_calc_TPM( g_output_file) except Exception, e: raise Exception(e) logger.exception("".join(traceback.format_exc())) raise Exception("Error parsing FPKMtracking")
def runEach(self, task_list): logger = self.logger ### Call Cuffmerge function used_tool = self.details['used_tool'] logger.info('in DiffExpfoBallgown.runEach()') if used_tool == "Ballgown (Bioconductor)": #merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,num_threads,self.details['gtf_file'],assembly_file) #run_tool = "StringTie" #tool_version = "1.2.3" # For now, take no action for StringTie processing logger.info('Exiting immediately - StringTie case') return elif used_tool == 'Cufflinks': merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir, merge_dir, num_threads, gtf_file, assembly_file) run_tool = "Tablemaker" tool_version = '2.0.9' cuffmerge_dir = os.path.join(self.directory, "cuffmerge") merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir, self.num_threads, self.details['gtf_file'], self.details['gtf_list_file']) ### Run DiffExpforBallgown output_dir = os.path.join(self.directory, self.method_params['output_obj_name']) diffexp_command = (' -p ' + str(self.num_threads)) ### Setting Advanced parameters for DiffExpforBallgown if ('time_series' in self.method_params and self.method_params['time_series'] != 0): diffexp_command += (' -T ') if ('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None): diffexp_command += (' -c ' + str(self.method_params['min_alignment_count'])) if ('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0): diffexp_command += (' --multi-read-correct ') if ('library_type' in self.method_params and self.method_params['library_type'] is not None): diffexp_command += (' --library-type ' + self.method_params['library_type']) if ('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None): diffexp_command += (' --library-norm-method ' + self.method_params['library_norm_method']) try: diffexp_command += " -o {0} -L {1} -u {2} {3}".format( output_dir, self.t_labels, merged_gtf, self.bam_files) logger.info("Executing: diffexp {0}".format(diffexp_command)) ret = script_util.runProgram(None, "diffexp", diffexp_command, None, self.directory) result = ret["result"] #error = ret['stderr'] #print result for line in result.splitlines(False): logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value logger.info(line) else: prev_value = '' logger.info(line) except Exception, e: raise Exception(e) raise Exception("Error executing diffexp {0},{1}".format( diffexp_command, e))
script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram(self.logger,"hisat2",hisat2_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}".format(hisat2_cmd)) try: stats_data = {} stats_data = script_util.extractAlignmentStatsInfo(self.logger,"bowtie2",ws_client,ws_id,None,cmdline_output['stderr'],None) bam_file = os.path.join(output_dir,"accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file,out_file) script_util.runProgram(self.logger,"samtools",sam_to_bam,None,directory) final_bam_prefix = os.path.join(output_dir,"accepted_hits") logger.info("Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file,final_bam_prefix) script_util.runProgram(self.logger,"samtools",sort_bam_cmd,None,directory) except Exception,e: raise Exception("Error Running the hisat2 command {0},{1} {2}".format(hisat2_cmd,directory," ".join(traceback.print_exc()))) # Zip tophat folder out_file_path = os.path.join(directory,"%s.zip" % output_name) try: logger.info("Zipping the output files".format(out_file_path)) script_util.zip_files(self.logger, output_dir,out_file_path) except Exception, e: raise Exception("Failed to compress the index: {0}".format(out_file_path)) ## Upload the file using handle service
filename=read2_name, directory=directory, token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name)) try: logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram(logger, "bowtie2", bowtie2_cmd, None, directory) #print cmdline_output except Exception, e: #logger.exception("Failed to upload the index") raise Exception("Failed to upload the index") try: #stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( logger, "bowtie2", ws_client, ws_id, None, cmdline_output['stderr'], None) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(logger, "samtools", sam_to_bam, None,
class StringTie(ExecutionBase): def __init__(self, logger, directory, urls, max_cores): pprint(self.__class__) super(StringTie, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods #self.sample = None #self.sampleset_info = None self.num_threads = None self.tool_used = "StringTie" self.tool_version = "1.2.3" def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format( s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format( s_alignment)) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger( directory, "run_Stringtie_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] input_direc = os.path.join( directory, alignment_name.split('_alignment')[0] + "_stringtie_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = alignment_name.split( '_alignment')[0] + "_stringtie_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock( logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=input_direc, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0},{1}".format( a_filename, "".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(input_direc, a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error") input_file = os.path.join(input_dir, "accepted_hits.bam") ### Adding advanced options to tophat command tool_opts = { k: str(v) for k, v in params.iteritems() if not k in ('ws_id', 'alignmentset_id', 'num_threads') and v is not None } stringtie_command = (' -p ' + str(self.num_threads)) if 'label' in params and params['label'] is not None: stringtie_command += (' -l ' + str(params['label'])) if 'min_isoform_abundance' in params and params[ 'min_isoform_abundance'] is not None: stringtie_command += (' -f ' + str(params['min_isoform_abundance'])) if 'min_length' in params and params['min_length'] is not None: stringtie_command += (' -m ' + str(params['min_length'])) if 'a_juncs' in params and params['a_juncs'] is not None: stringtie_command += (' -a ' + str(params['a_juncs'])) if 'j_min_reads' in params and params['j_min_reads'] is not None: stringtie_command += (' -j ' + str(params['j_min_reads'])) if 'c_min_read_coverage' in params and params[ 'c_min_read_coverage'] is not None: stringtie_command += (' -c ' + str(params['c_min_read_coverage'])) if 'gap_sep_value' in params and params[ 'gap_sep_value'] is not None: stringtie_command += (' -g ' + str(params['gap_sep_value'])) if 'disable_trimming' in params and params['disable_trimming'] != 0: stringtie_command += (' -t ') if 'ballgown_mode' in params and params['ballgown_mode'] != 0: stringtie_command += (' -B ') if 'skip_reads_with_no_ref' in params and params[ 'skip_reads_with_no_ref'] != 0: stringtie_command += (' -e ') t_file_name = os.path.join(output_dir, "transcripts.gtf") g_output_file = os.path.join(output_dir, "genes.fpkm_tracking") stringtie_command += " -o {0} -A {1} -G {2} {3}".format( t_file_name, g_output_file, gtf_file, input_file) logger.info("Executing: stringtie {0}".format(stringtie_command)) print "Executing: stringtie {0}".format(stringtie_command) ret = script_util.runProgram(None, "stringtie", stringtie_command, None, directory) ##Parse output files try: exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'StringTie', 'FPKM') tpm_exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'StringTie', 'TPM') except Exception, e: raise Exception(e) logger.exception("".join(traceback.format_exc())) raise Exception("Error parsing FPKMtracking")
script_util.move_files(logger, mv_dir, bowtie2_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error") ### Build Index for the fasta file fasta_file = os.path.join( bowtie2_dir, handler_util.get_file_with_suffix(bowtie2_dir, ".fa") + ".fa") bowtie2base = os.path.join( bowtie2_dir, handler_util.get_file_with_suffix(bowtie2_dir, ".fa")) bowtie2base_cmd = '{0} {1}'.format(fasta_file, bowtie2base) try: logger.info( "Building Index for Hisat2 {0}".format(bowtie2base_cmd)) cmdline_output = script_util.runProgram(logger, "bowtie2-build", bowtie2base_cmd, None, bowtie2_dir) except Exception, e: raise Exception( "Failed to run command {0}".format(bowtie2base_cmd)) ### Check if GTF object exists in the workspace pull the gtf ref_id = bowtie_index['data']['genome_id'] genome_name = ws_client.get_object_info_new( {"objects": [{ 'ref': ref_id }]})[0][1] ws_gtf = genome_name + "_GTF" gtf_file = script_util.check_and_download_existing_handle_obj( logger, ws_client, self.urls, params['ws_id'], ws_gtf, "KBaseRNASeq.GFFAnnotation", bowtie2_dir, token) if gtf_file is None:
stringtie_command += (' -j '+str(params['j_min_reads'])) if 'c_min_read_coverage' in params and params['c_min_read_coverage'] is not None: stringtie_command += (' -c '+str(params['c_min_read_coverage'])) if 'gap_sep_value' in params and params['gap_sep_value'] is not None: stringtie_command += (' -g '+str(params['gap_sep_value'])) if 'disable_trimming' in params and params['disable_trimming'] != 0: stringtie_command += (' -t ') if 'ballgown_mode' in params and params['ballgown_mode'] != 0: stringtie_command += (' -B ') if 'skip_reads_with_no_ref' in params and params['skip_reads_with_no_ref'] != 0: stringtie_command += (' -e ') t_file_name = os.path.join(output_dir,"transcripts.gtf") g_output_file = os.path.join(output_dir,"genes.fpkm_tracking") stringtie_command += " -o {0} -A {1} -G {2} {3}".format(t_file_name,g_output_file,gtf_file,input_file) logger.info("Executing: stringtie {0}".format(stringtie_command)) ret = script_util.runProgram(None,"stringtie",stringtie_command,None,directory) ##Parse output files try: exp_dict = script_util.parse_FPKMtracking(g_output_file,'StringTie','FPKM') tpm_exp_dict = script_util.parse_FPKMtracking(g_output_file,'StringTie','TPM') except Exception,e: logger.exception("".join(traceback.format_exc())) raise Exception("Error parsing FPKMtracking") ## compress and upload to shock try: logger.info("Zipping Stringtie output") out_file_path = os.path.join(directory,"%s.zip" % output_name) script_util.zip_files(logger,output_dir,out_file_path) except Exception,e: logger.exception("".join(traceback.format_exc()))
class HiSat2Sample(HiSat2): def __init__(self, logger, directory, urls, max_cores): super(HiSat2Sample, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.sample_info = None #self.sampleset_info = None self.num_threads = 1 def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] hisat2_dir = self.directory try: #sample,annotation_name = ws_client.get_objects( # [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']}, # { 'name' : params['genome_id'], 'workspace' : params['ws_id']}]) sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] annotation_name = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['genome_id'])[0] self.sample = sample except Exception, e: logger.exception("".join(traceback.format_exc())) raise ValueError(" Error Downloading objects from the workspace ") ### Get object Info and IDs #sample_info,annotation_info = ws_client.get_object_info_new({"objects": [ # {'name': params['sampleset_id'], 'workspace': params['ws_id']}, # {'name': params['genome_id'], 'workspace': params['ws_id']} # ]}) sample_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] self.sample_info = sample_info ### Get the workspace object ids for the objects ### sample_id = str(sample_info[6]) + '/' + str( sample_info[0]) + '/' + str(sample_info[4]) #annotation_id = str(annotation_info[6]) + '/' + str(annotation_info[0]) + '/' + str(annotation_info[4]) annotation_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['genome_id']) sample_type = sample_info[2].split('-')[0] lib_types = [ 'KBaseAssembly.SingleEndLibrary', 'KBaseAssembly.PairedEndLibrary', 'KBaseFile.SingleEndLibrary', 'KBaseFile.PairedEndLibrary' ] ### Check if the Library objects exist in the same workspace if not sample_type in lib_types: #'KBaseAssembly.SingleEndLibrary' or sample_type != 'KBaseAssembly.PairedEndLibrary': raise HiSat2SampleException( 'Either of the Library typed objects SingleEndLibrary or PairedEndLibrary is required' ) r_label = 'Single' self.num_jobs = 1 ### Get the Genome Id for the genome selected and get fasta file ref_id, fasta_file = rnaseq_util.get_fa_from_genome( logger, ws_client, self.urls, params['ws_id'], hisat2_dir, params['genome_id']) ### Build Index for the fasta file hisat2base = os.path.basename(fasta_file) #hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fa")) hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base) try: logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd)) cmdline_output = script_util.runProgram(logger, "hisat2-build", hisat2base_cmd, None, hisat2_dir) except Exception, e: raise Exception("Failed to run command {0}".format(hisat2base_cmd))
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger, ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) output_name = sample_name.split('.')[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p ' + str(self.num_threads)) if ('max_intron_length' in params and params['max_intron_length'] is not None): tophat_cmd += (' -I ' + str(params['max_intron_length'])) if ('min_intron_length' in params and params['min_intron_length'] is not None): tophat_cmd += (' -i ' + str(params['min_intron_length'])) if ('min_anchor_length' in params and params['min_anchor_length'] is not None): tophat_cmd += (' -a ' + str(params['min_anchor_length'])) if ('read_edit_dist' in params and params['read_edit_dist'] is not None): tophat_cmd += (' --read-edit-dist ' + str(params['read_edit_dist'])) if ('read_gap_length' in params and params['read_gap_length'] is not None): tophat_cmd += (' --read-gap-length ' + str(params['read_gap_length'])) if ('read_mismatches' in params and params['read_mismatches'] is not None): tophat_cmd += (' -N ' + str(params['read_mismatches'])) if ('library_type' in params and params['library_type'] is not None): tophat_cmd += (' --library-type ' + params['library_type']) if ('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1): tophat_cmd += ' --report-secondary-alignments' if ('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if ('preset_options' in params and params['preset_options'] is not None): tophat_cmd += ' --' + params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, directory, token) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' tophat_cmd += ' -o {0} -G {1} {2} {3}'.format( output_dir, gtf_file, bowtie2_base, ds['fwd']) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): tophat_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): tophat_cmd += (' --' + params['orientation']) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format( output_dir, gtf_file, bowtie2_base, ds['fwd'], ds['rev']) # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram( self.logger, "tophat", tophat_cmd, None, directory) except Exception, e: raise Exception("Failed to run command {0}\n{1}\n{2}".format( tophat_cmd, cmdline_output, cmd_err))