print "Executing: stringtie {0}".format(stringtie_command) ret = script_util.runProgram(None,"stringtie",stringtie_command,None,directory) ##Parse output files try: exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'StringTie', 'FPKM' ) tpm_exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'StringTie', 'TPM' ) except Exception,e: raise Exception(e) logger.exception("".join(traceback.format_exc())) raise Exception("Error parsing FPKMtracking") ## compress and upload to shock try: logger.info("Zipping Stringtie output") print "Zipping Stringtie output" out_file_path = os.path.join(directory,"%s.zip" % output_name) script_util.zip_files(logger,output_dir,out_file_path) except Exception,e: raise Exception(e) logger.exception("".join(traceback.format_exc())) raise Exception("Error executing stringtie") try: handle = script_util.upload_file_to_shock(logger,out_file_path)['handle'] except Exception, e: raise Exception(e) logger.exception("".join(traceback.format_exc())) raise Exception("Error while zipping the output objects: {0}".format(out_file_path)) ## Save object to workspace try: logger.info("Saving Stringtie object to workspace") es_obj = { 'id' : output_name, 'type' : 'RNA-Seq',
"Error Creating FASTA object from the workspace {0},{1},{2}".format( params["reference"], os.getcwd(), e ) ) ## Run the bowtie_indexing on the command line try: bowtie_index_cmd = "{0} {1}".format(params["reference"], params["reference"]) script_util.runProgram(self.__LOGGER, "bowtie2-build", bowtie_index_cmd, None, bowtie_dir) except Exception, e: raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params["reference"], e)) ## Zip the Index files try: script_util.zip_files(self.__LOGGER, bowtie_dir, "%s.zip" % params["output_obj_name"]) except Exception, e: raise KBaseRNASeqException("Failed to compress the index: {0}".format(e)) ## Upload the file using handle service try: bowtie_handle = script_util.create_shock_handle( self.__LOGGER, "%s.zip" % params["output_obj_name"], self.__SHOCK_URL, self.__HS_URL, "Zip", user_token, ) except Exception, e: raise KBaseRNASeqException("Failed to upload the index: {0}".format(e)) bowtie2index = {"handle": bowtie_handle}
## Run the bowtie_indexing on the command line try: if outfile_ref_name: bowtie_index_cmd = "{0} {1}".format(outfile_ref_name,params['reference']) else: bowtie_index_cmd = "{0} {1}".format(params['reference'],params['reference']) self.__LOGGER.info("Executing: bowtie2-build {0}".format(bowtie_index_cmd)) cmdline_output = script_util.runProgram(self.__LOGGER,"bowtie2-build",bowtie_index_cmd,None,bowtie_dir) if 'result' in cmdline_output: report = cmdline_output['result'] except Exception,e: raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params['reference'],e)) ## Zip the Index files try: script_util.zip_files(self.__LOGGER, bowtie_dir,os.path.join(self.__SCRATCH ,"%s.zip" % params['output_obj_name'])) out_file_path = os.path.join(self.__SCRATCH,"%s.zip" % params['output_obj_name']) except Exception, e: raise KBaseRNASeqException("Failed to compress the index: {0}".format(e)) ## Upload the file using handle service try: bowtie_handle = hs.upload(out_file_path) except Exception, e: raise KBaseRNASeqException("Failed to upload the Zipped Bowtie2Indexes file: {0}".format(e)) bowtie2index = { "handle" : bowtie_handle ,"size" : os.path.getsize(out_file_path),'genome_id' : ref_id} ## Save object to workspace self.__LOGGER.info( "Saving bowtie indexes object to workspace") res= ws_client.save_objects( {"workspace":params['ws_id'], "objects": [{
self.__LOGGER.info(stdout) if stderr is not None and len(stderr) > 0: self.__LOGGER.error("Indexing error: " + stderr) raise KBaseGenomeUtilException("Indexing error: " + stderr) except Exception, e: raise KBaseGenomeUtilException("Failed to run indexing program (%s) : %s " %(self.__INDEX_CMD, e)) if index_type == 'nucleotide': index_type = 'both' else: index_type = 'protein' #os.remove(target_nt_fn) #os.remove(target_aa_fn) # compress try: script_util.zip_files(self.__LOGGER, blast_dir, "%s.zip" % params['blastindex_name']) except Exception, e: raise KBaseGenomeUtilException("Failed to compress the index: %s" %(e)) try: handle = hs.upload("%s.zip" % (params['blastindex_name'])) except Exception, e: raise KBaseGenomeUtilException("Failed to upload the index: %s" %(e)) bi = {'handle' : handle, 'genome_set' : gs, 'index_type' : index_type, 'index_program' : params['index_program']} if 'description' in params: bi['description'] = params['description'] if index_type == 'none': err_msg = 'No sequences were indexed' bi['description'] = err_msg res= ws_client.save_objects(
final_bam_prefix = os.path.join(output_dir, "accepted_hits") logger.info("Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file, final_bam_prefix) script_util.runProgram(logger, "samtools", sort_bam_cmd, None, directory) except Exception, e: #logger.exception("Error Running the bowtie2 command {0},{1} {2}".format(bowtie2_cmd,directory," ".join(traceback.print_exc()))) raise Exception( "Error Running the bowtie2 command {0},{1} {2}".format( bowtie2_cmd, directory, " ".join(traceback.print_exc()))) # Zip tophat folder try: out_file_path = os.path.join(directory, "%s.zip" % output_name) logger.info("Zipping the output files".format(out_file_path)) script_util.zip_files(logger, output_dir, out_file_path) except Exception, e: #logger.exception("Failed to compress the index: {0}".format(out_file_path)) raise Exception( "Failed to compress the index: {0}".format(out_file_path)) ## Upload the file using handle service try: bowtie2_handle = hs.upload(out_file_path) except Exception, e: logger.exception( "Failed to upload zipped output file".format(out_file_path)) #raise Exception("Failed to upload zipped output file".format(out_file_path)) #### Replace version with get_version command##### bowtie2_out = { "file": bowtie2_handle,
class DiffExpforBallgown(ExecutionBase): def __init__(self, logger, directory, urls): logger.info("in DiffExprforBallgown, type logger is " + pformat(type(logger))) logger.info(" urls are " + pformat(urls)) pprint(self.__class__) super(self.__class__, self).__init__(logger, directory, urls) # user defined shared variables across methods #self.num_threads = None self.num_threads = 1 self.num_cores = 1 self.tool_used = None self.tool_version = None def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params token = self.common_params['user_token'] diffexp_dir = self.directory logger = self.logger logger.info('in DiffExpfoBallgown.prepare(), method params are') logger.info(pformat(self.method_params)) #self.details = rnaseq_util.get_details_for_diff_exp(logger,ws_client,hs,params['ws_id'],self.urls,diffexp_dir,params['expressionset_id'],token) #logger.info( 'back from get_details_for_diff_exp(), details are') #logger.info( pformat( self.details ) ) self.num_threads = mp.cpu_count() self.num_jobs = 1 self.details = {} self.details[ "used_tool"] = "Ballgown (Bioconductor)" # Question: where does this really get set? self.details["tool_version"] = "3.4" #als = [] #for l in self.details['labels']: # rep_files=[ (os.path.join(diffexp_dir+'/'+l,sub+'/accepted_hits.bam'), os.path.join(diffexp_dir+'/'+l,sub+'/transcripts.gtf')) for sub in os.listdir(os.path.join(diffexp_dir,l)) if os.path.isdir(os.path.join(diffexp_dir,l+'/'+sub))] # #rep_files=",".join([ os.path.join(diffexp_dir+'/'+l,sub+'/accepted_hits.bam') for sub in os.listdir(os.path.join(diffexp_dir,l)) if os.path.isdir(os.path.join(diffexp_dir,l+'/'+sub))]) # als += rep_files #### Call Cuffmerge function #used_tool = self.details['used_tool'] #merge_dir = os.path.join(diffexp_dir,"merge") #if used_tool == 'StringTie': # run_tool = "StringTie" # tool_version = "1.2.3" # #merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file']) #elif used_tool == 'Cufflinks': # merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir,merge_dir,num_threads,gtf_file,self.details['gtf_list_file']) # run_tool = "Tablemaker" # tool_version = '2.0.9' # merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir,merge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file']) # #self.bam_files = " ".join([i for i in als]) #self.t_labels = ",".join(self.details['labels']) #ballgown_dir = os.path.join(diffexp_dir,"ballgown") #if not os.path.exists(ballgown_dir): os.mkdir(ballgown_dir) #### Make Input_dir from expression_file_name self.task_list = [self.__class__] logger.info('exiting ') def runEach(self, task_list): logger = self.logger ### Call Cuffmerge function used_tool = self.details['used_tool'] logger.info('in DiffExpfoBallgown.runEach()') if used_tool == "Ballgown (Bioconductor)": #merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,num_threads,self.details['gtf_file'],assembly_file) #run_tool = "StringTie" #tool_version = "1.2.3" # For now, take no action for StringTie processing logger.info('Exiting immediately - StringTie case') return elif used_tool == 'Cufflinks': merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir, merge_dir, num_threads, gtf_file, assembly_file) run_tool = "Tablemaker" tool_version = '2.0.9' cuffmerge_dir = os.path.join(self.directory, "cuffmerge") merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir, self.num_threads, self.details['gtf_file'], self.details['gtf_list_file']) ### Run DiffExpforBallgown output_dir = os.path.join(self.directory, self.method_params['output_obj_name']) diffexp_command = (' -p ' + str(self.num_threads)) ### Setting Advanced parameters for DiffExpforBallgown if ('time_series' in self.method_params and self.method_params['time_series'] != 0): diffexp_command += (' -T ') if ('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None): diffexp_command += (' -c ' + str(self.method_params['min_alignment_count'])) if ('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0): diffexp_command += (' --multi-read-correct ') if ('library_type' in self.method_params and self.method_params['library_type'] is not None): diffexp_command += (' --library-type ' + self.method_params['library_type']) if ('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None): diffexp_command += (' --library-norm-method ' + self.method_params['library_norm_method']) try: diffexp_command += " -o {0} -L {1} -u {2} {3}".format( output_dir, self.t_labels, merged_gtf, self.bam_files) logger.info("Executing: diffexp {0}".format(diffexp_command)) ret = script_util.runProgram(None, "diffexp", diffexp_command, None, self.directory) result = ret["result"] #error = ret['stderr'] #print result for line in result.splitlines(False): logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value logger.info(line) else: prev_value = '' logger.info(line) except Exception, e: raise Exception(e) raise Exception("Error executing diffexp {0},{1}".format( diffexp_command, e)) try: logger.info("Zipping DiffExpforBallgown output") out_file_path = os.path.join( self.directory, "{0}.zip".format(self.method_params['output_obj_name'])) script_util.zip_files(logger, output_dir, out_file_path) except Exception, e: raise Exception("Error executing diffexp")
results=run_diffexp_for_ballgown_in_parallel(b_tasks) print results #expr_file, single_ballgown_dir = results #### Check if all the jobs passed ballgownobject_name = params['expressionset_id']+"_DifferentialExpression_Ballgown" ballgown_dir = os.path.join(diffexp_dir,"ballgown") #reportObj=script_util.create_RNASeq_ExpressionSet_and_build_report(logger,ws_client,TOOL_USED, TOOL_VERSION,tool_opts,params['ws_id'],align_names,expressionset_id,annotation_id,sampleset_id,results,expressionSet_name) ### Save Ballgown differential Expression object to workspace #except Exception,e: # raise Exception("Error executing diffexp {0},{1}".format(cuffdiff_command,directory)) ## compress and upload to shock try: logger.info("Zipping differential expression output for ballgown") out_file_path = os.path.join(diffexp_dir,"{0}.zip".format(params['output_obj_name'])) script_util.zip_files(logger,ballgown_dir,out_file_path) except Exception,e: raise Exception("Error zipping dir {0}".format(ballgown_dir)) try: handle = hs.upload(out_file_path) except Exception, e: print " ".join(traceback.print_exc()) raise Exception("Failed to upload the diffexp output files: {0}".format(out_file_path)) output_name = params['output_obj_name'] ## Save object to workspace try: logger.info("Saving diffexp object to workspace") cm_obj = { "tool_used" : run_tool, "tool_version" : tool_version, "condition" : condition, "genome_id" : genome_id,
#expr_file, single_ballgown_dir = results #### Check if all the jobs passed ballgownobject_name = params[ 'expressionset_id'] + "_DifferentialExpression_Ballgown" ballgown_dir = os.path.join(diffexp_dir, "ballgown") #reportObj=script_util.create_RNASeq_ExpressionSet_and_build_report(logger,ws_client,TOOL_USED, TOOL_VERSION,tool_opts,params['ws_id'],align_names,expressionset_id,annotation_id,sampleset_id,results,expressionSet_name) ### Save Ballgown differential Expression object to workspace #except Exception,e: # raise Exception("Error executing diffexp {0},{1}".format(cuffdiff_command,directory)) ## compress and upload to shock try: logger.info("Zipping differential expression output for ballgown") out_file_path = os.path.join( diffexp_dir, "{0}.zip".format(params['output_obj_name'])) script_util.zip_files(logger, ballgown_dir, out_file_path) except Exception, e: raise Exception("Error zipping dir {0}".format(ballgown_dir)) try: handle = hs.upload(out_file_path) except Exception, e: print " ".join(traceback.print_exc()) raise Exception( "Failed to upload the diffexp output files: {0}".format( out_file_path)) output_name = params['output_obj_name'] ## Save object to workspace try: logger.info("Saving diffexp object to workspace") cm_obj = { "tool_used": run_tool,
class Cuffdiff(ExecutionBase): def __init__(self, logger, directory, urls, max_cores): super(self.__class__, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.num_threads = None self.tool_used = "Cuffdiff" self.tool_version = "1.2.3" def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] cuffdiff_dir = self.directory self.details = rnaseq_util.get_details_for_diff_exp( logger, ws_client, hs, params['ws_id'], self.urls, cuffdiff_dir, params['expressionset_id'], token) self.num_threads = mp.cpu_count() self.num_jobs = 1 als = [] for l in self.details['labels']: rep_files = ",".join([ os.path.join(cuffdiff_dir + '/' + l, sub + '/accepted_hits.bam') for sub in os.listdir(os.path.join(cuffdiff_dir, l)) if os.path.isdir(os.path.join(cuffdiff_dir, l + '/' + sub)) ]) als.append(rep_files) self.bam_files = " ".join([i for i in als]) self.t_labels = ",".join(self.details['labels']) self.task_list = [self.__class__] def runEach(self, task_list): ### Call Cuffmerge function cuffmerge_dir = os.path.join(self.directory, "cuffmerge") merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir, self.num_threads, self.details['gtf_file'], self.details['gtf_list_file']) ### Run Cuffdiff output_dir = os.path.join(self.directory, self.method_params['output_obj_name']) cuffdiff_command = (' -p ' + str(self.num_threads)) ### Setting Advanced parameters for Cuffdiff if ('time_series' in self.method_params and self.method_params['time_series'] != 0): cuffdiff_command += (' -T ') if ('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None): cuffdiff_command += ( ' -c ' + str(self.method_params['min_alignment_count'])) if ('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0): cuffdiff_command += (' --multi-read-correct ') if ('library_type' in self.method_params and self.method_params['library_type'] is not None): cuffdiff_command += (' --library-type ' + self.method_params['library_type']) if ('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None): cuffdiff_command += (' --library-norm-method ' + self.method_params['library_norm_method']) try: cuffdiff_command += " -o {0} -L {1} -u {2} {3}".format( output_dir, self.t_labels, merged_gtf, self.bam_files) self.logger.info( "Executing: cuffdiff {0}".format(cuffdiff_command)) ret = script_util.runProgram(None, "cuffdiff", cuffdiff_command, None, self.directory) result = ret["result"] for line in result.splitlines(False): self.logger.info(line) stderr = ret["stderr"] prev_value = '' for line in stderr.splitlines(False): if line.startswith('> Processing Locus'): words = line.split() cur_value = words[len(words) - 1] if prev_value != cur_value: prev_value = cur_value self.logger.info(line) else: prev_value = '' self.logger.info(line) except Exception, e: raise Exception(e) raise Exception("Error executing cuffdiff {0},{1}".format( cuffdiff_command, e)) try: self.logger.info("Zipping Cuffdiff output") out_file_path = os.path.join( self.directory, "{0}.zip".format(self.method_params['output_obj_name'])) script_util.zip_files(self.logger, output_dir, out_file_path) except Exception, e: raise Exception("Error executing cuffdiff")