def _CallStringtie(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input") if not os.path.exists(input_direc) : os.mkdir(input_direc) output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join.tracback.format_exc())) try: input_dir = os.path.join(input_direc,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def _CalldiffExpCallforBallgown(logger, services, ws_client, hs, ws_id, num_threads, alignment_file, transcripts_gtf, merged_gtf, used_tool, directory, gtf_file): ### Create output directory name as ballgown/RNASeq_sample_name/ under diffexp_dir ### Get i as alignment_file ### Get j as expression file ### If tool is 'StringTie: Then call function call_stringtiemerge ; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_stringtieBall ### else if tool is 'TableMaker'; Then call function call_cuffmerge; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_tablemaker ### return the j and created paths. print "Running Differential Expression steps for {0}".format( transcripts_gtf) if not logger: logger = handler_util.create_logger( directory, "run_diffExpCallforBallgown_" + str(hex(uuid.getnode()))) try: #merge_dir = os.path.join(directory,"merge") #if not os.path.exists(merge_dir): os.mkdir(merge_dir) #print merge_dir ballgown_dir = os.path.join(directory, "ballgown") if not os.path.exists(ballgown_dir): os.mkdir(ballgown_dir) print ballgown_dir print transcripts_gtf output_name = transcripts_gtf.split( "/")[-3] + "_" + transcripts_gtf.split("/")[-2] output_dir = os.path.join(ballgown_dir, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) print output_dir #Download Alignment from shock #condition = expression['data']['condition'] if used_tool == 'StringTie': print "Entering StringTie" #merged_gtf = call_stringtiemerge(merge_dir,num_threads,gtf_file,list_file) call_stringtieBall(directory, ballgown_dir, num_threads, merged_gtf, alignment_file) elif used_tool == 'Cufflinks': print "Entering Tablemaker" print "Args passed to table maker : {0}, {1} ,{2} ,{3}, {4}".format( directory, ballgown_dir, num_threads, merged_gtf, alignment_file) #print directory + "\n" + ballgown_dir + "\n" + num_threads + "\n" + merged_gtf + "\n" + alignment_file #merged_gtf = call_cuffmerge(merge_dir,num_threads,gtf_file,list_file) call_tablemaker(directory, output_dir, num_threads, merged_gtf, alignment_file) if os.path.exists(ballgown_dir + "/t_data.ctab"): logger.info( "Running Differential Expression for Sample {0} completed successfully" .format(transcripts_gtf)) print( "Running Differential Expression for Sample {0} completed successfully" .format(transcripts_gtf)) print transcripts_gtf + ' : ' + output_dir return (transcripts_gtf, output_dir) except Exception, e: logger.exception(e) logger.exception("".join(traceback.format_exc())) raise Exception( "Error executing ballgown differential expression {0},{1}".format( transcripts_gtf, directory))
def _CallHisat2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,genome_id,sampleset_id,params,token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = handler_util.create_logger(directory,"run_Hisat2_"+read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory,read_sample.split('.')[0]+"_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0]+"_hisat2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) hisat2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".1.ht2")) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += ( ' -p {0}'.format(num_threads)) if('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += ( ' --'+params['alignment_type'] ) if('trim5' in params and params['trim5'] is not None): hisat2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): hisat2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): hisat2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): hisat2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): hisat2_cmd += ( ' --maxins '+str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += ( ' --min-intronlen '+str(params['min_intron_length'])) if('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += ( ' --max-intronlen '+str(params['max_intron_length'])) if('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += ( ' --no-spliced-alignment') if('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += ( ' --transcriptome-mapping-only') if('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += ( ' --'+params['tailor_alignments']) out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file) except Exception,e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file) except Exception,e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format(s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format(s_alignment)) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input") if not os.path.exists(input_direc) : os.mkdir(input_direc) output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def _CalldiffExpCallforBallgown(logger,services,ws_client,hs,ws_id,num_threads,alignment_file,transcripts_gtf,merged_gtf,used_tool,directory,gtf_file): ### Create output directory name as ballgown/RNASeq_sample_name/ under diffexp_dir ### Get i as alignment_file ### Get j as expression file ### If tool is 'StringTie: Then call function call_stringtiemerge ; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_stringtieBall ### else if tool is 'TableMaker'; Then call function call_cuffmerge; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_tablemaker ### return the j and created paths. print "Running Differential Expression steps for {0}".format(transcripts_gtf) if not logger: logger = handler_util.create_logger(directory,"run_diffExpCallforBallgown_"+str(hex(uuid.getnode()))) try: #merge_dir = os.path.join(directory,"merge") #if not os.path.exists(merge_dir): os.mkdir(merge_dir) #print merge_dir ballgown_dir = os.path.join(directory,"ballgown") if not os.path.exists(ballgown_dir): os.mkdir(ballgown_dir) print ballgown_dir print transcripts_gtf output_name = transcripts_gtf.split("/")[-3]+"_"+transcripts_gtf.split("/")[-2] output_dir = os.path.join(ballgown_dir,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) print output_dir #Download Alignment from shock #condition = expression['data']['condition'] if used_tool == 'StringTie': print "Entering StringTie" #merged_gtf = call_stringtiemerge(merge_dir,num_threads,gtf_file,list_file) call_stringtieBall(directory,ballgown_dir,num_threads,merged_gtf,alignment_file) elif used_tool == 'Cufflinks': print "Entering Tablemaker" print "Args passed to table maker : {0}, {1} ,{2} ,{3}, {4}".format(directory,ballgown_dir,num_threads,merged_gtf,alignment_file) #print directory + "\n" + ballgown_dir + "\n" + num_threads + "\n" + merged_gtf + "\n" + alignment_file #merged_gtf = call_cuffmerge(merge_dir,num_threads,gtf_file,list_file) call_tablemaker(directory,output_dir,num_threads,merged_gtf,alignment_file) if os.path.exists(ballgown_dir+"/t_data.ctab") : logger.info("Running Differential Expression for Sample {0} completed successfully".format(transcripts_gtf)) print("Running Differential Expression for Sample {0} completed successfully".format(transcripts_gtf)) print transcripts_gtf + ' : ' + output_dir return (transcripts_gtf, output_dir ) except Exception,e: logger.exception(e) logger.exception("".join(traceback.format_exc())) raise Exception("Error executing ballgown differential expression {0},{1}".format(transcripts_gtf,directory))
def _CallHisat2(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, genome_id, sampleset_id, params, token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = handler_util.create_logger(directory, "run_Hisat2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory, read_sample.split('.')[0] + "_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0] + "_hisat2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) hisat2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".1.ht2")) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += (' -p {0}'.format(num_threads)) if ('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += (' --' + params['alignment_type']) if ('trim5' in params and params['trim5'] is not None): hisat2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): hisat2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): hisat2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): hisat2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): hisat2_cmd += (' --maxins ' + str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if ('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += (' --min-intronlen ' + str(params['min_intron_length'])) if ('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += (' --max-intronlen ' + str(params['max_intron_length'])) if ('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += (' --no-spliced-alignment') if ('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += (' --transcriptome-mapping-only') if ('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += (' --' + params['tailor_alignments']) out_file = output_dir + "/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=input_direc, token=token) hisat2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(input_direc, read_name), hisat2_base, out_file) except Exception, e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=input_direc, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=input_direc, token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(input_direc, read1_name), os.path.join(output_dir, read2_name), hisat2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format( s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format( s_alignment)) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger( directory, "run_Stringtie_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] input_direc = os.path.join( directory, alignment_name.split('_alignment')[0] + "_stringtie_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = alignment_name.split( '_alignment')[0] + "_stringtie_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock( logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=input_direc, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0},{1}".format( a_filename, "".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(input_direc, a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")