def _CallCufflinks(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = create_logger(directory,"run_Cufflinks_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] #alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] output_name = alignment_name.split('_alignment')[0]+"_cufflinks_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] #i_name = alignment_name+"_"+a_filename #if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=directory,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0}".format(i_name)) try: input_dir = os.path.join(directory,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(directory,a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files")
def _CallStringtie(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input") if not os.path.exists(input_direc) : os.mkdir(input_direc) output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join.tracback.format_exc())) try: input_dir = os.path.join(input_direc,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format(s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format(s_alignment)) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input") if not os.path.exists(input_direc) : os.mkdir(input_direc) output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def _CallCufflinks(logger, services, ws_client, hs, ws_id, num_threads, s_alignment, gtf_file, directory, genome_id, annotation_id, sample_id, alignmentset_id, params, token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = create_logger(directory, "run_Cufflinks_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] #alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] output_name = alignment_name.split( '_alignment')[0] + "_cufflinks_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] #i_name = alignment_name+"_"+a_filename #if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=directory, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0}".format(i_name)) try: input_dir = os.path.join(directory, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(directory, a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files")
def _CallHisat2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,genome_id,sampleset_id,params,token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = handler_util.create_logger(directory,"run_Hisat2_"+read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory,read_sample.split('.')[0]+"_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0]+"_hisat2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) hisat2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".1.ht2")) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += ( ' -p {0}'.format(num_threads)) if('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += ( ' --'+params['alignment_type'] ) if('trim5' in params and params['trim5'] is not None): hisat2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): hisat2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): hisat2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): hisat2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): hisat2_cmd += ( ' --maxins '+str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += ( ' --min-intronlen '+str(params['min_intron_length'])) if('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += ( ' --max-intronlen '+str(params['max_intron_length'])) if('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += ( ' --no-spliced-alignment') if('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += ( ' --transcriptome-mapping-only') if('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += ( ' --'+params['tailor_alignments']) out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file) except Exception,e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file) except Exception,e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def _CallBowtie2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory,"run_Bowtie2_"+read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0]+"_bowtie2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += ( ' -p {0}'.format(num_threads)) if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] ) if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --'+params['preset_options']+'-local') else: bowtie2_cmd += (' --'+params['preset_options'] ) if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins'])) if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(directory,read_name),bowtie2_base,out_file) except Exception,e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(directory,read1_name),os.path.join(directory,read2_name),bowtie2_base,out_file) except Exception,e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def extract_cuffdiff_data (logger, shock_url, scratch, s_res, user_token): returnVal = False # Get input data Shock Id and Filename. cuffdiff_shock_id = s_res[0]['data']['file']['id'] cuffdiff_file_name = s_res[0]['data']['file']['file_name'] filesize = None dx = script_util.download_file_from_shock( logger, shock_url, cuffdiff_shock_id, cuffdiff_file_name, scratch, filesize, user_token) #cuffdiff_file_name =None #Decompress tar file and keep it in a directory zipfile = join(scratch, cuffdiff_file_name) dstnExtractFolder1 = join(scratch, "cuffdiffData") dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff") if not os.path.exists(dstnExtractFolder): os.makedirs(dstnExtractFolder) #untarStatus = untar_files(logger, tarfile, dstnExtractFolder) #if untarStatus == False: # logger.info("Problem extracting the archive") # return returnVal unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder) if unzipStatus == False: logger.info("Problem extracting the archive") return returnVal foldersinExtractFolder = os.listdir(dstnExtractFolder) if len(foldersinExtractFolder) == 0: logger.info("Problem extracting the archive") return returnVal # Run R script to run cummerbund json and update the cummerbund output json file cuffdiff_dir = dstnExtractFolder return cuffdiff_dir
def extract_cuffdiff_data(logger, shock_url, scratch, s_res, user_token): returnVal = False # Get input data Shock Id and Filename. cuffdiff_shock_id = s_res[0]['data']['file']['id'] cuffdiff_file_name = s_res[0]['data']['file']['file_name'] filesize = None dx = script_util.download_file_from_shock(logger, shock_url, cuffdiff_shock_id, cuffdiff_file_name, scratch, filesize, user_token) #cuffdiff_file_name =None #Decompress tar file and keep it in a directory zipfile = join(scratch, cuffdiff_file_name) dstnExtractFolder1 = join(scratch, "cuffdiffData") dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff") if not os.path.exists(dstnExtractFolder): os.makedirs(dstnExtractFolder) #untarStatus = untar_files(logger, tarfile, dstnExtractFolder) #if untarStatus == False: # logger.info("Problem extracting the archive") # return returnVal unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder) if unzipStatus == False: logger.info("Problem extracting the archive") return returnVal foldersinExtractFolder = os.listdir(dstnExtractFolder) if len(foldersinExtractFolder) == 0: logger.info("Problem extracting the archive") return returnVal # Run R script to run cummerbund json and update the cummerbund output json file cuffdiff_dir = dstnExtractFolder return cuffdiff_dir
def _CallTophat(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,gtf_file,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token): print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory,"run_Tophat_"+read_sample) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0]+"_tophat_alignment" output_dir = os.path.join(directory,output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to tophat command tophat_cmd = (' -p '+str(num_threads)) if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length'])) if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length'])) if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length'])) if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist'])) if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length'])) if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches'])) if('library_type' in params and params['library_type'] is not None ) : tophat_cmd += (' --library-type ' + params['library_type']) if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments' if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options'] if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) except Exception,e: raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
raise ValueError("Please ensure you have atleast 2 expressions to run diffExpCallforBallgown in Set mode") ### Check if the gtf file exists in the workspace. if exists download the file from that annotation_id = e_sample['data']['genome_id'] logger.info("Check if the gtf file exists in the workspace".format(annotation_id)) annotation_name = ws_client.get_object_info([{"ref" :annotation_id}],includeMetadata=None)[0][1] gtf_obj_name = annotation_name+"_GTF_Annotation" ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[gtf_obj_name]) if not ret is None: logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_name)) gtf_obj= ws_client.get_objects([{'name' : gtf_obj_name,'workspace' : params['ws_id']}])[0] gtf_info = ws_client.get_object_info_new({"objects": [{'name': gtf_obj_name, 'workspace': params['ws_id']}]})[0] gtf_annotation_id = str(gtf_info[6]) + '/' + str(gtf_info[0]) + '/' + str(gtf_info[4]) gtf_id=gtf_obj['data']['handle']['id'] gtf_name=gtf_obj['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=diffexp_dir,token=token) gtf_file = os.path.join(diffexp_dir,gtf_name) except Exception,e: raise Exception( "Unable to download shock file, {0}".format(gtf_name)) else: fasta_file= script_util.generate_fasta(logger,services,token,annotation_id,diffexp_dir,annotation_name) logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True) logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow())) gtf_file = script_util.create_gtf_annotation(logger,ws_client,hs,services,params['ws_id'],annotation_id,gtf_obj_name,fasta_file,diffexp_dir,token) m_expr_ids = e_sample['data']['mapped_expression_ids'] m_align_exp = [] labels = [] expressions = [] counter = 0
def _CallBowtie2(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Bowtie2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0] + "_bowtie2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) out_file = output_dir + "/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += (' -p {0}'.format(num_threads)) if ('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += (' --' + params['alignment_type']) if ('preset_options' in params and params['preset_options'] is not None) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --' + params['preset_options'] + '-local') else: bowtie2_cmd += (' --' + params['preset_options']) if ('trim5' in params and params['trim5'] is not None): bowtie2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): bowtie2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): bowtie2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): bowtie2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): bowtie2_cmd += (' --maxins ' + str(params['maxins'])) if ('orientation' in params and params['orientation'] is not None): bowtie2_cmd += (' --' + params['orientation']) if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=directory, token=token) bowtie2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(directory, read_name), bowtie2_base, out_file) except Exception, e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=directory, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=directory, token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
def _CallTophat(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, gtf_file, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token): print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Tophat_" + read_sample) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to tophat command tophat_cmd = (' -p ' + str(num_threads)) if ('max_intron_length' in params and params['max_intron_length'] is not None): tophat_cmd += (' -I ' + str(params['max_intron_length'])) if ('min_intron_length' in params and params['min_intron_length'] is not None): tophat_cmd += (' -i ' + str(params['min_intron_length'])) if ('min_anchor_length' in params and params['min_anchor_length'] is not None): tophat_cmd += (' -a ' + str(params['min_anchor_length'])) if ('read_edit_dist' in params and params['read_edit_dist'] is not None): tophat_cmd += (' --read-edit-dist ' + str(params['read_edit_dist'])) if ('read_gap_length' in params and params['read_gap_length'] is not None): tophat_cmd += (' --read-gap-length ' + str(params['read_gap_length'])) if ('read_mismatches' in params and params['read_mismatches'] is not None): tophat_cmd += (' -N ' + str(params['read_mismatches'])) if ('library_type' in params and params['library_type'] is not None): tophat_cmd += (' --library-type ' + params['library_type']) if ('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1): tophat_cmd += ' --report-secondary-alignments' if ('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if ('preset_options' in params and params['preset_options'] is not None): tophat_cmd += ' --' + params['preset_options'] if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=directory, token=token) tophat_cmd += ' -o {0} -G {1} {2} {3}'.format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read_name)) except Exception, e: raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=directory, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=directory, token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read1_name), os.path.join(directory, read2_name)) except Exception, e: raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
def _CallHisat2(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, genome_id, sampleset_id, params, token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = handler_util.create_logger(directory, "run_Hisat2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory, read_sample.split('.')[0] + "_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0] + "_hisat2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) hisat2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".1.ht2")) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += (' -p {0}'.format(num_threads)) if ('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += (' --' + params['alignment_type']) if ('trim5' in params and params['trim5'] is not None): hisat2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): hisat2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): hisat2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): hisat2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): hisat2_cmd += (' --maxins ' + str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if ('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += (' --min-intronlen ' + str(params['min_intron_length'])) if ('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += (' --max-intronlen ' + str(params['max_intron_length'])) if ('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += (' --no-spliced-alignment') if ('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += (' --transcriptome-mapping-only') if ('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += (' --' + params['tailor_alignments']) out_file = output_dir + "/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=input_direc, token=token) hisat2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(input_direc, read_name), hisat2_base, out_file) except Exception, e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=input_direc, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=input_direc, token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(input_direc, read1_name), os.path.join(output_dir, read2_name), hisat2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
ret = script_util.if_obj_exists(None, ws_client, params['ws_id'], "KBaseRNASeq.GFFAnnotation", [ws_gtf]) print ret if not ret is None: logger.info( "GFF Annotation Exist for Genome Annotation {0}.... Skipping step " .format(params['genome_id'])) annot_name, annot_id = ret[0] gtf_obj = ws_client.get_objects([{'ref': annot_id}])[0] gtf_id = gtf_obj['data']['handle']['id'] gtf_name = gtf_obj['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id, filename=gtf_name, directory=hisat2_dir, token=token) gtf_file = os.path.join(hisat2_dir, gtf_name) except Exception, e: raise Exception( "Unable to download shock file, {0}".format(gtf_name)) else: script_util.create_gtf_annotation(logger, ws_client, hs, services, params['ws_id'], annotation_id, params['genome_id'], fasta_file, hisat2_dir, token) # Determine the num_threads provided by the user otherwise default the number of threads to 2 if ('num_threads' in params and params['num_threads'] is not None): num_threads = int(params['num_threads'])
def blast_against_genome(self, ctx, params): # ctx is the context object # return variables are: returnVal #BEGIN blast_against_genome # TODO: Rename blast_search try: self.__LOGGER.info( "Preparing FA") if len(params['query']) > 5: sequence=params['query'] else: self.__LOGGER.error("The input sequence is too short!") raise KBaseGenomeUtilException("The input sequence is too short!") if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR) #print "generate input file for query sequence\n" query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA) target=open(query_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">query_seq_%d\n" %(i)) target.write(seqes[i]) target.close() user_token=ctx['token'] svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token ws_client=Workspace(url=self.__WS_URL, token=user_token) err_msg = "" blast_dir =self.__BLAST_DIR if os.path.exists(blast_dir): files=glob.glob("%s/*" % blast_dir) for f in files: os.remove(f) if not os.path.exists(blast_dir): os.makedirs(blast_dir) target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA) if 'target_seqs' in params: # let's build index directly and throw away sequence = params['target_seqs'] target=open(target_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">target_seq_%d\n" %(i)) target.write(seqes[i]) target.close() if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): formatdb_type='T' elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): formatdb_type='F' else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type) # TODO: replace it to subprocess.Popen tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True) stdout, stderr = tool_process.communicate() if stdout is not None and len(stdout) > 0: self.__LOGGER.info(stdout) if stderr is not None and len(stderr) > 0: self.__LOGGER.error("Index error: " + stderr) raise KBaseGenomeUtilException("Index error: " + stderr) else: try: blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'], 'workspace': params['ws_id'], 'included':['handle', 'index_type']}]) except: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) if len(blast_indexes) < 1: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip' if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): target_fn += '_aa.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide": self.__LOGGER.error("The index object does not contain amino acid sequence indexes") raise KBaseGenomeUtilException("The index object does not contain amino acid sequence indexes") elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): target_fn += '_nt.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein": self.__LOGGER.error("The index object does not contain nucleotide sequence indexes") raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes") else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] #pprint(blast_indexes[0]) self.__LOGGER.info("Downloading the genome index") #hs = HandleService(url=self.__HS_URL, token=user_token) try: script_util.download_file_from_shock(self.__LOGGER, shock_service_url= blast_indexes[0]['data']['handle']['url'], shock_id= blast_indexes[0]['data']['handle']['id'], filename= blast_indexes[0]['data']['handle']['file_name'], directory= '.', token = user_token) except Exception, e: self.__LOGGER.error("Downloading error from shock: Please contact [email protected]") raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]") try: script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir) except Exception, e: self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]") raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")
def _CallBowtie2( logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token, ): # logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Bowtie2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0] sample_type = r_sample_info[2].split("-")[0] output_name = read_sample.split(".")[0] + "_bowtie2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) out_file = output_dir + "/accepted_hits.sam" bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call bowtie2_cmd = "" bowtie2_cmd += " -p {0}".format(num_threads) if "quality_score" in params and params["quality_score"] is not None: bowtie2_cmd += " --" + params["quality_score"] if "alignment_type" in params and params["alignment_type"] is not None: bowtie2_cmd += " --" + params["alignment_type"] if ("preset_options" in params and params["preset_options"] is not None) and ( "alignment_type" in params and params["alignment_type"] is not None ): if params["alignment_type"] == "local": bowtie2_cmd += " --" + params["preset_options"] + "-local" else: bowtie2_cmd += " --" + params["preset_options"] if "trim5" in params and params["trim5"] is not None: bowtie2_cmd += " --trim5 " + str(params["trim5"]) if "trim3" in params and params["trim3"] is not None: bowtie2_cmd += " --trim3 " + str(params["trim3"]) if "np" in params and params["np"] is not None: bowtie2_cmd += " --np " + str(params["np"]) if "minins" in params and params["minins"] is not None: bowtie2_cmd += " --minins " + str(params["minins"]) if "maxins" in params and params["maxins"] is not None: bowtie2_cmd += " --maxins " + str(params["maxins"]) if "orientation" in params and params["orientation"] is not None: bowtie2_cmd += " --" + params["orientation"] if sample_type == "KBaseAssembly.SingleEndLibrary": lib_type = "SingleEnd" read_id = r_sample["data"]["handle"]["id"] read_name = r_sample["data"]["handle"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read_id, filename=read_name, directory=directory, token=token, ) bowtie2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(directory, read_name), bowtie2_base, out_file ) except Exception, e: # logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception("Unable to download shock file , {0}".format(read_name)) if sample_type == "KBaseAssembly.PairedEndLibrary": lib_type = "PairedEnd" read1_id = r_sample["data"]["handle_1"]["id"] read1_name = r_sample["data"]["handle_1"]["file_name"] read2_id = r_sample["data"]["handle_2"]["id"] read2_name = r_sample["data"]["handle_2"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read1_id, filename=read1_name, directory=directory, token=token, ) script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read2_id, filename=read2_name, directory=directory, token=token, ) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file ) except Exception, e: # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
def _CallTophat( logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, gtf_file, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token, ): print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Tophat_" + read_sample) try: r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0] sample_type = r_sample_info[2].split("-")[0] output_name = read_sample.split(".")[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) # if not os.path.exists(output_dir): os.makedirs(output_dir) # out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to tophat command tophat_cmd = " -p " + str(num_threads) if "max_intron_length" in params and params["max_intron_length"] is not None: tophat_cmd += " -I " + str(params["max_intron_length"]) if "min_intron_length" in params and params["min_intron_length"] is not None: tophat_cmd += " -i " + str(params["min_intron_length"]) if "min_anchor_length" in params and params["min_anchor_length"] is not None: tophat_cmd += " -a " + str(params["min_anchor_length"]) if "read_edit_dist" in params and params["read_edit_dist"] is not None: tophat_cmd += " --read-edit-dist " + str(params["read_edit_dist"]) if "read_gap_length" in params and params["read_gap_length"] is not None: tophat_cmd += " --read-gap-length " + str(params["read_gap_length"]) if "read_mismatches" in params and params["read_mismatches"] is not None: tophat_cmd += " -N " + str(params["read_mismatches"]) if "library_type" in params and params["library_type"] is not None: tophat_cmd += " --library-type " + params["library_type"] if "report_secondary_alignments" in params and int(params["report_secondary_alignments"]) == 1: tophat_cmd += " --report-secondary-alignments" if "no_coverage_search" in params and int(params["no_coverage_search"]) == 1: tophat_cmd += " --no-coverage-search" if "preset_options" in params and params["preset_options"] is not None: tophat_cmd += " --" + params["preset_options"] if sample_type == "KBaseAssembly.SingleEndLibrary": lib_type = "SingleEnd" read_id = r_sample["data"]["handle"]["id"] read_name = r_sample["data"]["handle"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read_id, filename=read_name, directory=directory, token=token, ) tophat_cmd += " -o {0} -G {1} {2} {3}".format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read_name) ) except Exception, e: raise Exception("Unable to download shock file , {0}".format(read_name)) if sample_type == "KBaseAssembly.PairedEndLibrary": lib_type = "PairedEnd" read1_id = r_sample["data"]["handle_1"]["id"] read1_name = r_sample["data"]["handle_1"]["file_name"] read2_id = r_sample["data"]["handle_2"]["id"] read2_name = r_sample["data"]["handle_2"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read1_id, filename=read1_name, directory=directory, token=token, ) script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read2_id, filename=read2_name, directory=directory, token=token, ) tophat_cmd += " -o {0} -G {1} {2} {3} {4}".format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read1_name), os.path.join(directory, read2_name), ) except Exception, e: raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format( s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format( s_alignment)) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger( directory, "run_Stringtie_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] input_direc = os.path.join( directory, alignment_name.split('_alignment')[0] + "_stringtie_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = alignment_name.split( '_alignment')[0] + "_stringtie_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock( logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=input_direc, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0},{1}".format( a_filename, "".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(input_direc, a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def blast_against_genome(self, ctx, params): # ctx is the context object # return variables are: returnVal #BEGIN blast_against_genome # TODO: Rename blast_search try: self.__LOGGER.info( "Preparing FA") if len(params['query']) > 5: sequence=params['query'] else: self.__LOGGER.error("The input sequence is too short!") raise KBaseGenomeUtilException("The input sequence is too short!") if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR) #print "generate input file for query sequence\n" query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA) target=open(query_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">query_seq_%d\n" %(i)) target.write(seqes[i]) target.close() user_token=ctx['token'] svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token ws_client=Workspace(url=self.__WS_URL, token=user_token) err_msg = "" blast_dir =self.__BLAST_DIR if os.path.exists(blast_dir): files=glob.glob("%s/*" % blast_dir) for f in files: os.remove(f) if not os.path.exists(blast_dir): os.makedirs(blast_dir) target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA) if 'target_seqs' in params: # let's build index directly and throw away sequence = params['target_seqs'] target=open(target_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">target_seq_%d\n" %(i)) target.write(seqes[i]) target.close() if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): formatdb_type='T' elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): formatdb_type='F' else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type) # TODO: replace it to subprocess.Popen tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True) stdout, stderr = tool_process.communicate() if stdout is not None and len(stdout) > 0: self.__LOGGER.info(stdout) if stderr is not None and len(stderr) > 0: self.__LOGGER.error("Index error: " + stderr) raise KBaseGenomeUtilException("Index error: " + stderr) else: try: blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'], 'workspace': params['ws_id'], 'included':['handle', 'index_type']}]) except: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) if len(blast_indexes) < 1: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip' if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): target_fn += '_aa.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide": self.__LOGGER.error("The index object does not contain amino acid sequence indexes") raise KBaseGenomeUtilException("The index object does not contain amino acid sequence indexes. This index will only work with blastn (nucleotide query, nucleotide index), tblastx(protein query, nucleotide index) and tblastx(nucleotide query, nucleotide index)") elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): target_fn += '_nt.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein": self.__LOGGER.error("The index object does not contain nucleotide sequence indexes") raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes. This index will only work with blastp (protein query, protein index) and blastx(nucleotide query, protein index)") else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] #pprint(blast_indexes[0]) self.__LOGGER.info("Downloading the genome index") #hs = HandleService(url=self.__HS_URL, token=user_token) try: script_util.download_file_from_shock(self.__LOGGER, shock_service_url= blast_indexes[0]['data']['handle']['url'], shock_id= blast_indexes[0]['data']['handle']['id'], filename= blast_indexes[0]['data']['handle']['file_name'], directory= '.', token = user_token) except Exception, e: self.__LOGGER.error("Downloading error from shock: Please contact [email protected]") raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]") try: script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir) except Exception, e: self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]") raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] reads_type = task_params['reads_type'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0]+"_tophat_alignment" output_dir = os.path.join(directory,output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p '+str(self.num_threads)) if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length'])) if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length'])) if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length'])) if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist'])) if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length'])) if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches'])) if('library_type' in params and params['library_type'] is not None ) : tophat_cmd += (' --library-type ' + params['library_type']) if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments' if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def generate_cummerbund_plots(self, ctx, cummerbundParams): # ctx is the context object # return variables are: returnVal #BEGIN generate_cummerbund_plots params = cummerbundParams returnVal = params['ws_cummerbund_output'] #Set up workspace client user_token = ctx['token'] ws_client = Workspace(url=self.__WS_URL, token=user_token) #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file s_res = ws_client.get_objects([{ 'name' : params['ws_cuffdiff_id'], 'workspace' : params['workspace_name'] }]) # Check if workspace has data if len(s_res) == 0: self.__LOGGER.info("Workspace did not return any objects") return returnVal # Get input data Shock Id and Filename. cuffdiff_shock_id = s_res[0]['data']['file']['id'] cuffdiff_file_name = s_res[0]['data']['file']['file_name'] #cuffdiff_file_name =None filesize = None # Download tar file dx = script_util.download_file_from_shock( self.__LOGGER, self.__SHOCK_URL, cuffdiff_shock_id, cuffdiff_file_name, self.__SCRATCH, filesize, user_token) #Decompress tar file and keep it in a directory tarfile = join(self.__SCRATCH, cuffdiff_file_name) dstnExtractFolder = join(self.__SCRATCH, "cuffdiffData") if not os.path.exists(dstnExtractFolder): os.makedirs(dstnExtractFolder) untarStatus = script_util2.untar_files(self.__LOGGER, tarfile, dstnExtractFolder) if untarStatus == False: self.__LOGGER.info("Problem extracting the archive") return returnVal foldersinExtractFolder = os.listdir(dstnExtractFolder) if len(foldersinExtractFolder) == 0: self.__LOGGER.info("Problem extracting the archive") return returnVal # Run R script to run cummerbund json and update the cummerbund output json file cuffdiff_dir = join(dstnExtractFolder, foldersinExtractFolder[0]) self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir) # Prepare output object. outputobject=dict() # Prepare output plot list cummerbundplotset=[] # List of plots to generate plotlist = [ { 'file': "dispersionplot.R", 'title': "Dispersion plot", 'description': "Dispersion plot" }, { 'file': "pcaplot.R", 'title': "PCA plot", 'description': "PCA plot" }, { 'file': "fpkmscvplot.R", 'title': "FPKM SCV plot", 'description': "FPKM SCV plot" } ] # Iterate through the plotlist and generate the images and json files. for plot in plotlist: status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token, cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir) if status == False: self.__LOGGER.info("Problem generating image and json file - " + plot["file"]) # Populate the output object outputobject['cummerbundplotSet'] = cummerbundplotset #TODO: Need to figure out how to get rnaseq experiment id outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id" outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id'] res = ws_client.save_objects({ "workspace":params['workspace_name'], "objects": [{ "type":"KBaseRNASeq.cummerbund_output", "data":outputobject, "name":params["ws_cummerbund_output"]}] }) #END generate_cummerbund_plots # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method generate_cummerbund_plots return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal]
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['bowtie2_dir'] ws_id = task_params['ws_id'] reads_type = task_params['reads_type'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory,read_sample.split('.')[0]+"_bowtie2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0]+"_bowtie2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) base = handler_util.get_file_with_suffix(directory,".rev.1.bt2") bowtie2_base =os.path.join(directory,base) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += ( ' -p {0}'.format(self.num_threads)) if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] ) if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --'+params['preset_options']+'-local') else: bowtie2_cmd += (' --'+params['preset_options'] ) if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins'])) out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),bowtie2_base,out_file) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))