def _CallHisat2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,genome_id,sampleset_id,params,token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = handler_util.create_logger(directory,"run_Hisat2_"+read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory,read_sample.split('.')[0]+"_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0]+"_hisat2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) hisat2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".1.ht2")) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += ( ' -p {0}'.format(num_threads)) if('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += ( ' --'+params['alignment_type'] ) if('trim5' in params and params['trim5'] is not None): hisat2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): hisat2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): hisat2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): hisat2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): hisat2_cmd += ( ' --maxins '+str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += ( ' --min-intronlen '+str(params['min_intron_length'])) if('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += ( ' --max-intronlen '+str(params['max_intron_length'])) if('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += ( ' --no-spliced-alignment') if('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += ( ' --transcriptome-mapping-only') if('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += ( ' --'+params['tailor_alignments']) out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file) except Exception,e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file) except Exception,e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def _CallBowtie2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory,"run_Bowtie2_"+read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0]+"_bowtie2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += ( ' -p {0}'.format(num_threads)) if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] ) if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --'+params['preset_options']+'-local') else: bowtie2_cmd += (' --'+params['preset_options'] ) if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins'])) if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(directory,read_name),bowtie2_base,out_file) except Exception,e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(directory,read1_name),os.path.join(directory,read2_name),bowtie2_base,out_file) except Exception,e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def _CallTophat(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,gtf_file,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token): print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory,"run_Tophat_"+read_sample) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0]+"_tophat_alignment" output_dir = os.path.join(directory,output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to tophat command tophat_cmd = (' -p '+str(num_threads)) if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length'])) if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length'])) if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length'])) if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist'])) if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length'])) if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches'])) if('library_type' in params and params['library_type'] is not None ) : tophat_cmd += (' --library-type ' + params['library_type']) if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments' if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options'] if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) except Exception,e: raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def _CallHisat2(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, genome_id, sampleset_id, params, token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = handler_util.create_logger(directory, "run_Hisat2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory, read_sample.split('.')[0] + "_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0] + "_hisat2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) hisat2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".1.ht2")) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += (' -p {0}'.format(num_threads)) if ('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += (' --' + params['alignment_type']) if ('trim5' in params and params['trim5'] is not None): hisat2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): hisat2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): hisat2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): hisat2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): hisat2_cmd += (' --maxins ' + str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if ('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += (' --min-intronlen ' + str(params['min_intron_length'])) if ('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += (' --max-intronlen ' + str(params['max_intron_length'])) if ('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += (' --no-spliced-alignment') if ('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += (' --transcriptome-mapping-only') if ('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += (' --' + params['tailor_alignments']) out_file = output_dir + "/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=input_direc, token=token) hisat2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(input_direc, read_name), hisat2_base, out_file) except Exception, e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=input_direc, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=input_direc, token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(input_direc, read1_name), os.path.join(output_dir, read2_name), hisat2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
raise Bowtie2SampleException('Either of the Library typed objects SingleEndLibrary or PairedEndLibrary is required') r_label = 'Single' ### Get the Bw index file bw_index_files = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],params['bowtie_index'],"KBaseRNASeq.Bowtie2Indexes",bowtie2_dir,token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger,os.path.join(bowtie2_dir,bw_index_files),bowtie2_dir) mv_dir= handler_util.get_dir(bowtie2_dir) if mv_dir is not None: script_util.move_files(logger,mv_dir,bowtie2_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error: Please contact [email protected]") ### Build Index for the fasta file fasta_file =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa")+".fa") bowtie2base =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa")) bowtie2base_cmd = '{0} {1}'.format(fasta_file,bowtie2base) try: logger.info("Building Index for Hisat2 {0}".format(bowtie2base_cmd)) cmdline_output = script_util.runProgram(logger,"bowtie2-build",bowtie2base_cmd,None,bowtie2_dir) except Exception,e: raise Exception("Failed to run command {0}".format(bowtie2base_cmd)) ### Check if GTF object exists in the workspace pull the gtf ref_id = bowtie_index['data']['genome_id'] genome_name = ws_client.get_object_info_new({"objects": [{'ref' : ref_id }] })[0][1] ws_gtf = genome_name+"_GTF" gtf_file = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],ws_gtf,"KBaseRNASeq.GFFAnnotation",bowtie2_dir,token) if gtf_file is None: rnaseq_util.create_gtf_annotation_from_genome(logger,ws_client,hs,self.urls,params['ws_id'],ref_id,genome_name,bowtie2_dir,token) # Determine the num_threads provided by the user otherwise default the number of threads to 2
.format(",".join(missing_objs), params['ws_id'])) ### Build Hisat2 index fasta_file = script_util.generate_fasta(logger, services, token, annotation_id, hisat2_dir, params['genome_id']) logger.info("Sanitizing the fasta file to correct id names {}".format( datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True) logger.info("Generating FASTA file completed successfully : {}".format( datetime.datetime.utcnow())) hisat2base = os.path.join( hisat2_dir, handler_util.get_file_with_suffix(hisat2_dir, ".fasta")) hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base) try: logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd)) cmdline_output = script_util.runProgram(logger, "hisat2-build", hisat2base_cmd, None, hisat2_dir) except Exception, e: raise Exception("Failed to run command {0}".format(hisat2base_cmd)) ws_gtf = params['genome_id'] + "_GTF" ret = script_util.if_obj_exists(None, ws_client, params['ws_id'], "KBaseRNASeq.GFFAnnotation", [ws_gtf]) print ret if not ret is None: logger.info( "GFF Annotation Exist for Genome Annotation {0}.... Skipping step "
def _CallBowtie2( logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token, ): # logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Bowtie2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0] sample_type = r_sample_info[2].split("-")[0] output_name = read_sample.split(".")[0] + "_bowtie2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) out_file = output_dir + "/accepted_hits.sam" bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call bowtie2_cmd = "" bowtie2_cmd += " -p {0}".format(num_threads) if "quality_score" in params and params["quality_score"] is not None: bowtie2_cmd += " --" + params["quality_score"] if "alignment_type" in params and params["alignment_type"] is not None: bowtie2_cmd += " --" + params["alignment_type"] if ("preset_options" in params and params["preset_options"] is not None) and ( "alignment_type" in params and params["alignment_type"] is not None ): if params["alignment_type"] == "local": bowtie2_cmd += " --" + params["preset_options"] + "-local" else: bowtie2_cmd += " --" + params["preset_options"] if "trim5" in params and params["trim5"] is not None: bowtie2_cmd += " --trim5 " + str(params["trim5"]) if "trim3" in params and params["trim3"] is not None: bowtie2_cmd += " --trim3 " + str(params["trim3"]) if "np" in params and params["np"] is not None: bowtie2_cmd += " --np " + str(params["np"]) if "minins" in params and params["minins"] is not None: bowtie2_cmd += " --minins " + str(params["minins"]) if "maxins" in params and params["maxins"] is not None: bowtie2_cmd += " --maxins " + str(params["maxins"]) if "orientation" in params and params["orientation"] is not None: bowtie2_cmd += " --" + params["orientation"] if sample_type == "KBaseAssembly.SingleEndLibrary": lib_type = "SingleEnd" read_id = r_sample["data"]["handle"]["id"] read_name = r_sample["data"]["handle"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read_id, filename=read_name, directory=directory, token=token, ) bowtie2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(directory, read_name), bowtie2_base, out_file ) except Exception, e: # logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception("Unable to download shock file , {0}".format(read_name)) if sample_type == "KBaseAssembly.PairedEndLibrary": lib_type = "PairedEnd" read1_id = r_sample["data"]["handle_1"]["id"] read1_name = r_sample["data"]["handle_1"]["file_name"] read2_id = r_sample["data"]["handle_2"]["id"] read2_name = r_sample["data"]["handle_2"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read1_id, filename=read1_name, directory=directory, token=token, ) script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read2_id, filename=read2_name, directory=directory, token=token, ) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file ) except Exception, e: # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] reads_type = task_params['reads_type'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0]+"_tophat_alignment" output_dir = os.path.join(directory,output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p '+str(self.num_threads)) if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length'])) if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length'])) if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length'])) if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist'])) if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length'])) if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches'])) if('library_type' in params and params['library_type'] is not None ) : tophat_cmd += (' --library-type ' + params['library_type']) if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments' if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['bowtie2_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample) input_direc = os.path.join(directory,sample_name.split('.')[0]+"_bowtie2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = sample_name.split('.')[0]+"_bowtie2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) base = handler_util.get_file_with_suffix(directory,".rev.1.bt2") bowtie2_base =os.path.join(directory,base) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += ( ' -p {0}'.format(self.num_threads)) if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] ) if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --'+params['preset_options']+'-local') else: bowtie2_cmd += (' --'+params['preset_options'] ) if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins'])) out_file = output_dir +"/accepted_hits.sam" #### try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref,input_direc, token) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' bowtie2_cmd += " -U {0} -x {1} -S {2}".format(ds['fwd'],bowtie2_base,out_file) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) else: # TODO: the following can be read from PEL object if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],hisat2_base,out_file) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],bowtie2_base,out_file) ### # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) # bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) # bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram(self.logger,"bowtie2",bowtie2_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}".format(bowtie2_cmd))
def _CallTophat( logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, gtf_file, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token, ): print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Tophat_" + read_sample) try: r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0] sample_type = r_sample_info[2].split("-")[0] output_name = read_sample.split(".")[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) # if not os.path.exists(output_dir): os.makedirs(output_dir) # out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to tophat command tophat_cmd = " -p " + str(num_threads) if "max_intron_length" in params and params["max_intron_length"] is not None: tophat_cmd += " -I " + str(params["max_intron_length"]) if "min_intron_length" in params and params["min_intron_length"] is not None: tophat_cmd += " -i " + str(params["min_intron_length"]) if "min_anchor_length" in params and params["min_anchor_length"] is not None: tophat_cmd += " -a " + str(params["min_anchor_length"]) if "read_edit_dist" in params and params["read_edit_dist"] is not None: tophat_cmd += " --read-edit-dist " + str(params["read_edit_dist"]) if "read_gap_length" in params and params["read_gap_length"] is not None: tophat_cmd += " --read-gap-length " + str(params["read_gap_length"]) if "read_mismatches" in params and params["read_mismatches"] is not None: tophat_cmd += " -N " + str(params["read_mismatches"]) if "library_type" in params and params["library_type"] is not None: tophat_cmd += " --library-type " + params["library_type"] if "report_secondary_alignments" in params and int(params["report_secondary_alignments"]) == 1: tophat_cmd += " --report-secondary-alignments" if "no_coverage_search" in params and int(params["no_coverage_search"]) == 1: tophat_cmd += " --no-coverage-search" if "preset_options" in params and params["preset_options"] is not None: tophat_cmd += " --" + params["preset_options"] if sample_type == "KBaseAssembly.SingleEndLibrary": lib_type = "SingleEnd" read_id = r_sample["data"]["handle"]["id"] read_name = r_sample["data"]["handle"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read_id, filename=read_name, directory=directory, token=token, ) tophat_cmd += " -o {0} -G {1} {2} {3}".format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read_name) ) except Exception, e: raise Exception("Unable to download shock file , {0}".format(read_name)) if sample_type == "KBaseAssembly.PairedEndLibrary": lib_type = "PairedEnd" read1_id = r_sample["data"]["handle_1"]["id"] read1_name = r_sample["data"]["handle_1"]["file_name"] read2_id = r_sample["data"]["handle_2"]["id"] read2_name = r_sample["data"]["handle_2"]["file_name"] try: script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read1_id, filename=read1_name, directory=directory, token=token, ) script_util.download_file_from_shock( logger, shock_service_url=services["shock_service_url"], shock_id=read2_id, filename=read2_name, directory=directory, token=token, ) tophat_cmd += " -o {0} -G {1} {2} {3} {4}".format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read1_name), os.path.join(directory, read2_name), ) except Exception, e: raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['hisat2_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] r_sample = script_util.ws_get_obj(self.logger, ws_client, ws_id, read_sample)[0] sample_type = script_util.ws_get_type_name(self.logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) input_direc = os.path.join( directory, sample_name.split('.')[0] + "_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = sample_name.split('.')[0] + "_hisat2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) print directory base = handler_util.get_file_with_suffix(directory, ".1.ht2") print base hisat2_base = os.path.join(directory, base) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += (' -p {0}'.format(self.num_threads)) if ('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += (' --' + params['alignment_type']) if ('trim5' in params and params['trim5'] is not None): hisat2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): hisat2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): hisat2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): hisat2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): hisat2_cmd += (' --maxins ' + str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if ('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += (' --min-intronlen ' + str(params['min_intron_length'])) if ('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += (' --max-intronlen ' + str(params['max_intron_length'])) if ('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += (' --no-spliced-alignment') if ('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += (' --transcriptome-mapping-only') if ('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += (' --' + params['tailor_alignments']) out_file = output_dir + "/accepted_hits.sam" #### try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, input_direc, token) self.logger.info(ds) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' hisat2_cmd += " -U {0} -x {1} -S {2}".format( ds['fwd'], hisat2_base, out_file) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( ds['fwd'], ds['rev'], hisat2_base, out_file) #if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) # hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) #if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) # hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),hisat2_base,out_file) # except Exception,e: # logger.exception(e) # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram( self.logger, "hisat2", hisat2_cmd, None, directory) except Exception, e: logger.exception(e) raise Exception("Failed to run command {0}".format(hisat2_cmd))
def _CallBowtie2(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token): #logger.info("Downloading Read Sample{0}".format(read_sample)) print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Bowtie2_" + read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0] + "_bowtie2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) out_file = output_dir + "/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += (' -p {0}'.format(num_threads)) if ('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += (' --' + params['alignment_type']) if ('preset_options' in params and params['preset_options'] is not None) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --' + params['preset_options'] + '-local') else: bowtie2_cmd += (' --' + params['preset_options']) if ('trim5' in params and params['trim5'] is not None): bowtie2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): bowtie2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): bowtie2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): bowtie2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): bowtie2_cmd += (' --maxins ' + str(params['maxins'])) if ('orientation' in params and params['orientation'] is not None): bowtie2_cmd += (' --' + params['orientation']) if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=directory, token=token) bowtie2_cmd += " -U {0} -x {1} -S {2}".format( os.path.join(directory, read_name), bowtie2_base, out_file) except Exception, e: #logger.exception( "Unable to download shock file , {0}".format(read_name)) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=directory, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=directory, token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file) except Exception, e: #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample) output_name = sample_name.split('.')[0]+"_tophat_alignment" output_dir = os.path.join(directory,output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p '+str(self.num_threads)) if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length'])) if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length'])) if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length'])) if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist'])) if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length'])) if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches'])) if('library_type' in params and params['library_type'] is not None ) : tophat_cmd += (' --library-type ' + params['library_type']) if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments' if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref,directory, token) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd']) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) else: # TODO: the following can be read from PEL object if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'],ds['rev']) # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err))
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['bowtie2_dir'] ws_id = task_params['ws_id'] reads_type = task_params['reads_type'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: r_sample = ws_client.get_objects( [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] sample_type = r_sample_info[2].split('-')[0] input_direc = os.path.join(directory,read_sample.split('.')[0]+"_bowtie2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = read_sample.split('.')[0]+"_bowtie2_alignment" output_dir = os.path.join(directory,output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) base = handler_util.get_file_with_suffix(directory,".rev.1.bt2") bowtie2_base =os.path.join(directory,base) ### Adding advanced options to Bowtie2Call bowtie2_cmd = '' bowtie2_cmd += ( ' -p {0}'.format(self.num_threads)) if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score']) if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] ) if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None): if (params['alignment_type'] == 'local'): bowtie2_cmd += (' --'+params['preset_options']+'-local') else: bowtie2_cmd += (' --'+params['preset_options'] ) if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5'])) if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3'])) if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np'])) if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins'])) if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins'])) out_file = output_dir +"/accepted_hits.sam" if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file) except Exception,e: self.logger.exception(e) raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation']) read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),bowtie2_base,out_file) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
reads = sample['data']['sample_ids'] reads_type= sample['data']['Library_type'] if reads_type == 'PairedEnd': r_type = 'KBaseAssembly.PairedEndLibrary' else: r_type = 'KBaseAssembly.SingleEndLibrary' e_ws_objs = script_util.if_ws_obj_exists(None,ws_client,params['ws_id'],r_type,reads) missing_objs = [i for i in reads if not i in e_ws_objs] if len(e_ws_objs) != len(reads): raise Exception('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id'])) ### Build Hisat2 index fasta_file = script_util.generate_fasta(logger,services,token,annotation_id,hisat2_dir,params['genome_id']) logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True) logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow())) hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fasta")) hisat2base_cmd = '{0} {1}'.format(fasta_file,hisat2base) try: logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd)) cmdline_output = script_util.runProgram(logger,"hisat2-build",hisat2base_cmd,None,hisat2_dir) except Exception,e: raise Exception("Failed to run command {0}".format(hisat2base_cmd)) ws_gtf = params['genome_id']+"_GTF" ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf]) print ret if not ret is None: logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(params['genome_id'])) annot_name,annot_id = ret[0] gtf_obj=ws_client.get_objects([{'ref' : annot_id}])[0] gtf_id=gtf_obj['data']['handle']['id'] gtf_name=gtf_obj['data']['handle']['file_name']
bw_name = bowtie_index['data']['handle']['file_name'] genome_id = bowtie_index['data']['genome_id'] annotation_gtf = ws_client.get_object_info([{"ref" :genome_id}],includeMetadata=None)[0][1] shared_files={} shared_files[bw_name] = bw_id script_util.download_shock_files(logger,self.urls['shock_service_url'],tophat_dir,shared_files,token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger,os.path.join(tophat_dir,bw_name),tophat_dir) mv_dir= handler_util.get_dir(tophat_dir) if mv_dir is not None: script_util.move_files(logger,mv_dir,tophat_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error") fasta_file =os.path.join(tophat_dir,(handler_util.get_file_with_suffix(tophat_dir,".fa")+".fa")) bowtie2base =os.path.join(tophat_dir,handler_util.get_file_with_suffix(tophat_dir,".rev.1.bt2")) ### Check if GTF annotation object exist or skip this step ### Check if the gtf object exists in the workspace ### Only run create_gtf_annotation if object doesnt exist ws_gtf = annotation_gtf+"_GTF_Annotation" genome_name = script_util.ws_get_obj_name( logger, ws_client, params['ws_id'], genome_id ) gtf_file = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],ws_gtf,"KBaseRNASeq.GFFAnnotation",tophat_dir,token) if gtf_file is None: gtf_file = rnaseq_util.create_gtf_annotation_from_genome(logger,ws_client,hs,self.urls,params['ws_id'],genome_id,genome_name,tophat_dir,token) #ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf]) # this line should be safe from reference #if not ret is None: # logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_gtf)) # annot_name,annot_id = ret[0]
script_util.download_shock_files(logger, self.urls['shock_service_url'], tophat_dir, shared_files, token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger, os.path.join(tophat_dir, bw_name), tophat_dir) mv_dir = handler_util.get_dir(tophat_dir) if mv_dir is not None: script_util.move_files(logger, mv_dir, tophat_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error") fasta_file = os.path.join( tophat_dir, (handler_util.get_file_with_suffix(tophat_dir, ".fa") + ".fa")) bowtie2base = os.path.join( tophat_dir, handler_util.get_file_with_suffix(tophat_dir, ".rev.1.bt2")) ### Check if GTF annotation object exist or skip this step ### Check if the gtf object exists in the workspace ### Only run create_gtf_annotation if object doesnt exist ws_gtf = annotation_gtf + "_GTF_Annotation" gtf_file = script_util.check_and_download_existing_handle_obj( logger, ws_client, self.urls, params['ws_id'], ws_gtf, "KBaseRNASeq.GFFAnnotation", tophat_dir, token) if gtf_file is None: gtf_file = rnaseq_util.create_gtf_annotation_from_genome( logger, ws_client, hs, self.urls, params['ws_id'], ref_id, genome_name, tophat_dir, token)
def _CallTophat(logger, services, ws_client, hs, ws_id, sample_type, num_threads, read_sample, gtf_file, condition, directory, bowtie2index_id, genome_id, sampleset_id, params, token): print "Downloading Read Sample{0}".format(read_sample) if not logger: logger = create_logger(directory, "run_Tophat_" + read_sample) try: r_sample = ws_client.get_objects([{ 'name': read_sample, 'workspace': ws_id }])[0] r_sample_info = ws_client.get_object_info_new( {"objects": [{ 'name': read_sample, 'workspace': ws_id }]})[0] sample_type = r_sample_info[2].split('-')[0] output_name = read_sample.split('.')[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to tophat command tophat_cmd = (' -p ' + str(num_threads)) if ('max_intron_length' in params and params['max_intron_length'] is not None): tophat_cmd += (' -I ' + str(params['max_intron_length'])) if ('min_intron_length' in params and params['min_intron_length'] is not None): tophat_cmd += (' -i ' + str(params['min_intron_length'])) if ('min_anchor_length' in params and params['min_anchor_length'] is not None): tophat_cmd += (' -a ' + str(params['min_anchor_length'])) if ('read_edit_dist' in params and params['read_edit_dist'] is not None): tophat_cmd += (' --read-edit-dist ' + str(params['read_edit_dist'])) if ('read_gap_length' in params and params['read_gap_length'] is not None): tophat_cmd += (' --read-gap-length ' + str(params['read_gap_length'])) if ('read_mismatches' in params and params['read_mismatches'] is not None): tophat_cmd += (' -N ' + str(params['read_mismatches'])) if ('library_type' in params and params['library_type'] is not None): tophat_cmd += (' --library-type ' + params['library_type']) if ('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1): tophat_cmd += ' --report-secondary-alignments' if ('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if ('preset_options' in params and params['preset_options'] is not None): tophat_cmd += ' --' + params['preset_options'] if sample_type == 'KBaseAssembly.SingleEndLibrary': lib_type = 'SingleEnd' read_id = r_sample['data']['handle']['id'] read_name = r_sample['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read_id, filename=read_name, directory=directory, token=token) tophat_cmd += ' -o {0} -G {1} {2} {3}'.format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read_name)) except Exception, e: raise Exception( "Unable to download shock file , {0}".format(read_name)) if sample_type == 'KBaseAssembly.PairedEndLibrary': lib_type = 'PairedEnd' read1_id = r_sample['data']['handle_1']['id'] read1_name = r_sample['data']['handle_1']['file_name'] read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read1_id, filename=read1_name, directory=directory, token=token) script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=read2_id, filename=read2_name, directory=directory, token=token) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format( output_dir, gtf_file, bowtie2_base, os.path.join(directory, read1_name), os.path.join(directory, read2_name)) except Exception, e: raise Exception( "Unable to download shock file , {0} or {1}".format( read1_name, read2_name))
bw_name = bowtie_index['data']['handle']['file_name'] genome_id = bowtie_index['data']['genome_id'] annotation_gtf = ws_client.get_object_info([{"ref" :genome_id}],includeMetadata=None)[0][1] shared_files={} shared_files[bw_name] = bw_id script_util.download_shock_files(logger,self.urls['shock_service_url'],tophat_dir,shared_files,token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger,os.path.join(tophat_dir,bw_name),tophat_dir) mv_dir= handler_util.get_dir(tophat_dir) if mv_dir is not None: script_util.move_files(logger,mv_dir,tophat_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error: Please contact [email protected]") fasta_file =os.path.join(tophat_dir,(handler_util.get_file_with_suffix(tophat_dir,".fa")+".fa")) bowtie2base =os.path.join(tophat_dir,handler_util.get_file_with_suffix(tophat_dir,".rev.1.bt2")) ### Check if GTF annotation object exist or skip this step ### Check if the gtf object exists in the workspace ### Only run create_gtf_annotation if object doesnt exist ws_gtf = annotation_gtf+"_GTF_Annotation" ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf]) if not ret is None: logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_gtf)) annot_name,annot_id = ret[0] gtf_obj=ws_client.get_objects([{'ref' : annot_id}])[0] gtf_id=gtf_obj['data']['handle']['id'] gtf_name=gtf_obj['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=tophat_dir,token=token)
token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger, os.path.join(bowtie2_dir, bw_index_files), bowtie2_dir) mv_dir = handler_util.get_dir(bowtie2_dir) if mv_dir is not None: script_util.move_files(logger, mv_dir, bowtie2_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error") ### Build Index for the fasta file fasta_file = os.path.join( bowtie2_dir, handler_util.get_file_with_suffix(bowtie2_dir, ".fa") + ".fa") bowtie2base = os.path.join( bowtie2_dir, handler_util.get_file_with_suffix(bowtie2_dir, ".fa")) bowtie2base_cmd = '{0} {1}'.format(fasta_file, bowtie2base) try: logger.info( "Building Index for Hisat2 {0}".format(bowtie2base_cmd)) cmdline_output = script_util.runProgram(logger, "bowtie2-build", bowtie2base_cmd, None, bowtie2_dir) except Exception, e: raise Exception( "Failed to run command {0}".format(bowtie2base_cmd)) ### Check if GTF object exists in the workspace pull the gtf ref_id = bowtie_index['data']['genome_id'] genome_name = ws_client.get_object_info_new(
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger, ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) output_name = sample_name.split('.')[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p ' + str(self.num_threads)) if ('max_intron_length' in params and params['max_intron_length'] is not None): tophat_cmd += (' -I ' + str(params['max_intron_length'])) if ('min_intron_length' in params and params['min_intron_length'] is not None): tophat_cmd += (' -i ' + str(params['min_intron_length'])) if ('min_anchor_length' in params and params['min_anchor_length'] is not None): tophat_cmd += (' -a ' + str(params['min_anchor_length'])) if ('read_edit_dist' in params and params['read_edit_dist'] is not None): tophat_cmd += (' --read-edit-dist ' + str(params['read_edit_dist'])) if ('read_gap_length' in params and params['read_gap_length'] is not None): tophat_cmd += (' --read-gap-length ' + str(params['read_gap_length'])) if ('read_mismatches' in params and params['read_mismatches'] is not None): tophat_cmd += (' -N ' + str(params['read_mismatches'])) if ('library_type' in params and params['library_type'] is not None): tophat_cmd += (' --library-type ' + params['library_type']) if ('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1): tophat_cmd += ' --report-secondary-alignments' if ('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if ('preset_options' in params and params['preset_options'] is not None): tophat_cmd += ' --' + params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, directory, token) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' tophat_cmd += ' -o {0} -G {1} {2} {3}'.format( output_dir, gtf_file, bowtie2_base, ds['fwd']) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): tophat_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): tophat_cmd += (' --' + params['orientation']) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format( output_dir, gtf_file, bowtie2_base, ds['fwd'], ds['rev']) # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram( self.logger, "tophat", tophat_cmd, None, directory) except Exception, e: raise Exception("Failed to run command {0}\n{1}\n{2}".format( tophat_cmd, cmdline_output, cmd_err))