def _CallCufflinks(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = create_logger(directory,"run_Cufflinks_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] #alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] output_name = alignment_name.split('_alignment')[0]+"_cufflinks_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] #i_name = alignment_name+"_"+a_filename #if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=directory,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0}".format(i_name)) try: input_dir = os.path.join(directory,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(directory,a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files")
def _CallStringtie(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input") if not os.path.exists(input_direc) : os.mkdir(input_direc) output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join.tracback.format_exc())) try: input_dir = os.path.join(input_direc,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def runEach(self,task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format(s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format(s_alignment)) alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name) try: alignment = ws_client.get_objects( [{ 'ref' : s_alignment }])[0] input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input") if not os.path.exists(input_direc) : os.mkdir(input_direc) output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression" output_dir = os.path.join(directory,output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token) except Exception,e: raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc,alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
def _CallCufflinks(logger, services, ws_client, hs, ws_id, num_threads, s_alignment, gtf_file, directory, genome_id, annotation_id, sample_id, alignmentset_id, params, token): print "Downloading Read Sample{0}".format(s_alignment) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = create_logger(directory, "run_Cufflinks_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] #alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] output_name = alignment_name.split( '_alignment')[0] + "_cufflinks_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] #i_name = alignment_name+"_"+a_filename #if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=directory, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0}".format(i_name)) try: input_dir = os.path.join(directory, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(directory, a_filename), input_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files")
def extract_cuffdiff_data (logger, shock_url, scratch, s_res, user_token): returnVal = False # Get input data Shock Id and Filename. cuffdiff_shock_id = s_res[0]['data']['file']['id'] cuffdiff_file_name = s_res[0]['data']['file']['file_name'] filesize = None dx = script_util.download_file_from_shock( logger, shock_url, cuffdiff_shock_id, cuffdiff_file_name, scratch, filesize, user_token) #cuffdiff_file_name =None #Decompress tar file and keep it in a directory zipfile = join(scratch, cuffdiff_file_name) dstnExtractFolder1 = join(scratch, "cuffdiffData") dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff") if not os.path.exists(dstnExtractFolder): os.makedirs(dstnExtractFolder) #untarStatus = untar_files(logger, tarfile, dstnExtractFolder) #if untarStatus == False: # logger.info("Problem extracting the archive") # return returnVal unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder) if unzipStatus == False: logger.info("Problem extracting the archive") return returnVal foldersinExtractFolder = os.listdir(dstnExtractFolder) if len(foldersinExtractFolder) == 0: logger.info("Problem extracting the archive") return returnVal # Run R script to run cummerbund json and update the cummerbund output json file cuffdiff_dir = dstnExtractFolder return cuffdiff_dir
def extract_cuffdiff_data(logger, shock_url, scratch, s_res, user_token): returnVal = False # Get input data Shock Id and Filename. cuffdiff_shock_id = s_res[0]['data']['file']['id'] cuffdiff_file_name = s_res[0]['data']['file']['file_name'] filesize = None dx = script_util.download_file_from_shock(logger, shock_url, cuffdiff_shock_id, cuffdiff_file_name, scratch, filesize, user_token) #cuffdiff_file_name =None #Decompress tar file and keep it in a directory zipfile = join(scratch, cuffdiff_file_name) dstnExtractFolder1 = join(scratch, "cuffdiffData") dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff") if not os.path.exists(dstnExtractFolder): os.makedirs(dstnExtractFolder) #untarStatus = untar_files(logger, tarfile, dstnExtractFolder) #if untarStatus == False: # logger.info("Problem extracting the archive") # return returnVal unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder) if unzipStatus == False: logger.info("Problem extracting the archive") return returnVal foldersinExtractFolder = os.listdir(dstnExtractFolder) if len(foldersinExtractFolder) == 0: logger.info("Problem extracting the archive") return returnVal # Run R script to run cummerbund json and update the cummerbund output json file cuffdiff_dir = dstnExtractFolder return cuffdiff_dir
class Bowtie2SampleSet(Bowtie2): def __init__(self, logger, directory, urls, max_cores): super(self.__class__, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.sample = None self.sampleset_info = None #self.num_threads = None def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] bowtie2_dir = self.directory try: #sample,bowtie_index = ws_client.get_objects( # [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']}, # { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}]) sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] bowtie_index = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['bowtie_index'])[0] self.sample = sample except Exception, e: logger.exception("".join(traceback.format_exc())) raise ValueError(" Error Downloading objects from the workspace ") ### Get obejct IDs #sampleset_info,bowtie_index_info = ws_client.get_object_info_new({"objects": [ # {'name': params['sampleset_id'], 'workspace': params['ws_id']}, # {'name': params['bowtie_index'], 'workspace': params['ws_id']} # ]}) sampleset_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] #bowtie_index_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['bowtie_index'])[0] ### Get the workspace object ids for the objects ### sampleset_id = str(sampleset_info[6]) + '/' + str( sampleset_info[0]) + '/' + str(sampleset_info[4]) #bowtie_index_id = str(bowtie_index_info[6]) + '/' + str(bowtie_index_info[0]) + '/' + str(bowtie_index_info[4]) self.sampleset_info = sampleset_info ### Get the workspace object ids for the objects ### sample_type = sampleset_info[2].split('-')[0] ### Check if the Library objects exist in the same workspace if not (sample_type == 'KBaseRNASeq.RNASeqSampleSet' or sample_type == 'KBaseSets.ReadsSet'): raise Bowtie2SampleSetException( 'RNASeqSampleSet or ReadsSet is required') #logger.info("Check if the Library objects do exist in the current workspace") (reads, r_label) = rnaseq_util.get_reads_conditions(logger, sample, sample_type) #reads = sample['data']['sample_ids'] #r_label = sample['data']['condition'] #reads_type= sample['data']['Library_type'] #e_ws_objs = script_util.if_ws_obj_exists_notype(None,ws_client,params['ws_id'],reads) # removed read type as it will be added only if it satisfies input types #missing_objs = [i for i in reads if not i in e_ws_objs] #if len(e_ws_objs) != len(reads): # raise Bowtie2SampleSetException('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id'])) self.num_jobs = len(reads) bw_index_files = script_util.check_and_download_existing_handle_obj( logger, ws_client, self.urls, params['ws_id'], params['bowtie_index'], "KBaseRNASeq.Bowtie2Indexes", bowtie2_dir, token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger, os.path.join(bowtie2_dir, bw_index_files), bowtie2_dir) mv_dir = handler_util.get_dir(bowtie2_dir) if mv_dir is not None: script_util.move_files(logger, mv_dir, bowtie2_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error")
def blast_against_genome(self, ctx, params): # ctx is the context object # return variables are: returnVal #BEGIN blast_against_genome # TODO: Rename blast_search try: self.__LOGGER.info( "Preparing FA") if len(params['query']) > 5: sequence=params['query'] else: self.__LOGGER.error("The input sequence is too short!") raise KBaseGenomeUtilException("The input sequence is too short!") if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR) #print "generate input file for query sequence\n" query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA) target=open(query_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">query_seq_%d\n" %(i)) target.write(seqes[i]) target.close() user_token=ctx['token'] svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token ws_client=Workspace(url=self.__WS_URL, token=user_token) err_msg = "" blast_dir =self.__BLAST_DIR if os.path.exists(blast_dir): files=glob.glob("%s/*" % blast_dir) for f in files: os.remove(f) if not os.path.exists(blast_dir): os.makedirs(blast_dir) target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA) if 'target_seqs' in params: # let's build index directly and throw away sequence = params['target_seqs'] target=open(target_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">target_seq_%d\n" %(i)) target.write(seqes[i]) target.close() if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): formatdb_type='T' elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): formatdb_type='F' else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type) # TODO: replace it to subprocess.Popen tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True) stdout, stderr = tool_process.communicate() if stdout is not None and len(stdout) > 0: self.__LOGGER.info(stdout) if stderr is not None and len(stderr) > 0: self.__LOGGER.error("Index error: " + stderr) raise KBaseGenomeUtilException("Index error: " + stderr) else: try: blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'], 'workspace': params['ws_id'], 'included':['handle', 'index_type']}]) except: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) if len(blast_indexes) < 1: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip' if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): target_fn += '_aa.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide": self.__LOGGER.error("The index object does not contain amino acid sequence indexes") raise KBaseGenomeUtilException("The index object does not contain amino acid sequence indexes") elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): target_fn += '_nt.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein": self.__LOGGER.error("The index object does not contain nucleotide sequence indexes") raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes") else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] #pprint(blast_indexes[0]) self.__LOGGER.info("Downloading the genome index") #hs = HandleService(url=self.__HS_URL, token=user_token) try: script_util.download_file_from_shock(self.__LOGGER, shock_service_url= blast_indexes[0]['data']['handle']['url'], shock_id= blast_indexes[0]['data']['handle']['id'], filename= blast_indexes[0]['data']['handle']['file_name'], directory= '.', token = user_token) except Exception, e: self.__LOGGER.error("Downloading error from shock: Please contact [email protected]") raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]") try: script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir) except Exception, e: self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]") raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")
def blast_against_genome(self, ctx, params): # ctx is the context object # return variables are: returnVal #BEGIN blast_against_genome # TODO: Rename blast_search try: self.__LOGGER.info( "Preparing FA") if len(params['query']) > 5: sequence=params['query'] else: self.__LOGGER.error("The input sequence is too short!") raise KBaseGenomeUtilException("The input sequence is too short!") if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR) #print "generate input file for query sequence\n" query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA) target=open(query_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">query_seq_%d\n" %(i)) target.write(seqes[i]) target.close() user_token=ctx['token'] svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token ws_client=Workspace(url=self.__WS_URL, token=user_token) err_msg = "" blast_dir =self.__BLAST_DIR if os.path.exists(blast_dir): files=glob.glob("%s/*" % blast_dir) for f in files: os.remove(f) if not os.path.exists(blast_dir): os.makedirs(blast_dir) target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA) if 'target_seqs' in params: # let's build index directly and throw away sequence = params['target_seqs'] target=open(target_fn,'w') if sequence.startswith(">"): target.write(sequence) else: seqes = sequence.split("\n") for i in range(len(seqes)): target.write(">target_seq_%d\n" %(i)) target.write(seqes[i]) target.close() if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): formatdb_type='T' elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): formatdb_type='F' else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type) # TODO: replace it to subprocess.Popen tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True) stdout, stderr = tool_process.communicate() if stdout is not None and len(stdout) > 0: self.__LOGGER.info(stdout) if stderr is not None and len(stderr) > 0: self.__LOGGER.error("Index error: " + stderr) raise KBaseGenomeUtilException("Index error: " + stderr) else: try: blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'], 'workspace': params['ws_id'], 'included':['handle', 'index_type']}]) except: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) if len(blast_indexes) < 1: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name'])) raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip' if(self.__INDEX_TYPE[params['blast_program']] == 'protein_db'): target_fn += '_aa.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide": self.__LOGGER.error("The index object does not contain amino acid sequence indexes") raise KBaseGenomeUtilException("The index object does not contain amino acid sequence indexes. This index will only work with blastn (nucleotide query, nucleotide index), tblastx(protein query, nucleotide index) and tblastx(nucleotide query, nucleotide index)") elif(self.__INDEX_TYPE[params['blast_program']] == 'transcript_db'): target_fn += '_nt.fa' if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein": self.__LOGGER.error("The index object does not contain nucleotide sequence indexes") raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes. This index will only work with blastp (protein query, protein index) and blastx(nucleotide query, protein index)") else: self.__LOGGER.error("{0} is not yet supported".format(params['blast_program'])) raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program'])) # TODO: Add err handling zip_fn = blast_indexes[0]['data']['handle']['file_name'] #pprint(blast_indexes[0]) self.__LOGGER.info("Downloading the genome index") #hs = HandleService(url=self.__HS_URL, token=user_token) try: script_util.download_file_from_shock(self.__LOGGER, shock_service_url= blast_indexes[0]['data']['handle']['url'], shock_id= blast_indexes[0]['data']['handle']['id'], filename= blast_indexes[0]['data']['handle']['file_name'], directory= '.', token = user_token) except Exception, e: self.__LOGGER.error("Downloading error from shock: Please contact [email protected]") raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]") try: script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir) except Exception, e: self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]") raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")
[{'ref' : a_id},{'ref': e_id}]) ### Get the condition name, replicate_id , shock_id and shock_filename condition = a_obj['data']['condition'] if 'replicate_id' in a_obj['data'] : replicate_id = a_obj['data']['replicate_id'] files[a_obj['data']['file']['file_name']] = a_obj['data']['file']['id'] files[e_obj['data']['file']['file_name']] = e_obj['data']['file']['id'] if not condition in labels: labels.append(condition) else : counter += 1 #### comment it when replicate_id is available from methods s_path = os.path.join(diffexp_dir,condition+"/"+str(counter)) ### Comment this line when replicate_id is available from the methods if not os.path.exists(s_path): os.makedirs(s_path) try: script_util.download_shock_files(logger,services['shock_service_url'],s_path,files,token) except Exception,e: raise Exception( "Unable to download shock file, {0}".format(e)) try: script_util.unzip_files(logger,os.path.join(s_path,a_obj['data']['file']['file_name']),s_path) script_util.unzip_files(logger,os.path.join(s_path,e_obj['data']['file']['file_name']),s_path) e_file_path = os.path.join(s_path,"transcripts.gtf") a_file_path = os.path.join(s_path,"accepted_hits.bam") if os.path.exists(a_file_path) : print a_file_path if os.path.exists(e_file_path) : print e_file_path list_file.write("{0}\n".format(e_file_path)) except Exception, e: logger.exception("".join(traceback.format_exc())) raise Exception("Unzip file error") list_file.close() print labels #output_dir = os.path.join(cuffdiff_dir, params['output_obj_name']) for l in labels: #rep_files=",".join([ os.path.join(diffexp_dir+'/'+l,sub+'/accepted_hits.bam') for sub in os.listdir(os.path.join(diffexp_dir,l)) if os.path.isdir(os.path.join(diffexp_dir,l+'/'+sub))])
class Bowtie2Sample(Bowtie2): def __init__(self, logger, directory, urls, max_cores): #super(Bowtie2Sample, self).__init__(logger, directory, urls) super(self.__class__, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.sample_info = None #self.sampleset_info = None self.num_threads = 1 def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] bowtie2_dir = self.directory try: #sample,bowtie_index = ws_client.get_objects( # [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']}, # { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}]) sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] bowtie_index = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['bowtie_index'])[0] self.sample = sample except Exception, e: logger.exception("".join(traceback.format_exc())) raise ValueError(" Error Downloading objects from the workspace ") ### Get obejct IDs #sample_info,bowtie_index_info = ws_client.get_object_info_new({"objects": [ # {'name': params['sampleset_id'], 'workspace': params['ws_id']}, # {'name': params['bowtie_index'], 'workspace': params['ws_id']} # ]}) sample_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] bowtie_index_info = script_util.ws_get_obj_info( logger, ws_client, params['ws_id'], params['bowtie_index'])[0] self.sample_info = sample_info ### Get the workspace object ids for the objects ### sample_id = str(sample_info[6]) + '/' + str( sample_info[0]) + '/' + str(sample_info[4]) bowtie_index_id = str(bowtie_index_info[6]) + '/' + str( bowtie_index_info[0]) + '/' + str(bowtie_index_info[4]) sample_type = sample_info[2].split('-')[0] lib_types = [ 'KBaseAssembly.SingleEndLibrary', 'KBaseAssembly.PairedEndLibrary', 'KBaseFile.SingleEndLibrary', 'KBaseFile.PairedEndLibrary' ] ### Check if the Library objects exist in the same workspace if not sample_type in lib_types: #'KBaseAssembly.SingleEndLibrary' or sample_type != 'KBaseAssembly.PairedEndLibrary': raise Bowtie2SampleException( 'Either of the Library typed objects SingleEndLibrary or PairedEndLibrary is required' ) r_label = 'Single' ### Get the Bw index file bw_index_files = script_util.check_and_download_existing_handle_obj( logger, ws_client, self.urls, params['ws_id'], params['bowtie_index'], "KBaseRNASeq.Bowtie2Indexes", bowtie2_dir, token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger, os.path.join(bowtie2_dir, bw_index_files), bowtie2_dir) mv_dir = handler_util.get_dir(bowtie2_dir) if mv_dir is not None: script_util.move_files(logger, mv_dir, bowtie2_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error")
def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] s_alignment = task_params['job_id'] gtf_file = task_params['gtf_file'] directory = task_params['stringtie_dir'] genome_id = task_params['genome_id'] annotation_id = task_params['annotation_id'] sample_id = task_params['sample_id'] alignmentset_id = task_params['alignmentset_id'] ws_id = task_params['ws_id'] print "Downloading Sample Alignment from workspace {0}".format( s_alignment) logger.info("Downloading Sample Alignment from workspace {0}".format( s_alignment)) alignment_name = ws_client.get_object_info([{ "ref": s_alignment }], includeMetadata=None)[0][1] if not logger: logger = handler_util.create_logger( directory, "run_Stringtie_" + alignment_name) try: alignment = ws_client.get_objects([{'ref': s_alignment}])[0] input_direc = os.path.join( directory, alignment_name.split('_alignment')[0] + "_stringtie_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = alignment_name.split( '_alignment')[0] + "_stringtie_expression" output_dir = os.path.join(directory, output_name) #Download Alignment from shock a_file_id = alignment['data']['file']['id'] a_filename = alignment['data']['file']['file_name'] condition = alignment['data']['condition'] try: script_util.download_file_from_shock( logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id, filename=a_filename, directory=input_direc, token=token) except Exception, e: raise Exception( "Unable to download shock file, {0},{1}".format( a_filename, "".join(traceback.format_exc()))) try: input_dir = os.path.join(input_direc, alignment_name) if not os.path.exists(input_dir): os.mkdir(input_dir) script_util.unzip_files(logger, os.path.join(input_direc, a_filename), input_dir) except Exception, e: raise Exception(e) logger.error("".join(traceback.format_exc())) raise Exception("Unzip alignment files error")
if not condition in labels: labels.append(condition) else: counter += 1 #### comment it when replicate_id is available from methods s_path = os.path.join( diffexp_dir, condition + "/" + str(counter) ) ### Comment this line when replicate_id is available from the methods if not os.path.exists(s_path): os.makedirs(s_path) try: script_util.download_shock_files(logger, services['shock_service_url'], s_path, files, token) except Exception, e: raise Exception("Unable to download shock file, {0}".format(e)) try: script_util.unzip_files( logger, os.path.join(s_path, a_obj['data']['file']['file_name']), s_path) script_util.unzip_files( logger, os.path.join(s_path, e_obj['data']['file']['file_name']), s_path) e_file_path = os.path.join(s_path, "transcripts.gtf") a_file_path = os.path.join(s_path, "accepted_hits.bam") if os.path.exists(a_file_path): print a_file_path if os.path.exists(e_file_path): print e_file_path list_file.write("{0}\n".format(e_file_path)) except Exception, e: logger.exception("".join(traceback.format_exc())) raise Exception("Unzip file error") list_file.close()
class TophatSampleSet(Tophat): def __init__(self, logger, directory, urls, max_cores): super(TophatSampleSet, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.sample = None self.bowtie2index_id = None #self.num_threads = None def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] tophat_dir = self.directory try: #sample ,bowtie_index = ws_client.get_objects( # [{'name' : params['sampleset_id'],'workspace' : params['ws_id']}, # { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}]) sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['sampleset_id'])[0] bowtie_index = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['bowtie_index'])[0] self.sample = sample except Exception,e: logger.exception("".join(traceback.format_exc())) raise ValueError(" Error Downloading objects from the workspace ") ### Get object Info and IDs sample_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] sample_type = sample_info[2].split('-')[0] # SampleSet if not (sample_type == 'KBaseRNASeq.RNASeqSampleSet' or sample_type == 'KBaseSets.ReadsSet'): raise TophatSampleSetException('RNASeqSampleSet or ReadsSet is required') (reads, r_label) = rnaseq_util.get_reads_conditions(logger, sample, sample_type) #reads = sample['data']['sample_ids'] #reads_type= sample['data']['Library_type'] # Note: do not need the following as we support ws reference #e_ws_objs = script_util.if_ws_obj_exists_notype(None,ws_client,params['ws_id'],reads) #missing_objs = [i for i in reads if not i in e_ws_objs] #if len(e_ws_objs) != len(reads): # raise ValueError('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id'])) ### Get obejct IDs #bowtie2_index_info,sampleset_info = ws_client.get_object_info_new({"objects": [{'name': params['bowtie_index'], 'workspace': params['ws_id']},{'name': params['sampleset_id'], 'workspace': params['ws_id']}]}) #self.bowtie2index_id = str(bowtie2_index_info[6]) + '/' + str(bowtie2_index_info[0]) + '/' + str(bowtie2_index_info[4]) #sampleset_id = str(sampleset_info[6]) + '/' + str(sampleset_info[0]) + '/' + str(sampleset_info[4]) self.bowtie2index_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['bowtie_index']) sampleset_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['sampleset_id']) bw_id = bowtie_index['data']['handle']['id'] bw_name = bowtie_index['data']['handle']['file_name'] genome_id = bowtie_index['data']['genome_id'] annotation_gtf = ws_client.get_object_info([{"ref" :genome_id}],includeMetadata=None)[0][1] shared_files={} shared_files[bw_name] = bw_id script_util.download_shock_files(logger,self.urls['shock_service_url'],tophat_dir,shared_files,token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger,os.path.join(tophat_dir,bw_name),tophat_dir) mv_dir= handler_util.get_dir(tophat_dir) if mv_dir is not None: script_util.move_files(logger,mv_dir,tophat_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error")
class TophatSample(Tophat): def __init__(self, logger, directory, urls, max_cores): super(TophatSample, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods self.bowtie2index_id = None self.num_threads = 1 def prepare(self): # for quick testing, we recover parameters here ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] tophat_dir = self.directory try: #sample ,bowtie_index = ws_client.get_objects( # [{'name' : params['sampleset_id'],'workspace' : params['ws_id']}, # { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}]) sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['sampleset_id'])[0] bowtie_index = script_util.ws_get_obj(logger, ws_client, params['ws_id'], params['bowtie_index'])[0] self.sample = sample except Exception, e: logger.exception("".join(traceback.format_exc())) raise ValueError(" Error Downloading objects from the workspace ") ### Get object Info and IDs #sample_info = ws_client.get_object_info_new({"objects": [{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})[0] #sample_type = sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, params['ws_id'], params['sampleset_id']) ### Get obejct IDs #bowtie2_index_info,sampleset_info = ws_client.get_object_info_new({"objects": [{'name': params['bowtie_index'], 'workspace': params['ws_id']},{'name': params['sampleset_id'], 'workspace': params['ws_id']}]}) #self.bowtie2index_id = str(bowtie2_index_info[6]) + '/' + str(bowtie2_index_info[0]) + '/' + str(bowtie2_index_info[4]) #sampleset_id = str(sampleset_info[6]) + '/' + str(sampleset_info[0]) + '/' + str(sampleset_info[4]) self.bowtie2index_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['bowtie_index']) sampleset_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['sampleset_id']) bw_id = bowtie_index['data']['handle']['id'] bw_name = bowtie_index['data']['handle']['file_name'] genome_id = bowtie_index['data']['genome_id'] annotation_gtf = ws_client.get_object_info([{ "ref": genome_id }], includeMetadata=None)[0][1] shared_files = {} shared_files[bw_name] = bw_id script_util.download_shock_files(logger, self.urls['shock_service_url'], tophat_dir, shared_files, token) try: logger.info("Unzipping Bowtie2 Indices") script_util.unzip_files(logger, os.path.join(tophat_dir, bw_name), tophat_dir) mv_dir = handler_util.get_dir(tophat_dir) if mv_dir is not None: script_util.move_files(logger, mv_dir, tophat_dir) except Exception, e: logger.error("".join(traceback.format_exc())) raise Exception("Unzip indexfile error")