def call_cuffmerge_and_cuffdiff(
    logger,
    ws_client,
    hs,
    ws_id,
    num_threads,
    list_file,
    gtf_file,
    bam_files,
    t_labels,
    genome_id,
    expressionset_id,
    alignmentset_id,
    sampleset_id,
    params,
    directory,
    token,
):
    ## Adding Advanced options for cuffmerge command
    cuffmerge_dir = os.path.join(directory, "cuffmerge")
    cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(str(num_threads), cuffmerge_dir, gtf_file, list_file)
    try:
        logger.info("Executing: cuffmerge {0}".format(cuffmerge_command))
        script_util.runProgram(logger, "cuffmerge", cuffmerge_command, None, directory)
        if os.path.exists(cuffmerge_dir + "/merged.gtf"):
            merged_gtf = os.path.join(cuffmerge_dir, "merged.gtf")
    except Exception, e:
        raise Exception("Error executing cuffmerge {0},{1}".format(cuffmerge_command, cuffmerge_dir))
def call_stringtieBall(directory,num_threads,m_gtf_file,alignment_file):
         #directory = os.path.join(directory,"cuffmerge")
         strdiff_command = " -p {0} -o {1} -e -B -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file)
         try:
                logger.info("Executing: stringtie {0}".format(strdiff_command))
                script_util.runProgram(logger,"stringtie",strdiff_command,None,directory)
         except Exception,e:
                raise Exception("Error executing StringTie differential expression {0},{1}".format(strdiff_command,directory))
def call_tablemaker(directory,num_threads,m_gtf_file,alignment_file):
         #cuffmerge_dir = os.path.join(directory,"cuffmerge")
         tm_command = " -p {0} -o {1} -q -W -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file)
         try:
                logger.info("Executing: tablemaker {0}".format(tm_command))
                script_util.runProgram(logger,"tablemaker",tm_command,None,directory)
         except Exception,e:
                raise Exception("Error executing tablemaker {0},{1}".format(tm_command,directory))
    def BuildBowtie2Index(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        # BEGIN BuildBowtie2Index
        user_token = ctx["token"]
        print "start"
        print user_token

        # svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        # hs = HandleService(url=self.__HS_URL, token=user_token)
        try:
            self.__LOGGER.info("Downloading KBaseGenome.ContigSet object from workspace")
            try:
                # 	assembly = ws_client.get_objects(
                #                    	[{'name' : params['reference'],
                #                      	'workspace' : params['ws_id']}])['data']
                # except Exception,e:
                #   raise KBaseRNASeqException("Error Downloading FASTA object from the workspace {0}".format(params['reference']))

                ## Check if the bowtie_dir is present; remove files in bowtie_dir if exists ; create a new dir if doesnt exists
                bowtie_dir = self.__BOWTIE_DIR
                if os.path.exists(bowtie_dir):
                    files = glob.glob("%s/*" % bowtie_dir)
                    for f in files:
                        os.remove(f)
                if not os.path.exists(bowtie_dir):
                    os.makedirs(bowtie_dir)

                ## dump fasta object to a file in bowtie_dir

                dumpfasta = "--workspace_service_url {0} --workspace_name {1} --working_directory {2} --output_file_name {3} --object_name {4} --shock_service_url {5} --token '{6}'".format(
                    self.__WS_URL,
                    params["ws_id"],
                    bowtie_dir,
                    params["reference"],
                    params["reference"],
                    self.__SHOCK_URL,
                    user_token,
                )
                print dumpfasta

                script_util.runProgram(
                    self.__LOGGER, self.__SCRIPT_TYPE["ContigSet_to_fasta"], dumpfasta, self.__SCRIPTS_DIR, os.getcwd()
                )
            except Exception, e:
                raise KBaseRNASeqException(
                    "Error Creating  FASTA object from the workspace {0},{1},{2}".format(
                        params["reference"], os.getcwd(), e
                    )
                )

                ## Run the bowtie_indexing on the  command line
            try:
                bowtie_index_cmd = "{0} {1}".format(params["reference"], params["reference"])
                script_util.runProgram(self.__LOGGER, "bowtie2-build", bowtie_index_cmd, None, bowtie_dir)
            except Exception, e:
                raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params["reference"], e))
def call_stringtieBall(working_dir,directory,num_threads,m_gtf_file,alignment_file):
         #directory = os.path.join(directory,"cuffmerge")
	 print "Inside stringtieBall"
         strdiff_command = " -p {0} -o {1} -e -B -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file)
         try:
                print "Executing: stringtie {0}".format(strdiff_command)
                script_util.runProgram(None,"stringtie",strdiff_command,None,working_dir)
         except Exception,e:
                raise Exception("Error executing StringTie differential expression {0},{1}".format(strdiff_command,working_dir))
def call_cuffmerge(directory,num_threads,gtf_file,list_file):
	 #cuffmerge_dir = os.path.join(directory,"cuffmerge")
         cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(str(num_threads),directory,gtf_file,list_file)
         merged_gtf = None
	 try:
                logger.info("Executing: cuffmerge {0}".format(cuffmerge_command))
                script_util.runProgram(logger,"cuffmerge",cuffmerge_command,None,directory)
                if os.path.exists(cuffmerge_dir+"/merged.gtf") : merged_gtf = os.path.join(directory,"merged.gtf")
         except Exception,e:
                raise Exception("Error executing cuffmerge {0},{1}".format(cuffmerge_command,directory))
def call_stringtiemerge(working_dir,directory,num_threads,gtf_file,list_file):
         #directory = os.path.join(directory,"cuffmerge")
         strmerge_command = " -p {0} -o {1} --merge -G {2} {3}".format(str(num_threads),directory,gtf_file,list_file)
         merged_gtf = None
         try:
                print  "Executing: stringtie {0}".format(strmerge_command)
                script_util.runProgram(None,"stringtie",strmerge_command,None,working_dir)
                if os.path.exists(directory+"/merged.gtf") : merged_gtf = os.path.join(directory,"merged.gtf")
         except Exception,e:
                raise Exception("Error executing StringTie merge {0},{1}".format(strmerge_command,working_dir))
def call_tablemaker(working_dir,directory,num_threads,m_gtf_file,alignment_file):
	 print "Inside Tablemaker"
	 print "Args passed : {0} , {1} , {2} , {3} , {4} ".format(working_dir,directory,num_threads,m_gtf_file,alignment_file)
         #cuffmerge_dir = os.path.join(directory,"cuffmerge")
         tm_command = " -p {0} -o {1} -q -W -G {2} {3}".format(str(num_threads),directory,m_gtf_file,alignment_file)
         try:
                print "Executing: tablemaker {0}".format(tm_command)
                script_util.runProgram(None,"tablemaker",tm_command,None,working_dir)
         except Exception,e:
		logger.exception(e)
                raise Exception("Error executing tablemaker {0},{1}".format(tm_command,working_dir))
Exemple #9
0
def call_stringtieBall(working_dir, directory, num_threads, m_gtf_file,
                       alignment_file):
    #directory = os.path.join(directory,"cuffmerge")
    print "Inside stringtieBall"
    strdiff_command = " -p {0} -o {1} -e -B -G {2} {3}".format(
        str(num_threads), directory, m_gtf_file, alignment_file)
    try:
        print "Executing: stringtie {0}".format(strdiff_command)
        script_util.runProgram(None, "stringtie", strdiff_command, None,
                               working_dir)
    except Exception, e:
        raise Exception(
            "Error executing StringTie differential expression {0},{1}".format(
                strdiff_command, working_dir))
Exemple #10
0
def call_stringtiemerge(working_dir, directory, num_threads, gtf_file,
                        list_file):
    #directory = os.path.join(directory,"cuffmerge")
    strmerge_command = " -p {0} -o {1} --merge -G {2} {3}".format(
        str(num_threads), directory, gtf_file, list_file)
    merged_gtf = None
    try:
        print "Executing: stringtie {0}".format(strmerge_command)
        script_util.runProgram(None, "stringtie", strmerge_command, None,
                               working_dir)
        if os.path.exists(directory + "/merged.gtf"):
            merged_gtf = os.path.join(directory, "merged.gtf")
    except Exception, e:
        raise Exception("Error executing StringTie merge {0},{1}".format(
            strmerge_command, working_dir))
Exemple #11
0
def call_tablemaker(working_dir, directory, num_threads, m_gtf_file,
                    alignment_file):
    print "Inside Tablemaker"
    print "Args passed : {0} , {1} , {2} , {3} , {4} ".format(
        working_dir, directory, num_threads, m_gtf_file, alignment_file)
    #cuffmerge_dir = os.path.join(directory,"cuffmerge")
    tm_command = " -p {0} -o {1} -q -W -G {2} {3}".format(
        str(num_threads), directory, m_gtf_file, alignment_file)
    try:
        print "Executing: tablemaker {0}".format(tm_command)
        script_util.runProgram(None, "tablemaker", tm_command, None,
                               working_dir)
    except Exception, e:
        logger.exception(e)
        raise Exception("Error executing tablemaker {0},{1}".format(
            tm_command, working_dir))
Exemple #12
0
def call_cuffmerge_and_cuffdiff(logger, ws_client, hs, ws_id, num_threads,
                                list_file, gtf_file, bam_files, t_labels,
                                genome_id, expressionset_id, alignmentset_id,
                                sampleset_id, params, directory, token):
    ## Adding Advanced options for cuffmerge command
    cuffmerge_dir = os.path.join(directory, "cuffmerge")
    cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(
        str(num_threads), cuffmerge_dir, gtf_file, list_file)
    try:
        logger.info("Executing: cuffmerge {0}".format(cuffmerge_command))
        script_util.runProgram(logger, "cuffmerge", cuffmerge_command, None,
                               directory)
        if os.path.exists(cuffmerge_dir + "/merged.gtf"):
            merged_gtf = os.path.join(cuffmerge_dir, "merged.gtf")
    except Exception, e:
        raise Exception("Error executing cuffmerge {0},{1}".format(
            cuffmerge_command, cuffmerge_dir))
Exemple #13
0
    def runEach(self, task_list):
        ### Call Cuffmerge function
        cuffmerge_dir = os.path.join(self.directory, "cuffmerge")
        merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir,
                                                self.num_threads,
                                                self.details['gtf_file'],
                                                self.details['gtf_list_file'])
        ### Run Cuffdiff
        output_dir = os.path.join(self.directory,
                                  self.method_params['output_obj_name'])
        cuffdiff_command = (' -p ' + str(self.num_threads))

        ### Setting Advanced parameters for Cuffdiff

        if ('time_series' in self.method_params
                and self.method_params['time_series'] != 0):
            cuffdiff_command += (' -T ')
        if ('min_alignment_count' in self.method_params
                and self.method_params['min_alignment_count'] is not None):
            cuffdiff_command += (
                ' -c ' + str(self.method_params['min_alignment_count']))
        if ('multi_read_correct' in self.method_params
                and self.method_params['multi_read_correct'] != 0):
            cuffdiff_command += (' --multi-read-correct ')
        if ('library_type' in self.method_params
                and self.method_params['library_type'] is not None):
            cuffdiff_command += (' --library-type ' +
                                 self.method_params['library_type'])
        if ('library_norm_method' in self.method_params
                and self.method_params['library_norm_method'] is not None):
            cuffdiff_command += (' --library-norm-method ' +
                                 self.method_params['library_norm_method'])
        try:
            cuffdiff_command += " -o {0} -L {1} -u {2} {3}".format(
                output_dir, self.t_labels, merged_gtf, self.bam_files)
            self.logger.info(
                "Executing: cuffdiff {0}".format(cuffdiff_command))
            ret = script_util.runProgram(None, "cuffdiff", cuffdiff_command,
                                         None, self.directory)
            result = ret["result"]
            for line in result.splitlines(False):
                self.logger.info(line)
                stderr = ret["stderr"]
                prev_value = ''
                for line in stderr.splitlines(False):
                    if line.startswith('> Processing Locus'):
                        words = line.split()
                        cur_value = words[len(words) - 1]
                        if prev_value != cur_value:
                            prev_value = cur_value
                            self.logger.info(line)
                        else:
                            prev_value = ''
                            self.logger.info(line)
        except Exception, e:
            raise Exception(e)
            raise Exception("Error executing cuffdiff {0},{1}".format(
                cuffdiff_command, e))
def call_cuffmerge(working_dir,directory,num_threads,gtf_file,list_file):
	 #cuffmerge_dir = os.path.join(directory,"cuffmerge")
	 print "Entering cuffmerge"
	 print "Args passed {0},{1},{2},{3}".format(directory,num_threads,gtf_file,list_file)
         cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(str(num_threads),directory,gtf_file,list_file)
         merged_gtf = None
	 try:
                #logger.info("Executing: cuffmerge {0}".format(cuffmerge_command))
                print  "Executing: cuffmerge {0}".format(cuffmerge_command)
                r,e = script_util.runProgram(None,"cuffmerge",cuffmerge_command,None,working_dir)
                print r + "\n" + e
		if os.path.exists(directory+"/merged.gtf") : merged_gtf = os.path.join(directory,"merged.gtf")
         except Exception,e:
                print "".join(traceback.format_exc())
                raise Exception("Error executing cuffmerge {0},{1}".format(cuffmerge_command,"".join(traceback.format_exc())))
    def runEach(self,task_list):
	 ### Call Cuffmerge function
	 used_tool = self.details['used_tool']
	 if used_tool == 'StringTie':
           merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,num_threads,self.details['gtf_file'],assembly_file)
           run_tool =  "StringTie"
           tool_version = "1.2.3"
         elif used_tool == 'Cufflinks':
           merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir,merge_dir,num_threads,gtf_file,assembly_file)
           run_tool = "Tablemaker" 
	   tool_version = '2.0.9'
	 cuffmerge_dir = os.path.join(self.directory,"cuffmerge")
	 merged_gtf = rnaseq_util.call_cuffmerge(self.directory,cuffmerge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file'])
	 ### Run DiffExpforBallgown
	 output_dir = os.path.join(self.directory,self.method_params['output_obj_name'])
	 diffexp_command = (' -p '+str(self.num_threads))

	 ### Setting Advanced parameters for DiffExpforBallgown

         if('time_series' in self.method_params and self.method_params['time_series'] != 0) : diffexp_command += (' -T ')
         if('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None ) : diffexp_command += (' -c '+str(self.method_params['min_alignment_count']))
         if('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0 ): diffexp_command += (' --multi-read-correct ')
         if('library_type' in self.method_params and self.method_params['library_type'] is not None ) : diffexp_command += ( ' --library-type '+self.method_params['library_type'])
         if('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None ) : diffexp_command += ( ' --library-norm-method '+self.method_params['library_norm_method'])
         try:
                diffexp_command += " -o {0} -L {1} -u {2} {3}".format(output_dir,self.t_labels,merged_gtf,self.bam_files)
                self.logger.info("Executing: diffexp {0}".format(diffexp_command))
                ret = script_util.runProgram(None,"diffexp",diffexp_command,None,self.directory)
                result = ret["result"]
		#error =  ret['stderr']
		#print result
		for line in result.splitlines(False):
                       self.logger.info(line)
                       stderr = ret["stderr"]
                       prev_value = ''
                       for line in stderr.splitlines(False):
                           if line.startswith('> Processing Locus'):
                                   words = line.split()
                                   cur_value = words[len(words) - 1]
                                   if prev_value != cur_value:
                                      prev_value = cur_value
                                      self.logger.info(line)
                                   else:
                                      prev_value = ''
                                      self.logger.info(line)
         except Exception,e:
		raise Exception(e)
                raise Exception("Error executing diffexp {0},{1}".format(diffexp_command,e))
    def BuildBowtie2Index(self, ctx, params):
        """
        :param params: instance of type "Bowtie2IndexParams" -> structure:
           parameter "ws_id" of String, parameter "reference" of String,
           parameter "output_obj_name" of String
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN BuildBowtie2Index
	user_token=ctx['token']
        ws_client=Workspace(url=self.__WS_URL, token=user_token)
	hs = HandleService(url=self.__HS_URL, token=user_token)
	try:
	    	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
                bowtie_dir = os.path.join(self.__SCRATCH ,'tmp') 
	        handler_util.setupWorkingDir(self.__LOGGER,bowtie_dir)
		## Update the provenance
	     	provenance = [{}]
        	if 'provenance' in ctx:
            		provenance = ctx['provenance']
        	# add additional info to provenance here, in this case the input data object reference
        	provenance[0]['input_ws_objects']=[params['ws_id']+'/'+params['reference']]
		
		try:
			ref_id, outfile_ref_name = rnaseq_util.get_fa_from_genome(self.__LOGGER,ws_client,self.__SERVICES,params['ws_id'],bowtie_dir,params['reference'])
                except Exception, e:
			self.__LOGGER.exception("".join(traceback.format_exc()))
                        raise ValueError('Unable to get FASTA for object {}'.format("".join(traceback.format_exc())))
	        ## Run the bowtie_indexing on the  command line
		try:
	    		if outfile_ref_name:
				bowtie_index_cmd = "{0} {1}".format(outfile_ref_name,params['reference'])
			else:
				bowtie_index_cmd = "{0} {1}".format(params['reference'],params['reference']) 
	    	        self.__LOGGER.info("Executing: bowtie2-build {0}".format(bowtie_index_cmd))  	
			cmdline_output = script_util.runProgram(self.__LOGGER,"bowtie2-build",bowtie_index_cmd,None,bowtie_dir)
			if 'result' in cmdline_output:
				report = cmdline_output['result']
		except Exception,e:
			raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params['reference'],e))
Exemple #17
0
def call_cuffmerge(working_dir, directory, num_threads, gtf_file, list_file):
    #cuffmerge_dir = os.path.join(directory,"cuffmerge")
    print "Entering cuffmerge"
    print "Args passed {0},{1},{2},{3}".format(directory, num_threads,
                                               gtf_file, list_file)
    cuffmerge_command = " -p {0} -o {1} -g {2} {3}".format(
        str(num_threads), directory, gtf_file, list_file)
    merged_gtf = None
    try:
        #logger.info("Executing: cuffmerge {0}".format(cuffmerge_command))
        print "Executing: cuffmerge {0}".format(cuffmerge_command)
        r, e = script_util.runProgram(None, "cuffmerge", cuffmerge_command,
                                      None, working_dir)
        print r + "\n" + e
        if os.path.exists(directory + "/merged.gtf"):
            merged_gtf = os.path.join(directory, "merged.gtf")
    except Exception, e:
        print "".join(traceback.format_exc())
        raise Exception("Error executing cuffmerge {0},{1}".format(
            cuffmerge_command, "".join(traceback.format_exc())))
Exemple #18
0
    def runEach(self,task_list):
	 ### Call Cuffmerge function
	 cuffmerge_dir = os.path.join(self.directory,"cuffmerge")
	 merged_gtf = rnaseq_util.call_cuffmerge(self.directory,cuffmerge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file'])
	 ### Run Cuffdiff
	 output_dir = os.path.join(self.directory,self.method_params['output_obj_name'])
	 cuffdiff_command = (' -p '+str(self.num_threads))

	 ### Setting Advanced parameters for Cuffdiff

         if('time_series' in self.method_params and self.method_params['time_series'] != 0) : cuffdiff_command += (' -T ')
         if('min_alignment_count' in self.method_params and self.method_params['min_alignment_count'] is not None ) : cuffdiff_command += (' -c '+str(self.method_params['min_alignment_count']))
         if('multi_read_correct' in self.method_params and self.method_params['multi_read_correct'] != 0 ): cuffdiff_command += (' --multi-read-correct ')
         if('library_type' in self.method_params and self.method_params['library_type'] is not None ) : cuffdiff_command += ( ' --library-type '+self.method_params['library_type'])
         if('library_norm_method' in self.method_params and self.method_params['library_norm_method'] is not None ) : cuffdiff_command += ( ' --library-norm-method '+self.method_params['library_norm_method'])
         try:
                cuffdiff_command += " -o {0} -L {1} -u {2} {3}".format(output_dir,self.t_labels,merged_gtf,self.bam_files)
                self.logger.info("Executing: cuffdiff {0}".format(cuffdiff_command))
                ret = script_util.runProgram(None,"cuffdiff",cuffdiff_command,None,self.directory)
                result = ret["result"]
		for line in result.splitlines(False):
                       self.logger.info(line)
                       stderr = ret["stderr"]
                       prev_value = ''
                       for line in stderr.splitlines(False):
                           if line.startswith('> Processing Locus'):
                                   words = line.split()
                                   cur_value = words[len(words) - 1]
                                   if prev_value != cur_value:
                                      prev_value = cur_value
                                      self.logger.info(line)
                                   else:
                                      prev_value = ''
                                      self.logger.info(line)
         except Exception,e:
		raise Exception(e)
                raise Exception("Error executing cuffdiff {0},{1}".format(cuffdiff_command,e))
Exemple #19
0
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
                        except Exception,e:
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                        cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err))
	 	try:
                	bam_file = output_dir+"/accepted_hits.bam"
                	align_stats_cmd="flagstat {0}".format(bam_file)
                	stats = script_util.runProgram(logger,"samtools",align_stats_cmd,None,directory)
			#print stats
			stats_data = {}
                	# Pass it to the stats['result']
                	#stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats"
                	stats_data =script_util.extractAlignmentStatsInfo(logger,"samtools",ws_client,ws_id,None,stats['result'],None)
            	except Exception , e :
                	raise Exception("Failed to create RNASeqAlignmentStats: {0}".format(bam_file))
                # Zip tophat folder
                out_file_path = os.path.join(directory,"%s.zip" % output_name)
            	try:
                        logger.info("Zipping the output files".format(out_file_path))
                	script_util.zip_files(logger, output_dir,out_file_path)
            	except Exception, e:
                	raise Exception("Failed to compress the index: {0}".format(out_file_path))
                ## Upload the file using handle service
class HiSat2SampleSet(HiSat2):
    def __init__(self, logger, directory, urls, max_cores):
        super(HiSat2SampleSet, self).__init__(logger, directory, urls,
                                              max_cores)

        # user defined shared variables across methods
        self.sample = None
        self.sampleset_info = None
        #self.num_threads = None

    def prepare(self):
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        hisat2_dir = self.directory

        try:
            #sample,annotation_name = ws_client.get_objects(
            #                         [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']},
            #                         { 'name' : params['genome_id'], 'workspace' : params['ws_id']}])
            sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],
                                            params['sampleset_id'])[0]
            annotation_name = script_util.ws_get_obj(logger, ws_client,
                                                     params['ws_id'],
                                                     params['genome_id'])[0]
            self.sample = sample
        except Exception, e:
            logger.exception("".join(traceback.format_exc()))
            raise ValueError(" Error Downloading objects from the workspace ")
        ### Get object Info and IDs
        #sampleset_info,annotation_info = ws_client.get_object_info_new({"objects": [
        #                                   {'name': params['sampleset_id'], 'workspace': params['ws_id']},
        #                                   {'name': params['genome_id'], 'workspace': params['ws_id']}
        #                                   ]})
        sampleset_info = script_util.ws_get_obj_info(logger, ws_client,
                                                     params['ws_id'],
                                                     params['sampleset_id'])[0]
        self.sampleset_info = sampleset_info
        ### Get the workspace object ids for the objects ###
        sampleset_id = str(sampleset_info[6]) + '/' + str(
            sampleset_info[0]) + '/' + str(sampleset_info[4])
        #annotation_id = str(annotation_info[6]) + '/' + str(annotation_info[0]) + '/' + str(annotation_info[4])
        annotation_id = script_util.ws_get_ref(logger, ws_client,
                                               params['ws_id'],
                                               params['genome_id'])
        sample_type = sampleset_info[2].split('-')[0]

        ### Check if the Library objects exist in the same workspace
        if not (sample_type == 'KBaseRNASeq.RNASeqSampleSet'
                or sample_type == 'KBaseSets.ReadsSet'):
            raise HiSat2SampleSetException(
                'RNASeqSampleSet or ReadsSet is required')
        #logger.info("Check if the Library objects do exist in the current workspace")
        #reads = sample['data']['sample_ids']
        #r_label = sample['data']['condition']
        (reads,
         r_label) = rnaseq_util.get_reads_conditions(logger, sample,
                                                     sample_type)
        #e_ws_objs = script_util.if_ws_obj_exists_notype(None,ws_client,params['ws_id'],reads)
        #missing_objs = [i for i in reads if not i in e_ws_objs]
        #if len(e_ws_objs) != len(reads):
        #    raise HiSat2SampleSetException('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id']))

        self.num_jobs = len(reads)
        ref_id, fasta_file = rnaseq_util.get_fa_from_genome(
            logger, ws_client, self.urls, params['ws_id'], hisat2_dir,
            params['genome_id'])
        hisat2base = os.path.basename(fasta_file)
        #hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fa"))
        hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base)
        try:
            logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
            cmdline_output = script_util.runProgram(logger, "hisat2-build",
                                                    hisat2base_cmd, None,
                                                    hisat2_dir)
        except Exception, e:
            raise Exception("Failed to run command {0}".format(hisat2base_cmd))
Exemple #21
0
                       lib_type = 'PairedEnd'
               	if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                       read1_id = r_sample['data']['handle_1']['id']
                       read1_name = r_sample['data']['handle_1']['file_name']
                       read2_id = r_sample['data']['handle_2']['id']
                       read2_name = r_sample['data']['handle_2']['file_name']
                       try:
                               script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
                               script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                               hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file)
                       except Exception,e:
                               #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                               raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
               try:
                       logger.info("Executing: hisat2 {0}".format(hisat2_cmd))
                       cmdline_output = script_util.runProgram(logger,"hisat2",hisat2_cmd,None,directory)
 		except Exception,e:
                       raise Exception("Failed to run command {0}".format(hisat2_cmd))
                       #logger.exception("Failed to run command {0}".format(hisat2_cmd))
               try:
                       stats_data = {}
                       stats_data = script_util.extractAlignmentStatsInfo(logger,"bowtie2",ws_client,ws_id,None,cmdline_output['stderr'],None)
                       bam_file = os.path.join(output_dir,"accepted_hits_unsorted.bam")
                       logger.info("Executing: sam_to_bam  {0}".format(bam_file))
                       sam_to_bam = "view -bS -o {0} {1}".format(bam_file,out_file)
                       script_util.runProgram(logger,"samtools",sam_to_bam,None,directory)
                       final_bam_prefix = os.path.join(output_dir,"accepted_hits")
                       logger.info("Executing: Sorting bam file  {0}".format(bam_file))
                       sort_bam_cmd  = "sort {0} {1}".format(bam_file,final_bam_prefix)
                       script_util.runProgram(logger,"samtools",sort_bam_cmd,None,directory)
               except Exception,e:
Exemple #22
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
	gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
		#r_sample = ws_client.get_objects(
                #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0]
		#r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		#sample_type = r_sample_info[2].split('-')[0]
                sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample)
                sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample)
		output_name = sample_name.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
                ### Adding advanced options to Bowtie2Call
		tophat_cmd = (' -p '+str(self.num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
                #out_file = output_dir +"/accepted_hits.sam"
                try:
                        sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample)
                        ds = script_util.ru_reads_download(self.logger, sample_ref,directory, token)
                except Exception,e:
                        self.logger.exception(e)
                        raise Exception( "Unable to download reads file , {0}".format(read_sample))
                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                	tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'])
                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        else:
                            # TODO: the following can be read from PEL object
                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'],ds['rev'])

#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                        cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err))
	try:
                logger.info("Unzipping Bowtie2 Indices")
                script_util.unzip_files(logger,os.path.join(bowtie2_dir,bw_index_files),bowtie2_dir)
                mv_dir= handler_util.get_dir(bowtie2_dir)
                if mv_dir is not None:
                        script_util.move_files(logger,mv_dir,bowtie2_dir)
        except Exception, e:
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip indexfile error: Please contact [email protected]")
	### Build Index for the fasta file 
        fasta_file =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa")+".fa")
        bowtie2base =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa"))
        bowtie2base_cmd = '{0} {1}'.format(fasta_file,bowtie2base)
	try:
            logger.info("Building Index for Hisat2 {0}".format(bowtie2base_cmd))
            cmdline_output = script_util.runProgram(logger,"bowtie2-build",bowtie2base_cmd,None,bowtie2_dir)
        except Exception,e:
            raise Exception("Failed to run command {0}".format(bowtie2base_cmd))
        ### Check if GTF object exists in the workspace pull the gtf
        ref_id = bowtie_index['data']['genome_id']
        genome_name = ws_client.get_object_info_new({"objects": [{'ref' : ref_id }] })[0][1]
	ws_gtf = genome_name+"_GTF"
	gtf_file = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],ws_gtf,"KBaseRNASeq.GFFAnnotation",bowtie2_dir,token)
        if gtf_file is None:
             rnaseq_util.create_gtf_annotation_from_genome(logger,ws_client,hs,self.urls,params['ws_id'],ref_id,genome_name,bowtie2_dir,token)
	# Determine the num_threads provided by the user otherwise default the number of threads to 2
        self.num_jobs = 1
        logger.info(" Number of threads used by each process {0}".format(self.num_threads))
	task_param = {'job_id' : params['sampleset_id'],
                      'label' : r_label,
                      'ws_id' : params['ws_id'],
             logger,
             shock_service_url=services["shock_service_url"],
             shock_id=read2_id,
             filename=read2_name,
             directory=directory,
             token=token,
         )
         bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
             os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file
         )
     except Exception, e:
         # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
         raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
 try:
     logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd))
     cmdline_output = script_util.runProgram(logger, "bowtie2", bowtie2_cmd, None, directory)
     # print cmdline_output
 except Exception, e:
     # logger.exception("Failed to upload the index")
     raise Exception("Failed to upload the index")
 try:
     # stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats"
     stats_data = {}
     stats_data = script_util.extractAlignmentStatsInfo(
         logger, "bowtie2", ws_client, ws_id, None, cmdline_output["stderr"], None
     )
     bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam")
     logger.info("Executing: sam_to_bam  {0}".format(bam_file))
     sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file)
     script_util.runProgram(logger, "samtools", sam_to_bam, None, directory)
     final_bam_prefix = os.path.join(output_dir, "accepted_hits")
Exemple #25
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['bowtie2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                #r_sample = ws_client.get_objects(
                #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0]
                #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                #sample_type = r_sample_info[2].split('-')[0]
                sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample)
                sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample)
                input_direc = os.path.join(directory,sample_name.split('.')[0]+"_bowtie2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = sample_name.split('.')[0]+"_bowtie2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                base = handler_util.get_file_with_suffix(directory,".rev.1.bt2")
                bowtie2_base =os.path.join(directory,base)
	
                ### Adding advanced options to Bowtie2Call
                bowtie2_cmd = ''
                bowtie2_cmd += ( ' -p {0}'.format(self.num_threads))
		if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
                if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                        if (params['alignment_type'] == 'local'):
                                 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                        else: bowtie2_cmd += (' --'+params['preset_options'] )
                if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))

                out_file = output_dir +"/accepted_hits.sam"
                ####
                try:
                        sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample)
                        ds = script_util.ru_reads_download(self.logger, sample_ref,input_direc, token)
                except Exception,e:
                        self.logger.exception(e)
                        raise Exception( "Unable to download reads file , {0}".format(read_sample))
                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        bowtie2_cmd += " -U {0} -x {1} -S {2}".format(ds['fwd'],bowtie2_base,out_file)
                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
                            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        else:
                            # TODO: the following can be read from PEL object
                            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],hisat2_base,out_file)
                        bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],bowtie2_base,out_file)
                ###
#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd))
                        cmdline_output = script_util.runProgram(self.logger,"bowtie2",bowtie2_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}".format(bowtie2_cmd))
Exemple #26
0
             filename=read2_name,
             directory=input_direc,
             token=token)
         hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
             os.path.join(input_direc, read1_name),
             os.path.join(output_dir, read2_name), hisat2_base,
             out_file)
     except Exception, e:
         #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
         raise Exception(
             "Unable to download shock file , {0} or {1}".format(
                 read1_name, read2_name))
 try:
     logger.info("Executing: hisat2 {0}".format(hisat2_cmd))
     cmdline_output = script_util.runProgram(logger, "hisat2",
                                             hisat2_cmd, None,
                                             directory)
 except Exception, e:
     raise Exception("Failed to run command {0}".format(hisat2_cmd))
     #logger.exception("Failed to run command {0}".format(hisat2_cmd))
 try:
     stats_data = {}
     stats_data = script_util.extractAlignmentStatsInfo(
         logger, "bowtie2", ws_client, ws_id, None,
         cmdline_output['stderr'], None)
     bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam")
     logger.info("Executing: sam_to_bam  {0}".format(bam_file))
     sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file)
     script_util.runProgram(logger, "samtools", sam_to_bam, None,
                            directory)
     final_bam_prefix = os.path.join(output_dir, "accepted_hits")
Exemple #27
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['hisat2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            r_sample = script_util.ws_get_obj(self.logger, ws_client, ws_id,
                                              read_sample)[0]
            sample_type = script_util.ws_get_type_name(self.logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            input_direc = os.path.join(
                directory,
                sample_name.split('.')[0] + "_hisat2_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = sample_name.split('.')[0] + "_hisat2_alignment"
            output_dir = os.path.join(directory, output_name)
            if not os.path.exists(output_dir): os.mkdir(output_dir)
            print directory
            base = handler_util.get_file_with_suffix(directory, ".1.ht2")
            print base
            hisat2_base = os.path.join(directory, base)
            ### Adding advanced options to Bowtie2Call
            hisat2_cmd = ''
            hisat2_cmd += (' -p {0}'.format(self.num_threads))
            if ('quality_score' in params
                    and params['quality_score'] is not None):
                hisat2_cmd += (' --' + params['quality_score'])
            if ('alignment_type' in params
                    and params['alignment_type'] is not None):
                hisat2_cmd += (' --' + params['alignment_type'])
            if ('trim5' in params and params['trim5'] is not None):
                hisat2_cmd += (' --trim5 ' + str(params['trim5']))
            if ('trim3' in params and params['trim3'] is not None):
                hisat2_cmd += (' --trim3 ' + str(params['trim3']))
            if ('np' in params and params['np'] is not None):
                hisat2_cmd += (' --np ' + str(params['np']))
            if ('minins' in params and params['minins'] is not None):
                hisat2_cmd += (' --minins ' + str(params['minins']))
            if ('maxins' in params and params['maxins'] is not None):
                hisat2_cmd += (' --maxins ' + str(params['maxins']))
            #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            if ('min_intron_length' in params
                    and params['min_intron_length'] is not None):
                hisat2_cmd += (' --min-intronlen ' +
                               str(params['min_intron_length']))
            if ('max_intron_length' in params
                    and params['max_intron_length'] is not None):
                hisat2_cmd += (' --max-intronlen ' +
                               str(params['max_intron_length']))
            if ('no_spliced_alignment' in params
                    and params['no_spliced_alignment'] != 0):
                hisat2_cmd += (' --no-spliced-alignment')
            if ('transcriptome_mapping_only' in params
                    and params['transcriptome_mapping_only'] != 0):
                hisat2_cmd += (' --transcriptome-mapping-only')
            if ('tailor_alignments' in params
                    and params['tailor_alignments'] is not None):
                hisat2_cmd += (' --' + params['tailor_alignments'])
            out_file = output_dir + "/accepted_hits.sam"
            ####
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   input_direc, token)
                self.logger.info(ds)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                hisat2_cmd += " -U {0} -x {1} -S {2}".format(
                    ds['fwd'], hisat2_base, out_file)
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    ds['fwd'], ds['rev'], hisat2_base, out_file)
            #if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
            #        lib_type = 'SingleEnd'
            #        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            #            read_id = r_sample['data']['handle']['id']
            #            read_name =  r_sample['data']['handle']['file_name']
            #        else:
            #            read_id = r_sample['data']['lib']['file']['id']
            #            read_name =  r_sample['data']['lib']['file']['file_name']
            #        try:
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
            #                hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file)
            #        except Exception,e:
            #                self.logger.exception(e)
            #                raise Exception( "Unable to download shock file , {0}".format(read_name))
            #if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
            #        lib_type = 'PairedEnd'
            #        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            #            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            #            read1_id = r_sample['data']['handle_1']['id']
            #            read1_name = r_sample['data']['handle_1']['file_name']
            #            read2_id = r_sample['data']['handle_2']['id']
            #            read2_name = r_sample['data']['handle_2']['file_name']
            #        else:
            #            # TODO: the following can be read from PEL object
            #            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            #            read1_id = r_sample['data']['lib1']['file']['id']
            #            read1_name = r_sample['data']['lib1']['file']['file_name']
            #            read2_id = r_sample['data']['lib2']['file']['id']
            #            read2_name = r_sample['data']['lib2']['file']['file_name']
            #        try:
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
            #                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),hisat2_base,out_file)
            #        except Exception,e:
            #                logger.exception(e)
            #                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd))
                cmdline_output = script_util.runProgram(
                    self.logger, "hisat2", hisat2_cmd, None, directory)
            except Exception, e:
                logger.exception(e)
                raise Exception("Failed to run command {0}".format(hisat2_cmd))
Exemple #28
0
class Bowtie2(ExecutionBase):
    def __init__(self, logger, directory, urls, max_cores):
        pprint(self.__class__)
        super(Bowtie2, self).__init__(logger, directory, urls, max_cores)

        # user defined shared variables across methods
        #self.sample = None
        #self.sampleset_info = None
        self.num_threads = None
        self.tool_used = "Bowtie2"
        self.tool_version = "2.2.6"

    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['bowtie2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            r_sample = script_util.ws_get_obj(logger, ws_client, ws_id,
                                              read_sample)[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            sample_type = script_util.ws_get_type_name(logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            input_direc = os.path.join(
                directory,
                sample_name.split('.')[0] + "_bowtie2_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = sample_name.split('.')[0] + "_bowtie2_alignment"
            output_dir = os.path.join(directory, output_name)
            if not os.path.exists(output_dir): os.mkdir(output_dir)
            base = handler_util.get_file_with_suffix(directory, ".rev.1.bt2")
            bowtie2_base = os.path.join(directory, base)

            ### Adding advanced options to Bowtie2Call
            bowtie2_cmd = ''
            bowtie2_cmd += (' -p {0}'.format(self.num_threads))
            if ('quality_score' in params
                    and params['quality_score'] is not None):
                bowtie2_cmd += (' --' + params['quality_score'])
            if ('alignment_type' in params
                    and params['alignment_type'] is not None):
                bowtie2_cmd += (' --' + params['alignment_type'])
            if ('preset_options' in params and params['preset_options']
                    is not None) and ('alignment_type' in params and
                                      params['alignment_type'] is not None):
                if (params['alignment_type'] == 'local'):
                    bowtie2_cmd += (' --' + params['preset_options'] +
                                    '-local')
                else:
                    bowtie2_cmd += (' --' + params['preset_options'])
            if ('trim5' in params and params['trim5'] is not None):
                bowtie2_cmd += (' --trim5 ' + str(params['trim5']))
            if ('trim3' in params and params['trim3'] is not None):
                bowtie2_cmd += (' --trim3 ' + str(params['trim3']))
            if ('np' in params and params['np'] is not None):
                bowtie2_cmd += (' --np ' + str(params['np']))
            if ('minins' in params and params['minins'] is not None):
                bowtie2_cmd += (' --minins ' + str(params['minins']))
            if ('maxins' in params and params['maxins'] is not None):
                bowtie2_cmd += (' --maxins ' + str(params['maxins']))

            out_file = output_dir + "/accepted_hits.sam"
            ####
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   input_direc, token)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(
                    ds['fwd'], bowtie2_base, out_file)
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    ds['fwd'], ds['rev'], hisat2_base, out_file)
                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    ds['fwd'], ds['rev'], bowtie2_base, out_file)
            ###
#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd))
                cmdline_output = script_util.runProgram(
                    self.logger, "bowtie2", bowtie2_cmd, None, directory)
            except Exception, e:
                raise Exception(
                    "Failed to run command {0}".format(bowtie2_cmd))
            try:
                stats_data = {}
                stats_data = script_util.extractAlignmentStatsInfo(
                    self.logger, "bowtie2", ws_client, ws_id, None,
                    cmdline_output['stderr'], None)
                bam_file = os.path.join(output_dir,
                                        "accepted_hits_unsorted.bam")
                logger.info("Executing: sam_to_bam  {0}".format(bam_file))
                sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file)
                script_util.runProgram(self.logger, "samtools", sam_to_bam,
                                       None, directory)
                final_bam_prefix = os.path.join(output_dir, "accepted_hits")
                logger.info(
                    "Executing: Sorting bam file  {0}".format(bam_file))
                sort_bam_cmd = "sort {0} {1}".format(bam_file,
                                                     final_bam_prefix)
                script_util.runProgram(self.logger, "samtools", sort_bam_cmd,
                                       None, directory)
            except Exception, e:
                raise Exception(
                    "Error Running the bowtie2 command {0},{1} {2}".format(
                        bowtie2_cmd, directory,
                        " ".join(traceback.print_exc())))
Exemple #29
0
class Cufflinks(ExecutionBase):
    def __init__(self, logger, directory, urls, max_cores):
        pprint(self.__class__)
        super(Cufflinks, self).__init__(logger, directory, urls, max_cores)

        # user defined shared variables across methods
        #self.sample = None
        #self.sampleset_info = None
        self.num_threads = None
        self.tool_used = "Cufflinks"
        self.tool_version = "1.2.3"

    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['cufflinks_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(
            s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(
            s_alignment))
        alignment_name = ws_client.get_object_info([{
            "ref": s_alignment
        }],
                                                   includeMetadata=None)[0][1]
        if not logger:
            logger = handler_util.create_logger(
                directory, "run_cufflinks_" + alignment_name)
        try:
            alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
            input_direc = os.path.join(
                directory,
                alignment_name.split('_alignment')[0] + "_cufflinks_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = alignment_name.split(
                '_alignment')[0] + "_cufflinks_expression"
            output_dir = os.path.join(directory, output_name)
            #Download Alignment from shock
            a_file_id = alignment['data']['file']['id']
            a_filename = alignment['data']['file']['file_name']
            condition = alignment['data']['condition']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=self.urls['shock_service_url'],
                    shock_id=a_file_id,
                    filename=a_filename,
                    directory=input_direc,
                    token=token)
            except Exception, e:
                raise Exception(
                    "Unable to download shock file, {0},{1}".format(
                        a_filename, "".join(traceback.format_exc())))
            try:
                input_dir = os.path.join(input_direc, alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,
                                        os.path.join(input_direc, a_filename),
                                        input_dir)
            except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")

            input_file = os.path.join(input_dir, "accepted_hits.bam")
            ### Adding advanced options to tophat command
            tool_opts = {
                k: str(v)
                for k, v in params.iteritems()
                if not k in ('ws_id', 'alignmentset_id',
                             'num_threads') and v is not None
            }
            cufflinks_command = (' -p ' + str(self.num_threads))
            if 'max_intron_length' in params and params[
                    'max_intron_length'] is not None:
                cufflinks_command += (' --max-intron-length ' +
                                      str(params['max_intron_length']))
            if 'min_intron_length' in params and params[
                    'min_intron_length'] is not None:
                cufflinks_command += (' --min-intron-length ' +
                                      str(params['min_intron_length']))
            if 'overhang_tolerance' in params and params[
                    'overhang_tolerance'] is not None:
                cufflinks_command += (' --overhang-tolerance ' +
                                      str(params['overhang_tolerance']))

            cufflinks_command += " -o {0} -G {1} {2}".format(
                output_dir, gtf_file, input_file)
            #cufflinks_command += " -o {0} -A {1} -G {2} {3}".format(t_file_name,g_output_file,gtf_file,input_file)
            logger.info("Executing: cufflinks {0}".format(cufflinks_command))
            print "Executing: cufflinks {0}".format(cufflinks_command)
            ret = script_util.runProgram(None, "cufflinks", cufflinks_command,
                                         None, directory)
            result = ret["result"]
            for line in result.splitlines(False):
                self.logger.info(line)
                stderr = ret["stderr"]
                prev_value = ''
                for line in stderr.splitlines(False):
                    if line.startswith('> Processing Locus'):
                        words = line.split()
                        cur_value = words[len(words) - 1]
                        if prev_value != cur_value:
                            prev_value = cur_value
                            self.logger.info(line)
                        else:
                            prev_value = ''
                            self.logger.info(line)

            ##Parse output files
            try:
                g_output_file = os.path.join(output_dir, "genes.fpkm_tracking")
                #exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'Cufflinks', 'FPKM' )
                #tpm_exp_dict = script_util.parse_FPKMtracking(g_output_file,'Cufflinks','TPM')
                # Cufflinks doesn't produce TPM, we infer from FPKM
                # (see discussion @ https://www.biostars.org/p/160989/)
                exp_dict, tpm_exp_dict = rnaseq_util.parse_FPKMtracking_calc_TPM(
                    g_output_file)
            except Exception, e:
                raise Exception(e)
                logger.exception("".join(traceback.format_exc()))
                raise Exception("Error parsing FPKMtracking")
Exemple #30
0
    def runEach(self, task_list):
        logger = self.logger
        ### Call Cuffmerge function
        used_tool = self.details['used_tool']
        logger.info('in DiffExpfoBallgown.runEach()')
        if used_tool == "Ballgown (Bioconductor)":
            #merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,num_threads,self.details['gtf_file'],assembly_file)
            #run_tool =  "StringTie"
            #tool_version = "1.2.3"
            # For now, take no action for StringTie processing
            logger.info('Exiting immediately - StringTie case')
            return
        elif used_tool == 'Cufflinks':
            merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir, merge_dir,
                                                    num_threads, gtf_file,
                                                    assembly_file)
            run_tool = "Tablemaker"
            tool_version = '2.0.9'
        cuffmerge_dir = os.path.join(self.directory, "cuffmerge")
        merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir,
                                                self.num_threads,
                                                self.details['gtf_file'],
                                                self.details['gtf_list_file'])
        ### Run DiffExpforBallgown
        output_dir = os.path.join(self.directory,
                                  self.method_params['output_obj_name'])
        diffexp_command = (' -p ' + str(self.num_threads))

        ### Setting Advanced parameters for DiffExpforBallgown

        if ('time_series' in self.method_params
                and self.method_params['time_series'] != 0):
            diffexp_command += (' -T ')
        if ('min_alignment_count' in self.method_params
                and self.method_params['min_alignment_count'] is not None):
            diffexp_command += (' -c ' +
                                str(self.method_params['min_alignment_count']))
        if ('multi_read_correct' in self.method_params
                and self.method_params['multi_read_correct'] != 0):
            diffexp_command += (' --multi-read-correct ')
        if ('library_type' in self.method_params
                and self.method_params['library_type'] is not None):
            diffexp_command += (' --library-type ' +
                                self.method_params['library_type'])
        if ('library_norm_method' in self.method_params
                and self.method_params['library_norm_method'] is not None):
            diffexp_command += (' --library-norm-method ' +
                                self.method_params['library_norm_method'])
        try:
            diffexp_command += " -o {0} -L {1} -u {2} {3}".format(
                output_dir, self.t_labels, merged_gtf, self.bam_files)
            logger.info("Executing: diffexp {0}".format(diffexp_command))
            ret = script_util.runProgram(None, "diffexp", diffexp_command,
                                         None, self.directory)
            result = ret["result"]
            #error =  ret['stderr']
            #print result
            for line in result.splitlines(False):
                logger.info(line)
                stderr = ret["stderr"]
                prev_value = ''
                for line in stderr.splitlines(False):
                    if line.startswith('> Processing Locus'):
                        words = line.split()
                        cur_value = words[len(words) - 1]
                        if prev_value != cur_value:
                            prev_value = cur_value
                            logger.info(line)
                        else:
                            prev_value = ''
                            logger.info(line)
        except Exception, e:
            raise Exception(e)
            raise Exception("Error executing diffexp {0},{1}".format(
                diffexp_command, e))
Exemple #31
0
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file)
                        except Exception,e:
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd))
                        cmdline_output = script_util.runProgram(self.logger,"hisat2",hisat2_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}".format(hisat2_cmd))
                try:
                        stats_data = {}
                        stats_data = script_util.extractAlignmentStatsInfo(self.logger,"bowtie2",ws_client,ws_id,None,cmdline_output['stderr'],None)
                        bam_file = os.path.join(output_dir,"accepted_hits_unsorted.bam")
                        logger.info("Executing: sam_to_bam  {0}".format(bam_file))
                        sam_to_bam = "view -bS -o {0} {1}".format(bam_file,out_file)
                        script_util.runProgram(self.logger,"samtools",sam_to_bam,None,directory)
                        final_bam_prefix = os.path.join(output_dir,"accepted_hits")
                        logger.info("Executing: Sorting bam file  {0}".format(bam_file))
                        sort_bam_cmd  = "sort {0} {1}".format(bam_file,final_bam_prefix)
                        script_util.runProgram(self.logger,"samtools",sort_bam_cmd,None,directory)
                except Exception,e:
                        raise Exception("Error Running the hisat2 command {0},{1} {2}".format(hisat2_cmd,directory," ".join(traceback.print_exc())))

                # Zip tophat folder
                out_file_path = os.path.join(directory,"%s.zip" % output_name)
                try:
                        logger.info("Zipping the output files".format(out_file_path))
                        script_util.zip_files(self.logger, output_dir,out_file_path)
                except Exception, e:
                        raise Exception("Failed to compress the index: {0}".format(out_file_path))
                ## Upload the file using handle service
Exemple #32
0
                    filename=read2_name,
                    directory=directory,
                    token=token)
                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name), bowtie2_base,
                    out_file)
            except Exception, e:
                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))
        try:
            logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd))
            cmdline_output = script_util.runProgram(logger, "bowtie2",
                                                    bowtie2_cmd, None,
                                                    directory)
#print cmdline_output
        except Exception, e:
            #logger.exception("Failed to upload the index")
            raise Exception("Failed to upload the index")
        try:
            #stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats"
            stats_data = {}
            stats_data = script_util.extractAlignmentStatsInfo(
                logger, "bowtie2", ws_client, ws_id, None,
                cmdline_output['stderr'], None)
            bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam")
            logger.info("Executing: sam_to_bam  {0}".format(bam_file))
            sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file)
            script_util.runProgram(logger, "samtools", sam_to_bam, None,
Exemple #33
0
class StringTie(ExecutionBase):
    def __init__(self, logger, directory, urls, max_cores):
        pprint(self.__class__)
        super(StringTie, self).__init__(logger, directory, urls, max_cores)

        # user defined shared variables across methods
        #self.sample = None
        #self.sampleset_info = None
        self.num_threads = None
        self.tool_used = "StringTie"
        self.tool_version = "1.2.3"

    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(
            s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(
            s_alignment))
        alignment_name = ws_client.get_object_info([{
            "ref": s_alignment
        }],
                                                   includeMetadata=None)[0][1]
        if not logger:
            logger = handler_util.create_logger(
                directory, "run_Stringtie_" + alignment_name)
        try:
            alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
            input_direc = os.path.join(
                directory,
                alignment_name.split('_alignment')[0] + "_stringtie_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = alignment_name.split(
                '_alignment')[0] + "_stringtie_expression"
            output_dir = os.path.join(directory, output_name)
            #Download Alignment from shock
            a_file_id = alignment['data']['file']['id']
            a_filename = alignment['data']['file']['file_name']
            condition = alignment['data']['condition']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=self.urls['shock_service_url'],
                    shock_id=a_file_id,
                    filename=a_filename,
                    directory=input_direc,
                    token=token)
            except Exception, e:
                raise Exception(
                    "Unable to download shock file, {0},{1}".format(
                        a_filename, "".join(traceback.format_exc())))
            try:
                input_dir = os.path.join(input_direc, alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,
                                        os.path.join(input_direc, a_filename),
                                        input_dir)
            except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")

            input_file = os.path.join(input_dir, "accepted_hits.bam")
            ### Adding advanced options to tophat command
            tool_opts = {
                k: str(v)
                for k, v in params.iteritems()
                if not k in ('ws_id', 'alignmentset_id',
                             'num_threads') and v is not None
            }
            stringtie_command = (' -p ' + str(self.num_threads))
            if 'label' in params and params['label'] is not None:
                stringtie_command += (' -l ' + str(params['label']))
            if 'min_isoform_abundance' in params and params[
                    'min_isoform_abundance'] is not None:
                stringtie_command += (' -f ' +
                                      str(params['min_isoform_abundance']))
            if 'min_length' in params and params['min_length'] is not None:
                stringtie_command += (' -m ' + str(params['min_length']))
            if 'a_juncs' in params and params['a_juncs'] is not None:
                stringtie_command += (' -a ' + str(params['a_juncs']))
            if 'j_min_reads' in params and params['j_min_reads'] is not None:
                stringtie_command += (' -j ' + str(params['j_min_reads']))
            if 'c_min_read_coverage' in params and params[
                    'c_min_read_coverage'] is not None:
                stringtie_command += (' -c ' +
                                      str(params['c_min_read_coverage']))
            if 'gap_sep_value' in params and params[
                    'gap_sep_value'] is not None:
                stringtie_command += (' -g ' + str(params['gap_sep_value']))
            if 'disable_trimming' in params and params['disable_trimming'] != 0:
                stringtie_command += (' -t ')
            if 'ballgown_mode' in params and params['ballgown_mode'] != 0:
                stringtie_command += (' -B ')
            if 'skip_reads_with_no_ref' in params and params[
                    'skip_reads_with_no_ref'] != 0:
                stringtie_command += (' -e ')
            t_file_name = os.path.join(output_dir, "transcripts.gtf")
            g_output_file = os.path.join(output_dir, "genes.fpkm_tracking")
            stringtie_command += " -o {0} -A {1} -G {2} {3}".format(
                t_file_name, g_output_file, gtf_file, input_file)
            logger.info("Executing: stringtie {0}".format(stringtie_command))
            print "Executing: stringtie {0}".format(stringtie_command)
            ret = script_util.runProgram(None, "stringtie", stringtie_command,
                                         None, directory)
            ##Parse output files
            try:
                exp_dict = rnaseq_util.parse_FPKMtracking(
                    g_output_file, 'StringTie', 'FPKM')
                tpm_exp_dict = rnaseq_util.parse_FPKMtracking(
                    g_output_file, 'StringTie', 'TPM')
            except Exception, e:
                raise Exception(e)
                logger.exception("".join(traceback.format_exc()))
                raise Exception("Error parsing FPKMtracking")
Exemple #34
0
         script_util.move_files(logger, mv_dir, bowtie2_dir)
 except Exception, e:
     logger.error("".join(traceback.format_exc()))
     raise Exception("Unzip indexfile error")
 ### Build Index for the fasta file
 fasta_file = os.path.join(
     bowtie2_dir,
     handler_util.get_file_with_suffix(bowtie2_dir, ".fa") + ".fa")
 bowtie2base = os.path.join(
     bowtie2_dir, handler_util.get_file_with_suffix(bowtie2_dir, ".fa"))
 bowtie2base_cmd = '{0} {1}'.format(fasta_file, bowtie2base)
 try:
     logger.info(
         "Building Index for Hisat2 {0}".format(bowtie2base_cmd))
     cmdline_output = script_util.runProgram(logger, "bowtie2-build",
                                             bowtie2base_cmd, None,
                                             bowtie2_dir)
 except Exception, e:
     raise Exception(
         "Failed to run command {0}".format(bowtie2base_cmd))
 ### Check if GTF object exists in the workspace pull the gtf
 ref_id = bowtie_index['data']['genome_id']
 genome_name = ws_client.get_object_info_new(
     {"objects": [{
         'ref': ref_id
     }]})[0][1]
 ws_gtf = genome_name + "_GTF"
 gtf_file = script_util.check_and_download_existing_handle_obj(
     logger, ws_client, self.urls, params['ws_id'], ws_gtf,
     "KBaseRNASeq.GFFAnnotation", bowtie2_dir, token)
 if gtf_file is None:
Exemple #35
0
                     stringtie_command += (' -j '+str(params['j_min_reads']))
                if 'c_min_read_coverage' in params  and params['c_min_read_coverage'] is not None:
                     stringtie_command += (' -c '+str(params['c_min_read_coverage']))
                if 'gap_sep_value' in params  and params['gap_sep_value'] is not None:
                     stringtie_command += (' -g '+str(params['gap_sep_value']))
                if 'disable_trimming' in params  and params['disable_trimming'] != 0:
                     stringtie_command += (' -t ')
                if 'ballgown_mode' in params  and params['ballgown_mode'] != 0:
                     stringtie_command += (' -B ')
                if 'skip_reads_with_no_ref' in params  and params['skip_reads_with_no_ref'] != 0:
                     stringtie_command += (' -e ')
                t_file_name = os.path.join(output_dir,"transcripts.gtf")
		g_output_file = os.path.join(output_dir,"genes.fpkm_tracking")
                stringtie_command += " -o {0} -A {1} -G {2} {3}".format(t_file_name,g_output_file,gtf_file,input_file)
                logger.info("Executing: stringtie {0}".format(stringtie_command))
                ret = script_util.runProgram(None,"stringtie",stringtie_command,None,directory)
                ##Parse output files
		try:
                	exp_dict = script_util.parse_FPKMtracking(g_output_file,'StringTie','FPKM')
                	tpm_exp_dict = script_util.parse_FPKMtracking(g_output_file,'StringTie','TPM')
		except Exception,e:
                        logger.exception("".join(traceback.format_exc()))
                        raise Exception("Error parsing FPKMtracking")

                ##  compress and upload to shock
                try:
                        logger.info("Zipping Stringtie output")
                        out_file_path = os.path.join(directory,"%s.zip" % output_name)
                        script_util.zip_files(logger,output_dir,out_file_path)
                except Exception,e:
                        logger.exception("".join(traceback.format_exc()))
Exemple #36
0
class HiSat2Sample(HiSat2):
    def __init__(self, logger, directory, urls, max_cores):
        super(HiSat2Sample, self).__init__(logger, directory, urls, max_cores)
        # user defined shared variables across methods
        self.sample_info = None
        #self.sampleset_info = None
        self.num_threads = 1

    def prepare(self):
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        hisat2_dir = self.directory

        try:
            #sample,annotation_name = ws_client.get_objects(
            #                         [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']},
            #                         { 'name' : params['genome_id'], 'workspace' : params['ws_id']}])
            sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],
                                            params['sampleset_id'])[0]
            annotation_name = script_util.ws_get_obj(logger, ws_client,
                                                     params['ws_id'],
                                                     params['genome_id'])[0]
            self.sample = sample
        except Exception, e:
            logger.exception("".join(traceback.format_exc()))
            raise ValueError(" Error Downloading objects from the workspace ")
        ### Get object Info and  IDs
        #sample_info,annotation_info = ws_client.get_object_info_new({"objects": [
        #                                   {'name': params['sampleset_id'], 'workspace': params['ws_id']},
        #                                   {'name': params['genome_id'], 'workspace': params['ws_id']}
        #                                   ]})
        sample_info = script_util.ws_get_obj_info(logger, ws_client,
                                                  params['ws_id'],
                                                  params['sampleset_id'])[0]
        self.sample_info = sample_info
        ### Get the workspace object ids for the objects ###
        sample_id = str(sample_info[6]) + '/' + str(
            sample_info[0]) + '/' + str(sample_info[4])
        #annotation_id = str(annotation_info[6]) + '/' + str(annotation_info[0]) + '/' + str(annotation_info[4])
        annotation_id = script_util.ws_get_ref(logger, ws_client,
                                               params['ws_id'],
                                               params['genome_id'])
        sample_type = sample_info[2].split('-')[0]
        lib_types = [
            'KBaseAssembly.SingleEndLibrary', 'KBaseAssembly.PairedEndLibrary',
            'KBaseFile.SingleEndLibrary', 'KBaseFile.PairedEndLibrary'
        ]
        ### Check if the Library objects exist in the same workspace
        if not sample_type in lib_types:  #'KBaseAssembly.SingleEndLibrary' or sample_type != 'KBaseAssembly.PairedEndLibrary':
            raise HiSat2SampleException(
                'Either of the Library typed objects SingleEndLibrary or PairedEndLibrary is required'
            )
        r_label = 'Single'
        self.num_jobs = 1
        ### Get the Genome Id for the genome selected and get fasta file
        ref_id, fasta_file = rnaseq_util.get_fa_from_genome(
            logger, ws_client, self.urls, params['ws_id'], hisat2_dir,
            params['genome_id'])

        ### Build Index for the fasta file
        hisat2base = os.path.basename(fasta_file)
        #hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fa"))
        hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base)
        try:
            logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
            cmdline_output = script_util.runProgram(logger, "hisat2-build",
                                                    hisat2base_cmd, None,
                                                    hisat2_dir)
        except Exception, e:
            raise Exception("Failed to run command {0}".format(hisat2base_cmd))
Exemple #37
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
        gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            r_sample = script_util.ws_get_obj(logger, ws_client, ws_id,
                                              read_sample)[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            sample_type = script_util.ws_get_type_name(logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            output_name = sample_name.split('.')[0] + "_tophat_alignment"
            output_dir = os.path.join(directory, output_name)
            #if not os.path.exists(output_dir): os.makedirs(output_dir)
            #out_file = output_dir +"/accepted_hits.sam"
            bowtie2_base = os.path.join(
                directory,
                handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
            ### Adding advanced options to Bowtie2Call
            tophat_cmd = (' -p ' + str(self.num_threads))
            if ('max_intron_length' in params
                    and params['max_intron_length'] is not None):
                tophat_cmd += (' -I ' + str(params['max_intron_length']))
            if ('min_intron_length' in params
                    and params['min_intron_length'] is not None):
                tophat_cmd += (' -i ' + str(params['min_intron_length']))
            if ('min_anchor_length' in params
                    and params['min_anchor_length'] is not None):
                tophat_cmd += (' -a ' + str(params['min_anchor_length']))
            if ('read_edit_dist' in params
                    and params['read_edit_dist'] is not None):
                tophat_cmd += (' --read-edit-dist ' +
                               str(params['read_edit_dist']))
            if ('read_gap_length' in params
                    and params['read_gap_length'] is not None):
                tophat_cmd += (' --read-gap-length ' +
                               str(params['read_gap_length']))
            if ('read_mismatches' in params
                    and params['read_mismatches'] is not None):
                tophat_cmd += (' -N ' + str(params['read_mismatches']))
            if ('library_type' in params
                    and params['library_type'] is not None):
                tophat_cmd += (' --library-type ' + params['library_type'])
            if ('report_secondary_alignments' in params
                    and int(params['report_secondary_alignments']) == 1):
                tophat_cmd += ' --report-secondary-alignments'
            if ('no_coverage_search' in params
                    and int(params['no_coverage_search']) == 1):
                tophat_cmd += ' --no-coverage-search'
            if ('preset_options' in params
                    and params['preset_options'] is not None):
                tophat_cmd += ' --' + params['preset_options']
            #out_file = output_dir +"/accepted_hits.sam"
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   directory, token)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(
                    output_dir, gtf_file, bowtie2_base, ds['fwd'])
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        tophat_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        tophat_cmd += (' --' + params['orientation'])
                tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(
                    output_dir, gtf_file, bowtie2_base, ds['fwd'], ds['rev'])

#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                cmdline_output, cmd_err = script_util.runProgram(
                    self.logger, "tophat", tophat_cmd, None, directory)
            except Exception, e:
                raise Exception("Failed to run command {0}\n{1}\n{2}".format(
                    tophat_cmd, cmdline_output, cmd_err))