コード例 #1
0
    def Hisat2Call(self, ctx, params):
        """
        :param params: instance of type "Hisat2Params" -> structure:
           parameter "ws_id" of String, parameter "sampleset_id" of String,
           parameter "genome_id" of String, parameter "num_threads" of Long,
           parameter "quality_score" of String, parameter "skip" of Long,
           parameter "trim3" of Long, parameter "trim5" of Long, parameter
           "np" of Long, parameter "minins" of Long, parameter "maxins" of
           Long, parameter "orientation" of String, parameter
           "min_intron_length" of Long, parameter "max_intron_length" of
           Long, parameter "no_spliced_alignment" of type "bool" (indicates
           true or false values, false <= 0, true >=1), parameter
           "transcriptome_mapping_only" of type "bool" (indicates true or
           false values, false <= 0, true >=1), parameter "tailor_alignments"
           of String
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN Hisat2Call
	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        hisat2_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,hisat2_dir) 
	# Set the common Params
	common_params = {'ws_client' : Workspace(url=self.__WS_URL, token=ctx['token']),
                         'hs_client' : HandleService(url=self.__HS_URL, token=ctx['token']),
                         'user_token' : ctx['token']
                        }
	# Set the Number of threads if specified 

        if 'num_threads' in params and params['num_threads'] is not None:
            common_params['num_threads'] = params['num_threads']

	# Check to Call HiSat2 in Set mode or Single mode
	wsc = common_params['ws_client']
	readsobj_info = wsc.get_object_info_new({"objects": [{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})
        readsobj_type = readsobj_info[0][2].split('-')[0]
	if readsobj_type == 'KBaseRNASeq.RNASeqSampleSet':	
		self.__LOGGER.info("HiSat2 SampleSet Case")
        	hs2ss = HiSat2SampleSet(self.__LOGGER, hisat2_dir, self.__SERVICES)
        	returnVal = hs2ss.run(common_params, params)
	else:
		hs2ss = HiSat2Sample(self.__LOGGER, hisat2_dir, self.__SERVICES)
		returnVal = hs2ss.run(common_params,params)
	#finally:
        handler_util.cleanup(self.__LOGGER,hisat2_dir)
        #END Hisat2Call

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method Hisat2Call return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #2
0
    def TophatCall(self, ctx, params):
        """
        :param params: instance of type "TophatParams" -> structure:
           parameter "ws_id" of String, parameter "read_sample" of String,
           parameter "genome_id" of String, parameter "bowtie2_index" of
           String, parameter "read_mismatches" of Long, parameter
           "read_gap_length" of Long, parameter "read_edit_dist" of Long,
           parameter "min_intron_length" of Long, parameter
           "max_intron_length" of Long, parameter "num_threads" of Long,
           parameter "report_secondary_alignments" of String, parameter
           "no_coverage_search" of String, parameter "library_type" of
           String, parameter "annotation_gtf" of type
           "ws_referenceAnnotation_id" (Id for KBaseRNASeq.GFFAnnotation @id
           ws KBaseRNASeq.GFFAnnotation)
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN TophatCall
	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        tophat_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,tophat_dir) 
	# Set the common Params
	common_params = {'ws_client' : Workspace(url=self.__WS_URL, token=ctx['token']),
                         'hs_client' : HandleService(url=self.__HS_URL, token=ctx['token']),
                         'user_token' : ctx['token']
                        }
	# Set the Number of threads if specified 
        if 'num_threads' in params and params['num_threads'] is not None:
            common_params['num_threads'] = params['num_threads']

	# Check to Call Tophat in Set mode or Single mode
	wsc = common_params['ws_client']
	obj_info = wsc.get_object_info_new({"objects": [{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})
        obj_type = obj_info[0][2].split('-')[0]
	if obj_type == 'KBaseRNASeq.RNASeqSampleSet':	
		self.__LOGGER.info("Tophat SampleSet Case")
        	tss = TophatSampleSet(self.__LOGGER, tophat_dir, self.__SERVICES)
        	returnVal = tss.run(common_params, params)
	else:
		self.__LOGGER.info("Tophat Sample Case")
		ts = TophatSample(self.__LOGGER, tophat_dir, self.__SERVICES)
		returnVal = ts.run(common_params,params)
        handler_util.cleanup(self.__LOGGER,tophat_dir)

        #END TophatCall

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method TophatCall return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #3
0
    def StringTieCall(self, ctx, params):
        """
        :param params: instance of type "StringTieParams" -> structure:
           parameter "ws_id" of String, parameter "sample_alignment" of
           String, parameter "num-threads" of Long, parameter "label" of
           String, parameter "min_isoform_abundance" of Double, parameter
           "a_juncs" of Long, parameter "min_length" of Long, parameter
           "j_min_reads" of Double, parameter "c_min_read_coverage" of
           Double, parameter "gap_sep_value" of Long, parameter
           "disable_trimming" of type "bool" (indicates true or false values,
           false <= 0, true >=1), parameter "ballgown_mode" of type "bool"
           (indicates true or false values, false <= 0, true >=1), parameter
           "skip_reads_with_no_ref" of type "bool" (indicates true or false
           values, false <= 0, true >=1), parameter "merge" of String
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN StringTieCall
	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        stringtie_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,stringtie_dir) 
	# Set the common Params
	common_params = {'ws_client' : Workspace(url=self.__WS_URL, token=ctx['token']),
                         'hs_client' : HandleService(url=self.__HS_URL, token=ctx['token']),
                         'user_token' : ctx['token']
                        }
	# Set the Number of threads if specified 
        if 'num_threads' in params and params['num_threads'] is not None:
            common_params['num_threads'] = params['num_threads']

	# Check to Call StringTie in Set mode or Single mode
	wsc = common_params['ws_client']
	obj_info = wsc.get_object_info_new({"objects": [{'name': params['alignmentset_id'], 'workspace': params['ws_id']}]})
        obj_type = obj_info[0][2].split('-')[0]
	if obj_type == 'KBaseRNASeq.RNASeqAlignmentSet':	
		self.__LOGGER.info("StringTie AlignmentSet Case")
        	sts = StringTieSampleSet(self.__LOGGER, stringtie_dir, self.__SERVICES)
        	returnVal = sts.run(common_params, params)
	else:
		sts = StringTieSample(self.__LOGGER, stringtie_dir, self.__SERVICES)
		returnVal = sts.run(common_params,params)
        handler_util.cleanup(self.__LOGGER,stringtie_dir)
        #END StringTieCall

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method StringTieCall return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #4
0
    def Bowtie2Call(self, ctx, params):
        """
        :param params: instance of type "Bowtie2Params" -> structure:
           parameter "ws_id" of String, parameter "sampleset_id" of String,
           parameter "genome_id" of String, parameter "bowtie_index" of
           String, parameter "phred33" of String, parameter "phred64" of
           String, parameter "local" of String, parameter "very-fast" of
           String, parameter "fast" of String, parameter "very-sensitive" of
           String, parameter "sensitive" of String, parameter
           "very-fast-local" of String, parameter "very-sensitive-local" of
           String, parameter "fast-local" of String, parameter
           "fast-sensitive" of String
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN Bowtie2Call
	
	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        bowtie2_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,bowtie2_dir) 
        common_params = {'ws_client' : Workspace(url=self.__WS_URL, token=ctx['token']),
                         'hs_client' : HandleService(url=self.__HS_URL, token=ctx['token']),
                         'user_token' : ctx['token']
                        }
        # Set the Number of threads if specified 

        if 'num_threads' in params and params['num_threads'] is not None:
            common_params['num_threads'] = params['num_threads']

        # Check to Call Bowtie2 in Set mode or Single mode
        wsc = common_params['ws_client']
        readsobj_info = wsc.get_object_info_new({"objects": [{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})
        readsobj_type = readsobj_info[0][2].split('-')[0]
        if readsobj_type == 'KBaseRNASeq.RNASeqSampleSet':
                self.__LOGGER.info("Bowtie2 SampleSet Case")
                bw2ss = Bowtie2SampleSet(self.__LOGGER, bowtie2_dir, self.__SERVICES)
                returnVal = bw2ss.run(common_params, params)
        else:
                bw2ss = Bowtie2Sample(self.__LOGGER, bowtie2_dir, self.__SERVICES)
                returnVal = bw2ss.run(common_params,params)
	handler_util.cleanup(self.__LOGGER,bowtie2_dir)
        #END Bowtie2Call

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method Bowtie2Call return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #5
0
    def CufflinksCall(self, ctx, params):
        """
        :param params: instance of type "CufflinksParams" -> structure:
           parameter "ws_id" of String, parameter "sample_alignment" of
           String, parameter "num_threads" of Long, parameter
           "min-intron-length" of Long, parameter "max-intron-length" of
           Long, parameter "overhang-tolerance" of Long
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN CufflinksCall
	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        cufflinks_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,cufflinks_dir)
        # Set the common Params
        common_params = {'ws_client' : Workspace(url=self.__WS_URL, token=ctx['token']),
                         'hs_client' : HandleService(url=self.__HS_URL, token=ctx['token']),
                         'user_token' : ctx['token']
                        }
        # Set the Number of threads if specified 
        if 'num_threads' in params and params['num_threads'] is not None:
            common_params['num_threads'] = params['num_threads']

        # Check to Call Cufflinks in Set mode or Single mode
        wsc = common_params['ws_client']
        obj_info = wsc.get_object_info_new({"objects": [{'name': params['alignmentset_id'], 'workspace': params['ws_id']}]})
        obj_type = obj_info[0][2].split('-')[0]
        if obj_type == 'KBaseRNASeq.RNASeqAlignmentSet':
                self.__LOGGER.info("Cufflinks AlignmentSet Case")
                sts = CufflinksSampleSet(self.__LOGGER, cufflinks_dir, self.__SERVICES)
                returnVal = sts.run(common_params, params)
        else:
		sts = CufflinksSample(self.__LOGGER, cufflinks_dir, self.__SERVICES)
                returnVal = sts.run(common_params,params)
        handler_util.cleanup(self.__LOGGER,cufflinks_dir)
        #END CufflinksCall

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method CufflinksCall return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #6
0
    def BuildBowtie2Index(self, ctx, params):
        """
        :param params: instance of type "Bowtie2IndexParams" -> structure:
           parameter "ws_id" of String, parameter "reference" of String,
           parameter "output_obj_name" of String
        :returns: instance of type "ResultsToReport" (Object for Report type)
           -> structure: parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN BuildBowtie2Index
	user_token=ctx['token']
        ws_client=Workspace(url=self.__WS_URL, token=user_token)
	hs = HandleService(url=self.__HS_URL, token=user_token)
	try:
	    	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
                bowtie_dir = os.path.join(self.__SCRATCH ,'tmp') 
	        handler_util.setupWorkingDir(self.__LOGGER,bowtie_dir)
		## Update the provenance
	     	provenance = [{}]
        	if 'provenance' in ctx:
            		provenance = ctx['provenance']
        	# add additional info to provenance here, in this case the input data object reference
        	provenance[0]['input_ws_objects']=[params['ws_id']+'/'+params['reference']]
		
		try:
			ref_id, outfile_ref_name = rnaseq_util.get_fa_from_genome(self.__LOGGER,ws_client,self.__SERVICES,params['ws_id'],bowtie_dir,params['reference'])
                except Exception, e:
			self.__LOGGER.exception("".join(traceback.format_exc()))
                        raise ValueError('Unable to get FASTA for object {}'.format("".join(traceback.format_exc())))
	        ## Run the bowtie_indexing on the  command line
		try:
	    		if outfile_ref_name:
				bowtie_index_cmd = "{0} {1}".format(outfile_ref_name,params['reference'])
			else:
				bowtie_index_cmd = "{0} {1}".format(params['reference'],params['reference']) 
	    	        self.__LOGGER.info("Executing: bowtie2-build {0}".format(bowtie_index_cmd))  	
			cmdline_output = script_util.runProgram(self.__LOGGER,"bowtie2-build",bowtie_index_cmd,None,bowtie_dir)
			if 'result' in cmdline_output:
				report = cmdline_output['result']
		except Exception,e:
			raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params['reference'],e))
コード例 #7
0
    def DiffExpCallforBallgown(self, ctx, params):
        """
        :param params: instance of type "DifferentialExpParams" -> structure:
           parameter "ws_id" of String, parameter "expressionset_id" of type
           "RNASeqExpressionSet" (Set object for RNASeqExpression objects
           @optional sample_ids condition tool_used tool_version tool_opts
           @metadata ws tool_used @metadata ws tool_version @metadata ws
           alignmentSet_id) -> structure: parameter "tool_used" of String,
           parameter "tool_version" of String, parameter "tool_opts" of
           mapping from String to String, parameter "alignmentSet_id" of type
           "ws_alignmentSet_id" (The workspace id for a RNASeqAlignmentSet
           object @id ws KBaseRNASeq.RNASeqAlignmentSet), parameter
           "sampleset_id" of type "ws_Sampleset_id" (Id for
           KBaseRNASeq.RNASeqSampleSet @id ws KBaseRNASeq.RNASeqSampleSet),
           parameter "genome_id" of String, parameter "sample_ids" of list of
           String, parameter "condition" of list of String, parameter
           "sample_expression_ids" of list of type "ws_expression_sample_id"
           (Id for expression sample @id ws KBaseRNASeq.RNASeqExpression),
           parameter "mapped_expression_objects" of list of mapping from
           String to String, parameter "mapped_expression_ids" of list of
           mapping from String to type "ws_expression_sample_id" (Id for
           expression sample @id ws KBaseRNASeq.RNASeqExpression), parameter
           "output_obj_name" of String, parameter "num_threads" of Long
        :returns: instance of type "RNASeqDifferentialExpression" (Object
           RNASeqDifferentialExpression file structure @optional tool_opts
           tool_version sample_ids comments) -> structure: parameter
           "tool_used" of String, parameter "tool_version" of String,
           parameter "tool_opts" of list of mapping from String to String,
           parameter "file" of type "Handle" (@optional hid file_name type
           url remote_md5 remote_sha1) -> structure: parameter "hid" of type
           "HandleId" (Id for the handle object @id handle), parameter
           "file_name" of String, parameter "id" of String, parameter "type"
           of String, parameter "url" of String, parameter "remote_md5" of
           String, parameter "remote_sha1" of String, parameter "sample_ids"
           of list of String, parameter "condition" of list of String,
           parameter "genome_id" of String, parameter "expressionSet_id" of
           type "ws_expressionSet_id" (Id for expression sample set @id ws
           KBaseRNASeq.RNASeqExpressionSet), parameter "alignmentSet_id" of
           type "ws_alignmentSet_id" (The workspace id for a
           RNASeqAlignmentSet object @id ws KBaseRNASeq.RNASeqAlignmentSet),
           parameter "sampleset_id" of type "ws_Sampleset_id" (Id for
           KBaseRNASeq.RNASeqSampleSet @id ws KBaseRNASeq.RNASeqSampleSet),
           parameter "comments" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN DiffExpCallforBallgown
	user_token=ctx['token']
        ws_client=Workspace(url=self.__WS_URL, token=user_token)
        hs = HandleService(url=self.__HS_URL, token=user_token)
        #try:
        if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        diffexp_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,diffexp_dir)
        returnVal = call_diffExpCallforBallgown.runMethod(self.__LOGGER,user_token,ws_client,hs,self.__SERVICES,diffexp_dir,params)
	print returnVal
        #except Exception,e:
        #         self.__LOGGER.exception("".join(traceback.format_exc()))
         #        raise KBaseRNASeqException("Error Running StringTieCall")
        #finally:
        handler_util.cleanup(self.__LOGGER,stringtie_dir)
        #END DiffExpCallforBallgown

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method DiffExpCallforBallgown return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #8
0
    def CuffdiffCall(self, ctx, params):
        """
        :param params: instance of type "CuffdiffParams" -> structure:
           parameter "ws_id" of String, parameter "rnaseq_exp_details" of
           type "RNASeqSampleSet" (Object to Describe the RNASeq SampleSet
           @optional platform num_replicates source publication_Id
           external_source_date sample_ids @metadata ws sampleset_id
           @metadata ws platform @metadata ws num_samples @metadata ws
           num_replicates @metadata ws length(condition)) -> structure:
           parameter "sampleset_id" of String, parameter "sampleset_desc" of
           String, parameter "domain" of String, parameter "platform" of
           String, parameter "num_samples" of Long, parameter
           "num_replicates" of Long, parameter "sample_ids" of list of
           String, parameter "condition" of list of String, parameter
           "source" of String, parameter "Library_type" of String, parameter
           "publication_Id" of String, parameter "external_source_date" of
           String, parameter "output_obj_name" of String, parameter
           "time-series" of String, parameter "library-type" of String,
           parameter "library-norm-method" of String, parameter
           "multi-read-correct" of String, parameter "min-alignment-count" of
           Long, parameter "dispersion-method" of String, parameter
           "no-js-tests" of String, parameter "frag-len-mean" of Long,
           parameter "frag-len-std-dev" of Long, parameter
           "max-mle-iterations" of Long, parameter "compatible-hits-norm" of
           String, parameter "no-length-correction" of String
        :returns: instance of type "RNASeqDifferentialExpression" (Object
           RNASeqDifferentialExpression file structure @optional tool_opts
           tool_version sample_ids comments) -> structure: parameter
           "tool_used" of String, parameter "tool_version" of String,
           parameter "tool_opts" of list of mapping from String to String,
           parameter "file" of type "Handle" (@optional hid file_name type
           url remote_md5 remote_sha1) -> structure: parameter "hid" of type
           "HandleId" (Id for the handle object @id handle), parameter
           "file_name" of String, parameter "id" of String, parameter "type"
           of String, parameter "url" of String, parameter "remote_md5" of
           String, parameter "remote_sha1" of String, parameter "sample_ids"
           of list of String, parameter "condition" of list of String,
           parameter "genome_id" of String, parameter "expressionSet_id" of
           type "ws_expressionSet_id" (Id for expression sample set @id ws
           KBaseRNASeq.RNASeqExpressionSet), parameter "alignmentSet_id" of
           type "ws_alignmentSet_id" (The workspace id for a
           RNASeqAlignmentSet object @id ws KBaseRNASeq.RNASeqAlignmentSet),
           parameter "sampleset_id" of type "ws_Sampleset_id" (Id for
           KBaseRNASeq.RNASeqSampleSet @id ws KBaseRNASeq.RNASeqSampleSet),
           parameter "comments" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN CuffdiffCall
		
	if not os.path.exists(self.__SCRATCH): os.makedirs(self.__SCRATCH)
        cuffdiff_dir = os.path.join(self.__SCRATCH,"tmp")
        handler_util.setupWorkingDir(self.__LOGGER,cuffdiff_dir) 
	# Set the common Params
	common_params = {'ws_client' : Workspace(url=self.__WS_URL, token=ctx['token']),
                         'hs_client' : HandleService(url=self.__HS_URL, token=ctx['token']),
                         'user_token' : ctx['token']
                        }
	# Set the Number of threads if specified 
        if 'num_threads' in params and params['num_threads'] is not None:
            common_params['num_threads'] = params['num_threads']

	cuff = Cuffdiff(self.__LOGGER, cuffdiff_dir, self.__SERVICES)
        returnVal = cuff.run(common_params, params)

	#finally:
        handler_util.cleanup(self.__LOGGER,cuffdiff_dir)
        #END CuffdiffCall

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method CuffdiffCall return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
コード例 #9
0
    def collect(self):
        params = self.method_params
        ws_client = self.common_params['ws_client']
        hs_client = self.common_params['hs_client']

        ws_id = params['ws_id']
        #rscripts_dir = self.common_params['rscripts_dir']
        rscripts_dir = '/kb/module/rscripts'

        token = self.common_params['user_token']
        diffexp_dir = self.directory
        logger = self.logger
        logger.info(
            'in DiffExpforBallgown.collect, method params (params) are')
        logger.info(pformat(params))
        output_object_name = params['output_obj_name']
        output_csv = "ballgown_diffexp.tsv"
        volcano_plot_file = "volcano_plot.png"
        stringtie_dir_prefix = "StringTie_outdir_"

        #
        #  1) need a pattern RE to match all the StringTie subdirs, so prefix all
        #     unzipped dirs with "stringtie_out_"
        #  2) need a group identifier string i.e. "111000"
        #

        ballgown_set_info = rnaseq_util.get_info_and_download_for_ballgown(
            logger, ws_client, hs_client, ws_id, self.urls, diffexp_dir,
            stringtie_dir_prefix, params['expressionset_id'], token)
        logger.info('back from download_for_ballgown(), ballgown_set_info are')
        logger.info(pformat(ballgown_set_info))

        sample_dir_group_file = "sample_dir_group_table"  # output file
        group_list = rnaseq_util.create_sample_dir_group_file(
            logger, ws_client, ws_id, ballgown_set_info['subdirs'],
            params['group_name1'], params['expr_ids1'], params['group_name2'],
            params['expr_ids2'], sample_dir_group_file)

        ballgown_output_dir = os.path.join(diffexp_dir, "ballgown_out")
        logger.info("ballgown output dir is {0}".format(ballgown_output_dir))
        handler_util.setupWorkingDir(logger, ballgown_output_dir)

        logger.info("about to run_ballgown_diff_exp")
        rnaseq_util.run_ballgown_diff_exp(logger, rscripts_dir, diffexp_dir,
                                          sample_dir_group_file,
                                          ballgown_output_dir, output_csv,
                                          volcano_plot_file)

        logger.info(
            "back from run_ballgown_diff_exp, about to load diff exp matrix file"
        )
        diff_expr_matrix = rnaseq_util.load_diff_expr_matrix(
            ballgown_output_dir, output_csv)  # read file before its zipped

        logger.info("about to load ballgout output into workspace")
        de_ws_save_obj_data = rnaseq_util.load_ballgown_output_into_ws(
            logger,
            ws_id,
            ws_client,
            hs_client,
            token,
            diffexp_dir,
            ballgown_output_dir,
            self.details["used_tool"],
            self.details["tool_version"],
            ballgown_set_info[
                'sample_expression_ids'],  # for sample ids? Is this good?
            group_list,  # conditions
            ballgown_set_info['genome_id'],  # genome_id
            ballgown_set_info['expressionset_id'],  # expressionset_id
            ballgown_set_info['alignmentSet_id'],  # alignmentset_id
            ballgown_set_info['sampleset_id'],  # sampleset_id
            output_object_name)
        logger.info(
            "back from loading ballgown output into workspace, object save data is "
        )
        logger.info(pformat(de_ws_save_obj_data))

        max_num_genes = sys.maxint  # default
        if 'maximum_num_genes' in params:
            if (params['maximum_num_genes'] != None):
                max_num_genes = params['maximum_num_genes']

        # this returns a list of gene ids passing the specified cuts, ordered by
        # descending fold_change
        selected_gene_list = rnaseq_util.filter_genes_diff_expr_matrix(
            diff_expr_matrix, params['fold_scale_type'],
            params['alpha_cutoff'], params['fold_change_cutoff'],
            max_num_genes)
        #  !!!!! IF selected_gene_list is empty print some kind of message, take no further action

        # get the unfiltered expression matrix
        expression_set_id_name = script_util.ws_get_obj_name(
            logger, ws_client, ws_id, params['expressionset_id'])
        em_name = expression_set_id_name + "_FPKM_ExpressionMatrix"
        logger.info("about to fetch expression matrix  {0}".format(em_name))
        try:
            #emw = ws_client.get_objects( [ { "name": em_name, "workspace": ws_id } ] )[0]
            emw = script_util.ws_get_obj(logger, ws_client, ws_id, em_name)[0]
        except:
            raise Exception(
                "unable to retrieve expression matrix object {0} from workspace {1}"
                .format(em_name, ws_id))
        emo = emw["data"]
        # filter it
        filtered_emo = rnaseq_util.filter_expr_matrix_object(
            emo, selected_gene_list)
        # save it
        logger.info("saving emo em_name {0}".format(
            params["filtered_expr_matrix"]))
        try:
            ret = ws_client.save_objects({
                'workspace':
                ws_id,
                'objects': [{
                    'type': 'KBaseFeatureValues.ExpressionMatrix',
                    'data': filtered_emo,
                    'name': params["filtered_expr_matrix"]
                }]
            })
        except:
            raise Exception("failed to save object ")
        logger.info("ws save return:\n" + pformat(ret))

        logger.info("saving volcano plot as report object")
        report_object_name = expression_set_id_name + "_plot_report"

        output_obj_ref = script_util.ws_get_ref(logger, ws_client, ws_id,
                                                output_object_name)
        em_obj_ref = script_util.ws_get_ref(logger, ws_client, ws_id,
                                            params["filtered_expr_matrix"])

        plot_report_object_name = rnaseq_util.create_and_save_volcano_plot_report(
            logger, ws_client, ws_id, self.urls['callback_url'], token,
            ballgown_output_dir, volcano_plot_file, output_obj_ref, em_obj_ref,
            report_object_name)
        #logger.info( "plot_report_object")
        #logger.info( pformat( plot_report_object_name ) )
        # THIS NEEDS TO BE AN INPUT PARAMETER IN SPEC FILE
        #iltered_expr_matrix_name = expressionset_id + "_filtered_fpkm"
        #e_em_save_obj_data = created_and_save_filtered_expr_matrix( logger,
        #                                                            ws_client,
        #                                                            ws_id,
        #                                                            token,
        #                                                            expression_set_name,
        #                                                            fold_scale_type,      #"linear", "log2+1", "log10+1"
        #                                                            alpha_cutoff,
        #                                                            q_value_cutoff,
        #                                                            log2_fold_change_cutoff,
        #                                                            maximum_num_genes,
        #                                                            filtered_expr_matrix_name
        #                                                           )
        #logger.info( "plot_report_object_name[1] is {0}".format( plot_report_object_name[1] ) )
        #logger.info( "plot_report_ref is {0}/{1}/{2}".format( plot_report_object_name[6], plot_report_object_name[0],plot_report_object_name[4] ) )
        returnVal = {
            'diff_expr_object':
            output_object_name,
            'filtered_expression_maxtrix':
            params["filtered_expr_matrix"],
            'report_name':
            plot_report_object_name[1],
            'report_ref':
            "{0}/{1}/{2}".format(plot_report_object_name[6],
                                 plot_report_object_name[0],
                                 plot_report_object_name[4]),
            'workspace':
            ws_id
        }

        self.returnVal = returnVal