Python KBParallel Examples

Programming Language: Python

Namespace/Package Name: KBParallel.KBParallelClient

Class/Type: KBParallel

Examples at hotexamples.com: 11

Python KBParallel - 11 examples found. These are the top rated real world Python examples of KBParallel.KBParallelClient.KBParallel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KBParallel(7)

run_batch(5)

run(2)

Frequently Used Methods

KBParallel (7)

run_batch (5)

run (2)

Example #1

Show file

File: ParallelSquareSumImpl.py Project: rsutormin/ParallelSquareSum

    def calcSquareSum(self, ctx, params):
        """
        :param params: instance of type "CalcSquareSumParams"
           (===================== main =====================) -> structure:
           parameter "n" of Long
        :returns: instance of type "CalcSquareSumInputOutput" -> structure:
           parameter "square_sum" of Long
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN calcSquareSum
        kbp = KBParallel(os.environ['SDK_CALLBACK_URL'], token=ctx['token'])
        returnVal = kbp.run({
            'prepare_method': {
                'module_name': 'ParallelSquareSum',
                'method_name': 'calcSquareSumPrepare',
                'service_ver': 'dev'
            },
            'is_local': 1,
            'global_params': params,
            'time_limit': 1000000
        })
        #END calcSquareSum

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method calcSquareSum return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

Example #2

Show file

    def manyHellos(self, ctx, input_params):
        """
        :param input_params: instance of type "ManyHellosInputParams"
           (hello_msg - what to print as the message, time_limit - how long
           the program will run, in seconds, workspace - used to store
           report(s).) -> structure: parameter "hello_msg" of String,
           parameter "num_jobs" of Long, parameter "time_limit" of Long,
           parameter "workspace" of String
        :returns: instance of type "ManyHellos_globalResult" -> structure:
           parameter "output" of String, parameter "jobs" of list of tuple of
           size 2: parameter "job_number" of Long, parameter "message" of
           String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN manyHellos
        print("Hi this is manyHellos()!")
        print("hello_mesg is ", input_params["hello_msg"])
        print("time_limit is ", input_params["time_limit"])
        print("num_jobs is ", input_params["num_jobs"])
        print("workspace is ", input_params["workspace"])

        kbp = KBParallel(os.environ['SDK_CALLBACK_URL'], token=ctx['token'])
        returnVal = kbp.run({
            'method': {
                'module_name': 'ManyHellos',
                'method_name': 'manyHellos',
                'service_ver': 'dev'
            },
            'is_local': 1,
            'global_params': {
                'msg': input_params["hello_msg"],
                'num_jobs': input_params["num_jobs"],
                'workspace': input_params["workspace"]
            },
            'time_limit': input_params["time_limit"]
        })
        print("this is manyHellos(), signing off!  Bye!")
        #END manyHellos

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method manyHellos return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

Example #3

Show file

File: BatchRunner.py Project: ugswork/kb_BatchApp

    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_BatchApp', provenance[0]['subactions'])
        print('Running kb_BatchApp version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.parallel_runner = KBParallel(self.callback_url)

Example #4

Show file

File: STAR_Aligner.py Project: sean-mccorkle/kb_STAR

    def __init__(self, config, provenance):
        self.config = config
        self.workspace_url = config['workspace-url']
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.scratch = config['scratch']
        self.srv_wiz_url = config['srv-wiz-url']
        self.parallel_runner = KBParallel(self.callback_url)
        self.provenance = provenance
        self.star_utils = STARUtils(self.scratch, self.workspace_url,
                                    self.callback_url, self.srv_wiz_url,
                                    provenance)
        self.set_api_client = SetAPI(self.srv_wiz_url, service_ver='dev')
        self.qualimap = kb_QualiMap(self.callback_url, service_ver='dev')
        self.star_idx_dir = None
        self.star_out_dir = None

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_STAR', provenance[0]['subactions'])
        print('Running kb_STAR version = ' + self.my_version)

Example #5

Show file

    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 context):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = context.provenance()
        self.job_id = None
        rpc_context = context.get('rpc_context')
        if rpc_context is not None and hasattr(rpc_context, 'get'):
            current_call_ctx = rpc_context.get('call_stack')
            if len(current_call_ctx):
                self.job_id = current_call_ctx[0].get('job_id')

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(self.provenance) > 0:
            if 'subactions' in self.provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_BatchApp', self.provenance[0]['subactions'])
        print('Running kb_BatchApp version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.parallel_runner = KBParallel(self.callback_url, service_ver='dev')

Example #6

Show file

    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, provenance):
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.au = AssemblyUtil(self.callback_url)
        self.dfu = DataFileUtil(self.callback_url, service_ver='beta')
        self.scratch = scratch_dir
        self.working_dir = scratch_dir
        self.prog_runner = Program_Runner(self.STAR_BIN, self.scratch)
        self.provenance = provenance
        self.ws_client = Workspace(self.workspace_url)

        self.parallel_runner = KBParallel(self.callback_url)
        self.qualimap = kb_QualiMap(self.callback_url, service_ver='dev')
        self.set_api_client = SetAPI(self.srv_wiz_url, service_ver='dev')
        self.eu = ExpressionUtils(self.callback_url, service_ver='beta')

Example #7

Show file

class BatchRunner(object):
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 context):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = context.provenance()
        self.job_id = None
        rpc_context = context.get('rpc_context')
        if rpc_context is not None and hasattr(rpc_context, 'get'):
            current_call_ctx = rpc_context.get('call_stack')
            if len(current_call_ctx):
                self.job_id = current_call_ctx[0].get('job_id')

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(self.provenance) > 0:
            if 'subactions' in self.provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_BatchApp', self.provenance[0]['subactions'])
        print('Running kb_BatchApp version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.parallel_runner = KBParallel(self.callback_url, service_ver='dev')

    def get_version_from_subactions(self, module_name, subactions):
        # go through each sub action looking for
        if not subactions:
            return 'release'  # default to release if we can't find anything
        for sa in subactions:
            if 'name' in sa:
                if sa['name'] == module_name:
                    # local-docker-image implies that we are running in kb-test, so return 'dev'
                    if sa['commit'] == 'local-docker-image':
                        return 'dev'
                    # to check that it is a valid hash, make sure it is the right
                    # length and made up of valid hash characters
                    if re.match('[a-fA-F0-9]{40}$', sa['commit']):
                        return sa['commit']
        # again, default to setting this to release
        return 'release'

    def run(self, params):
        self.validate_params(
            params
        )  # raises an exception if there's a failure. see that function for details.

        app_info = {
            'module_name': params['module_name'],
            'function_name': params['method_name'],
            'version': params['service_ver']
        }

        params_list = params.get('batch_params')
        print('Running on set of parameters =')
        pprint(params_list)

        tasks = []
        for input_params in params_list:
            tasks.append(
                self.build_single_execution_task(app_info, input_params))

        batch_run_params = {
            'tasks': tasks,
            'runner': 'parallel',
            'max_retries': 2
        }
        if self.job_id is not None:
            batch_run_params['parent_job_id'] = self.job_id

        # TODO check if this should be given in input
        batch_run_params['concurrent_local_tasks'] = 0
        batch_run_params['concurrent_njsw_tasks'] = 5

        print(
            "========================  BATCH_RUN_PARAMS  ====================")
        pprint(batch_run_params)
        print(
            "================================================================")

        batch_results = self.parallel_runner.run_batch(batch_run_params)
        print('Batch run results=')
        pprint(batch_results)

        results = {'batch_results': dict()}
        for result in batch_results['results']:
            results['batch_results'][result['result_package']['run_context']
                                     ['job_id']] = result

        results['report_name'], results['report_ref'] = build_report(
            self.callback_url, self.scratch_dir, results['batch_results'],
            len(params_list), params['wsid'])

        return results

    def build_single_execution_task(self, app_info, params):
        task_params = copy.deepcopy(params.get('params')[0])

        retVal = {'parameters': task_params}
        retVal.update(app_info)
        return retVal

    def clean(self, run_output_info):
        """
        Not really necessary on a single run, but if we are running multiple local subjobs, we
        should clean up files that have already been saved back up to KBase.
        """
        pass

    def validate_params(self, params):
        """
        Things to validate.
        params.module_name and params.method_name are real (maybe just let that go and assume they're ok)
        params.wsid is a real workspace id and the current user has write-access.
        params.batch_params is a list with len > 0
        """
        if params.get("batch_params", None) is None or (
                isinstance(params["batch_params"], list)
                and len(params["batch_params"]) == 0):
            raise ValueError("batch_params must be a list with a length >= 1")
        if params.get("module_name") is None:
            raise ValueError(
                "module_name must be an existing KBase app module!")
        elif "." in params["module_name"] or "/" in params["module_name"]:
            raise ValueError(
                "module_name should just be the name of the module, NOT the full module.method"
            )
        if params.get("method_name") is None:
            raise ValueError(
                "method_name must be an existing KBase app method!")
        elif "." in params["method_name"] or "/" in params["method_name"]:
            raise ValueError(
                "method_name should just be the name of the method, NOT the full module.method"
            )
        if params.get("service_ver") is None or not isinstance(
                params["service_ver"], basestring):
            raise ValueError("service_ver must be a valid string!")
        if params.get("wsid") is None:
            raise ValueError(
                "A workspace id must be provided to associate each subjob!")
        return params

Example #8

Show file

File: hisat2.py Project: sean-mccorkle/kb_hisat2

    def run_batch(self, reads_refs, params):
        """
        Runs HISAT2 in batch mode.
        reads_refs should be a list of dicts, where each looks like the following:
        {
            "ref": reads object reference,
            "condition": condition for that ref (string)
        }
        """
        # build task list and send it to KBParallel
        tasks = list()
        set_name = get_object_names(
            [params["sampleset_ref"]],
            self.workspace_url)[params["sampleset_ref"]]
        for idx, reads_ref in enumerate(reads_refs):
            single_param = dict(params)  # need a copy of the params
            single_param["build_report"] = 0
            single_param["sampleset_ref"] = reads_ref["ref"]
            if "condition" in reads_ref:
                single_param["condition"] = reads_ref["condition"]
            else:
                single_param["condition"] = "unspecified"

            tasks.append({
                "module_name": "kb_hisat2",
                "function_name": "run_hisat2",
                "version": self.my_version,
                "parameters": single_param
            })
        # UNCOMMENT BELOW FOR LOCAL TESTING
        batch_run_params = {
            "tasks": tasks,
            "runner": "parallel",
            # "concurrent_local_tasks": 3,
            # "concurrent_njsw_tasks": 0,
            "max_retries": 2
        }
        parallel_runner = KBParallel(self.callback_url)
        results = parallel_runner.run_batch(batch_run_params)["results"]
        alignment_items = list()
        alignments = dict()
        for idx, result in enumerate(results):
            # idx of the result is the same as the idx of the inputs AND reads_refs
            if result["is_error"] != 0:
                raise RuntimeError(
                    "Failed a parallel run of HISAT2! {}".format(
                        result["result_package"]["error"]))
            reads_ref = tasks[idx]["parameters"]["sampleset_ref"]
            alignment_items.append({
                "ref":
                result["result_package"]["result"][0]["alignment_objs"]
                [reads_ref]["ref"],
                "label":
                reads_refs[idx].get("condition",
                                    params.get("condition", "unspecified"))
            })
            alignments[reads_ref] = result["result_package"]["result"][0][
                "alignment_objs"][reads_ref]
        # build the final alignment set
        output_ref = self.upload_alignment_set(
            alignment_items, set_name + params["alignmentset_suffix"],
            params["ws_name"])
        return (alignments, output_ref)

Example #9

Show file

File: BatchRunner.py Project: ugswork/kb_BatchApp

class BatchRunner(object):
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_BatchApp', provenance[0]['subactions'])
        print('Running kb_BatchApp version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.parallel_runner = KBParallel(self.callback_url)

    def get_version_from_subactions(self, module_name, subactions):
        # go through each sub action looking for
        if not subactions:
            return 'release'  # default to release if we can't find anything
        for sa in subactions:
            if 'name' in sa:
                if sa['name'] == module_name:
                    # local-docker-image implies that we are running in kb-test, so return 'dev'
                    if sa['commit'] == 'local-docker-image':
                        return 'dev'
                    # to check that it is a valid hash, make sure it is the right
                    # length and made up of valid hash characters
                    if re.match('[a-fA-F0-9]{40}$', sa['commit']):
                        return sa['commit']
        # again, default to setting this to release
        return 'release'

    def run(self, params):
        #
        # validated_params = self.validate_params(params)
        validated_params = params
        num_params = len(validated_params.get('batch_params'))

        app_info = {
            'module_name': validated_params.get('app_id'),
            'function_name': validated_params.get('method'),
            'version': validated_params.get('service_ver')
        }

        if num_params >= 1:
            params_list = validated_params.get('batch_params')
            print('Running on set of parameters =')
            pprint(params_list)

            tasks = []
            for input_params in params_list:
                tasks.append(
                    self.build_single_execution_task(app_info, input_params))

            batch_run_params = {
                'tasks': tasks,
                'runner': 'parallel',
                'max_retries': 2
            }

            # TODO check if this should be given in input
            batch_run_params['concurrent_local_tasks'] = 1
            batch_run_params['concurrent_njsw_tasks'] = 0

            print(
                "========================  BATCH_RUN_PARAMS  ===================="
            )
            pprint(batch_run_params)
            print(
                "================================================================"
            )

            results = self.parallel_runner.run_batch(batch_run_params)
            print('Batch run results=')
            pprint(results)

            return results

        raise ('Improper number of method parameters')

    def build_single_execution_task(self, app_info, params):
        task_params = copy.deepcopy(params.get('params')[0])

        retVal = {'parameters': task_params}
        retVal.update(app_info)
        return retVal

    def clean(self, run_output_info):
        ''' Not really necessary on a single run, but if we are running multiple local subjobs, we
        should clean up files that have already been saved back up to kbase '''
        pass

    def validate_params(self, params):
        # TODO Add validation if needed
        return params

Example #10

Show file

File: Bowtie2Aligner.py Project: ugswork/kb_Bowtie2

class Bowtie2Aligner(object):
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_Bowtie2', provenance[0]['subactions'])
        print('Running kb_Bowtie2 version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.bowtie2 = Bowtie2Runner(self.scratch_dir)
        self.parallel_runner = KBParallel(self.callback_url)
        self.qualimap = kb_QualiMap(self.callback_url)

    def get_version_from_subactions(self, module_name, subactions):
        # go through each sub action looking for
        if not subactions:
            return 'release'  # default to release if we can't find anything
        for sa in subactions:
            if 'name' in sa:
                if sa['name'] == module_name:
                    # local-docker-image implies that we are running in kb-test, so return 'dev'
                    if sa['commit'] == 'local-docker-image':
                        return 'dev'
                    # to check that it is a valid hash, make sure it is the right
                    # length and made up of valid hash characters
                    if re.match('[a-fA-F0-9]{40}$', sa['commit']):
                        return sa['commit']
        # again, default to setting this to release
        return 'release'

    def align(self, params):
        validated_params = self.validate_params(params)
        input_info = self.determine_input_info(validated_params)
        # input info provides information on the input and tells us if we should
        # run as a single_library or as a set:
        #     input_info = {'run_mode': '', 'info': [..], 'ref': '55/1/2'}

        assembly_or_genome_ref = validated_params['assembly_or_genome_ref']

        if input_info['run_mode'] == 'single_library':
            if 'output_alignment_name' not in validated_params:
                suffix = '_alignment'
                if 'output_alignment_suffix' in validated_params:
                    suffix = validated_params['output_alignment_suffix']
                validated_params[
                    'output_alignment_name'] = input_info['info'][1] + suffix
            single_lib_result = self.single_reads_lib_run(
                input_info,
                assembly_or_genome_ref,
                validated_params,
                create_report=validated_params['create_report'])

            return single_lib_result

        if input_info['run_mode'] == 'sample_set':
            reads = self.fetch_reads_refs_from_sampleset(
                input_info['ref'], input_info['info'], validated_params)
            self.build_bowtie2_index(assembly_or_genome_ref,
                                     validated_params['output_workspace'])

            print('Running on set of reads=')
            pprint(reads)

            tasks = []
            for r in reads:
                tasks.append(
                    self.build_single_execution_task(
                        r['ref'], params, r['alignment_output_name'],
                        r['condition']))

            batch_run_params = {
                'tasks': tasks,
                'runner': 'parallel',
                'max_retries': 2
            }
            if validated_params['concurrent_local_tasks'] is not None:
                batch_run_params['concurrent_local_tasks'] = validated_params[
                    'concurrent_local_tasks']
            if validated_params['concurrent_njsw_tasks'] is not None:
                batch_run_params['concurrent_njsw_tasks'] = validated_params[
                    'concurrent_njsw_tasks']
            results = self.parallel_runner.run_batch(batch_run_params)
            print('Batch run results=')
            pprint(results)
            batch_result = self.process_batch_result(results, validated_params,
                                                     reads, input_info['info'])
            return batch_result

        raise ('Improper run mode')

    def build_single_execution_task(self, reads_lib_ref, params, output_name,
                                    condition):
        task_params = copy.deepcopy(params)

        task_params['input_ref'] = reads_lib_ref
        task_params['output_alignment_name'] = output_name
        task_params['create_report'] = 0
        task_params['condition_label'] = condition

        return {
            'module_name': 'kb_Bowtie2',
            'function_name': 'align_reads_to_assembly_app',
            'version': self.my_version,
            'parameters': task_params
        }

    def single_reads_lib_run(self,
                             read_lib_info,
                             assembly_or_genome_ref,
                             validated_params,
                             create_report=False,
                             bowtie2_index_info=None):
        ''' run on one reads '''

        # download reads and prepare any bowtie2 index files
        input_configuration = self.prepare_single_run(
            read_lib_info, assembly_or_genome_ref, bowtie2_index_info,
            validated_params['output_workspace'])

        # run the actual program
        run_output_info = self.run_bowtie2_align_cli(input_configuration,
                                                     validated_params)

        # process the result and save the output
        upload_results = self.save_read_alignment_output(
            run_output_info, input_configuration, validated_params)
        run_output_info['upload_results'] = upload_results

        report_info = None
        if create_report:
            report_info = self.create_report_for_single_run(
                run_output_info, input_configuration, validated_params)

        self.clean(run_output_info)

        return {'output_info': run_output_info, 'report_info': report_info}

    def build_bowtie2_index(self, assembly_or_genome_ref, ws_for_cache):
        bowtie2IndexBuilder = Bowtie2IndexBuilder(self.scratch_dir,
                                                  self.workspace_url,
                                                  self.callback_url,
                                                  self.srv_wiz_url,
                                                  self.provenance)

        return bowtie2IndexBuilder.get_index({
            'ref': assembly_or_genome_ref,
            'ws_for_cache': ws_for_cache
        })

    def prepare_single_run(self, input_info, assembly_or_genome_ref,
                           bowtie2_index_info, ws_for_cache):
        ''' Given a reads ref and an assembly, setup the bowtie2 index '''
        # first setup the bowtie2 index of the assembly
        input_configuration = {'bowtie2_index_info': bowtie2_index_info}
        if not bowtie2_index_info:
            bowtie2IndexBuilder = Bowtie2IndexBuilder(self.scratch_dir,
                                                      self.workspace_url,
                                                      self.callback_url,
                                                      self.srv_wiz_url,
                                                      self.provenance)

            index_result = bowtie2IndexBuilder.get_index({
                'ref':
                assembly_or_genome_ref,
                'ws_for_cache':
                ws_for_cache
            })
            input_configuration['bowtie2_index_info'] = index_result

        # next download the reads
        read_lib_ref = input_info['ref']
        read_lib_info = input_info['info']
        reads_params = {
            'read_libraries': [read_lib_ref],
            'interleaved': 'false',
            'gzipped': None
        }
        ru = ReadsUtils(self.callback_url)
        reads = ru.download_reads(reads_params)['files']

        input_configuration['reads_lib_type'] = self.get_type_from_obj_info(
            read_lib_info).split('.')[1]
        input_configuration['reads_files'] = reads[read_lib_ref]
        input_configuration['reads_lib_ref'] = read_lib_ref

        return input_configuration

    def run_bowtie2_align_cli(self, input_configuration, validated_params):
        # pprint('======== input_configuration =====')
        # pprint(input_configuration)
        options = []
        run_output_info = {}

        # set the bowtie2 index location
        bt2_index_dir = input_configuration['bowtie2_index_info']['output_dir']
        bt2_index_basename = input_configuration['bowtie2_index_info'][
            'index_files_basename']
        options.extend(['-x', bt2_index_basename])

        # set the input reads
        if input_configuration['reads_lib_type'] == 'SingleEndLibrary':
            options.extend(
                ['-U', input_configuration['reads_files']['files']['fwd']])
            run_output_info['library_type'] = 'single_end'
        elif input_configuration['reads_lib_type'] == 'PairedEndLibrary':
            options.extend(
                ['-1', input_configuration['reads_files']['files']['fwd']])
            options.extend(
                ['-2', input_configuration['reads_files']['files']['rev']])
            run_output_info['library_type'] = 'paired_end'

        # setup the output file name
        output_dir = os.path.join(
            self.scratch_dir,
            'bowtie2_alignment_output_' + str(int(time.time() * 10000)))
        output_sam_file = os.path.join(output_dir, 'reads_alignment.sam')
        os.makedirs(output_dir)
        options.extend(['-S', output_sam_file])
        run_output_info['output_sam_file'] = output_sam_file
        run_output_info['output_dir'] = output_dir

        # parse all the other parameters
        if 'quality_score' in validated_params:
            options.append('--' + str(validated_params['quality_score']))

        if 'alignment_type' in validated_params:
            options.append('--' + str(validated_params['alignment_type']))

        if 'preset_options' in validated_params:
            if 'alignment_type' in validated_params and validated_params[
                    'alignment_type'] == 'local':
                options.append('--' + str(validated_params['preset_options'] +
                                          '-local'))
            else:
                options.append('--' + str(validated_params['preset_options']))

        if 'trim5' in validated_params:
            options.extend(['--trim5', str(validated_params['trim5'])])
        if 'trim3' in validated_params:
            options.extend(['--trim3', str(validated_params['trim3'])])
        if 'np' in validated_params:
            options.extend(['--np', str(validated_params['np'])])

        if 'minins' in validated_params:
            options.extend(['--minins', str(validated_params['minins'])])
        if 'maxins' in validated_params:
            options.extend(['--maxins', str(validated_params['maxins'])])

        # unfortunately, bowtie2 expects the index files to be in the current directory, and
        # you cannot configure it otherwise.  So run bowtie out of the index directory, but
        # place the output SAM file somewhere else
        self.bowtie2.run('bowtie2', options, cwd=bt2_index_dir)

        return run_output_info

    def save_read_alignment_output(self, run_output_info, input_configuration,
                                   validated_params):
        rau = ReadsAlignmentUtils(self.callback_url)
        destination_ref = validated_params[
            'output_workspace'] + '/' + validated_params[
                'output_alignment_name']
        condition = 'unknown'
        if 'condition_label' in validated_params:
            condition = validated_params['condition_label']
        upload_params = {
            'file_path': run_output_info['output_sam_file'],
            'destination_ref': destination_ref,
            'read_library_ref': input_configuration['reads_lib_ref'],
            'assembly_or_genome_ref':
            validated_params['assembly_or_genome_ref'],
            'condition': condition
        }
        upload_results = rau.upload_alignment(upload_params)
        return upload_results

    def clean(self, run_output_info):
        ''' Not really necessary on a single run, but if we are running multiple local subjobs, we
        should clean up files that have already been saved back up to kbase '''
        pass

    def create_report_for_single_run(self, run_output_info,
                                     input_configuration, validated_params):
        # first run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': run_output_info['upload_results']['obj_ref']})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']

        # create report
        report_text = 'Ran on a single reads library.\n\n'
        alignment_info = self.get_obj_info(
            run_output_info['upload_results']['obj_ref'])
        report_text = 'Created ReadsAlignment: ' + str(
            alignment_info[1]) + '\n'
        report_text = '                        ' + run_output_info[
            'upload_results']['obj_ref'] + '\n'
        kbr = KBaseReport(self.callback_url)
        report_info = kbr.create_extended_report({
            'message':
            report_text,
            'objects_created': [{
                'ref':
                run_output_info['upload_results']['obj_ref'],
                'description':
                'ReadsAlignment'
            }],
            'report_object_name':
            'kb_Bowtie2_' + str(uuid.uuid4()),
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'workspace_name':
            validated_params['output_workspace']
        })
        return {
            'report_name': report_info['name'],
            'report_ref': report_info['ref']
        }

    def process_batch_result(self, batch_result, validated_params, reads,
                             input_set_info):

        n_jobs = len(batch_result['results'])
        n_success = 0
        n_error = 0
        ran_locally = 0
        ran_njsw = 0

        # reads alignment set items
        items = []
        objects_created = []

        for k in range(0, len(batch_result['results'])):
            job = batch_result['results'][k]
            result_package = job['result_package']
            if job['is_error']:
                n_error += 1
            else:
                n_success += 1
                output_info = result_package['result'][0]['output_info']
                ra_ref = output_info['upload_results']['obj_ref']
                # Note: could add a label to the alignment here?
                items.append({'ref': ra_ref, 'label': reads[k]['condition']})
                objects_created.append({'ref': ra_ref})

            if result_package['run_context']['location'] == 'local':
                ran_locally += 1
            if result_package['run_context']['location'] == 'njsw':
                ran_njsw += 1

        # Save the alignment set
        alignment_set_data = {'description': '', 'items': items}
        alignment_set_save_params = {
            'data':
            alignment_set_data,
            'workspace':
            validated_params['output_workspace'],
            'output_object_name':
            str(input_set_info[1]) + validated_params['output_obj_name_suffix']
        }

        set_api = SetAPI(self.srv_wiz_url)
        save_result = set_api.save_reads_alignment_set_v1(
            alignment_set_save_params)
        print('Saved ReadsAlignment=')
        pprint(save_result)
        objects_created.append({
            'ref':
            save_result['set_ref'],
            'description':
            'Set of all reads alignments generated'
        })
        set_name = save_result['set_info'][1]

        # run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': save_result['set_ref']})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']

        # create the report
        report_text = 'Ran on SampleSet or ReadsSet.\n\n'
        report_text = 'Created ReadsAlignmentSet: ' + str(set_name) + '\n\n'
        report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n'
        report_text += '        Successful runs = ' + str(n_success) + '\n'
        report_text += '            Failed runs = ' + str(n_error) + '\n'
        report_text += '       Ran on main node = ' + str(ran_locally) + '\n'
        report_text += '   Ran on remote worker = ' + str(ran_njsw) + '\n\n'

        print('Report text=')
        print(report_text)

        kbr = KBaseReport(self.callback_url)
        report_info = kbr.create_extended_report({
            'message':
            report_text,
            'objects_created':
            objects_created,
            'report_object_name':
            'kb_Bowtie2_' + str(uuid.uuid4()),
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'workspace_name':
            validated_params['output_workspace']
        })

        result = {
            'report_info': {
                'report_name': report_info['name'],
                'report_ref': report_info['ref']
            }
        }
        result['batch_output_info'] = batch_result

        return result

    def validate_params(self, params):
        validated_params = {}

        required_string_fields = [
            'input_ref', 'assembly_or_genome_ref', 'output_obj_name_suffix',
            'output_workspace'
        ]
        for field in required_string_fields:
            if field in params and params[field]:
                validated_params[field] = params[field]
            else:
                raise ValueError('"' + field +
                                 '" field required to run bowtie2 aligner app')

        optional_fields = [
            'quality_score', 'alignment_type', 'preset_options', 'trim5',
            'trim3', 'condition_label', 'np', 'minins', 'maxins',
            'output_alignment_suffix', 'output_alignment_name'
        ]
        for field in optional_fields:
            if field in params:
                if params[field] is not None:
                    validated_params[field] = params[field]

        validated_params['create_report'] = True
        if 'create_report' in params and params['create_report'] is not None:
            if int(params['create_report']) == 1:
                validated_params['create_report'] = True
            elif int(params['create_report']) == 0:
                validated_params['create_report'] = False
            else:
                raise ValueError(
                    '"create_report" field, if present, should be set to a boolean value: 0 or 1'
                )

        validated_params['concurrent_local_tasks'] = None
        validated_params['concurrent_njsw_tasks'] = None

        if 'concurrent_local_tasks' in params and params[
                'concurrent_local_tasks'] is not None:
            validated_params['concurrent_local_tasks'] = int(
                params['concurrent_local_tasks'])
        if 'concurrent_njsw_tasks' in params and params[
                'concurrent_njsw_tasks'] is not None:
            validated_params['concurrent_njsw_tasks'] = int(
                params['concurrent_njsw_tasks'])

        return validated_params

    def fetch_reads_refs_from_sampleset(self, ref, info, validated_params):
        """
        Note: adapted from kbaseapps/kb_hisat2 - file_util.py

        From the given object ref, return a list of all reads objects that are a part of that
        object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary
        refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows:
        {
            "ref": reads object reference,
            "condition": condition string associated with that reads object
        }
        The only one required is "ref", all other keys may or may not be present, based on the reads
        object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info
        for each reads object, but a single PairedEndLibrary may not have that info.
        If ref is already a Reads library, just returns a list with ref as a single element.
        """
        obj_type = self.get_type_from_obj_info(info)
        refs = list()
        refs_for_ws_info = list()
        if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type:
            print("Looking up reads references in ReadsSet object")
            set_api = SetAPI(self.srv_wiz_url)
            reads_set = set_api.get_reads_set_v1({
                'ref':
                ref,
                'include_item_info':
                0,
                'include_set_item_ref_paths':
                1
            })

            for reads in reads_set["data"]["items"]:
                refs.append({
                    'ref': reads['ref_path'],
                    'condition': reads['label']
                })
                refs_for_ws_info.append({'ref': reads['ref_path']})
        else:
            raise ValueError("Unable to fetch reads reference from object {} "
                             "which is a {}".format(ref, obj_type))

        # get object info so we can name things properly
        infos = self.ws.get_object_info3({'objects':
                                          refs_for_ws_info})['infos']

        name_ext = '_alignment'
        if 'output_alignment_suffix' in validated_params \
                and validated_params['output_alignment_suffix'] is not None:
            ext = validated_params['output_alignment_suffix'].replace(' ', '')
            if ext:
                name_ext = ext

        unique_name_lookup = {}
        for k in range(0, len(refs)):
            refs[k]['info'] = infos[k]
            name = infos[k][1]
            if name not in unique_name_lookup:
                unique_name_lookup[name] = 1
            else:
                unique_name_lookup[name] += 1
                name = name + '_' + str(unique_name_lookup[name])
            name = name + name_ext
            refs[k]['alignment_output_name'] = name

        return refs

    def determine_input_info(self, validated_params):
        ''' get info on the input_ref object and determine if we run once or run on a set '''
        info = self.get_obj_info(validated_params['input_ref'])
        obj_type = self.get_type_from_obj_info(info)
        if obj_type in [
                'KBaseAssembly.PairedEndLibrary',
                'KBaseAssembly.SingleEndLibrary', 'KBaseFile.PairedEndLibrary',
                'KBaseFile.SingleEndLibrary'
        ]:
            return {
                'run_mode': 'single_library',
                'info': info,
                'ref': validated_params['input_ref']
            }
        if obj_type == 'KBaseRNASeq.RNASeqSampleSet':
            return {
                'run_mode': 'sample_set',
                'info': info,
                'ref': validated_params['input_ref']
            }
        if obj_type == 'KBaseSets.ReadsSet':
            return {
                'run_mode': 'sample_set',
                'info': info,
                'ref': validated_params['input_ref']
            }

        raise ValueError('Object type of input_ref is not valid, was: ' +
                         str(obj_type))

    def get_type_from_obj_info(self, info):
        return info[2].split('-')[0]

    def get_obj_info(self, ref):
        return self.ws.get_object_info3({'objects': [{
            'ref': ref
        }]})['infos'][0]

Example #11

Show file

File: STAR_Aligner.py Project: sean-mccorkle/kb_STAR

class STAR_Aligner(object):
    def __init__(self, config, provenance):
        self.config = config
        self.workspace_url = config['workspace-url']
        self.callback_url = os.environ['SDK_CALLBACK_URL']
        self.scratch = config['scratch']
        self.srv_wiz_url = config['srv-wiz-url']
        self.parallel_runner = KBParallel(self.callback_url)
        self.provenance = provenance
        self.star_utils = STARUtils(self.scratch, self.workspace_url,
                                    self.callback_url, self.srv_wiz_url,
                                    provenance)
        self.set_api_client = SetAPI(self.srv_wiz_url, service_ver='dev')
        self.qualimap = kb_QualiMap(self.callback_url, service_ver='dev')
        self.star_idx_dir = None
        self.star_out_dir = None

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_STAR', provenance[0]['subactions'])
        print('Running kb_STAR version = ' + self.my_version)

    def run_align(self, params):
        # 0. create the star folders
        if self.star_idx_dir is None:
            (idx_dir, out_dir) = self.star_utils.create_star_dirs(self.scratch)
            self.star_idx_dir = idx_dir
            self.star_out_dir = out_dir

        # 1. validate & process the input parameters
        validated_params = self.star_utils.process_params(params)
        input_obj_info = self.star_utils.determine_input_info(validated_params)

        # 2. convert the input parameters (from refs to file paths, especially)
        input_params = self.star_utils.convert_params(validated_params)

        returnVal = {"report_ref": None, "report_name": None}
        if input_obj_info['run_mode'] == 'single_library':
            returnVal = self.star_run_single(input_params)

        if input_obj_info['run_mode'] == 'sample_set':
            #returnVal = self.star_run_batch_parallel(input_params)
            returnVal = self.star_run_batch_sequential(input_params)

        return returnVal

    def star_run_single(self, input_params):
        """
        Performs a single run of STAR against a single reads reference. The rest of the info
        is taken from the params dict - see the spec for details.
        """
        log('--->\nrunning STAR_Aligner.star_run_single\n' +
            'params:\n{}'.format(json.dumps(input_params, indent=1)))

        # 0. get index
        self.get_index(input_params)

        # 1. Prepare for mapping
        rds = None
        reads_refs = input_params[STARUtils.SET_READS]
        for r in reads_refs:
            if r['ref'] == input_params[STARUtils.PARAM_IN_READS]:
                rds = r
                break

        reads_info = self.star_utils._get_reads_info(
            rds, input_params[STARUtils.PARAM_IN_READS])

        rds_name = rds['alignment_output_name'].replace(
            input_params['alignment_suffix'], '')

        alignment_objs = list()
        alignment_ref = None
        singlerun_output_info = {}
        report_info = {'name': None, 'ref': None}
        ret_val = None

        rds_files = list()
        ret_fwd = reads_info["file_fwd"]
        if ret_fwd is not None:
            rds_files.append(ret_fwd)
            if reads_info.get('file_rev', None) is not None:
                rds_files.append(reads_info['file_rev'])

        input_params[STARUtils.PARAM_IN_OUTFILE_PREFIX] = rds_name + '_'
        # 2. After all is set, do the alignment and upload the output.
        star_mp_ret = self.run_star_mapping(input_params, rds_files, rds_name)

        if star_mp_ret.get('star_output', None) is not None:
            bam_sort = ''
            if input_params.get('outSAMtype', None) == 'BAM':
                bam_sort = 'sortedByCoord'
            output_bam_file = '{}_Aligned.{}.out.bam'.format(
                rds_name, bam_sort)
            output_bam_file = os.path.join(star_mp_ret['star_output'],
                                           output_bam_file)

            # Upload the alignment
            upload_results = self.star_utils.upload_STARalignment(
                input_params, rds, reads_info, output_bam_file)
            alignment_ref = upload_results['obj_ref']
            alignment_obj = {
                'ref': alignment_ref,
                'name': rds['alignment_output_name']
            }
            alignment_objs.append({
                'reads_ref': rds['ref'],
                'AlignmentObj': alignment_obj
            })

            singlerun_output_info['index_dir'] = self.star_idx_dir
            singlerun_output_info['output_dir'] = star_mp_ret['star_output']
            singlerun_output_info['output_bam_file'] = output_bam_file
            singlerun_output_info['upload_results'] = upload_results

            if input_params.get("create_report", 0) == 1:
                report_info = self.star_utils.generate_report_for_single_run(
                    singlerun_output_info, input_params)

            ret_val = {
                'alignmentset_ref': None,
                'output_directory': singlerun_output_info['output_dir'],
                'output_info': singlerun_output_info,
                'alignment_objs': alignment_objs,
                'report_name': report_info['name'],
                'report_ref': report_info['ref']
            }
        else:
            ret_val = {
                'alignmentset_ref': None,
                'output_directory': None,
                'output_info': None,
                'alignment_objs': None,
                'report_name': None,
                'report_ref': None
            }

        if ret_fwd is not None:
            os.remove(ret_fwd)
            if reads_info.get('file_rev', None) is not None:
                os.remove(reads_info["file_rev"])

        return ret_val

    def star_run_batch_sequential(self, input_params):
        """
        star_run_batch_sequential: running the STAR align by looping
        """
        log('--->\nrunning STAR_Aligner.star_run_batch_sequential\n' +
            'params:\n{}'.format(json.dumps(input_params, indent=1)))

        self.get_index(input_params)

        reads_refs = input_params[STARUtils.SET_READS]

        single_input_params = copy.deepcopy(input_params)

        # 1. Run the mapping one by one
        alignment_items = []
        alignment_objs = []
        rds_names = []
        for r in reads_refs:
            single_input_params[STARUtils.PARAM_IN_READS] = r['ref']
            single_input_params['create_report'] = 0
            single_ret = self.star_run_single(single_input_params)

            item = single_ret['alignment_objs'][0]
            a_obj = item['AlignmentObj']
            r_ref = item['reads_ref']
            alignment_objs.append(item)
            alignment_items.append({
                'ref':
                a_obj['ref'],
                'label':
                r.get('condition',
                      single_input_params.get('condition', 'unspecified'))
            })

            rds_names.append(r['alignment_output_name'].replace(
                single_input_params['alignment_suffix'], ''))

        # 2. Process all the results after mapping is done
        (set_result, report_info) = self._batch_sequential_post_processing(
            alignment_items, rds_names, input_params)

        set_result['output_directory'] = self.star_out_dir

        result = {
            'alignmentset_ref': set_result['set_ref'],
            'output_info': set_result,
            'alignment_objs': alignment_objs,
            'report_name': report_info['name'],
            'report_ref': report_info['ref']
        }

        return result

    def _batch_sequential_post_processing(self, alignment_items, rds_names,
                                          params):
        '''
        process the mapping results of all the reads in the readsset_ref
        '''
        # 1. Save the alignment set
        set_name_map = self.star_utils.get_object_names(
            [params[STARUtils.PARAM_IN_READS]])
        set_name = set_name_map[params[STARUtils.PARAM_IN_READS]]

        output_alignmentset_name = set_name + params['alignmentset_suffix']

        save_result = self.star_utils.upload_alignment_set(
            alignment_items, output_alignmentset_name,
            params['output_workspace'])

        result_obj_ref = save_result['set_ref']

        index_dir = os.path.join(self.scratch, STARUtils.STAR_IDX_DIR)
        output_dir = os.path.join(self.scratch, STARUtils.STAR_OUT_DIR)

        # 2. Extract the ReadsPerGene counts if necessary
        self._extract_readsPerGene(params, rds_names, output_dir)

        # 3. Reporting...
        report_info = {'name': None, 'ref': None}

        #run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': result_obj_ref})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']
        qc_result = [{
            'shock_id': qc_result_zip_info['shock_id'],
            'name': qc_result_zip_info['index_html_file_name'],
            'label': qc_result_zip_info['name']
        }]

        # create the report
        report_text = 'Ran on SampleSet or ReadsSet.\n\n'
        report_text += 'Created ReadsAlignmentSet: ' + str(
            output_alignmentset_name) + '\n\n'

        report_info = self.star_utils._generate_star_report(
            result_obj_ref, report_text, qc_result, params['output_workspace'],
            index_dir, output_dir)

        return (save_result, report_info)

    def star_run_batch_parallel(self, input_params):
        """
        star_run_batch_parallel: running the STAR align in batch parallelly
        """
        log('--->\nrunning STAR_Aligner.star_run_batch_parallel\n' +
            'params:\n{}'.format(json.dumps(input_params, indent=1)))

        reads_refs = input_params[STARUtils.SET_READS]

        # build task list and send it to KBParallel
        tasks = []
        for r in reads_refs:
            tasks.append(
                self.build_single_execution_task(r['ref'], input_params))

        batch_run_params = {
            'tasks': tasks,
            'runner': 'parallel',
            'max_retries': 2
        }

        if input_params.get('concurrent_local_tasks', None) is not None:
            batch_run_params['concurrent_local_tasks'] = input_params[
                'concurrent_local_tasks']
        if input_params.get('concurrent_njsw_tasks', None) is not None:
            batch_run_params['concurrent_njsw_tasks'] = input_params[
                'concurrent_njsw_tasks']

        results = self.parallel_runner.run_batch(batch_run_params)
        print('Batch run results=')
        pprint(results)

        batch_result = self.process_batch_result(results, input_params,
                                                 reads_refs)
        batch_result['output_directory'] = self.star_out_dir

        return batch_result

    def process_batch_result(self, batch_result, params, reads_refs):
        n_jobs = len(batch_result['results'])
        n_success = 0
        n_error = 0
        ran_locally = 0
        ran_njsw = 0

        set_name_map = self.star_utils.get_object_names(
            [params[STARUtils.PARAM_IN_READS]])
        set_name = set_name_map[params[STARUtils.PARAM_IN_READS]]

        # reads alignment set items
        alignment_items = []
        alignment_objs = []
        rds_names = []

        for k in range(0, len(batch_result['results'])):
            reads_ref = reads_refs[k]
            rds_names.append(reads_ref['alignment_output_name'].replace(
                params['alignment_suffix'], ''))

            job = batch_result['results'][k]
            result_package = job['result_package']
            if job['is_error']:
                n_error += 1
            else:
                n_success += 1
                output_info = result_package['result'][0]['output_info']
                ra_ref = output_info['upload_results']['obj_ref']
                alignment_items.append({
                    'ref':
                    ra_ref,
                    'label':
                    reads_ref.get('condition',
                                  params.get('condition', 'unspecified'))
                })
                alignment_objs.append({'ref': ra_ref})

            if result_package['run_context']['location'] == 'local':
                ran_locally += 1
            if result_package['run_context']['location'] == 'njsw':
                ran_njsw += 1

        # Save the alignment set
        output_alignmentset_name = set_name + params['alignmentset_suffix']
        save_result = self.star_utils.upload_alignment_set(
            alignment_items, output_alignmentset_name,
            params['output_workspace'])

        result_obj_ref = save_result['set_ref']

        index_dir = os.path.join(self.scratch, STARUtils.STAR_IDX_DIR)
        output_dir = os.path.join(self.scratch, STARUtils.STAR_OUT_DIR)

        # Extract the ReadsPerGene counts if necessary
        self._extract_readsPerGene(params, rds_names, output_dir)

        # Reporting...
        report_info = {'name': None, 'ref': None}

        #run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': result_obj_ref})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']
        qc_result = [{
            'shock_id': qc_result_zip_info['shock_id'],
            'name': qc_result_zip_info['index_html_file_name'],
            'label': qc_result_zip_info['name']
        }]

        # create the report
        report_text = 'Ran on SampleSet or ReadsSet.\n\n'
        report_text += 'Created ReadsAlignmentSet: ' + str(
            output_alignmentset_name) + '\n\n'
        report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n'
        report_text += '        Successful runs = ' + str(n_success) + '\n'
        report_text += '            Failed runs = ' + str(n_error) + '\n'
        report_text += '       Ran on main node = ' + str(ran_locally) + '\n'
        report_text += '   Ran on remote worker = ' + str(ran_njsw) + '\n\n'

        report_info = self.star_utils._generate_star_report(
            result_obj_ref, report_text, qc_result, params['output_workspace'],
            index_dir, output_dir)

        result = {
            'alignmentset_ref': result_obj_ref,
            'output_info': batch_result,
            'alignment_objs': alignment_objs,
            'report_name': report_info['name'],
            'report_ref': report_info['ref']
        }

        return result

    def _extract_readsPerGene(self, params, rds_names, output_dir):
        # Extract the ReadsPerGene counts if 'quantMode' was set during the STAR run
        gene_count_files = []
        if (params.get('quantMode', None) is not None
                and (params['quantMode'] == 'Both'
                     or 'GeneCounts' in params['quantMode'])):
            for reads_name in rds_names:
                gene_count_files.append('{}/{}_ReadsPerGene.out.tab'.format(
                    reads_name, reads_name))

            extract_geneCount_matrix(gene_count_files, output_dir)

    def build_single_execution_task(self, rds_ref, params):
        task_params = copy.deepcopy(params)

        task_params[STARUtils.PARAM_IN_READS] = rds_ref
        task_params['create_report'] = 0

        if 'condition' in rds_ref:
            task_param['condition'] = rds_ref['condition']
        else:
            task_params['condition'] = 'unspecified'

        return {
            'module_name': 'STAR',
            'function_name': 'run_star',
            'version': self.my_version,
            #'version': 'dev',
            'parameters': task_params
        }

    def get_version_from_subactions(self, module_name, subactions):
        # go through each sub action looking for
        if not subactions:
            return 'dev'  #'release'  # default to release if we can't find anything
        for sa in subactions:
            if 'name' in sa:
                if sa['name'] == module_name:
                    # local-docker-image implies that we are running in kb-test, so return 'dev'
                    if sa['commit'] == 'local-docker-image':
                        return 'dev'
                    # to check that it is a valid hash, make sure it is the right
                    # length and made up of valid hash characters
                    if re.match('[a-fA-F0-9]{40}$', sa['commit']):
                        return sa['commit']
        # again, default to setting this to release
        return 'dev'  #'release'

    def run_star_indexing(self, input_params):
        """
        Runs STAR in genomeGenerate mode to build the index files and directory for STAR mapping.
        It creates a directory as defined by self.star_idx_dir in the scratch area that houses
        the index files.
        """
        ret_params = copy.deepcopy(input_params)
        ret_params[STARUtils.PARAM_IN_STARMODE] = 'genomeGenerate'

        # build the indexing parameters
        params_idx = self.star_utils._get_indexing_params(
            ret_params, self.star_idx_dir)

        ret = 1
        try:
            if ret_params[STARUtils.PARAM_IN_STARMODE] == 'genomeGenerate':
                ret = self.star_utils._exec_indexing(params_idx)
            else:
                ret = 0
            while (ret != 0):
                time.sleep(1)
        except ValueError as eidx:
            log('STAR genome indexing raised error:\n')
            pprint(eidx)
        else:
            ret = 0

        return (ret, params_idx[STARUtils.STAR_IDX_DIR])

    def run_star_mapping(self, params, rds_files, rds_name):
        """
        Runs STAR in alignReads mode for STAR mapping.
        It creates a directory as defined by self.star_out_dir with a subfolder named after the reads
        """
        params_mp = self.star_utils._get_mapping_params(
            params, rds_files, rds_name, self.star_idx_dir, self.star_out_dir)

        retVal = {}
        params_mp[STARUtils.PARAM_IN_STARMODE] = 'alignReads'
        try:
            ret = self.star_utils._exec_mapping(params_mp)
            while (ret != 0):
                time.sleep(1)
        except ValueError as emp:
            log('STAR mapping raised error:\n')
            pprint(emp)
            retVal = {'star_idx': self.star_idx_dir, 'star_output': None}
        else:  #no exception raised by STAR mapping and STAR returns 0, then move to saving and reporting
            retVal = {
                'star_idx': self.star_idx_dir,
                'star_output': params_mp.get('align_output')
            }

        return retVal

    def get_index(self, input_params):
        '''
        get_index: generate the index if not yet existing
        '''
        gnm_ref = input_params[STARUtils.PARAM_IN_GENOME]
        if input_params.get('sjdbGTFfile', None) is None:
            input_params['sjdbGTFfile'] = self.star_utils._get_genome_gtf_file(
                gnm_ref, self.star_idx_dir)

        if not os.path.isfile(
                os.path.join(self.star_idx_dir, 'genomeParameters.txt')):
            # fetch genome fasta and GTF from refs to file location(s)
            input_params[
                STARUtils.
                PARAM_IN_FASTA_FILES] = self.star_utils._get_genome_fasta(
                    gnm_ref)

            # generate the indices
            (idx_ret, idx_dir) = self.run_star_indexing(input_params)
            if idx_ret != 0:
                raise ValueError(
                    "Failed to generate genome indices, aborting...")