def _handle_post_job_actions( self, step, job, replacement_dict ):
     # Create new PJA associations with the created job, to be run on completion.
     # PJA Parameter Replacement (only applies to immediate actions-- rename specifically, for now)
     # Pass along replacement dict with the execution of the PJA so we don't have to modify the object.
     for pja in step.post_job_actions:
         if pja.action_type in ActionBox.immediate_actions:
             ActionBox.execute( self.trans.app, self.trans.sa_session, pja, job, replacement_dict )
         else:
             job.add_post_job_action( pja )
Beispiel #2
0
 def _handle_post_job_actions(self, step, job, replacement_dict):
     # Create new PJA associations with the created job, to be run on completion.
     # PJA Parameter Replacement (only applies to immediate actions-- rename specifically, for now)
     # Pass along replacement dict with the execution of the PJA so we don't have to modify the object.
     for pja in step.post_job_actions:
         if pja.action_type in ActionBox.immediate_actions:
             ActionBox.execute(self.trans.app, self.trans.sa_session, pja,
                               job, replacement_dict)
         else:
             job.add_post_job_action(pja)
Beispiel #3
0
    def _remap_job_on_rerun(self, trans, galaxy_session, rerun_remap_job_id, current_job, out_data):
        """
        Re-connect dependent datasets for a job that is being rerun (because it failed initially).

        If a job fails, the user has the option to try the job again with changed parameters.
        To be able to resume jobs that depend on this jobs output datasets we change the dependent's job
        input datasets to be those of the job that is being rerun.
        """
        try:
            old_job = trans.sa_session.query(trans.app.model.Job).get(rerun_remap_job_id)
            assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, current_job.id)
            assert old_job.tool_id == current_job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, current_job.id, old_job.tool_id, current_job.tool_id)
            if trans.user is not None:
                assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, current_job.id, old_job.user_id, trans.user.id)
            elif trans.user is None and type(galaxy_session) == trans.model.GalaxySession:
                assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, current_job.id, old_job.session_id, galaxy_session.id)
            else:
                raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, current_job.id))
            # Duplicate PJAs before remap.
            for pjaa in old_job.post_job_actions:
                current_job.add_post_job_action(pjaa.post_job_action)
            if old_job.workflow_invocation_step:
                replacement_dict = {}
                for parameter in old_job.workflow_invocation_step.workflow_invocation.input_parameters:
                    if parameter.type == WorkflowRequestInputParameter.types.REPLACEMENT_PARAMETERS:
                        replacement_dict[parameter.name] = parameter.value
                for pja in old_job.workflow_invocation_step.workflow_step.post_job_actions:
                    # execute immediate actions here, with workflow context.
                    if pja.action_type in ActionBox.immediate_actions:
                        ActionBox.execute(trans.app, trans.sa_session, pja, current_job, replacement_dict)
            for p in old_job.parameters:
                if p.name.endswith('|__identifier__'):
                    current_job.parameters.append(p.copy())
            remapped_hdas = self.__remap_data_inputs(old_job=old_job, current_job=current_job)
            for jtod in old_job.output_datasets:
                for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
                    if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (
                            trans.user is None and job_to_remap.session_id == galaxy_session.id):
                        self.__remap_parameters(job_to_remap, jtid, jtod, out_data)
                        trans.sa_session.add(job_to_remap)
                        trans.sa_session.add(jtid)
                jtod.dataset.visible = False
                trans.sa_session.add(jtod)
            for jtodc in old_job.output_dataset_collection_instances:
                hdca = jtodc.dataset_collection_instance
                hdca.collection.replace_failed_elements(remapped_hdas)
                if hdca.implicit_collection_jobs:
                    for job in hdca.implicit_collection_jobs.jobs:
                        if job.job_id == old_job.id:
                            job.job_id = current_job.id
        except Exception:
            log.exception('Cannot remap rerun dependencies.')
Beispiel #4
0
    def _handle_post_job_actions( self, step, job, replacement_dict ):
        # Create new PJA associations with the created job, to be run on completion.
        # PJA Parameter Replacement (only applies to immediate actions-- rename specifically, for now)
        # Pass along replacement dict with the execution of the PJA so we don't have to modify the object.

        # Combine workflow and runtime post job actions into the effective post
        # job actions for this execution.
        effective_post_job_actions = step.post_job_actions[:]
        for key, value in self.runtime_post_job_actions.iteritems():
            effective_post_job_actions.append( self.__to_pja( key, value, None ) )
        for pja in effective_post_job_actions:
            if pja.action_type in ActionBox.immediate_actions:
                ActionBox.execute( self.trans.app, self.trans.sa_session, pja, job, replacement_dict )
            else:
                job.add_post_job_action( pja )
Beispiel #5
0
    def _execute_workflow(self, sample):
        for key, value in sample.workflow['mappings'].items():
            if 'hda' not in value and 'ldda' in value:
                # If HDA is already here, it's an external input, we're not copying anything.
                ldda = self.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get(value['ldda'])
                if ldda.dataset.state in ['new', 'upload', 'queued', 'running', 'empty', 'discarded']:
                    log.error("Cannot import dataset '%s' to user history since its state is '%s'.  " % (ldda.name, ldda.dataset.state))
                elif ldda.dataset.state in ['ok', 'error']:
                    hda = ldda.to_history_dataset_association(target_history=sample.history, add_to_history=True)
                    sample.workflow['mappings'][key]['hda'] = hda.id
                    self.sa_session.add(sample)
                    self.sa_session.flush()
        workflow_dict = sample.workflow
        import copy
        new_wf_dict = copy.deepcopy(workflow_dict)
        for key in workflow_dict['mappings']:
            if not isinstance(key, int):
                new_wf_dict['mappings'][int(key)] = workflow_dict['mappings'][key]
        workflow_dict = new_wf_dict
        fk_trans = FakeTrans(self.app, history=sample.history, user=sample.request.user)
        workflow = self.sa_session.query(self.app.model.Workflow).get(workflow_dict['id'])
        if not workflow:
            log.error("Workflow mapping failure.")
            return
        if len(workflow.steps) == 0:
            log.error("Workflow cannot be run because it does not have any steps")
            return
        if workflow.has_cycles:
            log.error("Workflow cannot be run because it contains cycles")
            return
        if workflow.has_errors:
            log.error("Workflow cannot be run because of validation errors in some steps")
            return
        # Build the state for each step
        errors = {}
        # Build a fake dictionary prior to execution.
        # Prepare each step
        for step in workflow.steps:
            step.upgrade_messages = {}
            # Contruct modules
            if step.type == 'tool' or step.type is None:
                # Restore the tool state for the step
                step.module = module_factory.from_workflow_step(fk_trans, step)
                # Fix any missing parameters
                step.upgrade_messages = step.module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                step.module.add_dummy_datasets(connections=step.input_connections)
                # Store state with the step
                step.state = step.module.state
                # Error dict
                if step.tool_errors:
                    errors[step.id] = step.tool_errors
            else:
                # Non-tool specific stuff?
                step.module = module_factory.from_workflow_step(fk_trans, step)
                step.state = step.module.get_runtime_state()
            # Connections by input name
            step.input_connections_by_name = dict((conn.input_name, conn) for conn in step.input_connections)
        for step in workflow.steps:
            step.upgrade_messages = {}
            # Connections by input name
            step.input_connections_by_name = \
                dict((conn.input_name, conn) for conn in step.input_connections)
            # Extract just the arguments for this step by prefix
            step_errors = None
            if step.type == 'tool' or step.type is None:
                module = module_factory.from_workflow_step(fk_trans, step)
                # Fix any missing parameters
                step.upgrade_messages = module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                module.add_dummy_datasets(connections=step.input_connections)
                # Get the tool
                tool = module.tool
                # Get the state
                step.state = state = module.state
            if step_errors:
                errors[step.id] = state.inputs["__errors__"] = step_errors
        # Run each step, connecting outputs to inputs
        workflow_invocation = self.app.model.WorkflowInvocation()
        workflow_invocation.workflow = workflow
        outputs = odict()
        for i, step in enumerate(workflow.steps):
            job = None
            if step.type == 'tool' or step.type is None:
                tool = self.app.toolbox.get_tool(step.tool_id)

                def callback(input, prefixed_name, **kwargs):
                    if isinstance(input, DataToolParameter):
                        if prefixed_name in step.input_connections_by_name:
                            conn = step.input_connections_by_name[prefixed_name]
                            return outputs[conn.output_step.id][conn.output_name]
                visit_input_values(tool.inputs, step.state.inputs, callback)
                job, out_data = tool.execute(fk_trans, step.state.inputs, history=sample.history)
                outputs[step.id] = out_data
                for pja in step.post_job_actions:
                    if pja.action_type in ActionBox.immediate_actions:
                        ActionBox.execute(self.app, self.sa_session, pja, job, replacement_dict=None)
                    else:
                        job.add_post_job_action(pja)
            else:
                job, out_data = step.module.execute(fk_trans, step.state)
                outputs[step.id] = out_data
                if step.id in workflow_dict['mappings']:
                    data = self.sa_session.query(self.app.model.HistoryDatasetAssociation).get(workflow_dict['mappings'][str(step.id)]['hda'])
                    outputs[step.id]['output'] = data
            workflow_invocation_step = self.app.model.WorkflowInvocationStep()
            workflow_invocation_step.workflow_invocation = workflow_invocation
            workflow_invocation_step.workflow_step = step
            workflow_invocation_step.job = job
        self.sa_session.add(workflow_invocation)
        self.sa_session.flush()
Beispiel #6
0
    def _execute_workflow(self, sample):
        for key, value in sample.workflow['mappings'].items():
            if 'hda' not in value and 'ldda' in value:
                # If HDA is already here, it's an external input, we're not copying anything.
                ldda = self.sa_session.query(
                    self.app.model.LibraryDatasetDatasetAssociation).get(
                        value['ldda'])
                if ldda.dataset.state in [
                        'new', 'upload', 'queued', 'running', 'empty',
                        'discarded'
                ]:
                    log.error(
                        "Cannot import dataset '%s' to user history since its state is '%s'.  "
                        % (ldda.name, ldda.dataset.state))
                elif ldda.dataset.state in ['ok', 'error']:
                    hda = ldda.to_history_dataset_association(
                        target_history=sample.history, add_to_history=True)
                    sample.workflow['mappings'][key]['hda'] = hda.id
                    self.sa_session.add(sample)
                    self.sa_session.flush()
        workflow_dict = sample.workflow
        import copy
        new_wf_dict = copy.deepcopy(workflow_dict)
        for key in workflow_dict['mappings']:
            if not isinstance(key, int):
                new_wf_dict['mappings'][int(
                    key)] = workflow_dict['mappings'][key]
        workflow_dict = new_wf_dict
        fk_trans = FakeTrans(self.app,
                             history=sample.history,
                             user=sample.request.user)
        workflow = self.sa_session.query(self.app.model.Workflow).get(
            workflow_dict['id'])
        if not workflow:
            log.error("Workflow mapping failure.")
            return
        if len(workflow.steps) == 0:
            log.error(
                "Workflow cannot be run because it does not have any steps")
            return
        if workflow.has_cycles:
            log.error("Workflow cannot be run because it contains cycles")
            return
        if workflow.has_errors:
            log.error(
                "Workflow cannot be run because of validation errors in some steps"
            )
            return
        # Build the state for each step
        errors = {}
        # Build a fake dictionary prior to execution.
        # Prepare each step
        for step in workflow.steps:
            step.upgrade_messages = {}
            # Contruct modules
            if step.type == 'tool' or step.type is None:
                # Restore the tool state for the step
                step.module = module_factory.from_workflow_step(fk_trans, step)
                # Fix any missing parameters
                step.upgrade_messages = step.module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                step.module.add_dummy_datasets(
                    connections=step.input_connections)
                # Store state with the step
                step.state = step.module.state
                # Error dict
                if step.tool_errors:
                    errors[step.id] = step.tool_errors
            else:
                # Non-tool specific stuff?
                step.module = module_factory.from_workflow_step(fk_trans, step)
                step.state = step.module.get_runtime_state()
            # Connections by input name
            step.input_connections_by_name = dict(
                (conn.input_name, conn) for conn in step.input_connections)
        for step in workflow.steps:
            step.upgrade_messages = {}
            # Connections by input name
            step.input_connections_by_name = \
                dict((conn.input_name, conn) for conn in step.input_connections)
            # Extract just the arguments for this step by prefix
            step_errors = None
            if step.type == 'tool' or step.type is None:
                module = module_factory.from_workflow_step(fk_trans, step)
                # Fix any missing parameters
                step.upgrade_messages = module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                module.add_dummy_datasets(connections=step.input_connections)
                # Get the tool
                tool = module.tool
                # Get the state
                step.state = state = module.state
            if step_errors:
                errors[step.id] = state.inputs["__errors__"] = step_errors
        # Run each step, connecting outputs to inputs
        workflow_invocation = self.app.model.WorkflowInvocation()
        workflow_invocation.workflow = workflow
        outputs = odict()
        for i, step in enumerate(workflow.steps):
            job = None
            if step.type == 'tool' or step.type is None:
                tool = self.app.toolbox.get_tool(step.tool_id)

                def callback(input, prefixed_name, **kwargs):
                    if isinstance(input, DataToolParameter):
                        if prefixed_name in step.input_connections_by_name:
                            conn = step.input_connections_by_name[
                                prefixed_name]
                            return outputs[conn.output_step.id][
                                conn.output_name]

                visit_input_values(tool.inputs, step.state.inputs, callback)
                job, out_data = tool.execute(fk_trans,
                                             step.state.inputs,
                                             history=sample.history)
                outputs[step.id] = out_data
                for pja in step.post_job_actions:
                    if pja.action_type in ActionBox.immediate_actions:
                        ActionBox.execute(self.app,
                                          self.sa_session,
                                          pja,
                                          job,
                                          replacement_dict=None)
                    else:
                        job.add_post_job_action(pja)
            else:
                job, out_data = step.module.execute(fk_trans, step.state)
                outputs[step.id] = out_data
                if step.id in workflow_dict['mappings']:
                    data = self.sa_session.query(
                        self.app.model.HistoryDatasetAssociation).get(
                            workflow_dict['mappings'][str(step.id)]['hda'])
                    outputs[step.id]['output'] = data
            workflow_invocation_step = self.app.model.WorkflowInvocationStep()
            workflow_invocation_step.workflow_invocation = workflow_invocation
            workflow_invocation_step.workflow_step = step
            workflow_invocation_step.job = job
        self.sa_session.add(workflow_invocation)
        self.sa_session.flush()
    def create(self, trans, payload, **kwd):
        """
        POST /api/workflows

        We're not creating workflows from the api.  Just execute for now.

        However, we will import them if installed_repository_file is specified
        """

        # ------------------------------------------------------------------------------- #
        ### RPARK: dictionary containing which workflows to change and edit ###
        param_map = {}
        if payload.has_key("parameters"):
            param_map = payload["parameters"]
        # ------------------------------------------------------------------------------- #

        if "workflow_id" not in payload:
            # create new
            if "installed_repository_file" in payload:
                workflow_controller = trans.webapp.controllers["workflow"]
                result = workflow_controller.import_workflow(trans=trans, cntrller="api", **payload)
                return result
            trans.response.status = 403
            return "Either workflow_id or installed_repository_file must be specified"
        if "installed_repository_file" in payload:
            trans.response.status = 403
            return "installed_repository_file may not be specified with workflow_id"
        stored_workflow = trans.sa_session.query(self.app.model.StoredWorkflow).get(
            trans.security.decode_id(payload["workflow_id"])
        )
        if stored_workflow.user != trans.user and not trans.user_is_admin():
            if (
                trans.sa_session.query(trans.app.model.StoredWorkflowUserShareAssociation)
                .filter_by(user=trans.user, stored_workflow=stored_workflow)
                .count()
                == 0
            ):
                trans.response.status = 400
                return "Workflow is not owned by or shared with current user"
        workflow = stored_workflow.latest_workflow
        if payload["history"].startswith("hist_id="):
            # Passing an existing history to use.
            history = trans.sa_session.query(self.app.model.History).get(
                trans.security.decode_id(payload["history"][8:])
            )
            if history.user != trans.user and not trans.user_is_admin():
                trans.response.status = 400
                return "Invalid History specified."
        else:
            history = self.app.model.History(name=payload["history"], user=trans.user)
            trans.sa_session.add(history)
            trans.sa_session.flush()
        ds_map = payload["ds_map"]
        add_to_history = "no_add_to_history" not in payload
        for k in ds_map:
            try:
                if ds_map[k]["src"] == "ldda":
                    ldda = trans.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get(
                        trans.security.decode_id(ds_map[k]["id"])
                    )
                    assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset(
                        trans.get_current_user_roles(), ldda.dataset
                    )
                    hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history)
                elif ds_map[k]["src"] == "ld":
                    ldda = (
                        trans.sa_session.query(self.app.model.LibraryDataset)
                        .get(trans.security.decode_id(ds_map[k]["id"]))
                        .library_dataset_dataset_association
                    )
                    assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset(
                        trans.get_current_user_roles(), ldda.dataset
                    )
                    hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history)
                elif ds_map[k]["src"] == "hda":
                    # Get dataset handle, add to dict and history if necessary
                    hda = trans.sa_session.query(self.app.model.HistoryDatasetAssociation).get(
                        trans.security.decode_id(ds_map[k]["id"])
                    )
                    assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset(
                        trans.get_current_user_roles(), hda.dataset
                    )
                else:
                    trans.response.status = 400
                    return "Unknown dataset source '%s' specified." % ds_map[k]["src"]
                if add_to_history and hda.history != history:
                    hda = hda.copy()
                    history.add_dataset(hda)
                ds_map[k]["hda"] = hda
            except AssertionError:
                trans.response.status = 400
                return "Invalid Dataset '%s' Specified" % ds_map[k]["id"]
        if not workflow:
            trans.response.status = 400
            return "Workflow not found."
        if len(workflow.steps) == 0:
            trans.response.status = 400
            return "Workflow cannot be run because it does not have any steps"
        if workflow.has_cycles:
            trans.response.status = 400
            return "Workflow cannot be run because it contains cycles"
        if workflow.has_errors:
            trans.response.status = 400
            return "Workflow cannot be run because of validation errors in some steps"
        # Build the state for each step
        rval = {}
        for step in workflow.steps:
            step_errors = None
            if step.type == "tool" or step.type is None:
                step.module = module_factory.from_workflow_step(trans, step)
                # Check for missing parameters
                step.upgrade_messages = step.module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                step.module.add_dummy_datasets(connections=step.input_connections)
                step.state = step.module.state

                ####################################################
                ####################################################
                # RPARK: IF TOOL_NAME IN PARAMETER MAP #
                if step.tool_id in param_map:
                    change_param = param_map[step.tool_id]["param"]
                    change_value = param_map[step.tool_id]["value"]
                    step.state.inputs[change_param] = change_value
                ####################################################
                ####################################################

                if step.tool_errors:
                    trans.response.status = 400
                    return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors
                if step.upgrade_messages:
                    trans.response.status = 400
                    return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages
            else:
                # This is an input step.  Make sure we have an available input.
                if step.type == "data_input" and str(step.id) not in ds_map:
                    trans.response.status = 400
                    return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id
                step.module = module_factory.from_workflow_step(trans, step)
                step.state = step.module.get_runtime_state()
            step.input_connections_by_name = dict((conn.input_name, conn) for conn in step.input_connections)
        # Run each step, connecting outputs to inputs
        workflow_invocation = self.app.model.WorkflowInvocation()
        workflow_invocation.workflow = workflow
        outputs = util.odict.odict()
        rval["history"] = trans.security.encode_id(history.id)
        rval["outputs"] = []
        for i, step in enumerate(workflow.steps):
            job = None
            if step.type == "tool" or step.type is None:
                tool = self.app.toolbox.get_tool(step.tool_id)

                def callback(input, value, prefixed_name, prefixed_label):
                    if isinstance(input, DataToolParameter):
                        if prefixed_name in step.input_connections_by_name:
                            conn = step.input_connections_by_name[prefixed_name]
                            return outputs[conn.output_step.id][conn.output_name]

                visit_input_values(tool.inputs, step.state.inputs, callback)
                job, out_data = tool.execute(trans, step.state.inputs, history=history)
                outputs[step.id] = out_data
                for pja in step.post_job_actions:
                    if pja.action_type in ActionBox.immediate_actions:
                        ActionBox.execute(self.app, trans.sa_session, pja, job, replacement_dict=None)
                    else:
                        job.add_post_job_action(pja)
                for v in out_data.itervalues():
                    rval["outputs"].append(trans.security.encode_id(v.id))
            else:
                # This is an input step.  Use the dataset inputs from ds_map.
                job, out_data = step.module.execute(trans, step.state)
                outputs[step.id] = out_data
                outputs[step.id]["output"] = ds_map[str(step.id)]["hda"]
            workflow_invocation_step = self.app.model.WorkflowInvocationStep()
            workflow_invocation_step.workflow_invocation = workflow_invocation
            workflow_invocation_step.workflow_step = step
            workflow_invocation_step.job = job
        trans.sa_session.add(workflow_invocation)
        trans.sa_session.flush()
        return rval
Beispiel #8
0
    def create(self, trans, payload, **kwd):
        """
        POST /api/workflows

        We're not creating workflows from the api.  Just execute for now.

        However, we will import them if installed_repository_file is specified
        """

        # ------------------------------------------------------------------------------- #
        ### RPARK: dictionary containing which workflows to change and edit ###
        param_map = {}
        if (payload.has_key('parameters')):
            param_map = payload['parameters']
        # ------------------------------------------------------------------------------- #

        if 'workflow_id' not in payload:
            # create new
            if 'installed_repository_file' in payload:
                workflow_controller = trans.webapp.controllers['workflow']
                result = workflow_controller.import_workflow(trans=trans,
                                                             cntrller='api',
                                                             **payload)
                return result
            trans.response.status = 403
            return "Either workflow_id or installed_repository_file must be specified"
        if 'installed_repository_file' in payload:
            trans.response.status = 403
            return "installed_repository_file may not be specified with workflow_id"
        stored_workflow = trans.sa_session.query(
            self.app.model.StoredWorkflow).get(
                trans.security.decode_id(payload['workflow_id']))
        if stored_workflow.user != trans.user and not trans.user_is_admin():
            if trans.sa_session.query(
                    trans.app.model.StoredWorkflowUserShareAssociation
            ).filter_by(user=trans.user,
                        stored_workflow=stored_workflow).count() == 0:
                trans.response.status = 400
                return ("Workflow is not owned by or shared with current user")
        workflow = stored_workflow.latest_workflow
        if payload['history'].startswith('hist_id='):
            #Passing an existing history to use.
            history = trans.sa_session.query(self.app.model.History).get(
                trans.security.decode_id(payload['history'][8:]))
            if history.user != trans.user and not trans.user_is_admin():
                trans.response.status = 400
                return "Invalid History specified."
        else:
            history = self.app.model.History(name=payload['history'],
                                             user=trans.user)
            trans.sa_session.add(history)
            trans.sa_session.flush()
        ds_map = payload['ds_map']
        add_to_history = 'no_add_to_history' not in payload
        for k in ds_map:
            try:
                if ds_map[k]['src'] == 'ldda':
                    ldda = trans.sa_session.query(
                        self.app.model.LibraryDatasetDatasetAssociation).get(
                            trans.security.decode_id(ds_map[k]['id']))
                    assert trans.user_is_admin(
                    ) or trans.app.security_agent.can_access_dataset(
                        trans.get_current_user_roles(), ldda.dataset)
                    hda = ldda.to_history_dataset_association(
                        history, add_to_history=add_to_history)
                elif ds_map[k]['src'] == 'ld':
                    ldda = trans.sa_session.query(
                        self.app.model.LibraryDataset).get(
                            trans.security.decode_id(
                                ds_map[k]
                                ['id'])).library_dataset_dataset_association
                    assert trans.user_is_admin(
                    ) or trans.app.security_agent.can_access_dataset(
                        trans.get_current_user_roles(), ldda.dataset)
                    hda = ldda.to_history_dataset_association(
                        history, add_to_history=add_to_history)
                elif ds_map[k]['src'] == 'hda':
                    # Get dataset handle, add to dict and history if necessary
                    hda = trans.sa_session.query(
                        self.app.model.HistoryDatasetAssociation).get(
                            trans.security.decode_id(ds_map[k]['id']))
                    assert trans.user_is_admin(
                    ) or trans.app.security_agent.can_access_dataset(
                        trans.get_current_user_roles(), hda.dataset)
                else:
                    trans.response.status = 400
                    return "Unknown dataset source '%s' specified." % ds_map[
                        k]['src']
                if add_to_history and hda.history != history:
                    hda = hda.copy()
                    history.add_dataset(hda)
                ds_map[k]['hda'] = hda
            except AssertionError:
                trans.response.status = 400
                return "Invalid Dataset '%s' Specified" % ds_map[k]['id']
        if not workflow:
            trans.response.status = 400
            return "Workflow not found."
        if len(workflow.steps) == 0:
            trans.response.status = 400
            return "Workflow cannot be run because it does not have any steps"
        if workflow.has_cycles:
            trans.response.status = 400
            return "Workflow cannot be run because it contains cycles"
        if workflow.has_errors:
            trans.response.status = 400
            return "Workflow cannot be run because of validation errors in some steps"
        # Build the state for each step
        rval = {}
        for step in workflow.steps:
            step_errors = None
            if step.type == 'tool' or step.type is None:
                step.module = module_factory.from_workflow_step(trans, step)
                # Check for missing parameters
                step.upgrade_messages = step.module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                step.module.add_dummy_datasets(
                    connections=step.input_connections)
                step.state = step.module.state

                ####################################################
                ####################################################
                # RPARK: IF TOOL_NAME IN PARAMETER MAP #
                if step.tool_id in param_map:
                    change_param = param_map[step.tool_id]['param']
                    change_value = param_map[step.tool_id]['value']
                    step.state.inputs[change_param] = change_value
                ####################################################
                ####################################################

                if step.tool_errors:
                    trans.response.status = 400
                    return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors
                if step.upgrade_messages:
                    trans.response.status = 400
                    return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages
            else:
                # This is an input step.  Make sure we have an available input.
                if step.type == 'data_input' and str(step.id) not in ds_map:
                    trans.response.status = 400
                    return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id
                step.module = module_factory.from_workflow_step(trans, step)
                step.state = step.module.get_runtime_state()
            step.input_connections_by_name = dict(
                (conn.input_name, conn) for conn in step.input_connections)
        # Run each step, connecting outputs to inputs
        workflow_invocation = self.app.model.WorkflowInvocation()
        workflow_invocation.workflow = workflow
        outputs = util.odict.odict()
        rval['history'] = trans.security.encode_id(history.id)
        rval['outputs'] = []
        for i, step in enumerate(workflow.steps):
            job = None
            if step.type == 'tool' or step.type is None:
                tool = self.app.toolbox.get_tool(step.tool_id)

                def callback(input, value, prefixed_name, prefixed_label):
                    if isinstance(input, DataToolParameter):
                        if prefixed_name in step.input_connections_by_name:
                            conn = step.input_connections_by_name[
                                prefixed_name]
                            return outputs[conn.output_step.id][
                                conn.output_name]

                visit_input_values(tool.inputs, step.state.inputs, callback)
                job, out_data = tool.execute(trans,
                                             step.state.inputs,
                                             history=history)
                outputs[step.id] = out_data
                for pja in step.post_job_actions:
                    if pja.action_type in ActionBox.immediate_actions:
                        ActionBox.execute(self.app,
                                          trans.sa_session,
                                          pja,
                                          job,
                                          replacement_dict=None)
                    else:
                        job.add_post_job_action(pja)
                for v in out_data.itervalues():
                    rval['outputs'].append(trans.security.encode_id(v.id))
            else:
                #This is an input step.  Use the dataset inputs from ds_map.
                job, out_data = step.module.execute(trans, step.state)
                outputs[step.id] = out_data
                outputs[step.id]['output'] = ds_map[str(step.id)]['hda']
            workflow_invocation_step = self.app.model.WorkflowInvocationStep()
            workflow_invocation_step.workflow_invocation = workflow_invocation
            workflow_invocation_step.workflow_step = step
            workflow_invocation_step.job = job
        trans.sa_session.add(workflow_invocation)
        trans.sa_session.flush()
        return rval
class JobWrapper( object ):
    """
    Wraps a 'model.Job' with convenience methods for running processes and
    state management.
    """
    def __init__( self, job, queue ):
        self.job_id = job.id
        self.session_id = job.session_id
        self.user_id = job.user_id
        self.tool = queue.app.toolbox.tools_by_id.get( job.tool_id, None )
        self.queue = queue
        self.app = queue.app
        self.sa_session = self.app.model.context
        self.extra_filenames = []
        self.command_line = None
        # Tool versioning variables
        self.version_string_cmd = None
        self.version_string = ""
        self.galaxy_lib_dir = None
        # With job outputs in the working directory, we need the working
        # directory to be set before prepare is run, or else premature deletion
        # and job recovery fail.
        # Create the working dir if necessary
        try:
            self.app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
            self.working_directory = self.app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
            log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory))
        except ObjectInvalid:
            raise Exception('Unable to create job working directory, job failure')
        self.output_paths = None
        self.output_hdas_and_paths = None
        self.tool_provided_job_metadata = None
        # Wrapper holding the info required to restore and clean up from files used for setting metadata externally
        self.external_output_metadata = metadata.JobExternalOutputMetadataWrapper( job )
        self.params = None
        if job.params:
            self.params = from_json_string( job.params )

        self.__user_system_pwent = None
        self.__galaxy_system_pwent = None

    def get_job_runner( self ):
        return self.tool.get_job_runner( self.params )

    def get_job( self ):
        return self.sa_session.query( model.Job ).get( self.job_id )

    def get_id_tag(self):
        # For compatability with drmaa, which uses job_id right now, and TaskWrapper
        return str(self.job_id)

    def get_param_dict( self ):
        """
        Restore the dictionary of parameters from the database.
        """
        job = self.get_job()
        param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] )
        param_dict = self.tool.params_from_strings( param_dict, self.app )
        return param_dict

    def get_version_string_path( self ):
        return os.path.abspath(os.path.join(self.app.config.new_file_path, "GALAXY_VERSION_STRING_%s" % self.job_id))

    def prepare( self ):
        """
        Prepare the job to run by creating the working directory and the
        config files.
        """
        self.sa_session.expunge_all() #this prevents the metadata reverting that has been seen in conjunction with the PBS job runner

        if not os.path.exists( self.working_directory ):
            os.mkdir( self.working_directory )

        # Restore parameters from the database
        job = self.get_job()
        if job.user is None and job.galaxy_session is None:
            raise Exception( 'Job %s has no user and no session.' % job.id )

        incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] )
        incoming = self.tool.params_from_strings( incoming, self.app )
        # Do any validation that could not be done at job creation
        self.tool.handle_unvalidated_param_values( incoming, self.app )
        # Restore input / output data lists
        inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
        out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] )
        inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
        out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] )

        # Set up output dataset association for export history jobs. Because job
        # uses a Dataset rather than an HDA or LDA, it's necessary to set up a
        # fake dataset association that provides the needed attributes for
        # preparing a job.
        class FakeDatasetAssociation ( object ):
            def __init__( self, dataset=None ):
                self.dataset = dataset
                self.file_name = dataset.file_name
                self.metadata = dict()
                self.children = []
        special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first()
        if not special:
            special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
        if special:
            out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset )
            
        # These can be passed on the command line if wanted as $userId $userEmail
        if job.history and job.history.user: # check for anonymous user!
            userId = '%d' % job.history.user.id
            userEmail = str(job.history.user.email)
        else:
            userId = 'Anonymous'
            userEmail = 'Anonymous'
        incoming['__user_id__'] = incoming['userId'] = userId
        incoming['__user_email__'] = incoming['userEmail'] = userEmail
        # Build params, done before hook so hook can use
        param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames(), self.working_directory )
        # Certain tools require tasks to be completed prior to job execution
        # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ).
        self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict )
        # Run the before queue ("exec_before_job") hook
        self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data,
                             out_data=out_data, tool=self.tool, param_dict=incoming)
        self.sa_session.flush()
        # Build any required config files
        config_filenames = self.tool.build_config_files( param_dict, self.working_directory )
        # FIXME: Build the param file (might return None, DEPRECATED)
        param_filename = self.tool.build_param_file( param_dict, self.working_directory )
        # Build the job's command line
        self.command_line = self.tool.build_command_line( param_dict )
        # FIXME: for now, tools get Galaxy's lib dir in their path
        if self.command_line and self.command_line.startswith( 'python' ):
            self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root
        # Shell fragment to inject dependencies
        if self.app.config.use_tool_dependencies:
            self.dependency_shell_commands = self.tool.build_dependency_shell_commands()
        else:
            self.dependency_shell_commands = None
        # We need command_line persisted to the db in order for Galaxy to re-queue the job
        # if the server was stopped and restarted before the job finished
        job.command_line = self.command_line
        self.sa_session.add( job )
        self.sa_session.flush()
        # Return list of all extra files
        extra_filenames = config_filenames
        if param_filename is not None:
            extra_filenames.append( param_filename )
        self.param_dict = param_dict
        self.extra_filenames = extra_filenames
        self.version_string_cmd = self.tool.version_string_cmd
        return extra_filenames

    def fail( self, message, exception=False ):
        """
        Indicate job failure by setting state and message on all output
        datasets.
        """
        job = self.get_job()
        self.sa_session.refresh( job )
        # if the job was deleted, don't fail it
        if not job.state == job.states.DELETED:
            # Check if the failure is due to an exception
            if exception:
                # Save the traceback immediately in case we generate another
                # below
                job.traceback = traceback.format_exc()
                # Get the exception and let the tool attempt to generate
                # a better message
                etype, evalue, tb =  sys.exc_info()
                m = self.tool.handle_job_failure_exception( evalue )
                if m:
                    message = m
            if self.app.config.outputs_to_working_directory:
                for dataset_path in self.get_output_fnames():
                    try:
                        shutil.move( dataset_path.false_path, dataset_path.real_path )
                        log.debug( "fail(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
                    except ( IOError, OSError ), e:
                        log.error( "fail(): Missing output file in working directory: %s" % e )
            for dataset_assoc in job.output_datasets + job.output_library_datasets:
                dataset = dataset_assoc.dataset
                self.sa_session.refresh( dataset )
                dataset.state = dataset.states.ERROR
                dataset.blurb = 'tool error'
                dataset.info = message
                dataset.set_size()
                dataset.dataset.set_total_size()
                if dataset.ext == 'auto':
                    dataset.extension = 'data'
                # Update (non-library) job output datasets through the object store
                if dataset not in job.output_library_datasets:
                    self.app.object_store.update_from_file(dataset.dataset, create=True)
                self.sa_session.add( dataset )
                self.sa_session.flush()
            job.state = job.states.ERROR
            job.command_line = self.command_line
            job.info = message
            self.sa_session.add( job )
            self.sa_session.flush()
        #Perform email action even on failure.
        for pja in [pjaa.post_job_action for pjaa in job.post_job_actions if pjaa.post_job_action.action_type == "EmailAction"]:
            ActionBox.execute(self.app, self.sa_session, pja, job)
        # If the job was deleted, call tool specific fail actions (used for e.g. external metadata) and clean up
        if self.tool:
            self.tool.job_failed( self, message, exception )
        if self.app.config.cleanup_job == 'always' or (self.app.config.cleanup_job == 'onsuccess' and job.state == job.states.DELETED):
            self.cleanup()
    def finish( self, stdout, stderr ):
        """
        Called to indicate that the associated command has been run. Updates
        the output datasets based on stderr and stdout from the command, and
        the contents of the output files.
        """
        # default post job setup
        self.sa_session.expunge_all()
        job = self.get_job()

        try:
            self.reclaim_ownership()
        except:
            self.fail( job.info )
            log.exception( '(%s) Failed to change ownership of %s, failing' % ( job.id, self.working_directory ) )

        # if the job was deleted, don't finish it
        if job.state == job.states.DELETED or job.state == job.states.ERROR:
            #ERROR at this point means the job was deleted by an administrator.
            return self.fail( job.info )
        if stderr:
            job.state = job.states.ERROR
        else:
            job.state = job.states.OK
        if self.version_string_cmd:
            version_filename = self.get_version_string_path()
            if os.path.exists(version_filename):
                self.version_string = open(version_filename).read()
                os.unlink(version_filename)

        if self.app.config.outputs_to_working_directory and not self.__link_file_check():
            for dataset_path in self.get_output_fnames():
                try:
                    shutil.move( dataset_path.false_path, dataset_path.real_path )
                    log.debug( "finish(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
                except ( IOError, OSError ):
                    # this can happen if Galaxy is restarted during the job's
                    # finish method - the false_path file has already moved,
                    # and when the job is recovered, it won't be found.
                    if os.path.exists( dataset_path.real_path ) and os.stat( dataset_path.real_path ).st_size > 0:
                        log.warning( "finish(): %s not found, but %s is not empty, so it will be used instead" % ( dataset_path.false_path, dataset_path.real_path ) )
                    else:
                        return self.fail( "Job %s's output dataset(s) could not be read" % job.id )
        job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) )
        job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
        def in_directory( file, directory ):
            # Make both absolute.
            directory = os.path.abspath( directory )
            file = os.path.abspath( file )

            #Return true, if the common prefix of both is equal to directory
            #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
            return os.path.commonprefix( [ file, directory ] ) == directory
        for dataset_assoc in job.output_datasets + job.output_library_datasets:
            context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
            #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
            for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: #need to update all associated output hdas, i.e. history was shared with job running
                #
                # If HDA is to be copied from the working directory, do it now so that other attributes are correctly set.
                #
                if isinstance( dataset, model.HistoryDatasetAssociation ):
                    joda = self.sa_session.query( model.JobToOutputDatasetAssociation ).filter_by( job=job, dataset=dataset ).first()
                    if joda and job_tool:
                        hda_tool_output = job_tool.outputs.get( joda.name, None )
                        if hda_tool_output and hda_tool_output.from_work_dir:
                            # Copy from working dir to HDA.
                            source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir )
                            if in_directory( source_file, self.working_directory ):
                                try:
                                    shutil.move( source_file, dataset.file_name )
                                    log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
                                except ( IOError, OSError ):
                                    log.debug( "finish(): Could not move %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
                            else:
                                # Security violation.
                                log.exception( "from_work_dir specified a location not in the working directory: %s, %s" % ( source_file, self.working_directory ) )

                dataset.blurb = 'done'
                dataset.peek  = 'no peek'
                dataset.info = ( dataset.info  or '' ) + context['stdout'] + context['stderr']
                dataset.tool_version = self.version_string
                dataset.set_size()
                # Update (non-library) job output datasets through the object store
                if dataset not in job.output_library_datasets:
                    self.app.object_store.update_from_file(dataset.dataset, create=True)
                if context['stderr']:
                    dataset.blurb = "error"
                elif dataset.has_data():
                    # If the tool was expected to set the extension, attempt to retrieve it
                    if dataset.ext == 'auto':
                        dataset.extension = context.get( 'ext', 'data' )
                        dataset.init_meta( copy_from=dataset )
                    #if a dataset was copied, it won't appear in our dictionary:
                    #either use the metadata from originating output dataset, or call set_meta on the copies
                    #it would be quicker to just copy the metadata from the originating output dataset,
                    #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta()
                    if not self.app.config.set_metadata_externally or \
                     ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) \
                       and self.app.config.retry_metadata_internally ):
                        dataset.set_meta( overwrite = False )
                    elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']:
                        dataset._state = model.Dataset.states.FAILED_METADATA
                    else:
                        #load metadata from file
                        #we need to no longer allow metadata to be edited while the job is still running,
                        #since if it is edited, the metadata changed on the running output will no longer match
                        #the metadata that was stored to disk for use via the external process,
                        #and the changes made by the user will be lost, without warning or notice
                        dataset.metadata.from_JSON_dict( self.external_output_metadata.get_output_filenames_by_dataset( dataset, self.sa_session ).filename_out )
                    try:
                        assert context.get( 'line_count', None ) is not None
                        if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
                            dataset.set_peek( line_count=context['line_count'], is_multi_byte=True )
                        else:
                            dataset.set_peek( line_count=context['line_count'] )
                    except:
                        if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
                            dataset.set_peek( is_multi_byte=True )
                        else:
                            dataset.set_peek()
                    try:
                        # set the name if provided by the tool
                        dataset.name = context['name']
                    except:
                        pass
                else:
                    dataset.blurb = "empty"
                    if dataset.ext == 'auto':
                        dataset.extension = 'txt'
                self.sa_session.add( dataset )
            if context['stderr']:
                dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR
            else:
                dataset_assoc.dataset.dataset.state = model.Dataset.states.OK
            # If any of the rest of the finish method below raises an
            # exception, the fail method will run and set the datasets to
            # ERROR.  The user will never see that the datasets are in error if
            # they were flushed as OK here, since upon doing so, the history
            # panel stops checking for updates.  So allow the
            # self.sa_session.flush() at the bottom of this method set
            # the state instead.

        for pja in job.post_job_actions:
            ActionBox.execute(self.app, self.sa_session, pja.post_job_action, job)
        # Flush all the dataset and job changes above.  Dataset state changes
        # will now be seen by the user.
        self.sa_session.flush()
        # Save stdout and stderr
        if len( stdout ) > 32768:
            log.error( "stdout for job %d is greater than 32K, only first part will be logged to database" % job.id )
        job.stdout = stdout[:32768]
        if len( stderr ) > 32768:
            log.error( "stderr for job %d is greater than 32K, only first part will be logged to database" % job.id )
        job.stderr = stderr[:32768]
        # custom post process setup
        inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
        out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] )
        inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
        out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] )
        param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows
        param_dict = self.tool.params_from_strings( param_dict, self.app )
        # Check for and move associated_files
        self.tool.collect_associated_files(out_data, self.working_directory)
        gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
        if gitd:
            self.tool.collect_associated_files({'' : gitd}, self.working_directory)
        # Create generated output children and primary datasets and add to param_dict
        collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)}
        param_dict.update({'__collected_datasets__':collected_datasets})
        # Certain tools require tasks to be completed after job execution
        # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ).
        self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict, job = job )
        # Call 'exec_after_process' hook
        self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data,
                             out_data=out_data, param_dict=param_dict,
                             tool=self.tool, stdout=stdout, stderr=stderr )
        job.command_line = self.command_line

        bytes = 0
        # Once datasets are collected, set the total dataset size (includes extra files)
        for dataset_assoc in job.output_datasets:
            dataset_assoc.dataset.dataset.set_total_size()
            bytes += dataset_assoc.dataset.dataset.get_total_size()

        if job.user:
            job.user.total_disk_usage += bytes

        # fix permissions
        for path in [ dp.real_path for dp in self.get_output_fnames() ]:
            util.umask_fix_perms( path, self.app.config.umask, 0666, self.app.config.gid )
        self.sa_session.flush()
        log.debug( 'job %d ended' % self.job_id )
        if self.app.config.cleanup_job == 'always' or ( not stderr and self.app.config.cleanup_job == 'onsuccess' ):
            self.cleanup()
Beispiel #11
0
    def create(self, trans, payload, **kwd):
        """
        POST /api/workflows

        We're not creating workflows from the api.  Just execute for now.

        However, we will import them if installed_repository_file is specified
        """

        # Pull parameters out of payload.
        workflow_id = payload['workflow_id']
        param_map = payload.get('parameters', {})
        ds_map = payload['ds_map']
        add_to_history = 'no_add_to_history' not in payload
        history_param = payload['history']

        # Get/create workflow.
        if not workflow_id:
            # create new
            if 'installed_repository_file' in payload:
                workflow_controller = trans.webapp.controllers[ 'workflow' ]
                result = workflow_controller.import_workflow( trans=trans,
                                                              cntrller='api',
                                                              **payload)
                return result
            trans.response.status = 403
            return "Either workflow_id or installed_repository_file must be specified"
        if 'installed_repository_file' in payload:
            trans.response.status = 403
            return "installed_repository_file may not be specified with workflow_id"

        # Get workflow + accessibility check.
        stored_workflow = trans.sa_session.query(self.app.model.StoredWorkflow).get(
                        trans.security.decode_id(workflow_id))
        if stored_workflow.user != trans.user and not trans.user_is_admin():
            if trans.sa_session.query(trans.app.model.StoredWorkflowUserShareAssociation).filter_by(user=trans.user, stored_workflow=stored_workflow).count() == 0:
                trans.response.status = 400
                return("Workflow is not owned by or shared with current user")
        workflow = stored_workflow.latest_workflow

        # Get target history.
        if history_param.startswith('hist_id='):
            #Passing an existing history to use.
            history = trans.sa_session.query(self.app.model.History).get(
                    trans.security.decode_id(history_param[8:]))
            if history.user != trans.user and not trans.user_is_admin():
                trans.response.status = 400
                return "Invalid History specified."
        else:
            # Send workflow outputs to new history.
            history = self.app.model.History(name=history_param, user=trans.user)
            trans.sa_session.add(history)
            trans.sa_session.flush()

        # Set workflow inputs.
        for k in ds_map:
            try:
                if ds_map[k]['src'] == 'ldda':
                    ldda = trans.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get(
                            trans.security.decode_id(ds_map[k]['id']))
                    assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset )
                    hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history)
                elif ds_map[k]['src'] == 'ld':
                    ldda = trans.sa_session.query(self.app.model.LibraryDataset).get(
                            trans.security.decode_id(ds_map[k]['id'])).library_dataset_dataset_association
                    assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset )
                    hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history)
                elif ds_map[k]['src'] == 'hda':
                    # Get dataset handle, add to dict and history if necessary
                    hda = trans.sa_session.query(self.app.model.HistoryDatasetAssociation).get(
                            trans.security.decode_id(ds_map[k]['id']))
                    assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset )
                else:
                    trans.response.status = 400
                    return "Unknown dataset source '%s' specified." % ds_map[k]['src']
                if add_to_history and  hda.history != history:
                    hda = hda.copy()
                    history.add_dataset(hda)
                ds_map[k]['hda'] = hda
            except AssertionError:
                trans.response.status = 400
                return "Invalid Dataset '%s' Specified" % ds_map[k]['id']

        # Sanity checks.
        if not workflow:
            trans.response.status = 400
            return "Workflow not found."
        if len( workflow.steps ) == 0:
            trans.response.status = 400
            return "Workflow cannot be run because it does not have any steps"
        if workflow.has_cycles:
            trans.response.status = 400
            return "Workflow cannot be run because it contains cycles"
        if workflow.has_errors:
            trans.response.status = 400
            return "Workflow cannot be run because of validation errors in some steps"

        # Build the state for each step
        rval = {}
        for step in workflow.steps:
            step_errors = None
            if step.type == 'tool' or step.type is None:
                step.module = module_factory.from_workflow_step( trans, step )
                # Check for missing parameters
                step.upgrade_messages = step.module.check_and_update_state()
                # Any connected input needs to have value DummyDataset (these
                # are not persisted so we need to do it every time)
                step.module.add_dummy_datasets( connections=step.input_connections )
                step.state = step.module.state
                _update_step_parameters(step, param_map)
                if step.tool_errors:
                    trans.response.status = 400
                    return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors
                if step.upgrade_messages:
                    trans.response.status = 400
                    return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages
            else:
                # This is an input step.  Make sure we have an available input.
                if step.type == 'data_input' and str(step.id) not in ds_map:
                    trans.response.status = 400
                    return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id
                step.module = module_factory.from_workflow_step( trans, step )
                step.state = step.module.get_runtime_state()
            step.input_connections_by_name = dict( ( conn.input_name, conn ) for conn in step.input_connections )

        # Run each step, connecting outputs to inputs
        workflow_invocation = self.app.model.WorkflowInvocation()
        workflow_invocation.workflow = workflow
        outputs = util.odict.odict()
        rval['history'] = trans.security.encode_id(history.id)
        rval['outputs'] = []
        for step in workflow.steps:
            job = None
            if step.type == 'tool' or step.type is None:
                tool = self.app.toolbox.get_tool( step.tool_id )

                def callback( input, value, prefixed_name, prefixed_label ):
                    if isinstance( input, DataToolParameter ):
                        if prefixed_name in step.input_connections_by_name:
                            conn = step.input_connections_by_name[ prefixed_name ]
                            return outputs[ conn.output_step.id ][ conn.output_name ]
                visit_input_values( tool.inputs, step.state.inputs, callback )
                job, out_data = tool.execute( trans, step.state.inputs, history=history)
                outputs[ step.id ] = out_data

                # Do post-job actions.
                replacement_params = payload.get('replacement_params', {})
                for pja in step.post_job_actions:
                    if pja.action_type in ActionBox.immediate_actions:
                        ActionBox.execute(trans.app, trans.sa_session, pja, job, replacement_dict=replacement_params)
                    else:
                        job.add_post_job_action(pja)

                for v in out_data.itervalues():
                    rval['outputs'].append(trans.security.encode_id(v.id))
            else:
                #This is an input step.  Use the dataset inputs from ds_map.
                job, out_data = step.module.execute( trans, step.state)
                outputs[step.id] = out_data
                outputs[step.id]['output'] = ds_map[str(step.id)]['hda']
            workflow_invocation_step = self.app.model.WorkflowInvocationStep()
            workflow_invocation_step.workflow_invocation = workflow_invocation
            workflow_invocation_step.workflow_step = step
            workflow_invocation_step.job = job
        trans.sa_session.add( workflow_invocation )
        trans.sa_session.flush()
        return rval