예제 #1
0
    def execute(self,
                tool,
                trans,
                incoming={},
                return_job=False,
                set_output_hid=True,
                set_output_history=True,
                history=None,
                job_params=None,
                rerun_remap_job_id=None,
                mapping_over_collection=False,
                execution_cache=None):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        self._check_access(tool, trans)
        app = trans.app
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)
        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections = self._collect_inputs(
            tool, trans, incoming, history, current_user_roles)

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        # format='input" previously would give you a random extension from
        # the input extensions, now it should just give "input" as the output
        # format.
        input_ext = 'data' if tool.profile < 16.04 else "input"
        input_dbkey = incoming.get("dbkey", "?")
        for name, data in reversed(inp_data.items()):
            if not data:
                data = NoneDataset(datatypes_registry=app.datatypes_registry)
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association(None)
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append('data %s' % data.hid)
            if tool.profile < 16.04:
                input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr(data, "element_identifier", None)
            if identifier is not None:
                incoming["%s|__identifier__" % name] = identifier

        # Collect chromInfo dataset and add as parameters to incoming
        (chrom_info, db_dataset) = app.genome_builds.get_chrom_info(
            input_dbkey,
            trans=trans,
            custom_build_hack_get_len_from_fasta_conversion=tool.id !=
            'CONVERTER_fasta_to_len')
        if db_dataset:
            inp_data.update({"chromInfo": db_dataset})
        incoming["chromInfo"] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = app.security_agent.history_get_default_permissions(
                history)

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names(input_names)

        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters(trans, tool, incoming)
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator(app)

        def handle_output(name, output, hidden=None):
            if output.parent:
                parent_to_child_pairs.append((output.parent, name))
                child_dataset_names.add(name)
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query(
                    app.model.HistoryDatasetAssociation).get(dataid)
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format(output, wrapped_params.params,
                                              inp_data,
                                              inp_dataset_collections,
                                              input_ext)
                data = app.model.HistoryDatasetAssociation(extension=ext,
                                                           create_dataset=True,
                                                           flush=False)
                if hidden is None:
                    hidden = output.hidden
                if hidden:
                    data.visible = False
                trans.sa_session.add(data)
                trans.app.security_agent.set_all_dataset_permissions(
                    data.dataset, output_permissions, new=True)

            # Must flush before setting object store id currently.
            # TODO: optimize this.
            trans.sa_session.flush()
            object_store_populator.set_object_store_id(data)

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance(metadata_source, string_types):
                    metadata_source = inp_data.get(metadata_source)

            if metadata_source is not None:
                data.init_meta(copy_from=metadata_source)
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name(output, data, tool, on_text,
                                             trans, incoming, history,
                                             wrapped_params.params, job_params)
            # Store output
            out_data[name] = data
            if output.actions:
                # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict(out_data)
                output_action_params.update(incoming)
                output.actions.apply_action(data, output_action_params)
            # Also set the default values of actions of type metadata
            self.set_metadata_defaults(output, data, tool, on_text, trans,
                                       incoming, history,
                                       wrapped_params.params, job_params)
            # Flush all datasets at once.
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = app.dataset_collections_service
                    # As far as I can tell - this is always true - but just verify
                    assert set_output_history, "Cannot create dataset collection for this kind of tool."

                    element_identifiers = []
                    input_collections = dict([
                        (k, v[0][0])
                        for k, v in inp_dataset_collections.iteritems()
                    ])
                    known_outputs = output.known_outputs(
                        input_collections, collections_manager.type_registry)
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        # Add elements to top-level collection, unless nested...
                        current_element_identifiers = element_identifiers
                        current_collection_type = output.structure.collection_type

                        for parent_id in (output_part_def.parent_ids or []):
                            # TODO: replace following line with formal abstractions for doing this.
                            current_collection_type = ":".join(
                                current_collection_type.split(":")[1:])
                            name_to_index = dict(
                                map(
                                    lambda (index, value):
                                    (value["name"], index),
                                    enumerate(current_element_identifiers)))
                            if parent_id not in name_to_index:
                                if parent_id not in current_element_identifiers:
                                    index = len(current_element_identifiers)
                                    current_element_identifiers.append(
                                        dict(
                                            name=parent_id,
                                            collection_type=
                                            current_collection_type,
                                            src="new_collection",
                                            element_identifiers=[],
                                        ))
                                else:
                                    index = name_to_index[parent_id]
                            current_element_identifiers = current_element_identifiers[
                                index]["element_identifiers"]

                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output(effective_output_name,
                                                output_part_def.output_def,
                                                hidden=True)
                        # TODO: this shouldn't exist in the top-level of the history at all
                        # but for now we are still working around that by hiding the contents
                        # there.
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add(effective_output_name)

                        if set_output_history:
                            history.add_dataset(element,
                                                set_hid=set_output_hid,
                                                quota=False)
                        trans.sa_session.add(element)
                        trans.sa_session.flush()

                        current_element_identifiers.append({
                            "__object__":
                            element,
                            "name":
                            output_part_def.element_identifier,
                        })
                        log.info(element_identifiers)

                    if output.dynamic_structure:
                        assert not element_identifiers  # known_outputs must have been empty
                        element_kwds = dict(elements=collections_manager.
                                            ELEMENTS_UNINITIALIZED)
                    else:
                        element_kwds = dict(
                            element_identifiers=element_identifiers)

                    collection_type = output.structure.collection_type
                    if collection_type is None:
                        collection_type_source = output.structure.collection_type_source
                        if collection_type_source is None:
                            # TODO: Not a new problem, but this should be determined
                            # sooner.
                            raise Exception(
                                "Could not determine collection type to create."
                            )
                        if collection_type_source not in input_collections:
                            raise Exception(
                                "Could not find collection type source with name [%s]."
                                % collection_type_source)

                        collection_type = input_collections[
                            collection_type_source].collection.collection_type

                    if mapping_over_collection:
                        dc = collections_manager.create_dataset_collection(
                            trans,
                            collection_type=collection_type,
                            **element_kwds)
                        out_collections[name] = dc
                    else:
                        hdca_name = self.get_output_name(
                            output, None, tool, on_text, trans, incoming,
                            history, wrapped_params.params, job_params)
                        hdca = collections_manager.create(
                            trans,
                            history,
                            name=hdca_name,
                            collection_type=collection_type,
                            trusted_identifiers=True,
                            **element_kwds)
                        # name here is name of the output element - not name
                        # of the hdca.
                        out_collection_instances[name] = hdca
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output(name, output)
                    log.info("Handled output named %s for tool %s %s" %
                             (name, tool.id, handle_output_timer))

        add_datasets_timer = ExecutionTimer()
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        datasets_to_persist = []
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[name]
                datasets_to_persist.append(data)
        if set_output_history:
            # Set HID and add to history.
            # This is brand new and certainly empty so don't worry about quota.
            # TOOL OPTIMIZATION NOTE - from above loop to the job create below 99%+
            # of execution time happens within in history.add_datasets.
            history.add_datasets(trans.sa_session,
                                 datasets_to_persist,
                                 set_hid=set_output_hid,
                                 quota=False,
                                 flush=False)
        else:
            for data in datasets_to_persist:
                trans.sa_session.add(data)

        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[parent_name]
            child_dataset = out_data[child_name]
            parent_dataset.children.append(child_dataset)

        log.info("Added output datasets to history %s" % add_datasets_timer)
        job_setup_timer = ExecutionTimer()
        # Create the job object
        job, galaxy_session = self._new_job_for_session(trans, tool, history)
        self._record_inputs(trans, tool, job, incoming, inp_data,
                            inp_dataset_collections, current_user_roles)
        self._record_outputs(job, out_data, out_collections,
                             out_collection_instances)

        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps(job_params)
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add(job)
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query(
                    app.model.Job).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (
                    rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (
                    old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (
                        old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type(
                        galaxy_session) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (
                        old_job.id, job.id, old_job.session_id,
                        galaxy_session.id)
                else:
                    raise Exception(
                        '(%s/%s): Remapping via the API is not (yet) supported'
                        % (old_job.id, job.id))
                # Duplicate PJAs before remap.
                for pjaa in old_job.post_job_actions:
                    job.add_post_job_action(pjaa.post_job_action)
                for jtod in old_job.output_datasets:
                    for (job_to_remap,
                         jtid) in [(jtid.job, jtid)
                                   for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id
                                == trans.user.id) or (trans.user is None and
                                                      job_to_remap.session_id
                                                      == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [
                                    dep_jtod.dataset for dep_jtod in
                                    job_to_remap.output_datasets
                            ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            input_values = dict([
                                (p.name, json.loads(p.value))
                                for p in job_to_remap.parameters
                            ])
                            update_param(jtid.name, input_values,
                                         str(out_data[jtod.name].id))
                            for p in job_to_remap.parameters:
                                p.value = json.dumps(input_values[p.name])
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info(
                                'Job %s input HDA %s remapped to new HDA %s' %
                                (job_to_remap.id, jtod.dataset.id,
                                 jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception:
                log.exception('Cannot remap rerun dependencies.')

        log.info("Setup for job %s complete, ready to flush %s" %
                 (job.log_str(), job_setup_timer))

        job_flush_timer = ExecutionTimer()
        trans.sa_session.flush()
        log.info("Flushed transaction for job %s %s" %
                 (job.log_str(), job_flush_timer))
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[name]
            redirect_url = tool.parse_redirect_url(dataset, incoming)
            # GALAXY_URL should be include in the tool params to enable the external application
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get('GALAXY_URL', None)
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.set_state(app.model.Job.states.OK)
            job.info = "Redirected to: %s" % redirect_url
            trans.sa_session.add(job)
            trans.sa_session.flush()
            trans.response.send_redirect(
                url_for(controller='tool_runner',
                        action='redirect',
                        redirect_url=redirect_url))
        else:
            # Put the job in the queue if tracking in memory
            app.job_queue.put(job.id, job.tool_id)
            trans.log_event("Added job to the job queue, id: %s" % str(job.id),
                            tool_id=job.tool_id)
            return job, out_data
예제 #2
0
파일: __init__.py 프로젝트: ashvark/galaxy
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False, execution_cache=None ):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        self._check_access( tool, trans )
        app = trans.app
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)
        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections = self._collect_inputs(tool, trans, incoming, history, current_user_roles)

        # Build name for output datasets based on tool name and input names
        on_text = self._get_on_text( inp_data )

        # format='input" previously would give you a random extension from
        # the input extensions, now it should just give "input" as the output
        # format.
        input_ext = 'data' if tool.profile < 16.04 else "input"
        input_dbkey = incoming.get( "dbkey", "?" )
        preserved_tags = []
        for name, data in reversed(inp_data.items()):
            if not data:
                data = NoneDataset( datatypes_registry=app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            if tool.profile < 16.04:
                input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr( data, "element_identifier", None )
            if identifier is not None:
                incoming[ "%s|__identifier__" % name ] = identifier

            for tag in data.tags:
                if tag.user_tname == 'name':
                    preserved_tags.append(tag)

        # Collect chromInfo dataset and add as parameters to incoming
        ( chrom_info, db_dataset ) = app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = app.security_agent.history_get_default_permissions( history )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = self._wrapped_params( trans, tool, incoming )

        out_data = odict()
        input_collections = dict( (k, v[0][0]) for k, v in inp_dataset_collections.items() )
        output_collections = OutputCollections(
            trans,
            history,
            tool=tool,
            tool_action=self,
            input_collections=input_collections,
            mapping_over_collection=mapping_over_collection,
            on_text=on_text,
            incoming=incoming,
            params=wrapped_params.params,
            job_params=job_params,
        )

        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator( app )

        def handle_output( name, output, hidden=None ):
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query( app.model.HistoryDatasetAssociation ).get( dataid )
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format(
                    output,
                    wrapped_params.params,
                    inp_data,
                    inp_dataset_collections,
                    input_ext
                )
                data = app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, flush=False )
                if hidden is None:
                    hidden = output.hidden
                if hidden:
                    data.visible = False
                trans.sa_session.add( data )
                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions, new=True )

            for tag in preserved_tags:
                data.tags.append(tag.copy())

            # Must flush before setting object store id currently.
            # TODO: optimize this.
            trans.sa_session.flush()
            object_store_populator.set_object_store_id( data )

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance( metadata_source, string_types ):
                    metadata_source = inp_data.get( metadata_source )

            if metadata_source is not None:
                data.init_meta( copy_from=metadata_source )
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Store output
            out_data[ name ] = data
            if output.actions:
                # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict( out_data )
                output_action_params.update( incoming )
                output.actions.apply_action( data, output_action_params )
            # Also set the default values of actions of type metadata
            self.set_metadata_defaults( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Flush all datasets at once.
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = app.dataset_collections_service
                    element_identifiers = []
                    known_outputs = output.known_outputs( input_collections, collections_manager.type_registry )
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        # Add elements to top-level collection, unless nested...
                        current_element_identifiers = element_identifiers
                        current_collection_type = output.structure.collection_type

                        for parent_id in (output_part_def.parent_ids or []):
                            # TODO: replace following line with formal abstractions for doing this.
                            current_collection_type = ":".join(current_collection_type.split(":")[1:])
                            name_to_index = dict((value["name"], index) for (index, value) in enumerate(current_element_identifiers))
                            if parent_id not in name_to_index:
                                if parent_id not in current_element_identifiers:
                                    index = len(current_element_identifiers)
                                    current_element_identifiers.append(dict(
                                        name=parent_id,
                                        collection_type=current_collection_type,
                                        src="new_collection",
                                        element_identifiers=[],
                                    ))
                                else:
                                    index = name_to_index[parent_id]
                            current_element_identifiers = current_element_identifiers[ index ][ "element_identifiers" ]

                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output( effective_output_name, output_part_def.output_def, hidden=True )
                        # TODO: this shouldn't exist in the top-level of the history at all
                        # but for now we are still working around that by hiding the contents
                        # there.
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add( effective_output_name )

                        history.add_dataset( element, set_hid=set_output_hid, quota=False )
                        trans.sa_session.add( element )
                        trans.sa_session.flush()

                        current_element_identifiers.append({
                            "__object__": element,
                            "name": output_part_def.element_identifier,
                        })
                        log.info(element_identifiers)

                    if output.dynamic_structure:
                        assert not element_identifiers  # known_outputs must have been empty
                        element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED)
                    else:
                        element_kwds = dict(element_identifiers=element_identifiers)

                    output_collections.create_collection(
                        output=output,
                        name=name,
                        tags=preserved_tags,
                        **element_kwds
                    )
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output( name, output )
                    log.info("Handled output named %s for tool %s %s" % (name, tool.id, handle_output_timer))

        add_datasets_timer = ExecutionTimer()
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        datasets_to_persist = []
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[ name ]
                datasets_to_persist.append( data )
        # Set HID and add to history.
        # This is brand new and certainly empty so don't worry about quota.
        # TOOL OPTIMIZATION NOTE - from above loop to the job create below 99%+
        # of execution time happens within in history.add_datasets.
        history.add_datasets( trans.sa_session, datasets_to_persist, set_hid=set_output_hid, quota=False, flush=False )

        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[ parent_name ]
            child_dataset = out_data[ child_name ]
            parent_dataset.children.append( child_dataset )

        log.info("Added output datasets to history %s" % add_datasets_timer)
        job_setup_timer = ExecutionTimer()
        # Create the job object
        job, galaxy_session = self._new_job_for_session( trans, tool, history )
        self._record_inputs( trans, tool, job, incoming, inp_data, inp_dataset_collections, current_user_roles )
        self._record_outputs( job, out_data, output_collections )
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps( job_params )
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add( job )
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query( app.model.Job ).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id)
                else:
                    raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id))
                # Duplicate PJAs before remap.
                for pjaa in old_job.post_job_actions:
                    job.add_post_job_action(pjaa.post_job_action)
                for jtod in old_job.output_datasets:
                    for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            input_values = dict( [ ( p.name, json.loads( p.value ) ) for p in job_to_remap.parameters ] )
                            update_param( jtid.name, input_values, str( out_data[ jtod.name ].id ) )
                            for p in job_to_remap.parameters:
                                p.value = json.dumps( input_values[ p.name ] )
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception:
                log.exception('Cannot remap rerun dependencies.')

        log.info("Setup for job %s complete, ready to flush %s" % (job.log_str(), job_setup_timer))

        job_flush_timer = ExecutionTimer()
        trans.sa_session.flush()
        log.info("Flushed transaction for job %s %s" % (job.log_str(), job_flush_timer))
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[ name ]
            redirect_url = tool.parse_redirect_url( dataset, incoming )
            # GALAXY_URL should be include in the tool params to enable the external application
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get( 'GALAXY_URL', None )
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.set_state( app.model.Job.states.OK )
            job.info = "Redirected to: %s" % redirect_url
            trans.sa_session.add( job )
            trans.sa_session.flush()
            trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
        else:
            # Put the job in the queue if tracking in memory
            app.job_queue.put( job.id, job.tool_id )
            trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
            return job, out_data