def __init__(self,
              dataset,
              datatypes_registry=None,
              tool=None,
              name=None,
              dataset_path=None,
              identifier=None):
     if not dataset:
         try:
             # TODO: allow this to work when working with grouping
             ext = tool.inputs[name].extensions[0]
         except:
             ext = 'data'
         self.dataset = wrap_with_safe_string(
             NoneDataset(datatypes_registry=datatypes_registry, ext=ext),
             no_wrap_classes=ToolParameterValueWrapper)
     else:
         # Tool wrappers should not normally be accessing .dataset directly,
         # so we will wrap it and keep the original around for file paths
         # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper?
         self.unsanitized = dataset
         self.dataset = wrap_with_safe_string(
             dataset, no_wrap_classes=ToolParameterValueWrapper)
         self.metadata = self.MetadataWrapper(dataset.metadata)
     self.datatypes_registry = datatypes_registry
     self.false_path = getattr(dataset_path, "false_path", None)
     self.false_extra_files_path = getattr(dataset_path,
                                           "false_extra_files_path", None)
     self._element_identifier = identifier
Example #2
0
 def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory):
     output_dataset_paths = dataset_path_rewrites( output_paths )
     for name, hda in output_datasets.items():
         # Write outputs to the working directory (for security purposes)
         # if desired.
         real_path = hda.file_name
         if real_path in output_dataset_paths:
             dataset_path = output_dataset_paths[ real_path ]
             param_dict[name] = DatasetFilenameWrapper( hda, dataset_path=dataset_path )
             try:
                 open( dataset_path.false_path, 'w' ).close()
             except EnvironmentError:
                 pass  # May well not exist - e.g. Pulsar.
         else:
             param_dict[name] = DatasetFilenameWrapper( hda )
         # Provide access to a path to store additional files
         # TODO: path munging for cluster/dataset server relocatability
         param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) ))
         for child in hda.children:
             param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
     for out_name, output in self.tool.outputs.iteritems():
         if out_name not in param_dict and output.filters:
             # Assume the reason we lack this output is because a filter
             # failed to pass; for tool writing convienence, provide a
             # NoneDataset
             ext = getattr( output, "format", None )  # populate only for output datasets (not collections)
             param_dict[ out_name ] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=ext )
Example #3
0
 def __init__(self,
              dataset,
              datatypes_registry=None,
              tool=None,
              name=None,
              dataset_path=None,
              identifier=None,
              formats=None):
     if not dataset:
         try:
             # TODO: allow this to work when working with grouping
             ext = tool.inputs[name].extensions[0]
         except Exception:
             ext = 'data'
         self.dataset = wrap_with_safe_string(
             NoneDataset(datatypes_registry=datatypes_registry, ext=ext),
             no_wrap_classes=ToolParameterValueWrapper)
     else:
         # Tool wrappers should not normally be accessing .dataset directly,
         # so we will wrap it and keep the original around for file paths
         # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper?
         if formats:
             target_ext, converted_dataset = dataset.find_conversion_destination(
                 formats)
             if target_ext and converted_dataset:
                 dataset = converted_dataset
         self.unsanitized = dataset
         self.dataset = wrap_with_safe_string(
             dataset, no_wrap_classes=ToolParameterValueWrapper)
         self.metadata = self.MetadataWrapper(dataset.metadata)
         if hasattr(dataset, 'tags'):
             self.groups = {
                 tag.user_value.lower()
                 for tag in dataset.tags if tag.user_tname == 'group'
             }
         else:
             # May be a 'FakeDatasetAssociation'
             self.groups = set()
     self.datatypes_registry = datatypes_registry
     self.false_path = getattr(dataset_path, "false_path", None)
     self.false_extra_files_path = getattr(dataset_path,
                                           "false_extra_files_path", None)
     self._element_identifier = identifier
Example #4
0
 def __init__(self,
              dataset,
              datatypes_registry=None,
              tool=None,
              name=None,
              dataset_path=None):
     if not dataset:
         try:
             # TODO: allow this to work when working with grouping
             ext = tool.inputs[name].extensions[0]
         except:
             ext = 'data'
         self.dataset = NoneDataset(datatypes_registry=datatypes_registry,
                                    ext=ext)
     else:
         self.dataset = dataset
         self.metadata = self.MetadataWrapper(dataset.metadata)
     self.false_path = getattr(dataset_path, "false_path", None)
     self.false_extra_files_path = getattr(dataset_path,
                                           "false_extra_files_path", None)
Example #5
0
    def execute(self,
                tool,
                trans,
                incoming={},
                return_job=False,
                set_output_hid=True,
                set_output_history=True,
                history=None,
                job_params=None,
                rerun_remap_job_id=None,
                mapping_over_collection=False,
                execution_cache=None):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        self._check_access(tool, trans)
        app = trans.app
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)
        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections = self._collect_inputs(
            tool, trans, incoming, history, current_user_roles)

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        # format='input" previously would give you a random extension from
        # the input extensions, now it should just give "input" as the output
        # format.
        input_ext = 'data' if tool.profile < 16.04 else "input"
        input_dbkey = incoming.get("dbkey", "?")
        for name, data in reversed(inp_data.items()):
            if not data:
                data = NoneDataset(datatypes_registry=app.datatypes_registry)
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association(None)
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append('data %s' % data.hid)
            if tool.profile < 16.04:
                input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr(data, "element_identifier", None)
            if identifier is not None:
                incoming["%s|__identifier__" % name] = identifier

        # Collect chromInfo dataset and add as parameters to incoming
        (chrom_info, db_dataset) = app.genome_builds.get_chrom_info(
            input_dbkey,
            trans=trans,
            custom_build_hack_get_len_from_fasta_conversion=tool.id !=
            'CONVERTER_fasta_to_len')
        if db_dataset:
            inp_data.update({"chromInfo": db_dataset})
        incoming["chromInfo"] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = app.security_agent.history_get_default_permissions(
                history)

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names(input_names)

        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters(trans, tool, incoming)
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator(app)

        def handle_output(name, output, hidden=None):
            if output.parent:
                parent_to_child_pairs.append((output.parent, name))
                child_dataset_names.add(name)
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query(
                    app.model.HistoryDatasetAssociation).get(dataid)
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format(output, wrapped_params.params,
                                              inp_data,
                                              inp_dataset_collections,
                                              input_ext)
                data = app.model.HistoryDatasetAssociation(extension=ext,
                                                           create_dataset=True,
                                                           flush=False)
                if hidden is None:
                    hidden = output.hidden
                if hidden:
                    data.visible = False
                trans.sa_session.add(data)
                trans.app.security_agent.set_all_dataset_permissions(
                    data.dataset, output_permissions, new=True)

            # Must flush before setting object store id currently.
            # TODO: optimize this.
            trans.sa_session.flush()
            object_store_populator.set_object_store_id(data)

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance(metadata_source, string_types):
                    metadata_source = inp_data.get(metadata_source)

            if metadata_source is not None:
                data.init_meta(copy_from=metadata_source)
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name(output, data, tool, on_text,
                                             trans, incoming, history,
                                             wrapped_params.params, job_params)
            # Store output
            out_data[name] = data
            if output.actions:
                # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict(out_data)
                output_action_params.update(incoming)
                output.actions.apply_action(data, output_action_params)
            # Also set the default values of actions of type metadata
            self.set_metadata_defaults(output, data, tool, on_text, trans,
                                       incoming, history,
                                       wrapped_params.params, job_params)
            # Flush all datasets at once.
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = app.dataset_collections_service
                    # As far as I can tell - this is always true - but just verify
                    assert set_output_history, "Cannot create dataset collection for this kind of tool."

                    element_identifiers = []
                    input_collections = dict([
                        (k, v[0][0])
                        for k, v in inp_dataset_collections.iteritems()
                    ])
                    known_outputs = output.known_outputs(
                        input_collections, collections_manager.type_registry)
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        # Add elements to top-level collection, unless nested...
                        current_element_identifiers = element_identifiers
                        current_collection_type = output.structure.collection_type

                        for parent_id in (output_part_def.parent_ids or []):
                            # TODO: replace following line with formal abstractions for doing this.
                            current_collection_type = ":".join(
                                current_collection_type.split(":")[1:])
                            name_to_index = dict(
                                map(
                                    lambda (index, value):
                                    (value["name"], index),
                                    enumerate(current_element_identifiers)))
                            if parent_id not in name_to_index:
                                if parent_id not in current_element_identifiers:
                                    index = len(current_element_identifiers)
                                    current_element_identifiers.append(
                                        dict(
                                            name=parent_id,
                                            collection_type=
                                            current_collection_type,
                                            src="new_collection",
                                            element_identifiers=[],
                                        ))
                                else:
                                    index = name_to_index[parent_id]
                            current_element_identifiers = current_element_identifiers[
                                index]["element_identifiers"]

                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output(effective_output_name,
                                                output_part_def.output_def,
                                                hidden=True)
                        # TODO: this shouldn't exist in the top-level of the history at all
                        # but for now we are still working around that by hiding the contents
                        # there.
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add(effective_output_name)

                        if set_output_history:
                            history.add_dataset(element,
                                                set_hid=set_output_hid,
                                                quota=False)
                        trans.sa_session.add(element)
                        trans.sa_session.flush()

                        current_element_identifiers.append({
                            "__object__":
                            element,
                            "name":
                            output_part_def.element_identifier,
                        })
                        log.info(element_identifiers)

                    if output.dynamic_structure:
                        assert not element_identifiers  # known_outputs must have been empty
                        element_kwds = dict(elements=collections_manager.
                                            ELEMENTS_UNINITIALIZED)
                    else:
                        element_kwds = dict(
                            element_identifiers=element_identifiers)

                    collection_type = output.structure.collection_type
                    if collection_type is None:
                        collection_type_source = output.structure.collection_type_source
                        if collection_type_source is None:
                            # TODO: Not a new problem, but this should be determined
                            # sooner.
                            raise Exception(
                                "Could not determine collection type to create."
                            )
                        if collection_type_source not in input_collections:
                            raise Exception(
                                "Could not find collection type source with name [%s]."
                                % collection_type_source)

                        collection_type = input_collections[
                            collection_type_source].collection.collection_type

                    if mapping_over_collection:
                        dc = collections_manager.create_dataset_collection(
                            trans,
                            collection_type=collection_type,
                            **element_kwds)
                        out_collections[name] = dc
                    else:
                        hdca_name = self.get_output_name(
                            output, None, tool, on_text, trans, incoming,
                            history, wrapped_params.params, job_params)
                        hdca = collections_manager.create(
                            trans,
                            history,
                            name=hdca_name,
                            collection_type=collection_type,
                            trusted_identifiers=True,
                            **element_kwds)
                        # name here is name of the output element - not name
                        # of the hdca.
                        out_collection_instances[name] = hdca
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output(name, output)
                    log.info("Handled output named %s for tool %s %s" %
                             (name, tool.id, handle_output_timer))

        add_datasets_timer = ExecutionTimer()
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        datasets_to_persist = []
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[name]
                datasets_to_persist.append(data)
        if set_output_history:
            # Set HID and add to history.
            # This is brand new and certainly empty so don't worry about quota.
            # TOOL OPTIMIZATION NOTE - from above loop to the job create below 99%+
            # of execution time happens within in history.add_datasets.
            history.add_datasets(trans.sa_session,
                                 datasets_to_persist,
                                 set_hid=set_output_hid,
                                 quota=False,
                                 flush=False)
        else:
            for data in datasets_to_persist:
                trans.sa_session.add(data)

        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[parent_name]
            child_dataset = out_data[child_name]
            parent_dataset.children.append(child_dataset)

        log.info("Added output datasets to history %s" % add_datasets_timer)
        job_setup_timer = ExecutionTimer()
        # Create the job object
        job, galaxy_session = self._new_job_for_session(trans, tool, history)
        self._record_inputs(trans, tool, job, incoming, inp_data,
                            inp_dataset_collections, current_user_roles)
        self._record_outputs(job, out_data, out_collections,
                             out_collection_instances)

        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps(job_params)
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add(job)
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query(
                    app.model.Job).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (
                    rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (
                    old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (
                        old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type(
                        galaxy_session) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (
                        old_job.id, job.id, old_job.session_id,
                        galaxy_session.id)
                else:
                    raise Exception(
                        '(%s/%s): Remapping via the API is not (yet) supported'
                        % (old_job.id, job.id))
                # Duplicate PJAs before remap.
                for pjaa in old_job.post_job_actions:
                    job.add_post_job_action(pjaa.post_job_action)
                for jtod in old_job.output_datasets:
                    for (job_to_remap,
                         jtid) in [(jtid.job, jtid)
                                   for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id
                                == trans.user.id) or (trans.user is None and
                                                      job_to_remap.session_id
                                                      == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [
                                    dep_jtod.dataset for dep_jtod in
                                    job_to_remap.output_datasets
                            ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            input_values = dict([
                                (p.name, json.loads(p.value))
                                for p in job_to_remap.parameters
                            ])
                            update_param(jtid.name, input_values,
                                         str(out_data[jtod.name].id))
                            for p in job_to_remap.parameters:
                                p.value = json.dumps(input_values[p.name])
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info(
                                'Job %s input HDA %s remapped to new HDA %s' %
                                (job_to_remap.id, jtod.dataset.id,
                                 jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception:
                log.exception('Cannot remap rerun dependencies.')

        log.info("Setup for job %s complete, ready to flush %s" %
                 (job.log_str(), job_setup_timer))

        job_flush_timer = ExecutionTimer()
        trans.sa_session.flush()
        log.info("Flushed transaction for job %s %s" %
                 (job.log_str(), job_flush_timer))
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[name]
            redirect_url = tool.parse_redirect_url(dataset, incoming)
            # GALAXY_URL should be include in the tool params to enable the external application
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get('GALAXY_URL', None)
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.set_state(app.model.Job.states.OK)
            job.info = "Redirected to: %s" % redirect_url
            trans.sa_session.add(job)
            trans.sa_session.flush()
            trans.response.send_redirect(
                url_for(controller='tool_runner',
                        action='redirect',
                        redirect_url=redirect_url))
        else:
            # Put the job in the queue if tracking in memory
            app.job_queue.put(job.id, job.tool_id)
            trans.log_event("Added job to the job queue, id: %s" % str(job.id),
                            tool_id=job.tool_id)
            return job, out_data
Example #6
0
    def execute(self,
                tool,
                trans,
                incoming={},
                return_job=False,
                set_output_hid=True,
                set_output_history=True,
                history=None,
                job_params=None,
                rerun_remap_job_id=None,
                mapping_over_collection=False):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        assert tool.allow_user_access(
            trans.user), "User (%s) is not allowed to access this tool." % (
                trans.user)
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans(trans, create=True)

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections(
            tool, incoming)
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets(tool, incoming, trans)

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get("dbkey", "?")
        inp_items = inp_data.items()
        inp_items.reverse()
        for name, data in inp_items:
            if not data:
                data = NoneDataset(
                    datatypes_registry=trans.app.datatypes_registry)
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association(None)
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append('data %s' % data.hid)
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr(data, "element_identifier", None)
            if identifier is not None:
                incoming["%s|__identifier__" % name] = identifier

        # Collect chromInfo dataset and add as parameters to incoming
        (chrom_info, db_dataset) = trans.app.genome_builds.get_chrom_info(
            input_dbkey,
            trans=trans,
            custom_build_hack_get_len_from_fasta_conversion=tool.id !=
            'CONVERTER_fasta_to_len')
        if db_dataset:
            inp_data.update({"chromInfo": db_dataset})
        incoming["chromInfo"] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions(
                history)

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names(input_names)

        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters(trans, tool, incoming)
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator(trans.app)

        def handle_output(name, output):
            if output.parent:
                parent_to_child_pairs.append((output.parent, name))
                child_dataset_names.add(name)
            ## What is the following hack for? Need to document under what
            ## conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query(
                    trans.app.model.HistoryDatasetAssociation).get(dataid)
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format(output, wrapped_params.params,
                                              inp_data, input_ext)
                data = trans.app.model.HistoryDatasetAssociation(
                    extension=ext,
                    create_dataset=True,
                    sa_session=trans.sa_session)
                if output.hidden:
                    data.visible = False
                # Commit the dataset immediately so it gets database assigned unique id
                trans.sa_session.add(data)
                trans.sa_session.flush()
                trans.app.security_agent.set_all_dataset_permissions(
                    data.dataset, output_permissions)

            object_store_populator.set_object_store_id(data)

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance(metadata_source, basestring):
                    metadata_source = inp_data[metadata_source]

            if metadata_source is not None:
                data.init_meta(copy_from=metadata_source)
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            # FIXME: shouldn't this be NEW until the job runner changes it?
            data.state = data.states.QUEUED
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name(output, data, tool, on_text,
                                             trans, incoming, history,
                                             wrapped_params.params, job_params)
            # Store output
            out_data[name] = data
            if output.actions:
                #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict(out_data)
                output_action_params.update(incoming)
                output.actions.apply_action(data, output_action_params)
            # Store all changes to database
            trans.sa_session.flush()
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = trans.app.dataset_collections_service

                    # As far as I can tell - this is always true - but just verify
                    assert set_output_history, "Cannot create dataset collection for this kind of tool."

                    elements = odict()
                    input_collections = dict([
                        (k, v[0])
                        for k, v in inp_dataset_collections.iteritems()
                    ])
                    known_outputs = output.known_outputs(
                        input_collections, collections_manager.type_registry)
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output(effective_output_name,
                                                output_part_def.output_def)
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add(effective_output_name)

                        if set_output_history:
                            history.add_dataset(element,
                                                set_hid=set_output_hid)
                        trans.sa_session.add(element)
                        trans.sa_session.flush()

                        elements[output_part_def.element_identifier] = element

                    if output.dynamic_structure:
                        assert not elements  # known_outputs must have been empty
                        elements = collections_manager.ELEMENTS_UNINITIALIZED

                    if mapping_over_collection:
                        dc = collections_manager.create_dataset_collection(
                            trans,
                            collection_type=output.structure.collection_type,
                            elements=elements,
                        )
                        out_collections[name] = dc
                    else:
                        hdca_name = self.get_output_name(
                            output, None, tool, on_text, trans, incoming,
                            history, wrapped_params.params, job_params)
                        hdca = collections_manager.create(
                            trans,
                            history,
                            name=hdca_name,
                            collection_type=output.structure.collection_type,
                            elements=elements,
                        )
                        # name here is name of the output element - not name
                        # of the hdca.
                        out_collection_instances[name] = hdca
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output(name, output)
                    log.info("Handled output %s" % handle_output_timer)
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[name]
                if set_output_history:
                    history.add_dataset(data, set_hid=set_output_hid)
                trans.sa_session.add(data)
                trans.sa_session.flush()
        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[parent_name]
            child_dataset = out_data[child_name]
            parent_dataset.children.append(child_dataset)
        # Store data after custom code runs
        trans.sa_session.flush()
        # Create the job object
        job = trans.app.model.Job()

        if hasattr(trans, "get_galaxy_session"):
            galaxy_session = trans.get_galaxy_session()
            # If we're submitting from the API, there won't be a session.
            if type(galaxy_session) == trans.model.GalaxySession:
                job.session_id = galaxy_session.id
        if trans.user is not None:
            job.user_id = trans.user.id
        job.history_id = history.id
        job.tool_id = tool.id
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.0"
        # FIXME: Don't need all of incoming here, just the defined parameters
        #        from the tool. We need to deal with tools that pass all post
        #        parameters to the command as a special case.
        for name, (dataset_collection,
                   reduced) in inp_dataset_collections.iteritems():
            # TODO: Does this work if nested in repeat/conditional?
            if reduced:
                incoming[
                    name] = "__collection_reduce__|%s" % dataset_collection.id
            # Should verify security? We check security of individual
            # datasets below?
            job.add_input_dataset_collection(name, dataset_collection)
        for name, value in tool.params_to_strings(incoming,
                                                  trans.app).iteritems():
            job.add_parameter(name, value)
        current_user_roles = trans.get_current_user_roles()
        access_timer = ExecutionTimer()
        for name, dataset in inp_data.iteritems():
            if dataset:
                if not trans.app.security_agent.can_access_dataset(
                        current_user_roles, dataset.dataset):
                    raise "User does not have permission to use a dataset (%s) provided for input." % data.id
                job.add_input_dataset(name, dataset)
            else:
                job.add_input_dataset(name, None)
        log.info("Verified access to datasets %s" % access_timer)
        for name, dataset in out_data.iteritems():
            job.add_output_dataset(name, dataset)
        for name, dataset_collection in out_collections.iteritems():
            job.add_implicit_output_dataset_collection(name,
                                                       dataset_collection)
        for name, dataset_collection_instance in out_collection_instances.iteritems(
        ):
            job.add_output_dataset_collection(name,
                                              dataset_collection_instance)
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps(job_params)
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add(job)
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query(
                    trans.app.model.Job).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (
                    rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (
                    old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (
                        old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type(
                        galaxy_session) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (
                        old_job.id, job.id, old_job.session_id,
                        galaxy_session.id)
                else:
                    raise Exception(
                        '(%s/%s): Remapping via the API is not (yet) supported'
                        % (old_job.id, job.id))
                for jtod in old_job.output_datasets:
                    for (job_to_remap,
                         jtid) in [(jtid.job, jtid)
                                   for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id
                                == trans.user.id) or (trans.user is None and
                                                      job_to_remap.session_id
                                                      == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [
                                    dep_jtod.dataset for dep_jtod in
                                    job_to_remap.output_datasets
                            ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            for p in job_to_remap.parameters:
                                if p.name == jtid.name and p.value == str(
                                        jtod.dataset.id):
                                    p.value = str(out_data[jtod.name].id)
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info(
                                'Job %s input HDA %s remapped to new HDA %s' %
                                (job_to_remap.id, jtod.dataset.id,
                                 jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception, e:
                log.exception('Cannot remap rerun dependencies.')
Example #7
0
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None ):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        def make_dict_copy( from_dict ):
            """
            Makes a copy of input dictionary from_dict such that all values that are dictionaries
            result in creation of a new dictionary ( a sort of deepcopy ).  We may need to handle 
            other complex types ( e.g., lists, etc ), but not sure... 
            Yes, we need to handle lists (and now are)... 
            """
            copy_from_dict = {}
            for key, value in from_dict.items():
                if type( value ).__name__ == 'dict':
                    copy_from_dict[ key ] = make_dict_copy( value )
                elif isinstance( value, list ):
                    copy_from_dict[ key ] = make_list_copy( value )
                else:
                    copy_from_dict[ key ] = value
            return copy_from_dict
        def make_list_copy( from_list ):
            new_list = []
            for value in from_list:
                if isinstance( value, dict ):
                    new_list.append( make_dict_copy( value ) )
                elif isinstance( value, list ):
                    new_list.append( make_list_copy( value ) )
                else:
                    new_list.append( value )
            return new_list
        def wrap_values( inputs, input_values, skip_missing_values = False ):
            # Wrap tool inputs as necessary
            for input in inputs.itervalues():
                if input.name not in input_values and skip_missing_values:
                    continue
                if isinstance( input, Repeat ):
                    for d in input_values[ input.name ]:
                        wrap_values( input.inputs, d, skip_missing_values = skip_missing_values )
                elif isinstance( input, Conditional ):
                    values = input_values[ input.name ]
                    current = values[ "__current_case__" ]
                    wrap_values( input.cases[current].inputs, values, skip_missing_values = skip_missing_values )
                elif isinstance( input, DataToolParameter ) and input.multiple:
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetListWrapper( input_values[ input.name ],
                                                         datatypes_registry = trans.app.datatypes_registry,
                                                         tool = tool,
                                                         name = input.name )
                elif isinstance( input, DataToolParameter ):
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ],
                                                             datatypes_registry = trans.app.datatypes_registry,
                                                             tool = tool,
                                                             name = input.name )
                elif isinstance( input, SelectToolParameter ):
                    input_values[ input.name ] = galaxy.tools.SelectToolParameterWrapper( input, input_values[ input.name ], tool.app, other_values = incoming )
                else:
                    input_values[ input.name ] = galaxy.tools.InputValueWrapper( input, input_values[ input.name ], incoming )
        
        # Set history.
        if not history:
            history = trans.history
        
        out_data = odict()
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        for name, data in inp_data.items():
            if not data:
                data = NoneDataset( datatypes_registry = trans.app.datatypes_registry )
                continue
                
            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data
            
            else: # HDA
                if data.hid:
                    input_names.append( 'data %s' % data.hid )
            input_ext = data.ext
            
            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        db_dataset = trans.db_dataset_for( input_dbkey )
        if db_dataset:
            db_datasets[ "chromInfo" ] = db_dataset
            incoming[ "chromInfo" ] = db_dataset.file_name
        else:
            # For custom builds, chrom info resides in converted dataset; for built-in builds, chrom info resides in tool-data/shared.
            chrom_info = None
            if trans.user and ( 'dbkeys' in trans.user.preferences ) and ( input_dbkey in from_json_string( trans.user.preferences[ 'dbkeys' ] ) ):
                # Custom build.
                custom_build_dict = from_json_string( trans.user.preferences[ 'dbkeys' ] )[ input_dbkey ]
                if 'fasta' in custom_build_dict:
                    build_fasta_dataset = trans.app.model.HistoryDatasetAssociation.get( custom_build_dict[ 'fasta' ] )
                    chrom_info = build_fasta_dataset.get_converted_dataset( trans, 'len' ).file_name
            
            if not chrom_info:
                # Default to built-in build.
                chrom_info = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % input_dbkey )
            incoming[ "chromInfo" ] = chrom_info
        inp_data.update( db_datasets )
        
        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( history )
        # Build name for output datasets based on tool name and input names
        if len( input_names ) == 1:
            on_text = input_names[0]
        elif len( input_names ) == 2:
            on_text = '%s and %s' % tuple(input_names[0:2])
        elif len( input_names ) == 3:
            on_text = '%s, %s, and %s' % tuple(input_names[0:3])
        elif len( input_names ) > 3:
            on_text = '%s, %s, and others' % tuple(input_names[0:2])
        else:
            on_text = ""
        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        params = None #wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        # Keep track of parent / child relationships, we'll create all the 
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_id = None
        for name, output in tool.outputs.items():
            for filter in output.filters:
                try:
                    if not eval( filter.text.strip(), globals(), incoming ):
                        break #do not create this dataset
                except Exception, e:
                    log.debug( 'Dataset output filter failed: %s' % e )
            else: #all filters passed
                if output.parent:
                    parent_to_child_pairs.append( ( output.parent, name ) )
                    child_dataset_names.add( name )
                ## What is the following hack for? Need to document under what 
                ## conditions can the following occur? ([email protected])
                # HACK: the output data has already been created
                #      this happens i.e. as a result of the async controller
                if name in incoming:
                    dataid = incoming[name]
                    data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
                    assert data != None
                    out_data[name] = data
                else:
                    # the type should match the input
                    ext = output.format
                    if ext == "input":
                        ext = input_ext
                    if output.format_source is not None and output.format_source in inp_data:
                        try:
                            ext = inp_data[output.format_source].ext
                        except Exception, e:
                            pass
                    
                    #process change_format tags
                    if output.change_format:
                        if params is None:
                            params = make_dict_copy( incoming )
                            wrap_values( tool.inputs, params, skip_missing_values = not tool.check_values )
                        for change_elem in output.change_format:
                            for when_elem in change_elem.findall( 'when' ):
                                check = when_elem.get( 'input', None )
                                if check is not None:
                                    try:
                                        if '$' not in check:
                                            #allow a simple name or more complex specifications
                                            check = '${%s}' % check
                                        if str( fill_template( check, context = params ) ) == when_elem.get( 'value', None ):
                                            ext = when_elem.get( 'format', ext )
                                    except: #bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct
                                        continue
                                else:
                                    check = when_elem.get( 'input_dataset', None )
                                    if check is not None:
                                        check = inp_data.get( check, None )
                                        if check is not None:
                                            if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
                                                ext = when_elem.get( 'format', ext )
                    data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
                    if output.hidden:
                        data.visible = False
                    # Commit the dataset immediately so it gets database assigned unique id
                    trans.sa_session.add( data )
                    trans.sa_session.flush()
                    trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
                # Create an empty file immediately.  The first dataset will be
                # created in the "default" store, all others will be created in
                # the same store as the first.
                data.dataset.object_store_id = object_store_id
                try:
                    trans.app.object_store.create( data.dataset )
                except ObjectInvalid:
                    raise Exception('Unable to create output dataset: object store is full')
                object_store_id = data.dataset.object_store_id      # these will be the same thing after the first output
                # This may not be neccesary with the new parent/child associations
                data.designation = name
                # Copy metadata from one of the inputs if requested. 
                if output.metadata_source:
                    data.init_meta( copy_from=inp_data[output.metadata_source] )
                else:
                    data.init_meta()
                # Take dbkey from LAST input
                data.dbkey = str(input_dbkey)
                # Set state 
                # FIXME: shouldn't this be NEW until the job runner changes it?
                data.state = data.states.QUEUED
                data.blurb = "queued"
                # Set output label
                if output.label:
                    if params is None:
                        params = make_dict_copy( incoming )
                        # wrapping the params allows the tool config to contain things like
                        # <outputs>
                        #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
                        # </outputs>
                        wrap_values( tool.inputs, params, skip_missing_values = not tool.check_values )
                    #tool (only needing to be set once) and on_string (set differently for each label) are overwritten for each output dataset label being determined
                    params['tool'] = tool
                    params['on_string'] = on_text
		    params['time']=logging.time.strftime('%X %x', logging.time.gmtime())
                    data.name = fill_template( output.label, context=params )
                else:
                    data.name = tool.name 
                    if on_text:
                        data.name += ( " on " + on_text )
                # Store output 
                out_data[ name ] = data
                if output.actions:
                    #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                    output_action_params = dict( out_data )
                    output_action_params.update( incoming )
                    output.actions.apply_action( data, output_action_params )
                # Store all changes to database
                trans.sa_session.flush()
Example #8
0
    def execute(self,
                tool,
                trans,
                incoming={},
                return_job=False,
                set_output_hid=True,
                set_output_history=True,
                history=None,
                job_params=None,
                rerun_remap_job_id=None):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans(trans, create=True)

        out_data = odict()
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections(
            tool, incoming, trans)
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets(tool, incoming, trans)

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get("dbkey", "?")
        for name, data in inp_data.items():
            if not data:
                data = NoneDataset(
                    datatypes_registry=trans.app.datatypes_registry)
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association(None)
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append('data %s' % data.hid)
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        (chrom_info, db_dataset) = trans.app.genome_builds.get_chrom_info(
            input_dbkey,
            trans=trans,
            custom_build_hack_get_len_from_fasta_conversion=tool.id !=
            'CONVERTER_fasta_to_len')
        if db_dataset:
            inp_data.update({"chromInfo": db_dataset})
        incoming["chromInfo"] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions(
                history)

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names(input_names)

        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters(trans, tool, incoming)
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_id = None
        for name, output in tool.outputs.items():
            for filter in output.filters:
                try:
                    if not eval(filter.text.strip(), globals(), incoming):
                        break  # do not create this dataset
                except Exception, e:
                    log.debug('Dataset output filter failed: %s' % e)
            else:  # all filters passed
                if output.parent:
                    parent_to_child_pairs.append((output.parent, name))
                    child_dataset_names.add(name)
                ## What is the following hack for? Need to document under what
                ## conditions can the following occur? ([email protected])
                # HACK: the output data has already been created
                #      this happens i.e. as a result of the async controller
                if name in incoming:
                    dataid = incoming[name]
                    data = trans.sa_session.query(
                        trans.app.model.HistoryDatasetAssociation).get(dataid)
                    assert data != None
                    out_data[name] = data
                else:
                    # the type should match the input
                    ext = output.format
                    if ext == "input":
                        ext = input_ext
                    if output.format_source is not None and output.format_source in inp_data:
                        try:
                            input_dataset = inp_data[output.format_source]
                            input_extension = input_dataset.ext
                            ext = input_extension
                        except Exception, e:
                            pass

                    #process change_format tags
                    if output.change_format:
                        for change_elem in output.change_format:
                            for when_elem in change_elem.findall('when'):
                                check = when_elem.get('input', None)
                                if check is not None:
                                    try:
                                        if '$' not in check:
                                            #allow a simple name or more complex specifications
                                            check = '${%s}' % check
                                        if str(
                                                fill_template(
                                                    check,
                                                    context=wrapped_params.
                                                    params)) == when_elem.get(
                                                        'value', None):
                                            ext = when_elem.get('format', ext)
                                    except:  # bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct
                                        continue
                                else:
                                    check = when_elem.get(
                                        'input_dataset', None)
                                    if check is not None:
                                        check = inp_data.get(check, None)
                                        if check is not None:
                                            if str(
                                                    getattr(
                                                        check,
                                                        when_elem.get(
                                                            'attribute'))
                                            ) == when_elem.get('value', None):
                                                ext = when_elem.get(
                                                    'format', ext)
                    data = trans.app.model.HistoryDatasetAssociation(
                        extension=ext,
                        create_dataset=True,
                        sa_session=trans.sa_session)
                    if output.hidden:
                        data.visible = False
                    # Commit the dataset immediately so it gets database assigned unique id
                    trans.sa_session.add(data)
                    trans.sa_session.flush()
                    trans.app.security_agent.set_all_dataset_permissions(
                        data.dataset, output_permissions)
                # Create an empty file immediately.  The first dataset will be
                # created in the "default" store, all others will be created in
                # the same store as the first.
                data.dataset.object_store_id = object_store_id
                try:
                    trans.app.object_store.create(data.dataset)
                except ObjectInvalid:
                    raise Exception(
                        'Unable to create output dataset: object store is full'
                    )
                object_store_id = data.dataset.object_store_id  # these will be the same thing after the first output
                # This may not be neccesary with the new parent/child associations
                data.designation = name
                # Copy metadata from one of the inputs if requested.
                if output.metadata_source:
                    data.init_meta(copy_from=inp_data[output.metadata_source])
                else:
                    data.init_meta()
                # Take dbkey from LAST input
                data.dbkey = str(input_dbkey)
                # Set state
                # FIXME: shouldn't this be NEW until the job runner changes it?
                data.state = data.states.QUEUED
                data.blurb = "queued"
                # Set output label
                data.name = self.get_output_name(output, data, tool, on_text,
                                                 trans, incoming, history,
                                                 wrapped_params.params,
                                                 job_params)
                # Store output
                out_data[name] = data
                if output.actions:
                    #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                    output_action_params = dict(out_data)
                    output_action_params.update(incoming)
                    output.actions.apply_action(data, output_action_params)
                # Store all changes to database
                trans.sa_session.flush()
Example #9
0
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False, execution_cache=None ):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        app = trans.app
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)
        current_user_roles = execution_cache.current_user_roles

        assert tool.allow_user_access( trans.user ), "User (%s) is not allowed to access this tool." % ( trans.user )
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans( trans, create=True )
        if history not in trans.sa_session:
            history = trans.sa_session.query( app.model.History ).get( history.id )

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming )
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans, current_user_roles=current_user_roles )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        for name, data in reversed(inp_data.items()):
            if not data:
                data = NoneDataset( datatypes_registry=app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append( 'data %s' % data.hid )
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr( data, "element_identifier", None )
            if identifier is not None:
                incoming[ "%s|__identifier__" % name ] = identifier

        # Collect chromInfo dataset and add as parameters to incoming
        ( chrom_info, db_dataset ) = app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = app.security_agent.history_get_default_permissions( history )

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names( input_names )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters( trans, tool, incoming )
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator( app )

        def handle_output( name, output, hidden=None ):
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query( app.model.HistoryDatasetAssociation ).get( dataid )
                assert data is not None
                out_data[name] = data
            else:
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans( trans, create=True )

        out_data = odict()
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming )
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        for name, data in inp_data.items():
            if not data:
                data = NoneDataset( datatypes_registry=trans.app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append( 'data %s' % data.hid )
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        ( chrom_info, db_dataset ) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( history )

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names( input_names )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters( trans, tool, incoming )
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator( trans.app )

        def handle_output( name, output ):
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            ## What is the following hack for? Need to document under what
            ## conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format( output, wrapped_params.params, inp_data, input_ext )
                data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
                if output.hidden:
                    data.visible = False
                # Commit the dataset immediately so it gets database assigned unique id
                trans.sa_session.add( data )
                trans.sa_session.flush()
                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )

            object_store_populator.set_object_store_id( data )

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.
            if output.metadata_source:
                data.init_meta( copy_from=inp_data[output.metadata_source] )
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            # FIXME: shouldn't this be NEW until the job runner changes it?
            data.state = data.states.QUEUED
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Store output
            out_data[ name ] = data
            if output.actions:
                #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict( out_data )
                output_action_params.update( incoming )
                output.actions.apply_action( data, output_action_params )
            # Store all changes to database
            trans.sa_session.flush()

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                handle_output( name, output )
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[ name ]
                if set_output_history:
                    history.add_dataset( data, set_hid=set_output_hid )
                trans.sa_session.add( data )
                trans.sa_session.flush()
        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[ parent_name ]
            child_dataset = out_data[ child_name ]
            parent_dataset.children.append( child_dataset )
        # Store data after custom code runs
        trans.sa_session.flush()
        # Create the job object
        job = trans.app.model.Job()

        if hasattr( trans, "get_galaxy_session" ):
            galaxy_session = trans.get_galaxy_session()
            # If we're submitting from the API, there won't be a session.
            if type( galaxy_session ) == trans.model.GalaxySession:
                job.session_id = galaxy_session.id
        if trans.user is not None:
            job.user_id = trans.user.id
        job.history_id = history.id
        job.tool_id = tool.id
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.0"
        # FIXME: Don't need all of incoming here, just the defined parameters
        #        from the tool. We need to deal with tools that pass all post
        #        parameters to the command as a special case.
        for name, ( dataset_collection, reduced ) in inp_dataset_collections.iteritems():
            # TODO: Does this work if nested in repeat/conditional?
            if reduced:
                incoming[ name ] = "__collection_reduce__|%s" % dataset_collection.id
            # Should verify security? We check security of individual
            # datasets below?
            job.add_input_dataset_collection( name, dataset_collection )
        for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
            job.add_parameter( name, value )
        current_user_roles = trans.get_current_user_roles()
        for name, dataset in inp_data.iteritems():
            if dataset:
                if not trans.app.security_agent.can_access_dataset( current_user_roles, dataset.dataset ):
                    raise "User does not have permission to use a dataset (%s) provided for input." % data.id
                job.add_input_dataset( name, dataset )
            else:
                job.add_input_dataset( name, None )
        for name, dataset in out_data.iteritems():
            job.add_output_dataset( name, dataset )
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps( job_params )
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add( job )
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query( trans.app.model.Job ).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id)
                else:
                    raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id))
                for jtod in old_job.output_datasets:
                    for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            for p in job_to_remap.parameters:
                                if p.name == jtid.name and p.value == str(jtod.dataset.id):
                                    p.value = str(out_data[jtod.name].id)
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception, e:
                log.exception('Cannot remap rerun dependencies.')
Example #11
0
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans( trans, create=True )

        out_data = odict()
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming )
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        for name, data in inp_data.items():
            if not data:
                data = NoneDataset( datatypes_registry=trans.app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append( 'data %s' % data.hid )
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        ( chrom_info, db_dataset ) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id!='CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( history )

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names( input_names )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters( trans, tool, incoming )
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_id = None
        for name, output in tool.outputs.items():
            for filter in output.filters:
                try:
                    if not eval( filter.text.strip(), globals(), incoming ):
                        break  # do not create this dataset
                except Exception, e:
                    log.debug( 'Dataset output filter failed: %s' % e )
            else:  # all filters passed
                if output.parent:
                    parent_to_child_pairs.append( ( output.parent, name ) )
                    child_dataset_names.add( name )
                ## What is the following hack for? Need to document under what
                ## conditions can the following occur? ([email protected])
                # HACK: the output data has already been created
                #      this happens i.e. as a result of the async controller
                if name in incoming:
                    dataid = incoming[name]
                    data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
                    assert data != None
                    out_data[name] = data
                else:
                    # the type should match the input
                    ext = output.format
                    if ext == "input":
                        ext = input_ext
                    if output.format_source is not None and output.format_source in inp_data:
                        try:
                            input_dataset = inp_data[output.format_source]
                            input_extension = input_dataset.ext
                            ext = input_extension
                        except Exception, e:
                            pass

                    #process change_format tags
                    if output.change_format:
                        for change_elem in output.change_format:
                            for when_elem in change_elem.findall( 'when' ):
                                check = when_elem.get( 'input', None )
                                if check is not None:
                                    try:
                                        if '$' not in check:
                                            #allow a simple name or more complex specifications
                                            check = '${%s}' % check
                                        if str( fill_template( check, context=wrapped_params.params ) ) == when_elem.get( 'value', None ):
                                            ext = when_elem.get( 'format', ext )
                                    except:  # bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct
                                        continue
                                else:
                                    check = when_elem.get( 'input_dataset', None )
                                    if check is not None:
                                        check = inp_data.get( check, None )
                                        if check is not None:
                                            if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
                                                ext = when_elem.get( 'format', ext )
                    data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
                    if output.hidden:
                        data.visible = False
                    # Commit the dataset immediately so it gets database assigned unique id
                    trans.sa_session.add( data )
                    trans.sa_session.flush()
                    trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
                # Create an empty file immediately.  The first dataset will be
                # created in the "default" store, all others will be created in
                # the same store as the first.
                data.dataset.object_store_id = object_store_id
                try:
                    trans.app.object_store.create( data.dataset )
                except ObjectInvalid:
                    raise Exception('Unable to create output dataset: object store is full')
                object_store_id = data.dataset.object_store_id      # these will be the same thing after the first output
                # This may not be neccesary with the new parent/child associations
                data.designation = name
                # Copy metadata from one of the inputs if requested.
                if output.metadata_source:
                    data.init_meta( copy_from=inp_data[output.metadata_source] )
                else:
                    data.init_meta()
                # Take dbkey from LAST input
                data.dbkey = str(input_dbkey)
                # Set state
                # FIXME: shouldn't this be NEW until the job runner changes it?
                data.state = data.states.QUEUED
                data.blurb = "queued"
                # Set output label
                data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
                # Store output
                out_data[ name ] = data
                if output.actions:
                    #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                    output_action_params = dict( out_data )
                    output_action_params.update( incoming )
                    output.actions.apply_action( data, output_action_params )
                # Store all changes to database
                trans.sa_session.flush()
Example #12
0
    def execute(self, tool, trans, incoming={}, set_output_hid=True ):
        def make_dict_copy( from_dict ):
            """
            Makes a copy of input dictionary from_dict such that all values that are dictionaries
            result in creation of a new dictionary ( a sort of deepcopy ).  We may need to handle 
            other complex types ( e.g., lists, etc ), but not sure... 
            """
            copy_from_dict = {}
            for key, value in from_dict.items():
                if type( value ).__name__ == 'dict':
                    copy_from_dict[ key ] = make_dict_copy( value )
                else:
                    copy_from_dict[ key ] = value
            return copy_from_dict
        def wrap_values( inputs, input_values ):
            # Wrap tool inputs as necessary
            for input in inputs.itervalues():
                if isinstance( input, Repeat ):
                    for d in input_values[ input.name ]:
                        wrap_values( input.inputs, d )
                elif isinstance( input, Conditional ):
                    values = input_values[ input.name ]
                    current = values[ "__current_case__" ]
                    wrap_values( input.cases[current].inputs, values )
                elif isinstance( input, DataToolParameter ):
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ],
                                                             datatypes_registry = trans.app.datatypes_registry,
                                                             tool = tool,
                                                             name = input.name )
                elif isinstance( input, SelectToolParameter ):
                    input_values[ input.name ] = galaxy.tools.SelectToolParameterWrapper( input, input_values[ input.name ], tool.app, other_values = incoming )
                else:
                    input_values[ input.name ] = galaxy.tools.InputValueWrapper( input, input_values[ input.name ], incoming )
        out_data = {}
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        for name, data in inp_data.items():
            if data:
                input_names.append( 'data %s' % data.hid )
                input_ext = data.ext
            else:
                data = NoneDataset( datatypes_registry = trans.app.datatypes_registry )
            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        db_dataset = trans.db_dataset_for( input_dbkey )
        if db_dataset:
            db_datasets[ "chromInfo" ] = db_dataset
            incoming[ "chromInfo" ] = db_dataset.file_name
        else:
            incoming[ "chromInfo" ] = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % input_dbkey )
        inp_data.update( db_datasets )
        
        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( trans.history )
        # Build name for output datasets based on tool name and input names
        if len( input_names ) == 1:
            on_text = input_names[0]
        elif len( input_names ) == 2:
            on_text = '%s and %s' % tuple(input_names[0:2])
        elif len( input_names ) == 3:
            on_text = '%s, %s, and %s' % tuple(input_names[0:3])
        elif len( input_names ) > 3:
            on_text = '%s, %s, and others' % tuple(input_names[0:2])
        else:
            on_text = ""
        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # Keep track of parent / child relationships, we'll create all the 
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        for name, output in tool.outputs.items():
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            ## What is the following hack for? Need to document under what 
            ## conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.app.model.HistoryDatasetAssociation.get( dataid )
                assert data != None
                out_data[name] = data
            else:
                # the type should match the input
                ext = output.format
                if ext == "input":
                    ext = input_ext
                #process change_format tags
                if output.change_format:
                    for change_elem in output.change_format:
                        for when_elem in change_elem.findall( 'when' ):
                            check = incoming.get( when_elem.get( 'input' ), None )
                            if check is not None:
                                if check == when_elem.get( 'value', None ):
                                    ext = when_elem.get( 'format', ext )
                            else:
                                check = when_elem.get( 'input_dataset', None )
                                if check is not None:
                                    check = inp_data.get( check, None )
                                    if check is not None:
                                        if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
                                            ext = when_elem.get( 'format', ext )
                data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True )
                # Commit the dataset immediately so it gets database assigned unique id
                data.flush()
                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
            # Create an empty file immediately
            open( data.file_name, "w" ).close()
            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested. 
            if output.metadata_source:
                data.init_meta( copy_from=inp_data[output.metadata_source] )
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state 
            # FIXME: shouldn't this be NEW until the job runner changes it?
            data.state = data.states.QUEUED
            data.blurb = "queued"
            # Set output label
            if output.label:
                params = make_dict_copy( incoming )
                # wrapping the params allows the tool config to contain things like
                # <outputs>
                #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
                # </outputs>
                wrap_values( tool.inputs, params )
                params['tool'] = tool
                params['on_string'] = on_text
                data.name = fill_template( output.label, context=params )
            else:
                data.name = tool.name 
                if on_text:
                    data.name += ( " on " + on_text )
            # Store output 
            out_data[ name ] = data
            # Store all changes to database
            trans.app.model.flush()
        # Add all the top-level (non-child) datasets to the history
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming: #don't add children; or already existing datasets, i.e. async created
                data = out_data[ name ]
                trans.history.add_dataset( data, set_hid = set_output_hid )
                data.flush()
        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[ parent_name ]
            child_dataset = out_data[ child_name ]
            parent_dataset.children.append( child_dataset )
        # Store data after custom code runs 
        trans.app.model.flush()
        # Create the job object
        job = trans.app.model.Job()
        job.session_id = trans.get_galaxy_session().id
        job.history_id = trans.history.id
        job.tool_id = tool.id
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.0"
        # FIXME: Don't need all of incoming here, just the defined parameters
        #        from the tool. We need to deal with tools that pass all post
        #        parameters to the command as a special case.
        for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
            job.add_parameter( name, value )
        for name, dataset in inp_data.iteritems():
            if dataset:
                # TODO, Nate: Make sure the permitted actions here are appropriate.
                if not trans.app.security_agent.allow_action( trans.user, dataset.permitted_actions.DATASET_ACCESS, dataset=dataset ):
                    raise "User does not have permission to use a dataset (%s) provided for input." % data.id
                job.add_input_dataset( name, dataset )
            else:
                job.add_input_dataset( name, None )
        for name, dataset in out_data.iteritems():
            job.add_output_dataset( name, dataset )
        trans.app.model.flush()
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[ name ]
            redirect_url = tool.parse_redirect_url( dataset, incoming )
            # GALAXY_URL should be include in the tool params to enable the external application 
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get( 'GALAXY_URL', None )
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.state = JOB_OK
            job.info = "Redirected to: %s" % redirect_url
            job.flush()
            trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
        else:
            # Queue the job for execution
            trans.app.job_queue.put( job.id, tool )
            trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
            return out_data
Example #13
0
    def execute(self, tool, trans, incoming={}, set_output_hid=True ):
        def make_dict_copy( from_dict ):
            """
            Makes a copy of input dictionary from_dict such that all values that are dictionaries
            result in creation of a new dictionary ( a sort of deepcopy ).  We may need to handle 
            other complex types ( e.g., lists, etc ), but not sure... 
            """
            copy_from_dict = {}
            for key, value in from_dict.items():
                if type( value ).__name__ == 'dict':
                    copy_from_dict[ key ] = make_dict_copy( value )
                else:
                    copy_from_dict[ key ] = value
            return copy_from_dict
        def wrap_values( inputs, input_values ):
            # Wrap tool inputs as necessary
            for input in inputs.itervalues():
                if isinstance( input, Repeat ):
                    for d in input_values[ input.name ]:
                        wrap_values( input.inputs, d )
                elif isinstance( input, Conditional ):
                    values = input_values[ input.name ]
                    current = values[ "__current_case__" ]
                    wrap_values( input.cases[current].inputs, values )
                elif isinstance( input, DataToolParameter ):
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ],
                                                             datatypes_registry = trans.app.datatypes_registry,
                                                             tool = tool,
                                                             name = input.name )
                elif isinstance( input, SelectToolParameter ):
                    input_values[ input.name ] = galaxy.tools.SelectToolParameterWrapper( input, input_values[ input.name ], tool.app, other_values = incoming )
                else:
                    input_values[ input.name ] = galaxy.tools.InputValueWrapper( input, input_values[ input.name ], incoming )
        out_data = {}
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        for name, data in inp_data.items():
            if data:
                input_names.append( 'data %s' % data.hid )
                input_ext = data.ext
            else:
                data = NoneDataset( datatypes_registry = trans.app.datatypes_registry )
            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        db_dataset = trans.db_dataset_for( input_dbkey )
        if db_dataset:
            db_datasets[ "chromInfo" ] = db_dataset
            incoming[ "chromInfo" ] = db_dataset.file_name
        else:
            incoming[ "chromInfo" ] = os.path.join( trans.app.config.tool_data_path, 'shared','ucsc','chrom', "%s.len" % input_dbkey )
        inp_data.update( db_datasets )
        
        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( trans.history )
        # Build name for output datasets based on tool name and input names
        if len( input_names ) == 1:
            on_text = input_names[0]
        elif len( input_names ) == 2:
            on_text = '%s and %s' % tuple(input_names[0:2])
        elif len( input_names ) == 3:
            on_text = '%s, %s, and %s' % tuple(input_names[0:3])
        elif len( input_names ) > 3:
            on_text = '%s, %s, and others' % tuple(input_names[0:2])
        else:
            on_text = ""
        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # Keep track of parent / child relationships, we'll create all the 
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        for name, output in tool.outputs.items():
            for filter in output.filters:
                try:
                    if not eval( filter.text, globals(), incoming ):
                        break #do not create this dataset
                except Exception, e:
                    log.debug( 'Dataset output filter failed: %s' % e )
            else: #all filters passed
                if output.parent:
                    parent_to_child_pairs.append( ( output.parent, name ) )
                    child_dataset_names.add( name )
                ## What is the following hack for? Need to document under what 
                ## conditions can the following occur? ([email protected])
                # HACK: the output data has already been created
                #      this happens i.e. as a result of the async controller
                if name in incoming:
                    dataid = incoming[name]
                    data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
                    assert data != None
                    out_data[name] = data
                else:
                    # the type should match the input
                    ext = output.format
                    if ext == "input":
                        ext = input_ext
                    #process change_format tags
                    if output.change_format:
                        for change_elem in output.change_format:
                            for when_elem in change_elem.findall( 'when' ):
                                check = incoming.get( when_elem.get( 'input' ), None )
                                if check is not None:
                                    if check == when_elem.get( 'value', None ):
                                        ext = when_elem.get( 'format', ext )
                                else:
                                    check = when_elem.get( 'input_dataset', None )
                                    if check is not None:
                                        check = inp_data.get( check, None )
                                        if check is not None:
                                            if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
                                                ext = when_elem.get( 'format', ext )
                    data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
                    # Commit the dataset immediately so it gets database assigned unique id
                    trans.sa_session.add( data )
                    trans.sa_session.flush()
                    trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
                # Create an empty file immediately
                open( data.file_name, "w" ).close()
                # Fix permissions
                util.umask_fix_perms( data.file_name, trans.app.config.umask, 0666 )
                # This may not be neccesary with the new parent/child associations
                data.designation = name
                # Copy metadata from one of the inputs if requested. 
                if output.metadata_source:
                    data.init_meta( copy_from=inp_data[output.metadata_source] )
                else:
                    data.init_meta()
                # Take dbkey from LAST input
                data.dbkey = str(input_dbkey)
                # Set state 
                # FIXME: shouldn't this be NEW until the job runner changes it?
                data.state = data.states.QUEUED
                data.blurb = "queued"
                # Set output label
                if output.label:
                    params = make_dict_copy( incoming )
                    # wrapping the params allows the tool config to contain things like
                    # <outputs>
                    #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
                    # </outputs>
                    wrap_values( tool.inputs, params )
                    params['tool'] = tool
                    params['on_string'] = on_text
                    data.name = fill_template( output.label, context=params )
                else:
                    data.name = tool.name 
                    if on_text:
                        data.name += ( " on " + on_text )
                # Store output 
                out_data[ name ] = data
                # Store all changes to database
                trans.sa_session.flush()
Example #14
0
    def execute(self,
                tool,
                trans,
                incoming={},
                return_job=False,
                set_output_hid=True,
                set_output_history=True,
                history=None,
                job_params=None,
                rerun_remap_job_id=None):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        def make_dict_copy(from_dict):
            """
            Makes a copy of input dictionary from_dict such that all values that are dictionaries
            result in creation of a new dictionary ( a sort of deepcopy ).  We may need to handle 
            other complex types ( e.g., lists, etc ), but not sure... 
            Yes, we need to handle lists (and now are)... 
            """
            copy_from_dict = {}
            for key, value in from_dict.items():
                if type(value).__name__ == 'dict':
                    copy_from_dict[key] = make_dict_copy(value)
                elif isinstance(value, list):
                    copy_from_dict[key] = make_list_copy(value)
                else:
                    copy_from_dict[key] = value
            return copy_from_dict

        def make_list_copy(from_list):
            new_list = []
            for value in from_list:
                if isinstance(value, dict):
                    new_list.append(make_dict_copy(value))
                elif isinstance(value, list):
                    new_list.append(make_list_copy(value))
                else:
                    new_list.append(value)
            return new_list

        def wrap_values(inputs, input_values, skip_missing_values=False):
            # Wrap tool inputs as necessary
            for input in inputs.itervalues():
                if input.name not in input_values and skip_missing_values:
                    continue
                if isinstance(input, Repeat):
                    for d in input_values[input.name]:
                        wrap_values(input.inputs,
                                    d,
                                    skip_missing_values=skip_missing_values)
                elif isinstance(input, Conditional):
                    values = input_values[input.name]
                    current = values["__current_case__"]
                    wrap_values(input.cases[current].inputs,
                                values,
                                skip_missing_values=skip_missing_values)
                elif isinstance(input, DataToolParameter) and input.multiple:
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetListWrapper( input_values[ input.name ],
                                                         datatypes_registry = trans.app.datatypes_registry,
                                                         tool = tool,
                                                         name = input.name )
                elif isinstance(input, DataToolParameter):
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ],
                                                             datatypes_registry = trans.app.datatypes_registry,
                                                             tool = tool,
                                                             name = input.name )
                elif isinstance(input, SelectToolParameter):
                    input_values[
                        input.name] = galaxy.tools.SelectToolParameterWrapper(
                            input,
                            input_values[input.name],
                            tool.app,
                            other_values=incoming)
                else:
                    input_values[input.name] = galaxy.tools.InputValueWrapper(
                        input, input_values[input.name], incoming)

        # Set history.
        if not history:
            history = tool.get_default_history_by_trans(trans, create=True)

        out_data = odict()
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets(tool, incoming, trans)

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get("dbkey", "?")
        for name, data in inp_data.items():
            if not data:
                data = NoneDataset(
                    datatypes_registry=trans.app.datatypes_registry)
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association(None)
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append('data %s' % data.hid)
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        db_dataset = trans.db_dataset_for(input_dbkey)
        if db_dataset:
            db_datasets["chromInfo"] = db_dataset
            incoming["chromInfo"] = db_dataset.file_name
        else:
            # For custom builds, chrom info resides in converted dataset; for built-in builds, chrom info resides in tool-data/shared.
            chrom_info = None
            if trans.user and ('dbkeys' in trans.user.preferences) and (
                    input_dbkey in from_json_string(
                        trans.user.preferences['dbkeys'])):
                # Custom build.
                custom_build_dict = from_json_string(
                    trans.user.preferences['dbkeys'])[input_dbkey]
                if 'fasta' in custom_build_dict:
                    build_fasta_dataset = trans.sa_session.query(
                        trans.app.model.HistoryDatasetAssociation).get(
                            custom_build_dict['fasta'])
                    chrom_info = build_fasta_dataset.get_converted_dataset(
                        trans, 'len').file_name

            if not chrom_info:
                # Default to built-in build.
                chrom_info = os.path.join(trans.app.config.len_file_path,
                                          "%s.len" % input_dbkey)
            incoming["chromInfo"] = chrom_info
        inp_data.update(db_datasets)

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions(
                history)
        # Build name for output datasets based on tool name and input names
        if len(input_names) == 1:
            on_text = input_names[0]
        elif len(input_names) == 2:
            on_text = '%s and %s' % tuple(input_names[0:2])
        elif len(input_names) == 3:
            on_text = '%s, %s, and %s' % tuple(input_names[0:3])
        elif len(input_names) > 3:
            on_text = '%s, %s, and others' % tuple(input_names[0:2])
        else:
            on_text = ""
        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        params = None  #wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_id = None
        for name, output in tool.outputs.items():
            for filter in output.filters:
                try:
                    if not eval(filter.text.strip(), globals(), incoming):
                        break  #do not create this dataset
                except Exception, e:
                    log.debug('Dataset output filter failed: %s' % e)
            else:  #all filters passed
                if output.parent:
                    parent_to_child_pairs.append((output.parent, name))
                    child_dataset_names.add(name)
                ## What is the following hack for? Need to document under what
                ## conditions can the following occur? ([email protected])
                # HACK: the output data has already been created
                #      this happens i.e. as a result of the async controller
                if name in incoming:
                    dataid = incoming[name]
                    data = trans.sa_session.query(
                        trans.app.model.HistoryDatasetAssociation).get(dataid)
                    assert data != None
                    out_data[name] = data
                else:
                    # the type should match the input
                    ext = output.format
                    if ext == "input":
                        ext = input_ext
                    if output.format_source is not None and output.format_source in inp_data:
                        try:
                            input_dataset = inp_data[output.format_source]
                            input_extension = input_dataset.ext
                            ext = input_extension
                        except Exception, e:
                            pass

                    #process change_format tags
                    if output.change_format:
                        if params is None:
                            params = make_dict_copy(incoming)
                            wrap_values(
                                tool.inputs,
                                params,
                                skip_missing_values=not tool.check_values)
                        for change_elem in output.change_format:
                            for when_elem in change_elem.findall('when'):
                                check = when_elem.get('input', None)
                                if check is not None:
                                    try:
                                        if '$' not in check:
                                            #allow a simple name or more complex specifications
                                            check = '${%s}' % check
                                        if str(
                                                fill_template(check,
                                                              context=params)
                                        ) == when_elem.get('value', None):
                                            ext = when_elem.get('format', ext)
                                    except:  #bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct
                                        continue
                                else:
                                    check = when_elem.get(
                                        'input_dataset', None)
                                    if check is not None:
                                        check = inp_data.get(check, None)
                                        if check is not None:
                                            if str(
                                                    getattr(
                                                        check,
                                                        when_elem.get(
                                                            'attribute'))
                                            ) == when_elem.get('value', None):
                                                ext = when_elem.get(
                                                    'format', ext)
                    data = trans.app.model.HistoryDatasetAssociation(
                        extension=ext,
                        create_dataset=True,
                        sa_session=trans.sa_session)
                    if output.hidden:
                        data.visible = False
                    # Commit the dataset immediately so it gets database assigned unique id
                    trans.sa_session.add(data)
                    trans.sa_session.flush()
                    trans.app.security_agent.set_all_dataset_permissions(
                        data.dataset, output_permissions)
                # Create an empty file immediately.  The first dataset will be
                # created in the "default" store, all others will be created in
                # the same store as the first.
                data.dataset.object_store_id = object_store_id
                try:
                    trans.app.object_store.create(data.dataset)
                except ObjectInvalid:
                    raise Exception(
                        'Unable to create output dataset: object store is full'
                    )
                object_store_id = data.dataset.object_store_id  # these will be the same thing after the first output
                # This may not be neccesary with the new parent/child associations
                data.designation = name
                # Copy metadata from one of the inputs if requested.
                if output.metadata_source:
                    data.init_meta(copy_from=inp_data[output.metadata_source])
                else:
                    data.init_meta()
                # Take dbkey from LAST input
                data.dbkey = str(input_dbkey)
                # Set state
                # FIXME: shouldn't this be NEW until the job runner changes it?
                data.state = data.states.QUEUED
                data.blurb = "queued"
                # Set output label
                if output.label:
                    if params is None:
                        params = make_dict_copy(incoming)
                        # wrapping the params allows the tool config to contain things like
                        # <outputs>
                        #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
                        # </outputs>
                        wrap_values(tool.inputs,
                                    params,
                                    skip_missing_values=not tool.check_values)
                    #tool (only needing to be set once) and on_string (set differently for each label) are overwritten for each output dataset label being determined
                    params['tool'] = tool
                    params['on_string'] = on_text
                    data.name = fill_template(output.label, context=params)
                else:
                    if params is None:
                        params = make_dict_copy(incoming)
                        wrap_values(tool.inputs,
                                    params,
                                    skip_missing_values=not tool.check_values)
                    data.name = self._get_default_data_name(
                        data,
                        tool,
                        on_text=on_text,
                        trans=trans,
                        incoming=incoming,
                        history=history,
                        params=params,
                        job_params=job_params)
                # Store output
                out_data[name] = data
                if output.actions:
                    #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                    output_action_params = dict(out_data)
                    output_action_params.update(incoming)
                    output.actions.apply_action(data, output_action_params)
                # Store all changes to database
                trans.sa_session.flush()
Example #15
0
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False, execution_cache=None ):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        self._check_access( tool, trans )
        app = trans.app
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)
        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections = self._collect_inputs(tool, trans, incoming, history, current_user_roles)

        # Build name for output datasets based on tool name and input names
        on_text = self._get_on_text( inp_data )

        # format='input" previously would give you a random extension from
        # the input extensions, now it should just give "input" as the output
        # format.
        input_ext = 'data' if tool.profile < 16.04 else "input"
        input_dbkey = incoming.get( "dbkey", "?" )
        preserved_tags = []
        for name, data in reversed(inp_data.items()):
            if not data:
                data = NoneDataset( datatypes_registry=app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            if tool.profile < 16.04:
                input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr( data, "element_identifier", None )
            if identifier is not None:
                incoming[ "%s|__identifier__" % name ] = identifier

            for tag in data.tags:
                if tag.user_tname == 'name':
                    preserved_tags.append(tag)

        # Collect chromInfo dataset and add as parameters to incoming
        ( chrom_info, db_dataset ) = app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = app.security_agent.history_get_default_permissions( history )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = self._wrapped_params( trans, tool, incoming )

        out_data = odict()
        input_collections = dict( (k, v[0][0]) for k, v in inp_dataset_collections.items() )
        output_collections = OutputCollections(
            trans,
            history,
            tool=tool,
            tool_action=self,
            input_collections=input_collections,
            mapping_over_collection=mapping_over_collection,
            on_text=on_text,
            incoming=incoming,
            params=wrapped_params.params,
            job_params=job_params,
        )

        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator( app )

        def handle_output( name, output, hidden=None ):
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query( app.model.HistoryDatasetAssociation ).get( dataid )
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format(
                    output,
                    wrapped_params.params,
                    inp_data,
                    inp_dataset_collections,
                    input_ext
                )
                data = app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, flush=False )
                if hidden is None:
                    hidden = output.hidden
                if hidden:
                    data.visible = False
                trans.sa_session.add( data )
                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions, new=True )

            for tag in preserved_tags:
                data.tags.append(tag.copy())

            # Must flush before setting object store id currently.
            # TODO: optimize this.
            trans.sa_session.flush()
            object_store_populator.set_object_store_id( data )

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance( metadata_source, string_types ):
                    metadata_source = inp_data.get( metadata_source )

            if metadata_source is not None:
                data.init_meta( copy_from=metadata_source )
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Store output
            out_data[ name ] = data
            if output.actions:
                # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict( out_data )
                output_action_params.update( incoming )
                output.actions.apply_action( data, output_action_params )
            # Also set the default values of actions of type metadata
            self.set_metadata_defaults( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Flush all datasets at once.
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = app.dataset_collections_service
                    element_identifiers = []
                    known_outputs = output.known_outputs( input_collections, collections_manager.type_registry )
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        # Add elements to top-level collection, unless nested...
                        current_element_identifiers = element_identifiers
                        current_collection_type = output.structure.collection_type

                        for parent_id in (output_part_def.parent_ids or []):
                            # TODO: replace following line with formal abstractions for doing this.
                            current_collection_type = ":".join(current_collection_type.split(":")[1:])
                            name_to_index = dict((value["name"], index) for (index, value) in enumerate(current_element_identifiers))
                            if parent_id not in name_to_index:
                                if parent_id not in current_element_identifiers:
                                    index = len(current_element_identifiers)
                                    current_element_identifiers.append(dict(
                                        name=parent_id,
                                        collection_type=current_collection_type,
                                        src="new_collection",
                                        element_identifiers=[],
                                    ))
                                else:
                                    index = name_to_index[parent_id]
                            current_element_identifiers = current_element_identifiers[ index ][ "element_identifiers" ]

                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output( effective_output_name, output_part_def.output_def, hidden=True )
                        # TODO: this shouldn't exist in the top-level of the history at all
                        # but for now we are still working around that by hiding the contents
                        # there.
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add( effective_output_name )

                        history.add_dataset( element, set_hid=set_output_hid, quota=False )
                        trans.sa_session.add( element )
                        trans.sa_session.flush()

                        current_element_identifiers.append({
                            "__object__": element,
                            "name": output_part_def.element_identifier,
                        })
                        log.info(element_identifiers)

                    if output.dynamic_structure:
                        assert not element_identifiers  # known_outputs must have been empty
                        element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED)
                    else:
                        element_kwds = dict(element_identifiers=element_identifiers)

                    output_collections.create_collection(
                        output=output,
                        name=name,
                        tags=preserved_tags,
                        **element_kwds
                    )
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output( name, output )
                    log.info("Handled output named %s for tool %s %s" % (name, tool.id, handle_output_timer))

        add_datasets_timer = ExecutionTimer()
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        datasets_to_persist = []
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[ name ]
                datasets_to_persist.append( data )
        # Set HID and add to history.
        # This is brand new and certainly empty so don't worry about quota.
        # TOOL OPTIMIZATION NOTE - from above loop to the job create below 99%+
        # of execution time happens within in history.add_datasets.
        history.add_datasets( trans.sa_session, datasets_to_persist, set_hid=set_output_hid, quota=False, flush=False )

        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[ parent_name ]
            child_dataset = out_data[ child_name ]
            parent_dataset.children.append( child_dataset )

        log.info("Added output datasets to history %s" % add_datasets_timer)
        job_setup_timer = ExecutionTimer()
        # Create the job object
        job, galaxy_session = self._new_job_for_session( trans, tool, history )
        self._record_inputs( trans, tool, job, incoming, inp_data, inp_dataset_collections, current_user_roles )
        self._record_outputs( job, out_data, output_collections )
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps( job_params )
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add( job )
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query( app.model.Job ).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id)
                else:
                    raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id))
                # Duplicate PJAs before remap.
                for pjaa in old_job.post_job_actions:
                    job.add_post_job_action(pjaa.post_job_action)
                for jtod in old_job.output_datasets:
                    for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            input_values = dict( [ ( p.name, json.loads( p.value ) ) for p in job_to_remap.parameters ] )
                            update_param( jtid.name, input_values, str( out_data[ jtod.name ].id ) )
                            for p in job_to_remap.parameters:
                                p.value = json.dumps( input_values[ p.name ] )
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception:
                log.exception('Cannot remap rerun dependencies.')

        log.info("Setup for job %s complete, ready to flush %s" % (job.log_str(), job_setup_timer))

        job_flush_timer = ExecutionTimer()
        trans.sa_session.flush()
        log.info("Flushed transaction for job %s %s" % (job.log_str(), job_flush_timer))
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[ name ]
            redirect_url = tool.parse_redirect_url( dataset, incoming )
            # GALAXY_URL should be include in the tool params to enable the external application
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get( 'GALAXY_URL', None )
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.set_state( app.model.Job.states.OK )
            job.info = "Redirected to: %s" % redirect_url
            trans.sa_session.add( job )
            trans.sa_session.flush()
            trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
        else:
            # Put the job in the queue if tracking in memory
            app.job_queue.put( job.id, job.tool_id )
            trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
            return job, out_data
Example #16
0
    def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False):
        """
        Executes a tool, creating job and tool outputs, associating them, and
        submitting the job to the job queue. If history is not specified, use
        trans.history as destination for tool's output datasets.
        """
        assert tool.allow_user_access( trans.user ), "User (%s) is not allowed to access this tool." % ( trans.user )
        # Set history.
        if not history:
            history = tool.get_default_history_by_trans( trans, create=True )

        out_data = odict()
        out_collections = {}
        out_collection_instances = {}
        # Track input dataset collections - but replace with simply lists so collect
        # input datasets can process these normally.
        inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming )
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets( tool, incoming, trans )

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get( "dbkey", "?" )
        inp_items = inp_data.items()
        inp_items.reverse()
        for name, data in inp_items:
            if not data:
                data = NoneDataset( datatypes_registry=trans.app.datatypes_registry )
                continue

            # Convert LDDA to an HDA.
            if isinstance(data, LibraryDatasetDatasetAssociation):
                data = data.to_history_dataset_association( None )
                inp_data[name] = data

            else:  # HDA
                if data.hid:
                    input_names.append( 'data %s' % data.hid )
            input_ext = data.ext

            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

            identifier = getattr( data, "element_identifier", None )
            if identifier is not None:
                incoming[ "%s|__identifier__" % name ] = identifier

        # Collect chromInfo dataset and add as parameters to incoming
        ( chrom_info, db_dataset ) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' )
        if db_dataset:
            inp_data.update( { "chromInfo": db_dataset } )
        incoming[ "chromInfo" ] = chrom_info

        # Determine output dataset permission/roles list
        existing_datasets = [ inp for inp in inp_data.values() if inp ]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions( history )

        # Build name for output datasets based on tool name and input names
        on_text = on_text_for_names( input_names )

        # Add the dbkey to the incoming parameters
        incoming[ "dbkey" ] = input_dbkey
        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = WrappedParameters( trans, tool, incoming )
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        object_store_populator = ObjectStorePopulator( trans.app )

        def handle_output( name, output ):
            if output.parent:
                parent_to_child_pairs.append( ( output.parent, name ) )
                child_dataset_names.add( name )
            # What is the following hack for? Need to document under what
            # conditions can the following occur? ([email protected])
            # HACK: the output data has already been created
            #      this happens i.e. as a result of the async controller
            if name in incoming:
                dataid = incoming[name]
                data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
                assert data is not None
                out_data[name] = data
            else:
                ext = determine_output_format( output, wrapped_params.params, inp_data, input_ext )
                data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
                if output.hidden:
                    data.visible = False
                # Commit the dataset immediately so it gets database assigned unique id
                trans.sa_session.add( data )
                trans.sa_session.flush()
                trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )

            object_store_populator.set_object_store_id( data )

            # This may not be neccesary with the new parent/child associations
            data.designation = name
            # Copy metadata from one of the inputs if requested.

            # metadata source can be either a string referencing an input
            # or an actual object to copy.
            metadata_source = output.metadata_source
            if metadata_source:
                if isinstance( metadata_source, basestring ):
                    metadata_source = inp_data[metadata_source]

            if metadata_source is not None:
                data.init_meta( copy_from=metadata_source )
            else:
                data.init_meta()
            # Take dbkey from LAST input
            data.dbkey = str(input_dbkey)
            # Set state
            # FIXME: shouldn't this be NEW until the job runner changes it?
            data.state = data.states.QUEUED
            data.blurb = "queued"
            # Set output label
            data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
            # Store output
            out_data[ name ] = data
            if output.actions:
                # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
                output_action_params = dict( out_data )
                output_action_params.update( incoming )
                output.actions.apply_action( data, output_action_params )
            # Store all changes to database
            trans.sa_session.flush()
            return data

        for name, output in tool.outputs.items():
            if not filter_output(output, incoming):
                if output.collection:
                    collections_manager = trans.app.dataset_collections_service
                    # As far as I can tell - this is always true - but just verify
                    assert set_output_history, "Cannot create dataset collection for this kind of tool."

                    element_identifiers = []
                    input_collections = dict( [ (k, v[0]) for k, v in inp_dataset_collections.iteritems() ] )
                    known_outputs = output.known_outputs( input_collections, collections_manager.type_registry )
                    # Just to echo TODO elsewhere - this should be restructured to allow
                    # nested collections.
                    for output_part_def in known_outputs:
                        # Add elements to top-level collection, unless nested...
                        current_element_identifiers = element_identifiers
                        current_collection_type = output.structure.collection_type

                        for parent_id in (output_part_def.parent_ids or []):
                            # TODO: replace following line with formal abstractions for doing this.
                            current_collection_type = ":".join(current_collection_type.split(":")[1:])
                            name_to_index = dict(map(lambda (index, value): (value["name"], index), enumerate(current_element_identifiers)))
                            if parent_id not in name_to_index:
                                if parent_id not in current_element_identifiers:
                                    index = len(current_element_identifiers)
                                    current_element_identifiers.append(dict(
                                        name=parent_id,
                                        collection_type=current_collection_type,
                                        src="new_collection",
                                        element_identifiers=[],
                                    ))
                                else:
                                    index = name_to_index[parent_id]
                            current_element_identifiers = current_element_identifiers[ index ][ "element_identifiers" ]

                        effective_output_name = output_part_def.effective_output_name
                        element = handle_output( effective_output_name, output_part_def.output_def )
                        # Following hack causes dataset to no be added to history...
                        child_dataset_names.add( effective_output_name )

                        if set_output_history:
                            history.add_dataset( element, set_hid=set_output_hid )
                        trans.sa_session.add( element )
                        trans.sa_session.flush()

                        current_element_identifiers.append({
                            "__object__": element,
                            "name": output_part_def.element_identifier,
                        })
                        log.info(element_identifiers)

                    if output.dynamic_structure:
                        assert not element_identifiers  # known_outputs must have been empty
                        element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED)
                    else:
                        element_kwds = dict(element_identifiers=element_identifiers)

                    if mapping_over_collection:
                        dc = collections_manager.create_dataset_collection(
                            trans,
                            collection_type=output.structure.collection_type,
                            **element_kwds
                        )
                        out_collections[ name ] = dc
                    else:
                        hdca_name = self.get_output_name( output, None, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
                        hdca = collections_manager.create(
                            trans,
                            history,
                            name=hdca_name,
                            collection_type=output.structure.collection_type,
                            trusted_identifiers=True,
                            **element_kwds
                        )
                        # name here is name of the output element - not name
                        # of the hdca.
                        out_collection_instances[ name ] = hdca
                else:
                    handle_output_timer = ExecutionTimer()
                    handle_output( name, output )
                    log.info("Handled output %s" % handle_output_timer)
        # Add all the top-level (non-child) datasets to the history unless otherwise specified
        for name in out_data.keys():
            if name not in child_dataset_names and name not in incoming:  # don't add children; or already existing datasets, i.e. async created
                data = out_data[ name ]
                if set_output_history:
                    history.add_dataset( data, set_hid=set_output_hid )
                trans.sa_session.add( data )
                trans.sa_session.flush()
        # Add all the children to their parents
        for parent_name, child_name in parent_to_child_pairs:
            parent_dataset = out_data[ parent_name ]
            child_dataset = out_data[ child_name ]
            parent_dataset.children.append( child_dataset )
        # Store data after custom code runs
        trans.sa_session.flush()
        # Create the job object
        job = trans.app.model.Job()

        if hasattr( trans, "get_galaxy_session" ):
            galaxy_session = trans.get_galaxy_session()
            # If we're submitting from the API, there won't be a session.
            if type( galaxy_session ) == trans.model.GalaxySession:
                job.session_id = galaxy_session.id
        if trans.user is not None:
            job.user_id = trans.user.id
        job.history_id = history.id
        job.tool_id = tool.id
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except:
            job.tool_version = "1.0.0"
        # FIXME: Don't need all of incoming here, just the defined parameters
        #        from the tool. We need to deal with tools that pass all post
        #        parameters to the command as a special case.
        for name, ( dataset_collection, reduced ) in inp_dataset_collections.iteritems():
            # TODO: Does this work if nested in repeat/conditional?
            if reduced:
                incoming[ name ] = "__collection_reduce__|%s" % dataset_collection.id
            # Should verify security? We check security of individual
            # datasets below?
            job.add_input_dataset_collection( name, dataset_collection )
        for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
            job.add_parameter( name, value )
        current_user_roles = trans.get_current_user_roles()
        access_timer = ExecutionTimer()
        for name, dataset in inp_data.iteritems():
            if dataset:
                if not trans.app.security_agent.can_access_dataset( current_user_roles, dataset.dataset ):
                    raise Exception("User does not have permission to use a dataset (%s) provided for input." % data.id)
                job.add_input_dataset( name, dataset )
            else:
                job.add_input_dataset( name, None )
        log.info("Verified access to datasets %s" % access_timer)
        for name, dataset in out_data.iteritems():
            job.add_output_dataset( name, dataset )
        for name, dataset_collection in out_collections.iteritems():
            job.add_implicit_output_dataset_collection( name, dataset_collection )
        for name, dataset_collection_instance in out_collection_instances.iteritems():
            job.add_output_dataset_collection( name, dataset_collection_instance )
        job.object_store_id = object_store_populator.object_store_id
        if job_params:
            job.params = dumps( job_params )
        job.set_handler(tool.get_job_handler(job_params))
        trans.sa_session.add( job )
        # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
        # This functionality requires tracking jobs in the database.
        if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None:
            try:
                old_job = trans.sa_session.query( trans.app.model.Job ).get(rerun_remap_job_id)
                assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id)
                assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id)
                if trans.user is not None:
                    assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id)
                elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession:
                    assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id)
                else:
                    raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id))
                for jtod in old_job.output_datasets:
                    for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
                        if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id):
                            if job_to_remap.state == job_to_remap.states.PAUSED:
                                job_to_remap.state = job_to_remap.states.NEW
                            for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]:
                                if hda.state == hda.states.PAUSED:
                                    hda.state = hda.states.NEW
                                    hda.info = None
                            for p in job_to_remap.parameters:
                                if p.name == jtid.name and p.value == str(jtod.dataset.id):
                                    p.value = str(out_data[jtod.name].id)
                            jtid.dataset = out_data[jtod.name]
                            jtid.dataset.hid = jtod.dataset.hid
                            log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id))
                            trans.sa_session.add(job_to_remap)
                            trans.sa_session.add(jtid)
                    jtod.dataset.visible = False
                    trans.sa_session.add(jtod)
            except Exception:
                log.exception('Cannot remap rerun dependencies.')
        trans.sa_session.flush()
        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
        # from being queued.
        if 'REDIRECT_URL' in incoming:
            # Get the dataset - there should only be 1
            for name in inp_data.keys():
                dataset = inp_data[ name ]
            redirect_url = tool.parse_redirect_url( dataset, incoming )
            # GALAXY_URL should be include in the tool params to enable the external application
            # to send back to the current Galaxy instance
            GALAXY_URL = incoming.get( 'GALAXY_URL', None )
            assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
            redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
            # Job should not be queued, so set state to ok
            job.set_state( trans.app.model.Job.states.OK )
            job.info = "Redirected to: %s" % redirect_url
            trans.sa_session.add( job )
            trans.sa_session.flush()
            trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
        else:
            # Put the job in the queue if tracking in memory
            trans.app.job_queue.put( job.id, job.tool_id )
            trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
            return job, out_data
Example #17
0
    def execute(self, tool, trans, incoming={}, set_output_hid=True):
        def make_dict_copy(from_dict):
            """
            Makes a copy of input dictionary from_dict such that all values that are dictionaries
            result in creation of a new dictionary ( a sort of deepcopy ).  We may need to handle 
            other complex types ( e.g., lists, etc ), but not sure... 
            """
            copy_from_dict = {}
            for key, value in from_dict.items():
                if type(value).__name__ == 'dict':
                    copy_from_dict[key] = make_dict_copy(value)
                else:
                    copy_from_dict[key] = value
            return copy_from_dict

        def wrap_values(inputs, input_values):
            # Wrap tool inputs as necessary
            for input in inputs.itervalues():
                if isinstance(input, Repeat):
                    for d in input_values[input.name]:
                        wrap_values(input.inputs, d)
                elif isinstance(input, Conditional):
                    values = input_values[input.name]
                    current = values["__current_case__"]
                    wrap_values(input.cases[current].inputs, values)
                elif isinstance(input, DataToolParameter):
                    input_values[ input.name ] = \
                        galaxy.tools.DatasetFilenameWrapper( input_values[ input.name ],
                                                             datatypes_registry = trans.app.datatypes_registry,
                                                             tool = tool,
                                                             name = input.name )
                elif isinstance(input, SelectToolParameter):
                    input_values[
                        input.name] = galaxy.tools.SelectToolParameterWrapper(
                            input,
                            input_values[input.name],
                            tool.app,
                            other_values=incoming)
                else:
                    input_values[input.name] = galaxy.tools.InputValueWrapper(
                        input, input_values[input.name], incoming)

        out_data = {}
        # Collect any input datasets from the incoming parameters
        inp_data = self.collect_input_datasets(tool, incoming, trans)

        # Deal with input dataset names, 'dbkey' and types
        input_names = []
        input_ext = 'data'
        input_dbkey = incoming.get("dbkey", "?")
        for name, data in inp_data.items():
            if data:
                input_names.append('data %s' % data.hid)
                input_ext = data.ext
            else:
                data = NoneDataset(
                    datatypes_registry=trans.app.datatypes_registry)
            if data.dbkey not in [None, '?']:
                input_dbkey = data.dbkey

        # Collect chromInfo dataset and add as parameters to incoming
        db_datasets = {}
        db_dataset = trans.db_dataset_for(input_dbkey)
        if db_dataset:
            db_datasets["chromInfo"] = db_dataset
            incoming["chromInfo"] = db_dataset.file_name
        else:
            incoming["chromInfo"] = os.path.join(
                trans.app.config.tool_data_path, 'shared', 'ucsc', 'chrom',
                "%s.len" % input_dbkey)
        inp_data.update(db_datasets)

        # Determine output dataset permission/roles list
        existing_datasets = [inp for inp in inp_data.values() if inp]
        if existing_datasets:
            output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets(
                existing_datasets)
        else:
            # No valid inputs, we will use history defaults
            output_permissions = trans.app.security_agent.history_get_default_permissions(
                trans.history)
        # Build name for output datasets based on tool name and input names
        if len(input_names) == 1:
            on_text = input_names[0]
        elif len(input_names) == 2:
            on_text = '%s and %s' % tuple(input_names[0:2])
        elif len(input_names) == 3:
            on_text = '%s, %s, and %s' % tuple(input_names[0:3])
        elif len(input_names) > 3:
            on_text = '%s, %s, and others' % tuple(input_names[0:2])
        else:
            on_text = ""
        # Add the dbkey to the incoming parameters
        incoming["dbkey"] = input_dbkey
        # Keep track of parent / child relationships, we'll create all the
        # datasets first, then create the associations
        parent_to_child_pairs = []
        child_dataset_names = set()
        for name, output in tool.outputs.items():
            for filter in output.filters:
                try:
                    if not eval(filter.text, globals(), incoming):
                        break  #do not create this dataset
                except Exception, e:
                    log.debug('Dataset output filter failed: %s' % e)
            else:  #all filters passed
                if output.parent:
                    parent_to_child_pairs.append((output.parent, name))
                    child_dataset_names.add(name)
                ## What is the following hack for? Need to document under what
                ## conditions can the following occur? ([email protected])
                # HACK: the output data has already been created
                #      this happens i.e. as a result of the async controller
                if name in incoming:
                    dataid = incoming[name]
                    data = trans.sa_session.query(
                        trans.app.model.HistoryDatasetAssociation).get(dataid)
                    assert data != None
                    out_data[name] = data
                else:
                    # the type should match the input
                    ext = output.format
                    if ext == "input":
                        ext = input_ext
                    #process change_format tags
                    if output.change_format:
                        for change_elem in output.change_format:
                            for when_elem in change_elem.findall('when'):
                                check = incoming.get(when_elem.get('input'),
                                                     None)
                                if check is not None:
                                    if check == when_elem.get('value', None):
                                        ext = when_elem.get('format', ext)
                                else:
                                    check = when_elem.get(
                                        'input_dataset', None)
                                    if check is not None:
                                        check = inp_data.get(check, None)
                                        if check is not None:
                                            if str(
                                                    getattr(
                                                        check,
                                                        when_elem.get(
                                                            'attribute'))
                                            ) == when_elem.get('value', None):
                                                ext = when_elem.get(
                                                    'format', ext)
                    data = trans.app.model.HistoryDatasetAssociation(
                        extension=ext,
                        create_dataset=True,
                        sa_session=trans.sa_session)
                    # Commit the dataset immediately so it gets database assigned unique id
                    trans.sa_session.add(data)
                    trans.sa_session.flush()
                    trans.app.security_agent.set_all_dataset_permissions(
                        data.dataset, output_permissions)
                # Create an empty file immediately
                open(data.file_name, "w").close()
                # Fix permissions
                util.umask_fix_perms(data.file_name, trans.app.config.umask,
                                     0666)
                # This may not be neccesary with the new parent/child associations
                data.designation = name
                # Copy metadata from one of the inputs if requested.
                if output.metadata_source:
                    data.init_meta(copy_from=inp_data[output.metadata_source])
                else:
                    data.init_meta()
                # Take dbkey from LAST input
                data.dbkey = str(input_dbkey)
                # Set state
                # FIXME: shouldn't this be NEW until the job runner changes it?
                data.state = data.states.QUEUED
                data.blurb = "queued"
                # Set output label
                if output.label:
                    params = make_dict_copy(incoming)
                    # wrapping the params allows the tool config to contain things like
                    # <outputs>
                    #     <data format="input" name="output" label="Blat on ${<input_param>.name}" />
                    # </outputs>
                    wrap_values(tool.inputs, params)
                    params['tool'] = tool
                    params['on_string'] = on_text
                    data.name = fill_template(output.label, context=params)
                else:
                    data.name = tool.name
                    if on_text:
                        data.name += (" on " + on_text)
                # Store output
                out_data[name] = data
                # Store all changes to database
                trans.sa_session.flush()