Beispiel #1
0
    def metadata_command(self, output_datasets, output_collections=None):
        output_collections = output_collections or {}
        metadata_compute_strategy = get_metadata_compute_strategy(
            self.app.config, self.job.id)
        self.metadata_compute_strategy = metadata_compute_strategy

        exec_dir = None
        dataset_files_path = self.app.model.Dataset.file_path
        config_root = self.app.config.root
        config_file = None
        datatypes_config = os.path.join(self.job_working_directory, 'metadata',
                                        'registry.xml')
        safe_makedirs(os.path.join(self.job_working_directory, 'metadata'))
        self.app.datatypes_registry.to_xml_file(path=datatypes_config)
        job_metadata = os.path.join(self.tool_working_directory,
                                    self.tool.provided_metadata_file)
        output_fnames = [
            DatasetPath(o.dataset.id, o.dataset.file_name, None)
            for o in output_datasets.values()
        ]
        command = metadata_compute_strategy.setup_external_metadata(
            output_datasets,
            output_collections,
            self.app.model.session,
            exec_dir=exec_dir,
            tmp_dir=self.
            job_working_directory,  # set in jobs/runners.py - better if was default.
            dataset_files_path=dataset_files_path,
            config_root=config_root,
            config_file=config_file,
            datatypes_config=datatypes_config,
            job_metadata=job_metadata,
            output_fnames=output_fnames,
            tool=self.tool,
            job=self.job,
            object_store_conf=self.app.object_store.to_dict(),
            max_metadata_value_size=10000)
        return command
Beispiel #2
0
    def execute_via_app(self,
                        tool,
                        app,
                        session_id,
                        history_id,
                        user=None,
                        incoming=None,
                        set_output_hid=False,
                        overwrite=True,
                        history=None,
                        job_params=None):
        """
        Execute using application.
        """
        incoming = incoming or {}
        for name, value in incoming.items():
            # Why are we looping here and not just using a fixed input name? Needed?
            if not name.startswith("input"):
                continue
            if isinstance(value, app.model.HistoryDatasetAssociation):
                dataset = value
                dataset_name = name
                type = 'hda'
                break
            elif isinstance(value, app.model.LibraryDatasetDatasetAssociation):
                dataset = value
                dataset_name = name
                type = 'ldda'
                break
            else:
                raise Exception(
                    'The dataset to set metadata on could not be determined.')

        sa_session = app.model.context

        # Create the job object
        job = app.model.Job()
        job.galaxy_version = app.config.version_major
        job.session_id = session_id
        job.history_id = history_id
        job.tool_id = tool.id
        if user:
            job.user_id = user.id
        if job_params:
            job.params = dumps(job_params)
        start_job_state = job.state  # should be job.states.NEW
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except AttributeError:
            job.tool_version = "1.0.1"
        job.dynamic_tool = tool.dynamic_tool
        job.state = job.states.WAITING  # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters
        sa_session.add(job)
        sa_session.flush()  # ensure job.id is available

        # add parameters to job_parameter table
        # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)?
        incoming['__ORIGINAL_DATASET_STATE__'] = dataset.state
        input_paths = [
            DatasetPath(dataset.id, real_path=dataset.file_name, mutable=False)
        ]
        app.object_store.create(job,
                                base_dir='job_work',
                                dir_only=True,
                                extra_dir=str(job.id))
        job_working_dir = app.object_store.get_filename(job,
                                                        base_dir='job_work',
                                                        dir_only=True,
                                                        extra_dir=str(job.id))
        datatypes_config = os.path.join(job_working_dir, 'registry.xml')
        app.datatypes_registry.to_xml_file(path=datatypes_config)
        external_metadata_wrapper = get_metadata_compute_strategy(
            app.config, job.id, tool_id=tool.id)
        output_datatasets_dict = {
            dataset_name: dataset,
        }
        validate_outputs = asbool(incoming.get("validate", False))
        cmd_line = external_metadata_wrapper.setup_external_metadata(
            output_datatasets_dict, {},
            sa_session,
            exec_dir=None,
            tmp_dir=job_working_dir,
            dataset_files_path=app.model.Dataset.file_path,
            output_fnames=input_paths,
            config_root=app.config.root,
            config_file=app.config.config_file,
            datatypes_config=datatypes_config,
            job_metadata=os.path.join(job_working_dir, 'working',
                                      tool.provided_metadata_file),
            include_command=False,
            max_metadata_value_size=app.config.max_metadata_value_size,
            validate_outputs=validate_outputs,
            job=job,
            kwds={'overwrite': overwrite})
        incoming['__SET_EXTERNAL_METADATA_COMMAND_LINE__'] = cmd_line
        for name, value in tool.params_to_strings(incoming, app).items():
            job.add_parameter(name, value)
        # add the dataset to job_to_input_dataset table
        if type == 'hda':
            job.add_input_dataset(dataset_name, dataset)
        elif type == 'ldda':
            job.add_input_library_dataset(dataset_name, dataset)
        # Need a special state here to show that metadata is being set and also allow the job to run
        # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state
        dataset._state = dataset.states.SETTING_METADATA
        job.state = start_job_state  # job inputs have been configured, restore initial job state
        sa_session.flush()

        # clear e.g. converted files
        dataset.datatype.before_setting_metadata(dataset)

        return job, {}