Ejemplo n.º 1
0
 def __test_evaluation_with_path_rewrites(self):
     # Various things can cause dataset paths to be rewritten (Task
     # splitting, config.outputs_to_working_directory). This tests that
     # functionality.
     self._setup_test_bwa_job()
     job_path_1 = "%s/dataset_1.dat" % self.test_directory
     job_path_2 = "%s/dataset_2.dat" % self.test_directory
     self._set_compute_environment(
         input_paths=[DatasetPath(1, '/galaxy/files/dataset_1.dat', false_path=job_path_1)],
         output_paths=[DatasetPath(2, '/galaxy/files/dataset_2.dat', false_path=job_path_2)],
     )
     command_line, extra_filenames, _ = self.evaluator.build()
     self.assertEqual(command_line, "bwa --thresh=4 --in=%s --out=%s" % (job_path_1, job_path_2))
Ejemplo n.º 2
0
def test_dataset_path():
    dataset_path_1 = DatasetPath(1, "/galaxy/database/files/dataset_1.dat")
    assert dataset_path_1.dataset_id == 1
    assert dataset_path_1.real_path == "/galaxy/database/files/dataset_1.dat"
    assert dataset_path_1.false_path is None
    assert dataset_path_1.mutable
    assert str(dataset_path_1) == "/galaxy/database/files/dataset_1.dat"

    dataset_path_2 = DatasetPath(
        2,
        "/galaxy/database/files/dataset_2.dat",
        false_path="/mnt/galaxyData/files/dataset_2.dat",
        mutable=False)
    assert dataset_path_2.dataset_id == 2
    assert dataset_path_2.real_path == "/galaxy/database/files/dataset_2.dat"
    assert dataset_path_2.false_path == "/mnt/galaxyData/files/dataset_2.dat"
    assert not dataset_path_2.mutable
    assert str(dataset_path_2) == "/mnt/galaxyData/files/dataset_2.dat"
Ejemplo n.º 3
0
def test_dataset_false_extra_files_path():
    dataset = MockDataset()

    wrapper = DatasetFilenameWrapper(dataset)
    assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH

    new_path = "/new/path/dataset_123.dat"
    dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path)
    wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path)
    # Setting false_path is not enough to override
    assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH

    new_files_path = "/new/path/dataset_123_files"
    dataset_path = DatasetPath(123,
                               MOCK_DATASET_PATH,
                               false_path=new_path,
                               false_extra_files_path=new_files_path)
    wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path)
    assert wrapper.extra_files_path == new_files_path
Ejemplo n.º 4
0
 def get_input_path(self, dataset: DatasetInstance):
     real_path = dataset.file_name
     false_path = self.dataset_path_rewriter.rewrite_dataset_path(dataset, 'input')
     return DatasetPath(
         dataset.dataset.id,
         real_path=real_path,
         false_path=false_path,
         mutable=False,
         dataset_uuid=dataset.dataset.uuid,
         object_store_id=dataset.dataset.object_store_id,
     )
Ejemplo n.º 5
0
    def compute_outputs(self):
        dataset_path_rewriter = self.dataset_path_rewriter

        job = self.job
        # Job output datasets are combination of history, library, and jeha datasets.
        special = self.sa_session.query(JobExportHistoryArchive).filter_by(job=job).first()
        false_path = None

        results = []
        for da in job.output_datasets + job.output_library_datasets:
            da_false_path = dataset_path_rewriter.rewrite_dataset_path(da.dataset, 'output')
            mutable = da.dataset.dataset.external_filename is None
            dataset_path = DatasetPath(da.dataset.dataset.id, da.dataset.file_name, false_path=da_false_path, mutable=mutable)
            results.append((da.name, da.dataset, dataset_path))

        self._output_paths = [t[2] for t in results]
        self._output_hdas_and_paths = {t[0]: t[1:] for t in results}
        if special:
            false_path = dataset_path_rewriter.rewrite_dataset_path(special, 'output')
            dsp = DatasetPath(special.dataset.id, special.dataset.file_name, false_path)
            self._output_paths.append(dsp)
            self._output_hdas_and_paths["output_file"] = (special.fda, dsp)
Ejemplo n.º 6
0
def test_dataset_false_extra_files_path():
    dataset = cast(DatasetInstance, MockDataset())

    wrapper = DatasetFilenameWrapper(dataset)
    assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH

    new_path = "/new/path/dataset_123.dat"
    dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path)
    wrapper = DatasetFilenameWrapper(dataset, compute_environment=cast(ComputeEnvironment, MockComputeEnvironment(dataset_path)))
    # Setting false_path is not enough to override
    assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH

    new_files_path = "/new/path/dataset_123_files"
    wrapper = DatasetFilenameWrapper(dataset, compute_environment=cast(ComputeEnvironment, MockComputeEnvironment(false_path=new_path, false_extra_files_path=new_files_path)))
    assert wrapper.extra_files_path == new_files_path
Ejemplo n.º 7
0
    def metadata_command(self, output_datasets, output_collections=None):
        output_collections = output_collections or {}
        metadata_compute_strategy = get_metadata_compute_strategy(
            self.app.config, self.job.id)
        self.metadata_compute_strategy = metadata_compute_strategy

        exec_dir = None
        dataset_files_path = self.app.model.Dataset.file_path
        config_root = self.app.config.root
        config_file = None
        datatypes_config = os.path.join(self.job_working_directory, 'metadata',
                                        'registry.xml')
        safe_makedirs(os.path.join(self.job_working_directory, 'metadata'))
        self.app.datatypes_registry.to_xml_file(path=datatypes_config)
        job_metadata = os.path.join(self.tool_working_directory,
                                    self.tool.provided_metadata_file)
        output_fnames = [
            DatasetPath(o.dataset.id, o.dataset.file_name, None)
            for o in output_datasets.values()
        ]
        command = metadata_compute_strategy.setup_external_metadata(
            output_datasets,
            output_collections,
            self.app.model.session,
            exec_dir=exec_dir,
            tmp_dir=self.
            job_working_directory,  # set in jobs/runners.py - better if was default.
            dataset_files_path=dataset_files_path,
            config_root=config_root,
            config_file=config_file,
            datatypes_config=datatypes_config,
            job_metadata=job_metadata,
            output_fnames=output_fnames,
            tool=self.tool,
            job=self.job,
            object_store_conf=self.app.object_store.to_dict(),
            max_metadata_value_size=10000)
        return command
Ejemplo n.º 8
0
    def execute_via_app(self,
                        tool,
                        app,
                        session_id,
                        history_id,
                        user=None,
                        incoming=None,
                        set_output_hid=False,
                        overwrite=True,
                        history=None,
                        job_params=None):
        """
        Execute using application.
        """
        incoming = incoming or {}
        for name, value in incoming.items():
            # Why are we looping here and not just using a fixed input name? Needed?
            if not name.startswith("input"):
                continue
            if isinstance(value, app.model.HistoryDatasetAssociation):
                dataset = value
                dataset_name = name
                type = 'hda'
                break
            elif isinstance(value, app.model.LibraryDatasetDatasetAssociation):
                dataset = value
                dataset_name = name
                type = 'ldda'
                break
            else:
                raise Exception(
                    'The dataset to set metadata on could not be determined.')

        sa_session = app.model.context

        # Create the job object
        job = app.model.Job()
        job.galaxy_version = app.config.version_major
        job.session_id = session_id
        job.history_id = history_id
        job.tool_id = tool.id
        if user:
            job.user_id = user.id
        if job_params:
            job.params = dumps(job_params)
        start_job_state = job.state  # should be job.states.NEW
        try:
            # For backward compatibility, some tools may not have versions yet.
            job.tool_version = tool.version
        except AttributeError:
            job.tool_version = "1.0.1"
        job.dynamic_tool = tool.dynamic_tool
        job.state = job.states.WAITING  # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters
        sa_session.add(job)
        sa_session.flush()  # ensure job.id is available

        # add parameters to job_parameter table
        # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)?
        incoming['__ORIGINAL_DATASET_STATE__'] = dataset.state
        input_paths = [
            DatasetPath(dataset.id, real_path=dataset.file_name, mutable=False)
        ]
        app.object_store.create(job,
                                base_dir='job_work',
                                dir_only=True,
                                extra_dir=str(job.id))
        job_working_dir = app.object_store.get_filename(job,
                                                        base_dir='job_work',
                                                        dir_only=True,
                                                        extra_dir=str(job.id))
        datatypes_config = os.path.join(job_working_dir, 'registry.xml')
        app.datatypes_registry.to_xml_file(path=datatypes_config)
        external_metadata_wrapper = get_metadata_compute_strategy(
            app.config, job.id, tool_id=tool.id)
        output_datatasets_dict = {
            dataset_name: dataset,
        }
        validate_outputs = asbool(incoming.get("validate", False))
        cmd_line = external_metadata_wrapper.setup_external_metadata(
            output_datatasets_dict, {},
            sa_session,
            exec_dir=None,
            tmp_dir=job_working_dir,
            dataset_files_path=app.model.Dataset.file_path,
            output_fnames=input_paths,
            config_root=app.config.root,
            config_file=app.config.config_file,
            datatypes_config=datatypes_config,
            job_metadata=os.path.join(job_working_dir, 'working',
                                      tool.provided_metadata_file),
            include_command=False,
            max_metadata_value_size=app.config.max_metadata_value_size,
            validate_outputs=validate_outputs,
            job=job,
            kwds={'overwrite': overwrite})
        incoming['__SET_EXTERNAL_METADATA_COMMAND_LINE__'] = cmd_line
        for name, value in tool.params_to_strings(incoming, app).items():
            job.add_parameter(name, value)
        # add the dataset to job_to_input_dataset table
        if type == 'hda':
            job.add_input_dataset(dataset_name, dataset)
        elif type == 'ldda':
            job.add_input_library_dataset(dataset_name, dataset)
        # Need a special state here to show that metadata is being set and also allow the job to run
        # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state
        dataset._state = dataset.states.SETTING_METADATA
        job.state = start_job_state  # job inputs have been configured, restore initial job state
        sa_session.flush()

        # clear e.g. converted files
        dataset.datatype.before_setting_metadata(dataset)

        return job, {}