def __test_evaluation_with_path_rewrites(self): # Various things can cause dataset paths to be rewritten (Task # splitting, config.outputs_to_working_directory). This tests that # functionality. self._setup_test_bwa_job() job_path_1 = "%s/dataset_1.dat" % self.test_directory job_path_2 = "%s/dataset_2.dat" % self.test_directory self._set_compute_environment( input_paths=[DatasetPath(1, '/galaxy/files/dataset_1.dat', false_path=job_path_1)], output_paths=[DatasetPath(2, '/galaxy/files/dataset_2.dat', false_path=job_path_2)], ) command_line, extra_filenames, _ = self.evaluator.build() self.assertEqual(command_line, "bwa --thresh=4 --in=%s --out=%s" % (job_path_1, job_path_2))
def test_dataset_path(): dataset_path_1 = DatasetPath(1, "/galaxy/database/files/dataset_1.dat") assert dataset_path_1.dataset_id == 1 assert dataset_path_1.real_path == "/galaxy/database/files/dataset_1.dat" assert dataset_path_1.false_path is None assert dataset_path_1.mutable assert str(dataset_path_1) == "/galaxy/database/files/dataset_1.dat" dataset_path_2 = DatasetPath( 2, "/galaxy/database/files/dataset_2.dat", false_path="/mnt/galaxyData/files/dataset_2.dat", mutable=False) assert dataset_path_2.dataset_id == 2 assert dataset_path_2.real_path == "/galaxy/database/files/dataset_2.dat" assert dataset_path_2.false_path == "/mnt/galaxyData/files/dataset_2.dat" assert not dataset_path_2.mutable assert str(dataset_path_2) == "/mnt/galaxyData/files/dataset_2.dat"
def test_dataset_false_extra_files_path(): dataset = MockDataset() wrapper = DatasetFilenameWrapper(dataset) assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_path = "/new/path/dataset_123.dat" dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path) wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path) # Setting false_path is not enough to override assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_files_path = "/new/path/dataset_123_files" dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path, false_extra_files_path=new_files_path) wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path) assert wrapper.extra_files_path == new_files_path
def get_input_path(self, dataset: DatasetInstance): real_path = dataset.file_name false_path = self.dataset_path_rewriter.rewrite_dataset_path(dataset, 'input') return DatasetPath( dataset.dataset.id, real_path=real_path, false_path=false_path, mutable=False, dataset_uuid=dataset.dataset.uuid, object_store_id=dataset.dataset.object_store_id, )
def compute_outputs(self): dataset_path_rewriter = self.dataset_path_rewriter job = self.job # Job output datasets are combination of history, library, and jeha datasets. special = self.sa_session.query(JobExportHistoryArchive).filter_by(job=job).first() false_path = None results = [] for da in job.output_datasets + job.output_library_datasets: da_false_path = dataset_path_rewriter.rewrite_dataset_path(da.dataset, 'output') mutable = da.dataset.dataset.external_filename is None dataset_path = DatasetPath(da.dataset.dataset.id, da.dataset.file_name, false_path=da_false_path, mutable=mutable) results.append((da.name, da.dataset, dataset_path)) self._output_paths = [t[2] for t in results] self._output_hdas_and_paths = {t[0]: t[1:] for t in results} if special: false_path = dataset_path_rewriter.rewrite_dataset_path(special, 'output') dsp = DatasetPath(special.dataset.id, special.dataset.file_name, false_path) self._output_paths.append(dsp) self._output_hdas_and_paths["output_file"] = (special.fda, dsp)
def test_dataset_false_extra_files_path(): dataset = cast(DatasetInstance, MockDataset()) wrapper = DatasetFilenameWrapper(dataset) assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_path = "/new/path/dataset_123.dat" dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path) wrapper = DatasetFilenameWrapper(dataset, compute_environment=cast(ComputeEnvironment, MockComputeEnvironment(dataset_path))) # Setting false_path is not enough to override assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_files_path = "/new/path/dataset_123_files" wrapper = DatasetFilenameWrapper(dataset, compute_environment=cast(ComputeEnvironment, MockComputeEnvironment(false_path=new_path, false_extra_files_path=new_files_path))) assert wrapper.extra_files_path == new_files_path
def metadata_command(self, output_datasets, output_collections=None): output_collections = output_collections or {} metadata_compute_strategy = get_metadata_compute_strategy( self.app.config, self.job.id) self.metadata_compute_strategy = metadata_compute_strategy exec_dir = None dataset_files_path = self.app.model.Dataset.file_path config_root = self.app.config.root config_file = None datatypes_config = os.path.join(self.job_working_directory, 'metadata', 'registry.xml') safe_makedirs(os.path.join(self.job_working_directory, 'metadata')) self.app.datatypes_registry.to_xml_file(path=datatypes_config) job_metadata = os.path.join(self.tool_working_directory, self.tool.provided_metadata_file) output_fnames = [ DatasetPath(o.dataset.id, o.dataset.file_name, None) for o in output_datasets.values() ] command = metadata_compute_strategy.setup_external_metadata( output_datasets, output_collections, self.app.model.session, exec_dir=exec_dir, tmp_dir=self. job_working_directory, # set in jobs/runners.py - better if was default. dataset_files_path=dataset_files_path, config_root=config_root, config_file=config_file, datatypes_config=datatypes_config, job_metadata=job_metadata, output_fnames=output_fnames, tool=self.tool, job=self.job, object_store_conf=self.app.object_store.to_dict(), max_metadata_value_size=10000) return command
def execute_via_app(self, tool, app, session_id, history_id, user=None, incoming=None, set_output_hid=False, overwrite=True, history=None, job_params=None): """ Execute using application. """ incoming = incoming or {} for name, value in incoming.items(): # Why are we looping here and not just using a fixed input name? Needed? if not name.startswith("input"): continue if isinstance(value, app.model.HistoryDatasetAssociation): dataset = value dataset_name = name type = 'hda' break elif isinstance(value, app.model.LibraryDatasetDatasetAssociation): dataset = value dataset_name = name type = 'ldda' break else: raise Exception( 'The dataset to set metadata on could not be determined.') sa_session = app.model.context # Create the job object job = app.model.Job() job.galaxy_version = app.config.version_major job.session_id = session_id job.history_id = history_id job.tool_id = tool.id if user: job.user_id = user.id if job_params: job.params = dumps(job_params) start_job_state = job.state # should be job.states.NEW try: # For backward compatibility, some tools may not have versions yet. job.tool_version = tool.version except AttributeError: job.tool_version = "1.0.1" job.dynamic_tool = tool.dynamic_tool job.state = job.states.WAITING # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters sa_session.add(job) sa_session.flush() # ensure job.id is available # add parameters to job_parameter table # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)? incoming['__ORIGINAL_DATASET_STATE__'] = dataset.state input_paths = [ DatasetPath(dataset.id, real_path=dataset.file_name, mutable=False) ] app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id)) job_working_dir = app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id)) datatypes_config = os.path.join(job_working_dir, 'registry.xml') app.datatypes_registry.to_xml_file(path=datatypes_config) external_metadata_wrapper = get_metadata_compute_strategy( app.config, job.id, tool_id=tool.id) output_datatasets_dict = { dataset_name: dataset, } validate_outputs = asbool(incoming.get("validate", False)) cmd_line = external_metadata_wrapper.setup_external_metadata( output_datatasets_dict, {}, sa_session, exec_dir=None, tmp_dir=job_working_dir, dataset_files_path=app.model.Dataset.file_path, output_fnames=input_paths, config_root=app.config.root, config_file=app.config.config_file, datatypes_config=datatypes_config, job_metadata=os.path.join(job_working_dir, 'working', tool.provided_metadata_file), include_command=False, max_metadata_value_size=app.config.max_metadata_value_size, validate_outputs=validate_outputs, job=job, kwds={'overwrite': overwrite}) incoming['__SET_EXTERNAL_METADATA_COMMAND_LINE__'] = cmd_line for name, value in tool.params_to_strings(incoming, app).items(): job.add_parameter(name, value) # add the dataset to job_to_input_dataset table if type == 'hda': job.add_input_dataset(dataset_name, dataset) elif type == 'ldda': job.add_input_library_dataset(dataset_name, dataset) # Need a special state here to show that metadata is being set and also allow the job to run # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state dataset._state = dataset.states.SETTING_METADATA job.state = start_job_state # job inputs have been configured, restore initial job state sa_session.flush() # clear e.g. converted files dataset.datatype.before_setting_metadata(dataset) return job, {}