def setUp(self): super(TestStager, self).setUp() from .test_utils import get_test_tool self.tool = get_test_tool() self.client = MockClient(self.temp_directory, self.tool) inputs = self.__setup_inputs() self.client_job_description = ClientJobDescription( tool=self.tool, command_line="run_test.exe", config_files=[], input_files=inputs, client_outputs=ClientOutputs("/galaxy/database/working_directory/1", []), working_directory="/galaxy/database/working_directory/1", dependencies_description=DependenciesDescription(requirements=[TEST_REQUIREMENT_1, TEST_REQUIREMENT_2]), env=[TEST_ENV_1], rewrite_paths=False, ) self.job_config = dict( configs_directory="/pulsar/staging/1/configs", working_directory="/pulsar/staging/1/working", outputs_directory="/pulsar/staging/1/outputs", system_properties=dict( separator="\\", ), )
def queue_job(self, job_wrapper): job_destination = job_wrapper.job_destination self._populate_parameter_defaults(job_destination) command_line, client, remote_job_config, compute_environment = self.__prepare_job( job_wrapper, job_destination) if not command_line: return try: dependencies_description = PulsarJobRunner.__dependencies_description( client, job_wrapper) rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client) unstructured_path_rewrites = {} output_names = [] if compute_environment: unstructured_path_rewrites = compute_environment.unstructured_path_rewrites output_names = compute_environment.output_names() if self.app.config.metadata_strategy == "legacy": # Drop this branch in 19.09. metadata_directory = job_wrapper.working_directory else: metadata_directory = os.path.join( job_wrapper.working_directory, "metadata") client_job_description = ClientJobDescription( command_line=command_line, input_files=self.get_input_files(job_wrapper), client_outputs=self.__client_outputs(client, job_wrapper), working_directory=job_wrapper.tool_working_directory, metadata_directory=metadata_directory, tool=job_wrapper.tool, config_files=job_wrapper.extra_filenames, dependencies_description=dependencies_description, env=client.env, rewrite_paths=rewrite_paths, arbitrary_files=unstructured_path_rewrites, touch_outputs=output_names, ) job_id = pulsar_submit_job(client, client_job_description, remote_job_config) log.info("Pulsar job submitted with job_id %s" % job_id) job_wrapper.set_job_destination(job_destination, job_id) job_wrapper.change_state(model.Job.states.QUEUED) except Exception: job_wrapper.fail("failure running job", exception=True) log.exception("failure running job %d", job_wrapper.job_id) return pulsar_job_state = AsynchronousJobState() pulsar_job_state.job_wrapper = job_wrapper pulsar_job_state.job_id = job_id pulsar_job_state.old_state = True pulsar_job_state.running = False pulsar_job_state.job_destination = job_destination self.monitor_job(pulsar_job_state)
def _run_client_for_job(args): if args.job_id is None: args.job_id = str(uuid.uuid4()) output_patterns = [] output_patterns.extend(args.output_pattern) for output in args.output: output_patterns.append(fnmatch.translate(output)) client_options = extract_client_options(args) client, client_manager = client_info(args, client_options) try: working_directory = args.working_directory client_outputs = ClientOutputs( working_directory=working_directory, dynamic_outputs=output_patterns, ) job_description = ClientJobDescription( command_line=args.command, working_directory=working_directory, client_outputs=client_outputs, ) submit_job(client, job_description) waiter = Waiter(client, client_manager) result_status = waiter.wait() pulsar_outputs = PulsarOutputs.from_status_response(result_status) if args.result_json: open(args.result_json, "w").write(json_dumps(result_status)) finish_args = dict( client=client, job_completed_normally=True, cleanup_job=args.cleanup, client_outputs=client_outputs, pulsar_outputs=pulsar_outputs, ) failed = finish_job(**finish_args) return failed finally: client_manager.shutdown()
def run(options): waiter = None try: temp_directory = tempfile.mkdtemp() temp_index_dir = os.path.join(temp_directory, "idx", "bwa") temp_index_dir_sibbling = os.path.join(temp_directory, "idx", "seq") temp_shared_dir = os.path.join(temp_directory, "shared", "test1") temp_work_dir = os.path.join(temp_directory, "w") temp_tool_dir = os.path.join(temp_directory, "t") __makedirs([temp_tool_dir, temp_work_dir, temp_index_dir, temp_index_dir_sibbling, temp_shared_dir]) temp_input_path = os.path.join(temp_directory, "dataset_0.dat") temp_input_extra_path = os.path.join(temp_directory, "dataset_0_files", "input_subdir", "extra") temp_index_path = os.path.join(temp_index_dir, "human.fa") temp_config_path = os.path.join(temp_work_dir, "config.txt") temp_tool_path = os.path.join(temp_directory, "t", "script.py") temp_output_path = os.path.join(temp_directory, "dataset_1.dat") temp_output2_path = os.path.join(temp_directory, "dataset_2.dat") temp_output3_path = os.path.join(temp_directory, "dataset_3.dat") temp_output4_path = os.path.join(temp_directory, "dataset_4.dat") temp_version_output_path = os.path.join(temp_directory, "GALAXY_VERSION_1234") temp_output_workdir_destination = os.path.join(temp_directory, "dataset_77.dat") temp_output_workdir = os.path.join(temp_work_dir, "env_test") temp_output_workdir_destination2 = os.path.join(temp_directory, "dataset_78.dat") temp_output_workdir2 = os.path.join(temp_work_dir, "rewrite_action_test") __write_to_file(temp_input_path, b"Hello world input!!@!") __write_to_file(temp_input_extra_path, b"INPUT_EXTRA_CONTENTS") __write_to_file(temp_config_path, EXPECTED_OUTPUT) __write_to_file(temp_tool_path, TEST_SCRIPT) __write_to_file(temp_index_path, b"AGTC") # Implicit files that should also get transferred since depth > 0 __write_to_file("%s.fai" % temp_index_path, b"AGTC") __write_to_file(os.path.join(temp_index_dir_sibbling, "human_full_seqs"), b"AGTC") empty_input = u"/foo/bar/x" test_unicode = getattr(options, "test_unicode", False) # TODO Switch this in integration tests cmd_text = EXAMPLE_UNICODE_TEXT if test_unicode else "Hello World" command_line_params = ( temp_tool_path, temp_config_path, temp_input_path, temp_output_path, empty_input, temp_output2_path, cmd_text, temp_output3_path, temp_input_extra_path, temp_version_output_path, temp_index_path, temp_output4_path, temp_shared_dir, ) assert os.path.exists(temp_index_path) command_line = u'python %s "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s"' % command_line_params config_files = [temp_config_path] input_files = [temp_input_path, empty_input] output_files = [ temp_output_path, temp_output2_path, temp_output3_path, temp_output4_path, temp_output_workdir_destination, temp_output_workdir_destination2 ] client, client_manager = __client(temp_directory, options) waiter = Waiter(client, client_manager) client_outputs = ClientOutputs( working_directory=temp_work_dir, work_dir_outputs=[ (temp_output_workdir, temp_output_workdir_destination), (temp_output_workdir2, temp_output_workdir_destination2), ], output_files=output_files, version_file=temp_version_output_path, ) job_description = ClientJobDescription( command_line=command_line, tool=MockTool(temp_tool_dir), config_files=config_files, input_files=input_files, client_outputs=client_outputs, working_directory=temp_work_dir, **__extra_job_description_kwargs(options) ) submit_job(client, job_description) result_status = waiter.wait() __finish(options, client, client_outputs, result_status) __assert_contents(temp_output_path, EXPECTED_OUTPUT, result_status) __assert_contents(temp_output2_path, cmd_text, result_status) __assert_contents(os.path.join(temp_work_dir, "galaxy.json"), b"GALAXY_JSON", result_status) __assert_contents(os.path.join(temp_directory, "dataset_1_files", "extra"), b"EXTRA_OUTPUT_CONTENTS", result_status) if getattr(options, "test_rewrite_action", False): __assert_contents(temp_output_workdir_destination2, os.path.join(temp_directory, "shared2", "test1"), result_status) if job_description.env: __assert_contents(temp_output_workdir_destination, b"TEST_ENV_VALUE", result_status) __assert_contents(temp_version_output_path, b"1.0.1", result_status) if job_description.dependencies_description: __assert_contents(temp_output3_path, "moo_override", result_status) else: __assert_contents(temp_output3_path, "moo_default", result_status) if client.default_file_action != "none": rewritten_index_path = open(temp_output4_path, 'r', encoding='utf-8').read() # Path written to this file will differ between Windows and Linux. assert re.search(r"123456[/\\]unstructured[/\\]\w+[/\\]bwa[/\\]human.fa", rewritten_index_path) is not None __exercise_errors(options, client, temp_output_path, temp_directory) client_manager.shutdown() except BaseException: if not options.suppress_output: traceback.print_exc() raise finally: if waiter is not None: waiter.shutdown() shutil.rmtree(temp_directory)
def run(options): waiter = None try: temp_directory = tempfile.mkdtemp() temp_index_dir = os.path.join(temp_directory, "idx", "bwa") temp_index_dir_sibbling = os.path.join(temp_directory, "idx", "seq") temp_shared_dir = os.path.join(temp_directory, "shared", "test1") temp_work_dir = os.path.join(temp_directory, "w") temp_metadata_dir = os.path.join(temp_directory, "m") temp_false_working_dir = os.path.join(temp_metadata_dir, "working") temp_tool_dir = os.path.join(temp_directory, "t") __makedirs([ temp_tool_dir, temp_work_dir, temp_index_dir, temp_index_dir_sibbling, temp_shared_dir, temp_metadata_dir, temp_false_working_dir, ]) temp_input_path = os.path.join(temp_directory, "dataset_0.dat") temp_input_extra_path = os.path.join(temp_directory, "dataset_0_files", "input_subdir", "extra") temp_input_metadata_path = os.path.join(temp_directory, "metadata", "12312231231231.dat") temp_index_path = os.path.join(temp_index_dir, "human.fa") temp_config_path = os.path.join(temp_work_dir, "config.txt") temp_tool_path = os.path.join(temp_directory, "t", "script.py") temp_output_path = os.path.join(temp_directory, "dataset_1.dat") temp_output2_path = os.path.join(temp_directory, "dataset_2.dat") temp_output3_path = os.path.join(temp_directory, "dataset_3.dat") temp_output4_path = os.path.join(temp_directory, "dataset_4.dat") temp_version_output_path = os.path.join(temp_directory, "GALAXY_VERSION_1234") temp_output_workdir_destination = os.path.join(temp_directory, "dataset_77.dat") temp_output_workdir = os.path.join(temp_work_dir, "env_test") temp_metadata_path = os.path.join(temp_metadata_dir, "metadata_test123") temp_output_workdir_destination2 = os.path.join( temp_directory, "dataset_78.dat") temp_output_workdir2 = os.path.join(temp_work_dir, "rewrite_action_test") __write_to_file(temp_input_path, b"Hello world input!!@!") __write_to_file(temp_input_extra_path, b"INPUT_EXTRA_CONTENTS") __write_to_file(temp_input_metadata_path, b"INPUT METADATA CONTENTS...") __write_to_file(temp_config_path, EXPECTED_OUTPUT) __write_to_file(temp_metadata_path, "meta input") __write_to_file(temp_tool_path, TEST_SCRIPT) __write_to_file(temp_index_path, b"AGTC") # Implicit files that should also get transferred since depth > 0 __write_to_file("%s.fai" % temp_index_path, b"AGTC") __write_to_file( os.path.join(temp_index_dir_sibbling, "human_full_seqs"), b"AGTC") empty_input = u"/foo/bar/x" test_unicode = getattr(options, "test_unicode", False) # TODO Switch this in integration tests cmd_text = EXAMPLE_UNICODE_TEXT if test_unicode else "Hello World" command_line_params = ( temp_tool_path, temp_config_path, temp_input_path, temp_output_path, empty_input, temp_output2_path, cmd_text, temp_output3_path, temp_input_extra_path, temp_version_output_path, temp_index_path, temp_output4_path, temp_shared_dir, temp_metadata_path, temp_input_metadata_path, ) assert os.path.exists(temp_index_path) command_line = u'python %s "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s"' % command_line_params config_files = [temp_config_path] client_inputs = [] client_inputs.append( ClientInput(temp_input_path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH)) client_inputs.append( ClientInput(temp_input_path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH)) # Reverting empty input handling added in: # https://github.com/galaxyproject/pulsar/commit/2fb36ba979cf047a595c53cdef833cae79cbb380 # Seems like it really should cause a failure. # client_inputs.append(ClientInput(empty_input, CLIENT_INPUT_PATH_TYPES.INPUT_PATH)) client_inputs.append( ClientInput(os.path.join(temp_directory, "dataset_0_files"), CLIENT_INPUT_PATH_TYPES.INPUT_EXTRA_FILES_PATH)) client_inputs.append( ClientInput(temp_input_metadata_path, CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH)) output_files = [ temp_output_path, temp_output2_path, temp_output3_path, temp_output4_path, temp_output_workdir_destination, temp_output_workdir_destination2 ] client, client_manager = __client(temp_directory, options) waiter = Waiter(client, client_manager) client_outputs = ClientOutputs( working_directory=temp_work_dir, metadata_directory=temp_metadata_dir, work_dir_outputs=[ (temp_output_workdir, temp_output_workdir_destination), (temp_output_workdir2, temp_output_workdir_destination2), ], output_files=output_files, version_file=temp_version_output_path, ) job_description = ClientJobDescription( command_line=command_line, tool=MockTool(temp_tool_dir), config_files=config_files, client_inputs=ClientInputs(client_inputs), client_outputs=client_outputs, working_directory=temp_work_dir, metadata_directory=temp_metadata_dir, **__extra_job_description_kwargs(options)) submit_job(client, job_description) result_status = waiter.wait() stdout = result_status["stdout"].strip() assert "stdout output".startswith(stdout) if hasattr(options, "maximum_stream_size"): assert len(stdout) == options.maximum_stream_size __finish(options, client, client_outputs, result_status) __assert_contents(temp_output_path, EXPECTED_OUTPUT, result_status) __assert_contents(temp_output2_path, cmd_text, result_status) __assert_contents(os.path.join(temp_work_dir, "galaxy.json"), b"GALAXY_JSON", result_status) __assert_contents( os.path.join(temp_directory, "dataset_1_files", "extra"), b"EXTRA_OUTPUT_CONTENTS", result_status) __assert_contents(os.path.join(temp_metadata_dir, "metadata_output"), b"meta output", result_status) if getattr(options, "test_rewrite_action", False): __assert_contents(temp_output_workdir_destination2, os.path.join(temp_directory, "shared2", "test1"), result_status) if job_description.env: __assert_contents(temp_output_workdir_destination, b"TEST_ENV_VALUE", result_status) __assert_contents(temp_version_output_path, b"1.0.1", result_status) if job_description.dependencies_description: __assert_contents(temp_output3_path, "moo_override", result_status) else: __assert_contents(temp_output3_path, "moo_default", result_status) __assert_has_rewritten_bwa_path(client, temp_output4_path) __exercise_errors(options, client, temp_output_path, temp_directory) client_manager.shutdown() except BaseException: if not options.suppress_output: traceback.print_exc() raise finally: if waiter is not None: waiter.shutdown() shutil.rmtree(temp_directory)
def queue_job(self, job_wrapper): job_destination = job_wrapper.job_destination self._populate_parameter_defaults(job_destination) command_line, client, remote_job_config, compute_environment, remote_container = self.__prepare_job(job_wrapper, job_destination) if not command_line: return try: dependencies_description = PulsarJobRunner.__dependencies_description(client, job_wrapper) rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client) path_rewrites_unstructured = {} output_names = [] if compute_environment: path_rewrites_unstructured = compute_environment.path_rewrites_unstructured output_names = compute_environment.output_names() client_inputs_list = [] for input_dataset_wrapper in job_wrapper.get_input_paths(): # str here to resolve false_path if set on a DatasetPath object. path = str(input_dataset_wrapper) object_store_ref = { "dataset_id": input_dataset_wrapper.dataset_id, "dataset_uuid": str(input_dataset_wrapper.dataset_uuid), "object_store_id": input_dataset_wrapper.object_store_id, } client_inputs_list.append(ClientInput(path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH, object_store_ref=object_store_ref)) for input_extra_path in compute_environment.path_rewrites_input_extra.keys(): # TODO: track dataset for object_Store_ref... client_inputs_list.append(ClientInput(input_extra_path, CLIENT_INPUT_PATH_TYPES.INPUT_EXTRA_FILES_PATH)) for input_metadata_path in compute_environment.path_rewrites_input_metadata.keys(): # TODO: track dataset for object_Store_ref... client_inputs_list.append(ClientInput(input_metadata_path, CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH)) input_files = None client_inputs = ClientInputs(client_inputs_list) else: input_files = self.get_input_files(job_wrapper) client_inputs = None if self.app.config.metadata_strategy == "legacy": # Drop this branch in 19.09. metadata_directory = job_wrapper.working_directory else: metadata_directory = os.path.join(job_wrapper.working_directory, "metadata") remote_pulsar_app_config = job_destination.params.get("pulsar_app_config", {}) job_directory_files = [] config_files = job_wrapper.extra_filenames tool_script = os.path.join(job_wrapper.working_directory, "tool_script.sh") if os.path.exists(tool_script): log.debug("Registering tool_script for Pulsar transfer [%s]" % tool_script) job_directory_files.append(tool_script) client_job_description = ClientJobDescription( command_line=command_line, input_files=input_files, client_inputs=client_inputs, # Only one of these input defs should be non-None client_outputs=self.__client_outputs(client, job_wrapper), working_directory=job_wrapper.tool_working_directory, metadata_directory=metadata_directory, tool=job_wrapper.tool, config_files=config_files, dependencies_description=dependencies_description, env=client.env, rewrite_paths=rewrite_paths, arbitrary_files=path_rewrites_unstructured, touch_outputs=output_names, remote_pulsar_app_config=remote_pulsar_app_config, job_directory_files=job_directory_files, container=None if not remote_container else remote_container.container_id, ) job_id = pulsar_submit_job(client, client_job_description, remote_job_config) log.info("Pulsar job submitted with job_id %s" % job_id) job_wrapper.set_job_destination(job_destination, job_id) job_wrapper.change_state(model.Job.states.QUEUED) except Exception: job_wrapper.fail("failure running job", exception=True) log.exception("failure running job %d", job_wrapper.job_id) return pulsar_job_state = AsynchronousJobState() pulsar_job_state.job_wrapper = job_wrapper pulsar_job_state.job_id = job_id pulsar_job_state.old_state = True pulsar_job_state.running = False pulsar_job_state.job_destination = job_destination self.monitor_job(pulsar_job_state)