def _load_io_for_task(registered_tasks, entry_points_d, preset_xml, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """Grungy loading of the IO and resolving values Returns a tuple of (WorkflowLevelOptions, TaskOptions, ClusterRender) """ slog.info("validating entry points. {e}".format(e=entry_points_d)) _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if rc_preset: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) if preset_xml is not None: preset_record = IO.parse_pipeline_preset_xml(preset_xml) wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode # Validate topts = IO.validate_raw_task_options(registered_tasks, topts) log.debug("Resolved task options to {d}".format(d=workflow_level_opts)) log.debug(pprint.pprint(workflow_level_opts.to_dict(), indent=4)) if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) # override distributed mode if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute else: cluster_render = None workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode return workflow_level_opts, topts, cluster_render
def setUpClass(cls): pipeline = REGISTERED_PIPELINES[cls.PB_PIPELINE_ID] log.debug(pipeline) cls.bindings = pipeline.all_bindings cls.EPOINTS_D = { k: get_temp_file(v) for k, v in cls.EPOINTS_NAMES.iteritems() } log.debug(pprint.pformat(cls.bindings, indent=4)) log.debug( "Number of registered tasks {n}".format(n=len(REGISTERED_TASKS))) cls.bgraph = B.binding_strs_to_binding_graph(REGISTERED_TASKS, cls.bindings) d = os.path.expanduser('~/scratch/tmp_pbsmrtpipe') if getpass.getuser( ) == 'mkocher' else None cls.output_dir = tempfile.mkdtemp(prefix='job_test_', dir=d) preset_record = IO.parse_pipeline_preset_xml( os.path.join(TEST_DATA_DIR, cls.PRESET_XML)) cls.workflow_options = preset_record.to_workflow_level_opt() # leave this for now cls.envs = [] cls.cluster_engine = C.load_installed_cluster_templates_by_name("sge")
def get_task_and_workflow_options(testkit_cfg): parsed_cfg = config_parser_to_butler(testkit_cfg) workflow_options, task_options = [], [] def __get_option_type(val): option_type = "pbsmrtpipe.option_types.string" if isinstance(val, bool): option_type = "pbsmrtpipe.option_types.boolean" elif isinstance(val, int): option_type = "pbsmrtpipe.option_types.integer" elif isinstance(val, float): option_type = "pbsmrtpipe.option_types.float" return option_type if not parsed_cfg.preset_xml in [None, '']: presets = parse_pipeline_preset_xml(parsed_cfg.preset_xml) for option_id, option_value in presets.task_options: log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) task_options.append(dict( optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value))) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append(dict( optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value))) return task_options, workflow_options
def _load_env_preset(env_var): path = os.environ.get(env_var, None) if path is None: return None else: return IO.parse_pipeline_preset_xml(path)
def run_diagnostics(preset_xml, output_dir): """Run Hello World pipeline Submit to the cluster if configured """ precord = parse_pipeline_preset_xml(preset_xml) wopts = precord.to_workflow_level_opt() to_p = _to_path(output_dir) input_txt = to_p("e-01_input.txt") with open(input_txt, 'w') as f: f.write("Mock data\n") job_preset_xml = to_p("preset.xml") shutil.copyfile(preset_xml, job_preset_xml) _d = dict(f=input_txt, p=job_preset_xml, d=output_dir) cmd_str = "pbsmrtpipe pipeline-id pbsmrtpipe.pipelines.dev_dist -e \"e_01:{f}\" --preset-xml={p} --output-dir={d}" cmd = cmd_str.format(**_d) print "Running command {c}".format(c=cmd) rcode, stdout, stderr, run_time = run_command(cmd, sys.stdout, sys.stderr) if rcode == 0: print "Successfully submitted cluster job using templates {p}".format(p=wopts.cluster_manager_path) return rcode
def run_simple_diagnostics(preset_xml, output_dir): """Setup simple job to run""" precord = parse_pipeline_preset_xml(preset_xml) wopts = precord.to_workflow_level_opt() to_p = _to_path(output_dir) ts = load_cluster_templates(wopts.cluster_manager_path) run_sh = to_p('run.sh') cluster_sh = to_p('cluster.sh') output_file = to_p('hello-world-output.txt') _write_echo_hello_world(output_file, run_sh) cluster_stderr = to_p("cluster.stderr") cluster_stdout = to_p("cluster.stdout") cluster_cmd = ts.render("start", run_sh, "job.dev-diagnostic-hello-world", stdout=cluster_stdout, stderr=cluster_stderr) with open(cluster_sh, 'w') as f: f.write(cluster_cmd) print "Run.sh command {r}".format(r=run_sh) print "Exe'ing Cluster command {c}".format(c=cluster_cmd) rcode, stdout, stderr, run_time = run_command(cluster_cmd, sys.stdout, sys.stderr) if rcode == 0: print "Successfully submitted cluster job using templates {p}".format(p=wopts.cluster_manager_path) return rcode
def get_task_and_workflow_options(testkit_cfg): parsed_cfg = config_parser_to_butler(testkit_cfg) workflow_options, task_options = [], [] def __get_option_type(val): option_type = TaskOptionTypes.STR if isinstance(val, bool): option_type = TaskOptionTypes.BOOL elif isinstance(val, int): option_type = TaskOptionTypes.INT elif isinstance(val, float): option_type = TaskOptionTypes.FLOAT elif val is None: val = "" return option_type, val rtasks_d, _, _, _ = L.load_all() if not parsed_cfg.preset_xml in [None, '']: if not parsed_cfg.preset_json in [None, '']: raise ValueError( "Please use either preset_json or preset_xml, not both") presets = parse_pipeline_preset_xml(parsed_cfg.preset_xml) task_opts_d = validate_raw_task_options(rtasks_d, dict(presets.task_options)) for option_id, option_value in task_opts_d.iteritems(): log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) option_type, option_value = __get_option_type(option_value) task_options.append( dict(optionId=option_id, value=option_value, optionTypeId=option_type)) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append( dict(optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value)[0])) elif not parsed_cfg.preset_json in [None, '']: presets = parse_pipeline_preset_json(parsed_cfg.preset_json) for option_id, option_value in presets.task_options: log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) option_type, option_value = __get_option_type(option_value) task_options.append( dict(optionId=option_id, value=option_value, optionTypeId=option_type)) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append( dict(optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value)[0])) return task_options, workflow_options
def _load_presets(self): preset_xml = op.join(op.dirname(self.job_dir), "preset.xml") preset_json = op.join(op.dirname(self.job_dir), "preset.json") if not op.isfile(preset_xml) and not op.isfile(preset_json): raise SkipTest("No presets JSON or XML found") if op.isfile(preset_json): return parse_pipeline_preset_json(preset_json) elif op.isfile(preset_xml): return parse_pipeline_preset_xml(preset_xml)
def test_load_preset(self): xml = IO.schema_workflow_options_to_xml(self._to_opts()) preset_xml = get_temp_file(suffix="_preset.xml") log.debug(preset_xml) with open(preset_xml, 'w') as w: w.write(str(xml)) preset_record = IO.parse_pipeline_preset_xml(preset_xml) workflow_level_opts = preset_record.to_workflow_level_opt() self.assertTrue(len(workflow_level_opts), len(self._to_opts()))
def get_task_and_workflow_options(testkit_cfg): parsed_cfg = config_parser_to_butler(testkit_cfg) workflow_options, task_options = [], [] def __get_option_type(val): option_type = to_opt_type_ns("string") if isinstance(val, bool): option_type = to_opt_type_ns("boolean") elif isinstance(val, int): option_type = to_opt_type_ns("integer") elif isinstance(val, float): option_type = to_opt_type_ns("float") elif val is None: val = "" return option_type, val if not parsed_cfg.preset_xml in [None, '']: if not parsed_cfg.preset_json in [None, '']: raise ValueError( "Please use either preset_json or preset_xml, not both") presets = parse_pipeline_preset_xml(parsed_cfg.preset_xml) for option_id, option_value in presets.task_options: log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) option_type, option_value = __get_option_type(option_value) task_options.append( dict(optionId=option_id, value=option_value, optionTypeId=option_type)) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append( dict(optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value)[0])) elif not parsed_cfg.preset_json in [None, '']: presets = parse_pipeline_preset_json(parsed_cfg.preset_json) for option_id, option_value in presets.task_options: log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) option_type, option_value = __get_option_type(option_value) task_options.append( dict(optionId=option_id, value=option_value, optionTypeId=option_type)) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append( dict(optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value)[0])) return task_options, workflow_options
def get_task_and_workflow_options(testkit_cfg): parsed_cfg = config_parser_to_butler(testkit_cfg) workflow_options, task_options = [], [] def __get_option_type(val): option_type = to_opt_type_ns("string") if isinstance(val, bool): option_type = to_opt_type_ns("boolean") elif isinstance(val, int): option_type = to_opt_type_ns("integer") elif isinstance(val, float): option_type = to_opt_type_ns("float") elif val is None: val = "" return option_type, val if not parsed_cfg.preset_xml in [None, '']: if not parsed_cfg.preset_json in [None, '']: raise ValueError("Please use either preset_json or preset_xml, not both") presets = parse_pipeline_preset_xml(parsed_cfg.preset_xml) for option_id, option_value in presets.task_options: log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) option_type, option_value = __get_option_type(option_value) task_options.append(dict( optionId=option_id, value=option_value, optionTypeId=option_type)) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append(dict( optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value)[0])) elif not parsed_cfg.preset_json in [None, '']: presets = parse_pipeline_preset_json(parsed_cfg.preset_json) for option_id, option_value in presets.task_options: log.info("task_option: {i} = {v}".format(i=option_id, v=option_value)) option_type, option_value = __get_option_type(option_value) task_options.append(dict( optionId=option_id, value=option_value, optionTypeId=option_type)) for option_id, option_value in presets.workflow_options: log.info("workflow_option: {i} = {v}".format(i=option_id, v=option_value)) workflow_options.append(dict( optionId=option_id, value=option_value, optionTypeId=__get_option_type(option_value)[0])) return task_options, workflow_options
def _args_run_diagnostics(args): f = run_diagnostics if args.simple: f = run_simple_diagnostics precord = IO.parse_pipeline_preset_xml(args.preset_xml) wopts = precord.to_workflow_level_opt() if wopts.cluster_manager_path is not None and wopts.distributed_mode is True: output_dir = os.path.abspath(args.output_dir) return f(args.preset_xml, output_dir) else: log.warning("Cluster mode not enabled. Skipping cluster submission tests") return 0
def validate_preset_xml(dir_name): from pbsmrtpipe.pb_io import parse_pipeline_preset_xml, validate_raw_task_options import pbsmrtpipe.loader as L rtasks_d, _, _, pts = L.load_all() for file_name in os.listdir(dir_name): if file_name.endswith(".xml"): p = parse_pipeline_preset_xml(op.join(dir_name, file_name)) if p.pipeline_id is None: raise ValueError( "{f} does not have pipeline-id set".format(f=file_name)) elif not p.pipeline_id in pts: raise ValueError( "pipeline-id {i} not recognized".format(i=p.pipeline_id)) log.info("validating {f}...".format(f=file_name)) validate_raw_task_options(rtasks_d, dict(p.task_options)) else: log.warn("Skipping non-XML file {f}".format(f=file_name)) return 0
def validate_preset_xml(dir_name): from pbsmrtpipe.pb_io import parse_pipeline_preset_xml, validate_raw_task_options import pbsmrtpipe.loader as L rtasks_d, _, _, pts = L.load_all() for file_name in os.listdir(dir_name): if file_name.endswith(".xml"): p = parse_pipeline_preset_xml(op.join(dir_name, file_name)) if p.pipeline_id is None: raise ValueError("{f} does not have pipeline-id set".format( f=file_name)) elif not p.pipeline_id in pts: raise ValueError("pipeline-id {i} not recognized".format( i=p.pipeline_id)) log.info("validating {f}...".format(f=file_name)) validate_raw_task_options(rtasks_d, dict(p.task_options)) else: log.warn("Skipping non-XML file {f}".format(f=file_name)) return 0
def setUpClass(cls): pipeline = REGISTERED_PIPELINES[cls.PB_PIPELINE_ID] log.debug(pipeline) cls.bindings = pipeline.all_bindings cls.EPOINTS_D = {k: get_temp_file(v) for k, v in cls.EPOINTS_NAMES.iteritems()} log.debug(pprint.pformat(cls.bindings, indent=4)) log.debug("Number of registered tasks {n}".format(n=len(REGISTERED_TASKS))) cls.bgraph = B.binding_strs_to_binding_graph(REGISTERED_TASKS, cls.bindings) d = os.path.expanduser('~/scratch/tmp_pbsmrtpipe') if getpass.getuser() == 'mkocher' else None cls.output_dir = tempfile.mkdtemp(prefix='job_test_', dir=d) preset_record = IO.parse_pipeline_preset_xml(os.path.join(TEST_DATA_DIR, cls.PRESET_XML)) cls.workflow_options = preset_record.to_workflow_level_opt() # leave this for now cls.envs = [] cls.cluster_engine = C.load_installed_cluster_templates_by_name("sge")
def test_01(self): preset_record = IO.parse_pipeline_preset_xml(self.path) self.assertIsInstance(preset_record, IO.PresetRecord)
def _to_workflow_options_settings(path): preset_record = IO.parse_pipeline_preset_xml(path) d = dict(preset_record.workflow_options) wopts = IO.WorkflowLevelOptions.from_id_dict(d) return wopts
def test_to_dict(self): preset_record = IO.parse_pipeline_preset_xml(self.path) d = dict(preset_record.workflow_options) wopts = IO.WorkflowLevelOptions.from_id_dict(d) self.assertIsInstance(wopts.to_dict(), dict)
def _parse(self, file_name): return IO.parse_pipeline_preset_xml(file_name)
def test_preset_xml_sanity(self): path = os.path.join(TEST_DATA_DIR, self.FILE_NAME) p = IO.parse_pipeline_preset_xml(path) log.info("Parsed {}".format(p)) self.assertTrue(True)
def _load_io_for_workflow(registered_tasks, registered_pipelines, workflow_template_xml_or_pipeline, entry_points_d, preset_xml, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """ Load and resolve input IO layer # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # force_distribute will attempt to override ALL settings (if cluster_manager is defined) :returns: A tuple of Workflow Bindings, Workflow Level Options, Task Opts, ClusterRenderer) :rtype: (List[(str, str)], WorkflowLevelOpts, {TaskId:value}, ClusterRenderer) """ # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # A little sanity check # Validate that entry points exist slog.info("validating entry points.") _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if isinstance(workflow_template_xml_or_pipeline, Pipeline): builder_record = IO.BuilderRecord(workflow_template_xml_or_pipeline.all_bindings, {}, {}) else: slog.info("Loading workflow template.") builder_record = IO.parse_pipeline_template_xml(workflow_template_xml_or_pipeline, registered_pipelines) slog.info("successfully loaded workflow template.") if preset_xml is None: slog.info("No preset provided. Skipping preset.xml loading.") preset_record = None else: slog.info("Loading preset {p}".format(p=preset_xml)) preset_record = IO.parse_pipeline_preset_xml(preset_xml) slog.info("successfully loaded preset.") if rc_preset is not None: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) wopts.update(dict(builder_record.workflow_options)) topts.update(builder_record.task_options) if preset_record is not None: wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) # override distributed mode only if provided. if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) slog.info("Successfully validated workflow options.") slog.info("validating supplied task options.") topts = IO.validate_raw_task_options(registered_tasks, topts) slog.info("successfully validated (pre DI) task options.") log.debug("Resolved task options to {d}".format(d=workflow_level_opts)) log.debug(pprint.pformat(workflow_level_opts.to_dict(), indent=4)) workflow_bindings = builder_record.bindings if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) else: cluster_render = None if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode return workflow_bindings, workflow_level_opts, topts, cluster_render
def _load_io_for_workflow(registered_tasks, registered_pipelines, workflow_template_xml_or_pipeline, entry_points_d, preset_xmls, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """ Load and resolve input IO layer # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # force_distribute will attempt to override ALL settings (if cluster_manager is defined) :returns: A tuple of Workflow Bindings, Workflow Level Options, Task Opts, ClusterRenderer) :rtype: (List[(str, str)], WorkflowLevelOpts, {TaskId:value}, ClusterRenderer) """ # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # A little sanity check # Validate that entry points exist slog.info("validating entry points.") _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if isinstance(workflow_template_xml_or_pipeline, Pipeline): # Use default values defined in the Pipeline builder_record = IO.BuilderRecord(workflow_template_xml_or_pipeline.all_bindings, workflow_template_xml_or_pipeline.task_options, {}) else: slog.info("Loading workflow template.") builder_record = IO.parse_pipeline_template_xml(workflow_template_xml_or_pipeline, registered_pipelines) slog.info("successfully loaded workflow template.") if preset_xmls: slog.info("Loading preset(s) {p}".format(p=preset_xmls)) preset_record = IO.parse_pipeline_preset_xmls(preset_xmls) slog.info("successfully loaded preset.") else: slog.info("No preset provided. Skipping preset.xml loading.") preset_record = None if rc_preset is not None: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) wopts.update(dict(builder_record.workflow_options)) topts.update(builder_record.task_options) if preset_record is not None: wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) # override distributed mode only if provided. if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) slog.info("Successfully validated workflow options.") slog.info("validating supplied task options.") topts = IO.validate_raw_task_options(registered_tasks, topts) slog.info("successfully validated (pre DI) task options.") workflow_bindings = builder_record.bindings if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) else: cluster_render = None if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() workflow_level_opts.max_nproc = multiprocessing.cpu_count() - 1 if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode log.debug("Resolved workflow level options to {d}".format(d=workflow_level_opts)) log.debug("\n" + pprint.pformat(workflow_level_opts.to_dict(), indent=4)) log.debug("Initial resolving of loaded preset.xml and pipeline.xml task options:") log.debug("\n" + pprint.pformat(topts)) return workflow_bindings, workflow_level_opts, topts, cluster_render