def run_simple_diagnostics(preset_xml, output_dir): """Setup simple job to run""" precord = parse_pipeline_preset_xml(preset_xml) wopts = precord.to_workflow_level_opt() to_p = _to_path(output_dir) ts = load_cluster_templates(wopts.cluster_manager_path) run_sh = to_p('run.sh') cluster_sh = to_p('cluster.sh') output_file = to_p('hello-world-output.txt') _write_echo_hello_world(output_file, run_sh) cluster_stderr = to_p("cluster.stderr") cluster_stdout = to_p("cluster.stdout") cluster_cmd = ts.render("start", run_sh, "job.dev-diagnostic-hello-world", stdout=cluster_stdout, stderr=cluster_stderr) with open(cluster_sh, 'w') as f: f.write(cluster_cmd) print "Run.sh command {r}".format(r=run_sh) print "Exe'ing Cluster command {c}".format(c=cluster_cmd) rcode, stdout, stderr, run_time = run_command(cluster_cmd, sys.stdout, sys.stderr) if rcode == 0: print "Successfully submitted cluster job using templates {p}".format(p=wopts.cluster_manager_path) return rcode
def validate_or_modify_workflow_level_options(wopts): """ This will adjust or modify intra-option dependencies. :type wopts: WorkflowLevelOptions :param wopts: :return: """ # Check if tmp dir if not os.path.isdir(wopts.tmp_dir): raise IOError("Unable to find tmp dir '{t}'".format(t=wopts.tmp_dir)) # Set distributed mode to false if cluster_manager is not provided if wopts.distributed_mode: if isinstance(wopts.cluster_manager_path, str): try: _ = C.load_cluster_templates(wopts.cluster_manager_path) slog.info("Successfully loaded cluster manager from {p}".format(p=wopts.cluster_manager_path)) # if we got here the templates are loaded successfully except Exception: slog.error("Failed to load cluster templates from '{x}'".format(x=wopts.cluster_manager_path)) raise else: slog.warn("cluster_manager not provided. Settings distribute mode to False") wopts.distributed_mode = False else: slog.warn("distribute_mode is False, Disabling cluster manager, running in LOCAL ONLY mode.") wopts.cluster_manager_path = None return wopts
def _test_can_load_cluster_templates(path): try: t = load_cluster_templates(path) return True except Exception as e: log.error("Failed to load cluster templates {t}".format(t=path)) return False
def _load_io_for_task(registered_tasks, entry_points_d, preset_xmls, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """Grungy loading of the IO and resolving values Returns a tuple of (WorkflowLevelOptions, TaskOptions, ClusterRender) """ slog.info("validating entry points. {e}".format(e=entry_points_d)) _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if rc_preset: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) if preset_xmls: preset_record = IO.parse_pipeline_preset_xmls(preset_xmls) wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode # Validate topts = IO.validate_raw_task_options(registered_tasks, topts) log.debug("Resolved task options to {d}".format(d=workflow_level_opts)) log.debug(pprint.pprint(workflow_level_opts.to_dict(), indent=4)) if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) # override distributed mode if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute else: cluster_render = None workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode return workflow_level_opts, topts, cluster_render
def _load_io_for_task(registered_tasks, entry_points_d, preset_xml, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """Grungy loading of the IO and resolving values Returns a tuple of (WorkflowLevelOptions, TaskOptions, ClusterRender) """ slog.info("validating entry points. {e}".format(e=entry_points_d)) _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if rc_preset: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) if preset_xml is not None: preset_record = IO.parse_pipeline_preset_xml(preset_xml) wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode # Validate topts = IO.validate_raw_task_options(registered_tasks, topts) log.debug("Resolved task options to {d}".format(d=workflow_level_opts)) log.debug(pprint.pprint(workflow_level_opts.to_dict(), indent=4)) if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) # override distributed mode if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute else: cluster_render = None workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode return workflow_level_opts, topts, cluster_render
def validate_or_modify_workflow_level_options(wopts): """ This will adjust or modify intra-option dependencies. :type wopts: WorkflowLevelOptions :param wopts: :return: """ # Check if tmp dir if not os.path.isdir(wopts.tmp_dir): raise IOError("Unable to find tmp dir '{t}'".format(t=wopts.tmp_dir)) # Set distributed mode to false if cluster_manager is not provided if wopts.distributed_mode: if isinstance(wopts.cluster_manager_path, str): try: _ = C.load_cluster_templates(wopts.cluster_manager_path) slog.info( "Successfully loaded cluster manager from {p}".format( p=wopts.cluster_manager_path)) # if we got here the templates are loaded successfully except Exception: slog.error( "Failed to load cluster templates from '{x}'".format( x=wopts.cluster_manager_path)) raise else: slog.warn( "cluster_manager not provided. Settings distribute mode to False" ) wopts.distributed_mode = False else: slog.warn( "distribute_mode is False, Disabling cluster manager, running in LOCAL ONLY mode." ) wopts.cluster_manager_path = None if wopts.total_max_nproc is not None: if wopts.max_nproc > wopts.total_max_nproc: raise ValueError( "Max nproc ({x}) must be <= Total Max nproc ({t})".format( x=wopts.max_nproc, t=wopts.total_max_nproc)) # total max nproc = 1 cpu/worker * max nworkers will be the upper bound of # total number of processors used if wopts.max_nworkers > wopts.total_max_nproc: log.warn("Max workers {w} used will be <= {t}".format( w=wopts.max_nworkers, t=wopts.total_max_nproc)) return wopts
def validate_or_modify_workflow_level_options(wopts): """ This will adjust or modify intra-option dependencies. :type wopts: WorkflowLevelOptions :param wopts: :return: """ # Check if tmp dir if not os.path.isdir(wopts.tmp_dir): raise IOError("Unable to find tmp dir '{t}'".format(t=wopts.tmp_dir)) # Set distributed mode to false if cluster_manager is not provided if wopts.distributed_mode: if isinstance(wopts.cluster_manager_path, str): try: _ = C.load_cluster_templates(wopts.cluster_manager_path) slog.info("Successfully loaded cluster manager from {p}".format(p=wopts.cluster_manager_path)) # if we got here the templates are loaded successfully except Exception: slog.error("Failed to load cluster templates from '{x}'".format(x=wopts.cluster_manager_path)) raise else: slog.warn("cluster_manager not provided. Settings distribute mode to False") wopts.distributed_mode = False else: slog.warn("distribute_mode is False, Disabling cluster manager, running in LOCAL ONLY mode.") wopts.cluster_manager_path = None if wopts.total_max_nproc is not None: if wopts.max_nproc > wopts.total_max_nproc: raise ValueError("Max nproc ({x}) must be <= Total Max nproc ({t})".format(x=wopts.max_nproc, t=wopts.total_max_nproc)) # total max nproc = 1 cpu/worker * max nworkers will be the upper bound of # total number of processors used if wopts.max_nworkers > wopts.total_max_nproc: log.warn("Max workers {w} used will be <= {t}".format(w=wopts.max_nworkers, t=wopts.total_max_nproc)) return wopts
def _load_io_for_workflow(registered_tasks, registered_pipelines, workflow_template_xml_or_pipeline, entry_points_d, preset_xmls, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """ Load and resolve input IO layer # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # force_distribute will attempt to override ALL settings (if cluster_manager is defined) :returns: A tuple of Workflow Bindings, Workflow Level Options, Task Opts, ClusterRenderer) :rtype: (List[(str, str)], WorkflowLevelOpts, {TaskId:value}, ClusterRenderer) """ # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # A little sanity check # Validate that entry points exist slog.info("validating entry points.") _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if isinstance(workflow_template_xml_or_pipeline, Pipeline): # Use default values defined in the Pipeline builder_record = IO.BuilderRecord(workflow_template_xml_or_pipeline.all_bindings, workflow_template_xml_or_pipeline.task_options, {}) else: slog.info("Loading workflow template.") builder_record = IO.parse_pipeline_template_xml(workflow_template_xml_or_pipeline, registered_pipelines) slog.info("successfully loaded workflow template.") if preset_xmls: slog.info("Loading preset(s) {p}".format(p=preset_xmls)) preset_record = IO.parse_pipeline_preset_xmls(preset_xmls) slog.info("successfully loaded preset.") else: slog.info("No preset provided. Skipping preset.xml loading.") preset_record = None if rc_preset is not None: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) wopts.update(dict(builder_record.workflow_options)) topts.update(builder_record.task_options) if preset_record is not None: wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) # override distributed mode only if provided. if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) slog.info("Successfully validated workflow options.") slog.info("validating supplied task options.") topts = IO.validate_raw_task_options(registered_tasks, topts) slog.info("successfully validated (pre DI) task options.") workflow_bindings = builder_record.bindings if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) else: cluster_render = None if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() workflow_level_opts.max_nproc = multiprocessing.cpu_count() - 1 if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode log.debug("Resolved workflow level options to {d}".format(d=workflow_level_opts)) log.debug("\n" + pprint.pformat(workflow_level_opts.to_dict(), indent=4)) log.debug("Initial resolving of loaded preset.xml and pipeline.xml task options:") log.debug("\n" + pprint.pformat(topts)) return workflow_bindings, workflow_level_opts, topts, cluster_render
def _load_io_for_workflow(registered_tasks, registered_pipelines, workflow_template_xml_or_pipeline, entry_points_d, preset_xml, rc_preset_or_none, force_distribute=None, force_chunk_mode=None, debug_mode=None): """ Load and resolve input IO layer # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # force_distribute will attempt to override ALL settings (if cluster_manager is defined) :returns: A tuple of Workflow Bindings, Workflow Level Options, Task Opts, ClusterRenderer) :rtype: (List[(str, str)], WorkflowLevelOpts, {TaskId:value}, ClusterRenderer) """ # Load Presets and Workflow Options. Resolve and Merge # The Order of loading is # - rc, workflow.xml, then preset.xml # A little sanity check # Validate that entry points exist slog.info("validating entry points.") _validate_entry_points_or_raise(entry_points_d) slog.info("successfully validated {n} entry points".format(n=len(entry_points_d))) wopts = {} topts = {} if rc_preset_or_none is None: rc_preset = IO.load_preset_from_env() else: rc_preset = IO.parse_pipeline_preset_xml(rc_preset_or_none) if isinstance(workflow_template_xml_or_pipeline, Pipeline): builder_record = IO.BuilderRecord(workflow_template_xml_or_pipeline.all_bindings, {}, {}) else: slog.info("Loading workflow template.") builder_record = IO.parse_pipeline_template_xml(workflow_template_xml_or_pipeline, registered_pipelines) slog.info("successfully loaded workflow template.") if preset_xml is None: slog.info("No preset provided. Skipping preset.xml loading.") preset_record = None else: slog.info("Loading preset {p}".format(p=preset_xml)) preset_record = IO.parse_pipeline_preset_xml(preset_xml) slog.info("successfully loaded preset.") if rc_preset is not None: topts.update(dict(rc_preset.task_options)) wopts.update(dict(rc_preset.workflow_options)) wopts.update(dict(builder_record.workflow_options)) topts.update(builder_record.task_options) if preset_record is not None: wopts.update(dict(preset_record.workflow_options)) topts.update(dict(preset_record.task_options)) workflow_level_opts = IO.WorkflowLevelOptions.from_id_dict(wopts) # override distributed mode only if provided. if isinstance(force_distribute, bool): workflow_level_opts.distributed_mode = force_distribute workflow_level_opts = IO.validate_or_modify_workflow_level_options(workflow_level_opts) slog.info("Successfully validated workflow options.") slog.info("validating supplied task options.") topts = IO.validate_raw_task_options(registered_tasks, topts) slog.info("successfully validated (pre DI) task options.") log.debug("Resolved task options to {d}".format(d=workflow_level_opts)) log.debug(pprint.pformat(workflow_level_opts.to_dict(), indent=4)) workflow_bindings = builder_record.bindings if isinstance(workflow_level_opts.cluster_manager_path, str): cluster_render = C.load_cluster_templates(workflow_level_opts.cluster_manager_path) else: cluster_render = None if isinstance(force_chunk_mode, bool): workflow_level_opts.chunk_mode = force_chunk_mode workflow_level_opts.max_nchunks = min(workflow_level_opts.max_nchunks, GlobalConstants.MAX_NCHUNKS) if workflow_level_opts.distributed_mode is False: slog.info("local-only mode detected setting total NPROC to {x}".format(x=multiprocessing.cpu_count())) workflow_level_opts.total_max_nproc = multiprocessing.cpu_count() if debug_mode is True: slog.info("overriding debug-mode to True") workflow_level_opts.debug_mode = debug_mode return workflow_bindings, workflow_level_opts, topts, cluster_render