def get_autofill_values(app_name, raise_err=True): """Return contents of "autofill_values" for given app_name. `raise_err` - If False, return {} if autofill_values does not exist """ app_data = cb.get_tasks_config()[app_name] try: vals = app_data["autofill_values"] except KeyError: msg = ( "Expected to find `autofill_values` defined in task" " configuration for given app_name. This is required when the" " task introduces a new job_id component in its job_id_template" " that does not exist on a parent node, and also when the app" ' depends_on "all" autofill_values from a parent.' ) if raise_err: log.exception(msg, extra=dict(app_name=app_name)) raise DAGMisconfigured("%s app_name: %s" % (msg, app_name)) return {} assert isinstance(vals, cb.TasksConfigBaseMapping), "expected a mapping" try: return { k: not isinstance(v, (list, cb.TasksConfigBaseSequence)) and range(*(int(x) for x in v.split(":", 2))) or list(v) or [] for k, v in vals.items() } except: log.error("Failed to parse config data for app_name.autofill_values", extra=dict(app_name=app_name)) raise
def get_autofill_values(app_name, raise_err=True): """Return contents of "autofill_values" for given app_name. `raise_err` - If False, return {} if autofill_values does not exist """ app_data = cb.get_tasks_config()[app_name] try: vals = app_data['autofill_values'] except KeyError: msg = ('Expected to find `autofill_values` defined in task' ' configuration for given app_name. This is required when the' ' task introduces a new job_id component in its job_id_template' ' that does not exist on a parent node, and also when the app' ' depends_on "all" autofill_values from a parent.') if raise_err: log.exception(msg, extra=dict(app_name=app_name)) raise DAGMisconfigured("%s app_name: %s" % (msg, app_name)) return {} assert isinstance(vals, cb.TasksConfigBaseMapping), "expected a mapping" try: return { k: not isinstance(v, (list, cb.TasksConfigBaseSequence)) and range(*(int(x) for x in v.split(':', 2))) or list(v) or [] for k, v in vals.items() } except: log.error("Failed to parse config data for app_name.autofill_values", extra=dict(app_name=app_name)) raise
def get_job_id_template(app_name, template=None): if template is None: template = get_NS().job_id_default_template dg = cb.get_tasks_config() template = dg[app_name].get("job_id", template) parsed_template = re.findall(r"{(.*?)}", template) return (template, parsed_template)
def get_job_id_template(app_name, template=None): if template is None: template = get_NS().job_id_default_template dg = cb.get_tasks_config() template = dg[app_name].get('job_id', template) parsed_template = re.findall(r'{(.*?)}', template) return (template, parsed_template)
def build_dag(validate=False): tasks_conf = cb.get_tasks_config() dg = nx.MultiDiGraph() for app_name, deps in _add_nodes(tasks_conf, dg): _build_dict_deps(dg=dg, app_name=app_name, deps=deps) if validate: validate_dag(dg, tasks_conf) return dg
def build_dag(validate=False): tasks_conf = cb.get_tasks_config() dg = nx.MultiDiGraph() for app_name, deps in _add_nodes(tasks_conf, dg): _build_dict_deps( dg=dg, app_name=app_name, deps=deps) if validate: validate_dag(dg, tasks_conf) return dg
def get_job_type(app_name): """Lookup the job_type in tasks graph""" dg = cb.get_tasks_config() try: return dg[app_name]["job_type"] except KeyError: log.debug( "No job_type specified for given app_name." " Assuming job_type='bash'", extra=dict(app_name=app_name) ) return "bash"
def get_job_type(app_name): """Lookup the job_type in tasks graph""" dg = cb.get_tasks_config() try: return dg[app_name]['job_type'] except KeyError: log.debug( "No job_type specified for given app_name." " Assuming job_type='bash'", extra=dict(app_name=app_name)) return 'bash'
def inject_into_dag(func_name, inject_dct): """Update (add or replace) tasks in dag with new task config. Assumes that the config we're using is the JSONMapping """ if not all(k.startswith(makepath(func_name)) for k in inject_dct): raise UserWarning( "inject_into_dag can only inject app_names that have the" " correct prefix: %s" % makepath(func_name, '{app_name}')) f = _create_tasks_json(func_name, inject=inject_dct)[0] _initialize([cb, dt, qb], args=['--tasks_json', f]) # verify injection worked dg = cb.get_tasks_config().to_dict() for k, v in inject_dct.items(): assert dg[k] == v, ( "test code: inject_into_dag didn't insert the new tasks?") yield os.remove(f)
def passes_filter(app_name, job_id): """Determine if this job matches certain criteria that state it is a valid job for this app_name. A partially out of scope for dag stuff, but important detail: Jobs that don't match the criteria should immediately be marked as completed """ # for now, if we can parse it, it's valid pjob_id = parse_job_id(app_name, job_id) # does this job matches criteria that makes it executable? if so, we can't # autocomplete it dg = cb.get_tasks_config() meta = dg[app_name] ld = dict(app_name=app_name, job_id=job_id) try: dct = dict(meta["valid_if_or"]) except (KeyError, TypeError): return True # everything is valid if "_func" in dct: import_path = dct.pop("_func") # safe because config is immutable try: func = load_obj_from_path(import_path, ld) except Exception as err: raise err.__class__("valid_if_or._func misconfigured: %s" % err.message) if func(app_name, **pjob_id): return True for k, v in dct.items(): try: kk = pjob_id[k] except KeyError: _log_raise( "valid_if_or contains a key that's not in the job_id", extra=dict(valid_if_or_key=k, **ld), exception_kls=DAGMisconfigured, ) vals = [get_NS().job_id_validations[k](x) for x in v] if kk in vals: return True return False
def passes_filter(app_name, job_id): """Determine if this job matches certain criteria that state it is a valid job for this app_name. A partially out of scope for dag stuff, but important detail: Jobs that don't match the criteria should immediately be marked as completed """ # for now, if we can parse it, it's valid pjob_id = parse_job_id(app_name, job_id) # does this job matches criteria that makes it executable? if so, we can't # autocomplete it dg = cb.get_tasks_config() meta = dg[app_name] ld = dict(app_name=app_name, job_id=job_id) try: dct = dict(meta['valid_if_or']) except (KeyError, TypeError): return True # everything is valid if '_func' in dct: import_path = dct.pop('_func') # safe because config is immutable try: func = load_obj_from_path(import_path, ld) except Exception as err: raise err.__class__("valid_if_or._func misconfigured: %s" % err.message) if func(app_name, **pjob_id): return True for k, v in dct.items(): try: kk = pjob_id[k] except KeyError: _log_raise("valid_if_or contains a key that's not in the job_id", extra=dict(valid_if_or_key=k, **ld), exception_kls=DAGMisconfigured) vals = [get_NS().job_id_validations[k](x) for x in v] if kk in vals: return True return False
def _get_grps(app_name, filter_deps, ld): """ Return an iterator that yields (dependency_group_name, group_metadata) tuples """ td = cb.get_tasks_config() try: depends_on = td[app_name]['depends_on'] except KeyError: return [] # this task has no dependencies if "app_name" in depends_on: grps = [(get_NS().dependency_group_default_name, depends_on)] _get_parents_validate_group_names( [get_NS().dependency_group_default_name], filter_deps, ld) elif filter_deps: _get_parents_validate_group_names(depends_on, filter_deps, ld) grps = (data for data in depends_on.items() if data[0] in filter_deps) else: grps = depends_on.items() return grps
def _get_grps(app_name, filter_deps, ld): """ Return an iterator that yields (dependency_group_name, group_metadata) tuples """ td = cb.get_tasks_config() try: depends_on = td[app_name]['depends_on'] except KeyError: return [] # this task has no dependencies if "app_name" in depends_on: grps = [(get_NS().dependency_group_default_name, depends_on)] _get_parents_validate_group_names( [get_NS().dependency_group_default_name], filter_deps, ld) elif filter_deps: _get_parents_validate_group_names( depends_on, filter_deps, ld) grps = (data for data in depends_on.items() if data[0] in filter_deps) else: grps = depends_on.items() return grps
def get_task_names(): """Lookup the tasks in the tasks graph""" dg = cb.get_tasks_config() return dg.keys()