def _get_parents_validate_group_names( dep_names, filter_deps, ld): _log_raise_if( not set(dep_names).issuperset(filter_deps), "You specified dependency group names that don't exist", extra=dict(filter_deps=filter_deps, **ld), exception_kls=DAGMisconfigured)
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld): # valid_if_or - are we specifying what makes a job valid correctly? if 'valid_if_or' not in metadata: return for k, v in metadata['valid_if_or'].items(): if k == '_func': continue location = "%s.valid_if_or.%s" % (app_name1, k) _log_raise_if( not isinstance(v, cb.TasksConfigBaseSequence), "Task is misconfigured. Wrong value type. Expected a sequence", extra=dict(wrong_value_type=type(v), key=location, **ld), exception_kls=DAGMisconfigured) templ = node.get_job_id_template(app_name1)[1] _log_raise_if( k not in templ, "valid_if_or contains a key that isn't in its job_id template", extra=dict(key=k, job_id_template=templ, **ld), exception_kls=DAGMisconfigured) try: validation_func = get_NS().job_id_validations[k] except KeyError: continue for vv in v: try: validation_func(vv) except Exception as err: _log_raise( ("valid_if_or contains a value that wasn't validated" " by your job_id_validations. err: %s(%s)") % (err.__class__, err), extra=dict(key=location, wrong_value_type=type(vv), **ld), exception_kls=DAGMisconfigured)
def validate_depends_on(app_name1, metadata, dg, tasks_conf, ld): if "depends_on" not in metadata: return _log_raise_if( not isinstance(metadata["depends_on"], cb.TasksConfigBaseMapping), ("Configuration Error: Task's value at the depends_on key" " must subclass cb.TasksConfigBaseMapping"), extra=dict(key="depends_on", received_value_type=type(metadata["depends_on"]), **ld), exception_kls=DAGMisconfigured) # depends_on - are we specifying only one unnamed dependency group? if "app_name" in metadata["depends_on"]: _validate_dep_grp_metadata( dep_grp=metadata['depends_on'], ld=ld, tasks_conf=tasks_conf, dep_name=get_NS().dependency_group_default_name) # depends_on - are we specifying specific dependency_groups? else: _validate_dependency_groups(tasks_conf, metadata, ld) # depends_on - are dependent tasks listed properly? for parent in dg.pred[app_name1]: _log_raise_if( parent not in tasks_conf, "Task defines an unrecognized parent dependency", extra=dict(parent_app_name=parent, **ld), exception_kls=DAGMisconfigured)
def validate_depends_on(app_name1, metadata, dg, tasks_conf, ld): if "depends_on" not in metadata: return _log_raise_if( not isinstance(metadata["depends_on"], cb.TasksConfigBaseMapping), ("Configuration Error: Task's value at the depends_on key" " must subclass cb.TasksConfigBaseMapping"), extra=dict(key="depends_on", received_value_type=type(metadata["depends_on"]), **ld), exception_kls=DAGMisconfigured) # depends_on - are we specifying only one unnamed dependency group? if "app_name" in metadata["depends_on"]: _validate_dep_grp_metadata( dep_grp=metadata['depends_on'], ld=ld, tasks_conf=tasks_conf, dep_name=get_NS().dependency_group_default_name) # depends_on - are we specifying specific dependency_groups? else: _validate_dependency_groups(tasks_conf, metadata, ld) # depends_on - are dependent tasks listed properly? for parent in dg.pred[app_name1]: _log_raise_if(parent not in tasks_conf, "Task defines an unrecognized parent dependency", extra=dict(parent_app_name=parent, **ld), exception_kls=DAGMisconfigured)
def validate_autofill_values(app_name1, metadata, dg, tasks_conf, ld): dct = metadata.get('autofill_values', {}) _log_raise_if( dct and not isinstance(dct, cb.TasksConfigBaseMapping), "`autofill_values` must be a mapping of key:[value] pairs", extra=dict(type_autofill_values=type(dct), z=dct, **ld), exception_kls=DAGMisconfigured) for k, v in dct.items(): msg = ("Value of `autofill_values.<key>` must be a sequence or" " a string denoting a number range of form: \"min:max\"") extra = dict(key='autofill_values.%s' % k, **ld) if isinstance(v, six.string_types): _log_raise_if( not all(x.isdigit() for x in v.split(':', 2)), msg, extra=extra, exception_kls=DAGMisconfigured) else: _log_raise_if( not isinstance(v, cb.TasksConfigBaseSequence), msg, extra=extra, exception_kls=DAGMisconfigured) extra_keys = set(dct).difference(node.get_job_id_template(app_name1)[1]) _log_raise_if( extra_keys, ("The set of `autofill_values` keys must be a subset of app_name's" " given job_id components"), extra=dict(extra_keys=extra_keys, **ld), exception_kls=DAGMisconfigured)
def validate_autofill_values(app_name1, metadata, dg, tasks_conf, ld): dct = metadata.get('autofill_values', {}) _log_raise_if(dct and not isinstance(dct, cb.TasksConfigBaseMapping), "`autofill_values` must be a mapping of key:[value] pairs", extra=dict(type_autofill_values=type(dct), z=dct, **ld), exception_kls=DAGMisconfigured) for k, v in dct.items(): msg = ("Value of `autofill_values.<key>` must be a sequence or" " a string denoting a number range of form: \"min:max\"") extra = dict(key='autofill_values.%s' % k, **ld) if isinstance(v, six.string_types): _log_raise_if(not all(x.isdigit() for x in v.split(':', 2)), msg, extra=extra, exception_kls=DAGMisconfigured) else: _log_raise_if(not isinstance(v, cb.TasksConfigBaseSequence), msg, extra=extra, exception_kls=DAGMisconfigured) extra_keys = set(dct).difference(node.get_job_id_template(app_name1)[1]) _log_raise_if( extra_keys, ("The set of `autofill_values` keys must be a subset of app_name's" " given job_id components"), extra=dict(extra_keys=extra_keys, **ld), exception_kls=DAGMisconfigured)
def _validate_dependency_groups_part2(dep_name, dep_grp, ld, tasks_conf): _log_raise_if( ("app_name" not in dep_grp or not isinstance(dep_grp["app_name"], cb.TasksConfigBaseSequence)), ("Each dependency group the task depends on must specify" " an app_name key whose value is a sequence of items" " (ie a cb.TasksConfigBaseSequence)"), extra=dict( key="depends_on", invalid_dependency_group=dep_name, dep_grp=str(dict(dep_grp)), **ld), exception_kls=DAGMisconfigured) _validate_dep_grp_metadata( dep_grp, ld=ld, tasks_conf=tasks_conf, dep_name=dep_name) _validate_dep_grp_with_job_id_validations( dep_grp, ld=ld, tasks_conf=tasks_conf)
def __getitem__(self, key): if key not in self.cache: key = "%s%s" % (self.redis_key_prefix, key) try: val = self.cli.hgetall(key) except: log.error(("Redis failed to fetch app config data." "Is the redis key you used an incorrect type?" " It should be a hash."), extra=dict(key=key)) raise _log_raise_if(not val, "Given app_name does not exist in redis", dict(app_name=key), KeyError) # Convert redis values to python objects. Potentially dangerous. val = {eval(k, {}, {}): eval(v, {}, {}) for k, v in val.items()} self.cache[key] = _ensure_type(val, JSONMapping, JSONSequence) return self.cache[key]
def _validate_dependency_groups_part2(dep_name, dep_grp, ld, tasks_conf): _log_raise_if( ("app_name" not in dep_grp or not isinstance(dep_grp["app_name"], cb.TasksConfigBaseSequence)), ("Each dependency group the task depends on must specify" " an app_name key whose value is a sequence of items" " (ie a cb.TasksConfigBaseSequence)"), extra=dict(key="depends_on", invalid_dependency_group=dep_name, dep_grp=str(dict(dep_grp)), **ld), exception_kls=DAGMisconfigured) _validate_dep_grp_metadata(dep_grp, ld=ld, tasks_conf=tasks_conf, dep_name=dep_name) _validate_dep_grp_with_job_id_validations(dep_grp, ld=ld, tasks_conf=tasks_conf)
def _validate_dependency_groups(tasks_conf, metadata, ld): if "depends_on" not in metadata: return for dep_name, dep_grp in metadata["depends_on"].items(): _log_raise_if( dep_name in tasks_conf, ("Task's depends_on value has a naming conflict. You cannot" " identify a dependency group with the same name as an" " app_name."), extra=dict(key="depends_on", invalid_dependency_group=dep_name, **ld), exception_kls=DAGMisconfigured) # validate scenario where the dep_grp is made up of subgrpA AND subgrpB if isinstance(dep_grp, cb.TasksConfigBaseSequence): for _dep_grp in dep_grp: _validate_dependency_groups_part2(dep_name, _dep_grp, ld, tasks_conf) # check job_id template identifiers are consistently defined # across the dependency group for identifier in node.get_job_id_template(ld['app_name'])[1]: values = [ _dep_grp.get(identifier) for _dep_grp in dep_grp if 'job_id' not in _dep_grp ] _log_raise_if( not reduce(lambda x, y: x == y, values), ("You specified inconsistent values for job_id" " metadata. Each sub-dependency in your dependency" " group must specify the exact same metadata value" " for each identifier in your app's job_id template."), # because otherwise, users could easily create unexpected # dependence relations extra=dict(key="depends_on", invalid_dependency_group=dep_name, invalid_identifier=identifier, values=values, **ld), exception_kls=DAGMisconfigured) else: _validate_dependency_groups_part2(dep_name, dep_grp, ld, tasks_conf)
def __getitem__(self, key): if key not in self.cache: key = "%s%s" % (self.redis_key_prefix, key) try: val = self.cli.hgetall(key) except: log.error(( "Redis failed to fetch app config data." "Is the redis key you used an incorrect type?" " It should be a hash."), extra=dict(key=key)) raise _log_raise_if( not val, "Given app_name does not exist in redis", dict(app_name=key), KeyError) # Convert redis values to python objects. Potentially dangerous. val = {eval(k, {}, {}): eval(v, {}, {}) for k, v in val.items()} self.cache[key] = _ensure_type(val, JSONMapping, JSONSequence) return self.cache[key]
def _validate_dependency_groups(tasks_conf, metadata, ld): if "depends_on" not in metadata: return for dep_name, dep_grp in metadata["depends_on"].items(): _log_raise_if( dep_name in tasks_conf, ("Task's depends_on value has a naming conflict. You cannot" " identify a dependency group with the same name as an" " app_name."), extra=dict( key="depends_on", invalid_dependency_group=dep_name, **ld), exception_kls=DAGMisconfigured) # validate scenario where the dep_grp is made up of subgrpA AND subgrpB if isinstance(dep_grp, cb.TasksConfigBaseSequence): for _dep_grp in dep_grp: _validate_dependency_groups_part2( dep_name, _dep_grp, ld, tasks_conf) # check job_id template identifiers are consistently defined # across the dependency group for identifier in node.get_job_id_template(ld['app_name'])[1]: values = [_dep_grp.get(identifier) for _dep_grp in dep_grp if 'job_id' not in _dep_grp] _log_raise_if( not reduce(lambda x, y: x == y, values), ("You specified inconsistent values for job_id" " metadata. Each sub-dependency in your dependency" " group must specify the exact same metadata value" " for each identifier in your app's job_id template."), # because otherwise, users could easily create unexpected # dependence relations extra=dict( key="depends_on", invalid_dependency_group=dep_name, invalid_identifier=identifier, values=values, **ld), exception_kls=DAGMisconfigured) else: _validate_dependency_groups_part2( dep_name, dep_grp, ld, tasks_conf)
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf): """Do the user defined job_id validations, if they exist, apply to each individual value of the relevant key in the dep group?""" for k, v in dep_grp.items(): # don't do validation on depends_on."app_name" field here, # and not for the depends_on."job_id" either # These fields are the only two fields in depends_on that are # not job_id components if k in ["app_name", "job_id"]: continue func = get_NS().job_id_validations.get(k) # ensure that job_id validations are fully specified for keys in # depends_on sections _log_raise_if( not func, "You introduced a new job_id component in a" " <app_name>.depends_on.<key> subsection, and you must inform" " Stolos how to parse the component", extra=dict(key=k, value=v, **ld), exception_kls=DAGMisconfigured) # skip rest of validations if "all" is used if v == "all": # assert that autofill_values exists on all parents msg = ( " You requested that child depends on \"all\" values for some" " part of its parent job_id_template. If you do this," " the parent must define" " <parent_app_name>.autofill_values.<key>") for parent in dep_grp['app_name']: _log_raise_if(k not in tasks_conf[parent].get( 'autofill_values', {}), msg, extra=dict(parent_app_name=parent, key=k, **ld), exception_kls=DAGMisconfigured) continue for vv in v: try: res = func(vv) except Exception as err: _log_raise( ("Invalid data at <app_name>.depends_on.<key>.[nth_value]." " The job_id_validation function complained that the" " value was invalid. Error details: %s") % err.message, extra=dict(key='%s.%s' % (k, v), value=vv, **ld), exception_kls=DAGMisconfigured) _log_raise_if( vv != res, ("A job_id_validation func just returned a modified" " value. It should return input unmodified or fail."), extra=dict(key='%s.%s' % (k, v), value=vv, job_id_validation=func, **ld), exception_kls=DAGMisconfigured)
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf): """Do the user defined job_id validations, if they exist, apply to each individual value of the relevant key in the dep group?""" for k, v in dep_grp.items(): # don't do validation on depends_on."app_name" field here, # and not for the depends_on."job_id" either # These fields are the only two fields in depends_on that are # not job_id components if k in ["app_name", "job_id"]: continue func = get_NS().job_id_validations.get(k) # ensure that job_id validations are fully specified for keys in # depends_on sections _log_raise_if( not func, "You introduced a new job_id component in a" " <app_name>.depends_on.<key> subsection, and you must inform" " Stolos how to parse the component", extra=dict( key=k, value=v, **ld), exception_kls=DAGMisconfigured) # skip rest of validations if "all" is used if v == "all": # assert that autofill_values exists on all parents msg = ( " You requested that child depends on \"all\" values for some" " part of its parent job_id_template. If you do this," " the parent must define" " <parent_app_name>.autofill_values.<key>") for parent in dep_grp['app_name']: _log_raise_if( k not in tasks_conf[parent].get('autofill_values', {}), msg, extra=dict(parent_app_name=parent, key=k, **ld), exception_kls=DAGMisconfigured) continue for vv in v: try: res = func(vv) except Exception as err: _log_raise(( "Invalid data at <app_name>.depends_on.<key>.[nth_value]." " The job_id_validation function complained that the" " value was invalid. Error details: %s" ) % err.message, extra=dict(key='%s.%s' % (k, v), value=vv, **ld), exception_kls=DAGMisconfigured) _log_raise_if( vv != res, ("A job_id_validation func just returned a modified" " value. It should return input unmodified or fail."), extra=dict( key='%s.%s' % (k, v), value=vv, job_id_validation=func, **ld), exception_kls=DAGMisconfigured)
def _validate_dep_grp_metadata(dep_grp, ld, tasks_conf, dep_name): """ Test that a dependency group correctly defined. `dep_grp` (obj) - Configuration data for a dependency group. It is an instance that inherits from cb.TasksConfigBaseMapping. Visualized as a dict or json, a dep_grp might look like: dep_grp = {"app_name": ["app2", "app3"]} `ld` (dict) - helpful info for error logs. It also contains the app_name this dep_grp belongs to. `tasks_conf` (obj) - The configuration for all tasks. It is an instance that inherits from cb.TasksConfigBaseMapping `dep_name` (str) - The name for this dependency group. """ ld1 = ld _template, child_template = node.get_job_id_template(ld['app_name']) for parent_app_name in dep_grp['app_name']: ld = dict( parent_app_name=parent_app_name, dependency_group="depends_on.%s" % dep_name, **ld1) _log_raise_if( parent_app_name not in tasks_conf, "Unrecognized parent_app_name in a `depends_on` dependency group", extra=ld, exception_kls=DAGMisconfigured) _, parent_template = node.get_job_id_template(parent_app_name) if len(dep_grp) == 1: _log_raise_if( not set(child_template).issuperset(parent_template), ("If you choose specify a dependency group with no job_id" " identifiers, then the" " child task's job_id identifiers must be a superset of those" " in the parent's job_id. Otherwise, there" " are cases where you cannot identify" " a parent job_id given a child job_id."), extra=dict( parent_job_id_template=parent_template, child_job_id_template=child_template, **ld), exception_kls=DAGMisconfigured) # for every parent, does the dependency group define enough information # to support a bubble-up or bubble-down operation? required_keys = set( child_template).difference(parent_template).difference( tasks_conf[ld['app_name']].get('autofill_values', {})) missing_keys = required_keys.difference(dep_grp) _log_raise_if( missing_keys, ("This app's dependency group is missing some required" " job_id identifiers"), extra=dict( missing_job_id_identifiers=missing_keys, job_id_template=_template, **ld), exception_kls=DAGMisconfigured) # look at the key:value pairs in <app_name>.depends_on for k, v in dep_grp.items(): # dups in values? if isinstance(v, cb.TasksConfigBaseSequence): _log_raise_if( len(set(v)) != len(v), "You have duplicate metadata in dependency group metadata", extra=dict(key=k, value=v, **ld), exception_kls=DAGMisconfigured) else: _log_raise_if( v != 'all', ("The value of a depends_on.<key> must be a list of values" " or the exact string 'all'"), extra=dict(key=k, value=v, **ld), exception_kls=DAGMisconfigured)
def _generate_job_ids(app_name, job_id, child, group_name, depends_on): # ignore dependency groups that have nothing to do with the parent app_name if app_name not in depends_on['app_name']: return [] # if len(depends_on) == 1: # # child depends only on one parent, so it must be the parent we've # # called get_children on! # return [(child, job_id)] # check that the job_id applies to this group pjob_id = parse_job_id(app_name, job_id) # parent data ctemplate, cparsed_template = get_job_id_template(child) # child data # check if parent job_ids are hardcoded into configuration if 'job_id' in depends_on: if job_id in depends_on['job_id']: kwargs = dict() kwargs.update(pjob_id) kwargs.update( {k: v[0] for k, v in depends_on.items() if len(v) == 1}) cjob_id = ctemplate.format(**kwargs) return [(child, cjob_id)] return [] # check if the parent job_id template is compatible with this dep_grp child_autofill_values = get_autofill_values(child, raise_err=False) for k, v in pjob_id.items(): # is the parent's job_id identifier defined anywhere? if k not in depends_on and k not in cparsed_template: return [] # is the identifier appropriately missing from the dep_grp? if k in depends_on and v not in depends_on[k]: return [] # is parent identifier defined in child autofill_values different # than parent's given job id? if k in child_autofill_values and v not in child_autofill_values[k]: return [] # check that child's autofill_values are defined if parent doesn't # completely define a child's job_id components. required_autofill_values = set(cparsed_template).difference(pjob_id) _log_raise_if( any(x not in child_autofill_values for x in required_autofill_values), "autofill_values must be defined on child app_name if you have a" " parent whose job_id template is not a superset of the child's", extra=dict(child_app_name=child, parent_app_name=app_name, required_autofill_values=required_autofill_values), exception_kls=DAGMisconfigured) # check if the child's job_id template is compatible with this dep_grp for k in cparsed_template: # is child's job_id identifier appropriately missing from the dep_grp? if k in depends_on and k in pjob_id and \ pjob_id[k] not in depends_on[k]: return [] # is identifier defined anywhere? if (k not in depends_on and k not in pjob_id and k not in get_autofill_values(child, raise_err=False)): return [] return _generate_job_ids2(depends_on, pjob_id, cparsed_template, ctemplate, group_name, child)
def _validate_dep_grp_metadata(dep_grp, ld, tasks_conf, dep_name): """ Test that a dependency group correctly defined. `dep_grp` (obj) - Configuration data for a dependency group. It is an instance that inherits from cb.TasksConfigBaseMapping. Visualized as a dict or json, a dep_grp might look like: dep_grp = {"app_name": ["app2", "app3"]} `ld` (dict) - helpful info for error logs. It also contains the app_name this dep_grp belongs to. `tasks_conf` (obj) - The configuration for all tasks. It is an instance that inherits from cb.TasksConfigBaseMapping `dep_name` (str) - The name for this dependency group. """ ld1 = ld _template, child_template = node.get_job_id_template(ld['app_name']) for parent_app_name in dep_grp['app_name']: ld = dict(parent_app_name=parent_app_name, dependency_group="depends_on.%s" % dep_name, **ld1) _log_raise_if( parent_app_name not in tasks_conf, "Unrecognized parent_app_name in a `depends_on` dependency group", extra=ld, exception_kls=DAGMisconfigured) _, parent_template = node.get_job_id_template(parent_app_name) if len(dep_grp) == 1: _log_raise_if( not set(child_template).issuperset(parent_template), ("If you choose specify a dependency group with no job_id" " identifiers, then the" " child task's job_id identifiers must be a superset of those" " in the parent's job_id. Otherwise, there" " are cases where you cannot identify" " a parent job_id given a child job_id."), extra=dict(parent_job_id_template=parent_template, child_job_id_template=child_template, **ld), exception_kls=DAGMisconfigured) # for every parent, does the dependency group define enough information # to support a bubble-up or bubble-down operation? required_keys = set(child_template).difference( parent_template).difference(tasks_conf[ld['app_name']].get( 'autofill_values', {})) missing_keys = required_keys.difference(dep_grp) _log_raise_if(missing_keys, ("This app's dependency group is missing some required" " job_id identifiers"), extra=dict(missing_job_id_identifiers=missing_keys, job_id_template=_template, **ld), exception_kls=DAGMisconfigured) # look at the key:value pairs in <app_name>.depends_on for k, v in dep_grp.items(): # dups in values? if isinstance(v, cb.TasksConfigBaseSequence): _log_raise_if( len(set(v)) != len(v), "You have duplicate metadata in dependency group metadata", extra=dict(key=k, value=v, **ld), exception_kls=DAGMisconfigured) else: _log_raise_if( v != 'all', ("The value of a depends_on.<key> must be a list of values" " or the exact string 'all'"), extra=dict(key=k, value=v, **ld), exception_kls=DAGMisconfigured)
def _generate_job_ids(app_name, job_id, child, group_name, depends_on): # ignore dependency groups that have nothing to do with the parent app_name if app_name not in depends_on['app_name']: return [] # if len(depends_on) == 1: # # child depends only on one parent, so it must be the parent we've # # called get_children on! # return [(child, job_id)] # check that the job_id applies to this group pjob_id = parse_job_id(app_name, job_id) # parent data ctemplate, cparsed_template = get_job_id_template(child) # child data # check if parent job_ids are hardcoded into configuration if 'job_id' in depends_on: if job_id in depends_on['job_id']: kwargs = dict() kwargs.update(pjob_id) kwargs.update({k: v[0] for k, v in depends_on.items() if len(v) == 1}) cjob_id = ctemplate.format(**kwargs) return [(child, cjob_id)] return [] # check if the parent job_id template is compatible with this dep_grp child_autofill_values = get_autofill_values(child, raise_err=False) for k, v in pjob_id.items(): # is the parent's job_id identifier defined anywhere? if k not in depends_on and k not in cparsed_template: return [] # is the identifier appropriately missing from the dep_grp? if k in depends_on and v not in depends_on[k]: return [] # is parent identifier defined in child autofill_values different # than parent's given job id? if k in child_autofill_values and v not in child_autofill_values[k]: return [] # check that child's autofill_values are defined if parent doesn't # completely define a child's job_id components. required_autofill_values = set(cparsed_template).difference(pjob_id) _log_raise_if( any(x not in child_autofill_values for x in required_autofill_values), "autofill_values must be defined on child app_name if you have a" " parent whose job_id template is not a superset of the child's", extra=dict( child_app_name=child, parent_app_name=app_name, required_autofill_values=required_autofill_values), exception_kls=DAGMisconfigured) # check if the child's job_id template is compatible with this dep_grp for k in cparsed_template: # is child's job_id identifier appropriately missing from the dep_grp? if k in depends_on and k in pjob_id and \ pjob_id[k] not in depends_on[k]: return [] # is identifier defined anywhere? if ( k not in depends_on and k not in pjob_id and k not in get_autofill_values(child, raise_err=False) ): return [] return _generate_job_ids2( depends_on, pjob_id, cparsed_template, ctemplate, group_name, child)
def _get_parents_validate_group_names(dep_names, filter_deps, ld): _log_raise_if(not set(dep_names).issuperset(filter_deps), "You specified dependency group names that don't exist", extra=dict(filter_deps=filter_deps, **ld), exception_kls=DAGMisconfigured)