Exemple #1
0
def _get_parents_validate_group_names(
        dep_names, filter_deps, ld):
    _log_raise_if(
        not set(dep_names).issuperset(filter_deps),
        "You specified dependency group names that don't exist",
        extra=dict(filter_deps=filter_deps, **ld),
        exception_kls=DAGMisconfigured)
Exemple #2
0
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld):
    # valid_if_or  -  are we specifying what makes a job valid correctly?
    if 'valid_if_or' not in metadata:
        return

    for k, v in metadata['valid_if_or'].items():
        if k == '_func':
            continue
        location = "%s.valid_if_or.%s" % (app_name1, k)
        _log_raise_if(
            not isinstance(v, cb.TasksConfigBaseSequence),
            "Task is misconfigured.  Wrong value type. Expected a sequence",
            extra=dict(wrong_value_type=type(v), key=location, **ld),
            exception_kls=DAGMisconfigured)
        templ = node.get_job_id_template(app_name1)[1]
        _log_raise_if(
            k not in templ,
            "valid_if_or contains a key that isn't in its job_id template",
            extra=dict(key=k, job_id_template=templ, **ld),
            exception_kls=DAGMisconfigured)
        try:
            validation_func = get_NS().job_id_validations[k]
        except KeyError:
            continue
        for vv in v:
            try:
                validation_func(vv)
            except Exception as err:
                _log_raise(
                    ("valid_if_or contains a value that wasn't validated"
                     " by your job_id_validations. err: %s(%s)") %
                    (err.__class__, err),
                    extra=dict(key=location, wrong_value_type=type(vv), **ld),
                    exception_kls=DAGMisconfigured)
Exemple #3
0
def validate_depends_on(app_name1, metadata, dg, tasks_conf, ld):
    if "depends_on" not in metadata:
        return

    _log_raise_if(
        not isinstance(metadata["depends_on"], cb.TasksConfigBaseMapping),
        ("Configuration Error: Task's value at the depends_on key"
         " must subclass cb.TasksConfigBaseMapping"),
        extra=dict(key="depends_on",
                   received_value_type=type(metadata["depends_on"]),
                   **ld),
        exception_kls=DAGMisconfigured)
    # depends_on  - are we specifying only one unnamed dependency group?
    if "app_name" in metadata["depends_on"]:
        _validate_dep_grp_metadata(
            dep_grp=metadata['depends_on'],
            ld=ld, tasks_conf=tasks_conf,
            dep_name=get_NS().dependency_group_default_name)
    # depends_on  - are we specifying specific dependency_groups?
    else:
        _validate_dependency_groups(tasks_conf, metadata, ld)
        # depends_on  -  are dependent tasks listed properly?
        for parent in dg.pred[app_name1]:
            _log_raise_if(
                parent not in tasks_conf,
                "Task defines an unrecognized parent dependency",
                extra=dict(parent_app_name=parent, **ld),
                exception_kls=DAGMisconfigured)
Exemple #4
0
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld):
    # valid_if_or  -  are we specifying what makes a job valid correctly?
    if 'valid_if_or' not in metadata:
        return

    for k, v in metadata['valid_if_or'].items():
        if k == '_func':
            continue
        location = "%s.valid_if_or.%s" % (app_name1, k)
        _log_raise_if(
            not isinstance(v, cb.TasksConfigBaseSequence),
            "Task is misconfigured.  Wrong value type. Expected a sequence",
            extra=dict(wrong_value_type=type(v), key=location, **ld),
            exception_kls=DAGMisconfigured)
        templ = node.get_job_id_template(app_name1)[1]
        _log_raise_if(
            k not in templ,
            "valid_if_or contains a key that isn't in its job_id template",
            extra=dict(key=k, job_id_template=templ, **ld),
            exception_kls=DAGMisconfigured)
        try:
            validation_func = get_NS().job_id_validations[k]
        except KeyError:
            continue
        for vv in v:
            try:
                validation_func(vv)
            except Exception as err:
                _log_raise(
                    ("valid_if_or contains a value that wasn't validated"
                     " by your job_id_validations. err: %s(%s)")
                    % (err.__class__, err),
                    extra=dict(key=location, wrong_value_type=type(vv), **ld),
                    exception_kls=DAGMisconfigured)
Exemple #5
0
def validate_depends_on(app_name1, metadata, dg, tasks_conf, ld):
    if "depends_on" not in metadata:
        return

    _log_raise_if(
        not isinstance(metadata["depends_on"], cb.TasksConfigBaseMapping),
        ("Configuration Error: Task's value at the depends_on key"
         " must subclass cb.TasksConfigBaseMapping"),
        extra=dict(key="depends_on",
                   received_value_type=type(metadata["depends_on"]),
                   **ld),
        exception_kls=DAGMisconfigured)
    # depends_on  - are we specifying only one unnamed dependency group?
    if "app_name" in metadata["depends_on"]:
        _validate_dep_grp_metadata(
            dep_grp=metadata['depends_on'],
            ld=ld,
            tasks_conf=tasks_conf,
            dep_name=get_NS().dependency_group_default_name)
    # depends_on  - are we specifying specific dependency_groups?
    else:
        _validate_dependency_groups(tasks_conf, metadata, ld)
        # depends_on  -  are dependent tasks listed properly?
        for parent in dg.pred[app_name1]:
            _log_raise_if(parent not in tasks_conf,
                          "Task defines an unrecognized parent dependency",
                          extra=dict(parent_app_name=parent, **ld),
                          exception_kls=DAGMisconfigured)
Exemple #6
0
def validate_autofill_values(app_name1, metadata, dg, tasks_conf, ld):
    dct = metadata.get('autofill_values', {})
    _log_raise_if(
        dct and not isinstance(dct, cb.TasksConfigBaseMapping),
        "`autofill_values` must be a mapping of key:[value] pairs",
        extra=dict(type_autofill_values=type(dct), z=dct, **ld),
        exception_kls=DAGMisconfigured)
    for k, v in dct.items():
        msg = ("Value of `autofill_values.<key>` must be a sequence or"
               " a string denoting a number range of form:  \"min:max\"")
        extra = dict(key='autofill_values.%s' % k, **ld)
        if isinstance(v, six.string_types):
            _log_raise_if(
                not all(x.isdigit() for x in v.split(':', 2)),
                msg, extra=extra, exception_kls=DAGMisconfigured)
        else:
            _log_raise_if(
                not isinstance(v, cb.TasksConfigBaseSequence),
                msg, extra=extra, exception_kls=DAGMisconfigured)
    extra_keys = set(dct).difference(node.get_job_id_template(app_name1)[1])
    _log_raise_if(
        extra_keys,
        ("The set of `autofill_values` keys must be a subset of app_name's"
         " given job_id components"),
        extra=dict(extra_keys=extra_keys, **ld),
        exception_kls=DAGMisconfigured)
Exemple #7
0
def validate_autofill_values(app_name1, metadata, dg, tasks_conf, ld):
    dct = metadata.get('autofill_values', {})
    _log_raise_if(dct and not isinstance(dct, cb.TasksConfigBaseMapping),
                  "`autofill_values` must be a mapping of key:[value] pairs",
                  extra=dict(type_autofill_values=type(dct), z=dct, **ld),
                  exception_kls=DAGMisconfigured)
    for k, v in dct.items():
        msg = ("Value of `autofill_values.<key>` must be a sequence or"
               " a string denoting a number range of form:  \"min:max\"")
        extra = dict(key='autofill_values.%s' % k, **ld)
        if isinstance(v, six.string_types):
            _log_raise_if(not all(x.isdigit() for x in v.split(':', 2)),
                          msg,
                          extra=extra,
                          exception_kls=DAGMisconfigured)
        else:
            _log_raise_if(not isinstance(v, cb.TasksConfigBaseSequence),
                          msg,
                          extra=extra,
                          exception_kls=DAGMisconfigured)
    extra_keys = set(dct).difference(node.get_job_id_template(app_name1)[1])
    _log_raise_if(
        extra_keys,
        ("The set of `autofill_values` keys must be a subset of app_name's"
         " given job_id components"),
        extra=dict(extra_keys=extra_keys, **ld),
        exception_kls=DAGMisconfigured)
Exemple #8
0
def _validate_dependency_groups_part2(dep_name, dep_grp, ld, tasks_conf):
    _log_raise_if(
        ("app_name" not in dep_grp or
         not isinstance(dep_grp["app_name"], cb.TasksConfigBaseSequence)),
        ("Each dependency group the task depends on must specify"
         " an app_name key whose value is a sequence of items"
         " (ie a cb.TasksConfigBaseSequence)"),
        extra=dict(
            key="depends_on", invalid_dependency_group=dep_name,
            dep_grp=str(dict(dep_grp)), **ld),
        exception_kls=DAGMisconfigured)
    _validate_dep_grp_metadata(
        dep_grp, ld=ld, tasks_conf=tasks_conf, dep_name=dep_name)
    _validate_dep_grp_with_job_id_validations(
        dep_grp, ld=ld, tasks_conf=tasks_conf)
Exemple #9
0
    def __getitem__(self, key):
        if key not in self.cache:
            key = "%s%s" % (self.redis_key_prefix, key)
            try:
                val = self.cli.hgetall(key)
            except:
                log.error(("Redis failed to fetch app config data."
                           "Is the redis key you used an incorrect type?"
                           "  It should be a hash."),
                          extra=dict(key=key))
                raise
            _log_raise_if(not val, "Given app_name does not exist in redis",
                          dict(app_name=key), KeyError)

            # Convert redis values to python objects.  Potentially dangerous.
            val = {eval(k, {}, {}): eval(v, {}, {}) for k, v in val.items()}
            self.cache[key] = _ensure_type(val, JSONMapping, JSONSequence)
        return self.cache[key]
Exemple #10
0
def _validate_dependency_groups_part2(dep_name, dep_grp, ld, tasks_conf):
    _log_raise_if(
        ("app_name" not in dep_grp
         or not isinstance(dep_grp["app_name"], cb.TasksConfigBaseSequence)),
        ("Each dependency group the task depends on must specify"
         " an app_name key whose value is a sequence of items"
         " (ie a cb.TasksConfigBaseSequence)"),
        extra=dict(key="depends_on",
                   invalid_dependency_group=dep_name,
                   dep_grp=str(dict(dep_grp)),
                   **ld),
        exception_kls=DAGMisconfigured)
    _validate_dep_grp_metadata(dep_grp,
                               ld=ld,
                               tasks_conf=tasks_conf,
                               dep_name=dep_name)
    _validate_dep_grp_with_job_id_validations(dep_grp,
                                              ld=ld,
                                              tasks_conf=tasks_conf)
Exemple #11
0
def _validate_dependency_groups(tasks_conf, metadata, ld):
    if "depends_on" not in metadata:
        return

    for dep_name, dep_grp in metadata["depends_on"].items():
        _log_raise_if(
            dep_name in tasks_conf,
            ("Task's depends_on value has a naming conflict. You cannot"
             " identify a dependency group with the same name as an"
             " app_name."),
            extra=dict(key="depends_on",
                       invalid_dependency_group=dep_name,
                       **ld),
            exception_kls=DAGMisconfigured)
        # validate scenario where the dep_grp is made up of subgrpA AND subgrpB
        if isinstance(dep_grp, cb.TasksConfigBaseSequence):
            for _dep_grp in dep_grp:
                _validate_dependency_groups_part2(dep_name, _dep_grp, ld,
                                                  tasks_conf)
            # check job_id template identifiers are consistently defined
            # across the dependency group
            for identifier in node.get_job_id_template(ld['app_name'])[1]:
                values = [
                    _dep_grp.get(identifier) for _dep_grp in dep_grp
                    if 'job_id' not in _dep_grp
                ]
                _log_raise_if(
                    not reduce(lambda x, y: x == y, values),
                    ("You specified inconsistent values for job_id"
                     " metadata.  Each sub-dependency in your dependency"
                     " group must specify the exact same metadata value"
                     " for each identifier in your app's job_id template."),
                    # because otherwise, users could easily create unexpected
                    # dependence relations
                    extra=dict(key="depends_on",
                               invalid_dependency_group=dep_name,
                               invalid_identifier=identifier,
                               values=values,
                               **ld),
                    exception_kls=DAGMisconfigured)
        else:
            _validate_dependency_groups_part2(dep_name, dep_grp, ld,
                                              tasks_conf)
Exemple #12
0
    def __getitem__(self, key):
        if key not in self.cache:
            key = "%s%s" % (self.redis_key_prefix, key)
            try:
                val = self.cli.hgetall(key)
            except:
                log.error((
                    "Redis failed to fetch app config data."
                    "Is the redis key you used an incorrect type?"
                    "  It should be a hash."), extra=dict(key=key))
                raise
            _log_raise_if(
                not val,
                "Given app_name does not exist in redis",
                dict(app_name=key), KeyError)

            # Convert redis values to python objects.  Potentially dangerous.
            val = {eval(k, {}, {}): eval(v, {}, {}) for k, v in val.items()}
            self.cache[key] = _ensure_type(val, JSONMapping, JSONSequence)
        return self.cache[key]
Exemple #13
0
def _validate_dependency_groups(tasks_conf, metadata, ld):
    if "depends_on" not in metadata:
        return

    for dep_name, dep_grp in metadata["depends_on"].items():
        _log_raise_if(
            dep_name in tasks_conf,
            ("Task's depends_on value has a naming conflict. You cannot"
             " identify a dependency group with the same name as an"
             " app_name."),
            extra=dict(
                key="depends_on",
                invalid_dependency_group=dep_name,
                **ld),
            exception_kls=DAGMisconfigured)
        # validate scenario where the dep_grp is made up of subgrpA AND subgrpB
        if isinstance(dep_grp, cb.TasksConfigBaseSequence):
            for _dep_grp in dep_grp:
                _validate_dependency_groups_part2(
                    dep_name, _dep_grp, ld, tasks_conf)
            # check job_id template identifiers are consistently defined
            # across the dependency group
            for identifier in node.get_job_id_template(ld['app_name'])[1]:
                values = [_dep_grp.get(identifier) for _dep_grp in dep_grp
                          if 'job_id' not in _dep_grp]
                _log_raise_if(
                    not reduce(lambda x, y: x == y, values),
                    ("You specified inconsistent values for job_id"
                     " metadata.  Each sub-dependency in your dependency"
                     " group must specify the exact same metadata value"
                     " for each identifier in your app's job_id template."),
                    # because otherwise, users could easily create unexpected
                    # dependence relations
                    extra=dict(
                        key="depends_on", invalid_dependency_group=dep_name,
                        invalid_identifier=identifier,
                        values=values, **ld),
                    exception_kls=DAGMisconfigured)
        else:
            _validate_dependency_groups_part2(
                dep_name, dep_grp, ld, tasks_conf)
Exemple #14
0
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf):
    """Do the user defined job_id validations, if they exist,
    apply to each individual value of the relevant key in the dep group?"""
    for k, v in dep_grp.items():

        # don't do validation on depends_on."app_name" field here,
        # and not for the depends_on."job_id" either
        # These fields are the only two fields in depends_on that are
        # not job_id components
        if k in ["app_name", "job_id"]:
            continue

        func = get_NS().job_id_validations.get(k)

        # ensure that job_id validations are fully specified for keys in
        # depends_on sections
        _log_raise_if(
            not func, "You introduced a new job_id component in a"
            " <app_name>.depends_on.<key> subsection, and you must inform"
            " Stolos how to parse the component",
            extra=dict(key=k, value=v, **ld),
            exception_kls=DAGMisconfigured)

        # skip rest of validations if "all" is used
        if v == "all":
            # assert that autofill_values exists on all parents

            msg = (
                " You requested that child depends on \"all\" values for some"
                " part of its parent job_id_template.  If you do this,"
                " the parent must define"
                " <parent_app_name>.autofill_values.<key>")
            for parent in dep_grp['app_name']:
                _log_raise_if(k not in tasks_conf[parent].get(
                    'autofill_values', {}),
                              msg,
                              extra=dict(parent_app_name=parent, key=k, **ld),
                              exception_kls=DAGMisconfigured)
            continue

        for vv in v:
            try:
                res = func(vv)
            except Exception as err:
                _log_raise(
                    ("Invalid data at <app_name>.depends_on.<key>.[nth_value]."
                     " The job_id_validation function complained that the"
                     " value was invalid. Error details: %s") % err.message,
                    extra=dict(key='%s.%s' % (k, v), value=vv, **ld),
                    exception_kls=DAGMisconfigured)

            _log_raise_if(
                vv != res,
                ("A job_id_validation func just returned a modified"
                 " value.  It should return input unmodified or fail."),
                extra=dict(key='%s.%s' % (k, v),
                           value=vv,
                           job_id_validation=func,
                           **ld),
                exception_kls=DAGMisconfigured)
Exemple #15
0
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf):
    """Do the user defined job_id validations, if they exist,
    apply to each individual value of the relevant key in the dep group?"""
    for k, v in dep_grp.items():

        # don't do validation on depends_on."app_name" field here,
        # and not for the depends_on."job_id" either
        # These fields are the only two fields in depends_on that are
        # not job_id components
        if k in ["app_name", "job_id"]:
            continue

        func = get_NS().job_id_validations.get(k)

        # ensure that job_id validations are fully specified for keys in
        # depends_on sections
        _log_raise_if(
            not func,
            "You introduced a new job_id component in a"
            " <app_name>.depends_on.<key> subsection, and you must inform"
            " Stolos how to parse the component", extra=dict(
                key=k, value=v, **ld), exception_kls=DAGMisconfigured)

        # skip rest of validations if "all" is used
        if v == "all":
            # assert that autofill_values exists on all parents

            msg = (
                " You requested that child depends on \"all\" values for some"
                " part of its parent job_id_template.  If you do this,"
                " the parent must define"
                " <parent_app_name>.autofill_values.<key>")
            for parent in dep_grp['app_name']:
                _log_raise_if(
                    k not in tasks_conf[parent].get('autofill_values', {}),
                    msg, extra=dict(parent_app_name=parent, key=k, **ld),
                    exception_kls=DAGMisconfigured)
            continue

        for vv in v:
            try:
                res = func(vv)
            except Exception as err:
                _log_raise((
                    "Invalid data at <app_name>.depends_on.<key>.[nth_value]."
                    " The job_id_validation function complained that the"
                    " value was invalid. Error details: %s"
                ) % err.message,
                    extra=dict(key='%s.%s' % (k, v), value=vv, **ld),
                    exception_kls=DAGMisconfigured)

            _log_raise_if(
                vv != res,
                ("A job_id_validation func just returned a modified"
                 " value.  It should return input unmodified or fail."),
                extra=dict(
                    key='%s.%s' % (k, v), value=vv, job_id_validation=func,
                    **ld),
                exception_kls=DAGMisconfigured)
Exemple #16
0
def _validate_dep_grp_metadata(dep_grp, ld, tasks_conf, dep_name):
    """
    Test that a dependency group correctly defined.

    `dep_grp` (obj) - Configuration data for a dependency group.  It is an
        instance that inherits from cb.TasksConfigBaseMapping.
        Visualized as a dict or json, a dep_grp might look like:
            dep_grp = {"app_name": ["app2", "app3"]}
    `ld` (dict) - helpful info for error logs.  It also contains
        the app_name this dep_grp belongs to.
    `tasks_conf` (obj) - The configuration for all tasks. It is an instance
        that inherits from cb.TasksConfigBaseMapping
    `dep_name` (str) - The name for this dependency group.

    """
    ld1 = ld
    _template, child_template = node.get_job_id_template(ld['app_name'])
    for parent_app_name in dep_grp['app_name']:
        ld = dict(
            parent_app_name=parent_app_name,
            dependency_group="depends_on.%s" % dep_name, **ld1)

        _log_raise_if(
            parent_app_name not in tasks_conf,
            "Unrecognized parent_app_name in a `depends_on` dependency group",
            extra=ld,
            exception_kls=DAGMisconfigured)
        _, parent_template = node.get_job_id_template(parent_app_name)
        if len(dep_grp) == 1:
            _log_raise_if(
                not set(child_template).issuperset(parent_template),
                ("If you choose specify a dependency group with no job_id"
                 " identifiers, then the"
                 " child task's job_id identifiers must be a superset of those"
                 " in the parent's job_id. Otherwise, there"
                 " are cases where you cannot identify"
                 " a parent job_id given a child job_id."),
                extra=dict(
                    parent_job_id_template=parent_template,
                    child_job_id_template=child_template,
                    **ld),
                exception_kls=DAGMisconfigured)

        # for every parent, does the dependency group define enough information
        # to support a bubble-up or bubble-down operation?
        required_keys = set(
            child_template).difference(parent_template).difference(
                tasks_conf[ld['app_name']].get('autofill_values', {}))
        missing_keys = required_keys.difference(dep_grp)
        _log_raise_if(
            missing_keys,
            ("This app's dependency group is missing some required"
             " job_id identifiers"), extra=dict(
                 missing_job_id_identifiers=missing_keys,
                 job_id_template=_template, **ld),
            exception_kls=DAGMisconfigured)

    # look at the key:value pairs in <app_name>.depends_on
    for k, v in dep_grp.items():
        # dups in values?
        if isinstance(v, cb.TasksConfigBaseSequence):
            _log_raise_if(
                len(set(v)) != len(v),
                "You have duplicate metadata in dependency group metadata",
                extra=dict(key=k, value=v, **ld),
                exception_kls=DAGMisconfigured)
        else:
            _log_raise_if(
                v != 'all',
                ("The value of a depends_on.<key> must be a list of values"
                 " or the exact string 'all'"),
                extra=dict(key=k, value=v, **ld),
                exception_kls=DAGMisconfigured)
Exemple #17
0
def _generate_job_ids(app_name, job_id, child, group_name, depends_on):
    # ignore dependency groups that have nothing to do with the parent app_name
    if app_name not in depends_on['app_name']:
        return []

    # if len(depends_on) == 1:
    # # child depends only on one parent, so it must be the parent we've
    # # called get_children on!
    # return [(child, job_id)]

    # check that the job_id applies to this group
    pjob_id = parse_job_id(app_name, job_id)  # parent data
    ctemplate, cparsed_template = get_job_id_template(child)  # child data

    # check if parent job_ids are hardcoded into configuration
    if 'job_id' in depends_on:
        if job_id in depends_on['job_id']:
            kwargs = dict()
            kwargs.update(pjob_id)
            kwargs.update(
                {k: v[0]
                 for k, v in depends_on.items() if len(v) == 1})
            cjob_id = ctemplate.format(**kwargs)
            return [(child, cjob_id)]
        return []
    # check if the parent job_id template is compatible with this dep_grp
    child_autofill_values = get_autofill_values(child, raise_err=False)
    for k, v in pjob_id.items():
        # is the parent's job_id identifier defined anywhere?
        if k not in depends_on and k not in cparsed_template:
            return []
        # is the identifier appropriately missing from the dep_grp?
        if k in depends_on and v not in depends_on[k]:
            return []
        # is parent identifier defined in child autofill_values different
        # than parent's given job id?

        if k in child_autofill_values and v not in child_autofill_values[k]:
            return []

    # check that child's autofill_values are defined if parent doesn't
    # completely define a child's job_id components.
    required_autofill_values = set(cparsed_template).difference(pjob_id)
    _log_raise_if(
        any(x not in child_autofill_values for x in required_autofill_values),
        "autofill_values must be defined on child app_name if you have a"
        " parent whose job_id template is not a superset of the child's",
        extra=dict(child_app_name=child,
                   parent_app_name=app_name,
                   required_autofill_values=required_autofill_values),
        exception_kls=DAGMisconfigured)

    # check if the child's job_id template is compatible with this dep_grp
    for k in cparsed_template:
        # is child's job_id identifier appropriately missing from the dep_grp?
        if k in depends_on and k in pjob_id and \
                pjob_id[k] not in depends_on[k]:
            return []
        # is identifier defined anywhere?
        if (k not in depends_on and k not in pjob_id
                and k not in get_autofill_values(child, raise_err=False)):
            return []
    return _generate_job_ids2(depends_on, pjob_id, cparsed_template, ctemplate,
                              group_name, child)
Exemple #18
0
def _validate_dep_grp_metadata(dep_grp, ld, tasks_conf, dep_name):
    """
    Test that a dependency group correctly defined.

    `dep_grp` (obj) - Configuration data for a dependency group.  It is an
        instance that inherits from cb.TasksConfigBaseMapping.
        Visualized as a dict or json, a dep_grp might look like:
            dep_grp = {"app_name": ["app2", "app3"]}
    `ld` (dict) - helpful info for error logs.  It also contains
        the app_name this dep_grp belongs to.
    `tasks_conf` (obj) - The configuration for all tasks. It is an instance
        that inherits from cb.TasksConfigBaseMapping
    `dep_name` (str) - The name for this dependency group.

    """
    ld1 = ld
    _template, child_template = node.get_job_id_template(ld['app_name'])
    for parent_app_name in dep_grp['app_name']:
        ld = dict(parent_app_name=parent_app_name,
                  dependency_group="depends_on.%s" % dep_name,
                  **ld1)

        _log_raise_if(
            parent_app_name not in tasks_conf,
            "Unrecognized parent_app_name in a `depends_on` dependency group",
            extra=ld,
            exception_kls=DAGMisconfigured)
        _, parent_template = node.get_job_id_template(parent_app_name)
        if len(dep_grp) == 1:
            _log_raise_if(
                not set(child_template).issuperset(parent_template),
                ("If you choose specify a dependency group with no job_id"
                 " identifiers, then the"
                 " child task's job_id identifiers must be a superset of those"
                 " in the parent's job_id. Otherwise, there"
                 " are cases where you cannot identify"
                 " a parent job_id given a child job_id."),
                extra=dict(parent_job_id_template=parent_template,
                           child_job_id_template=child_template,
                           **ld),
                exception_kls=DAGMisconfigured)

        # for every parent, does the dependency group define enough information
        # to support a bubble-up or bubble-down operation?
        required_keys = set(child_template).difference(
            parent_template).difference(tasks_conf[ld['app_name']].get(
                'autofill_values', {}))
        missing_keys = required_keys.difference(dep_grp)
        _log_raise_if(missing_keys,
                      ("This app's dependency group is missing some required"
                       " job_id identifiers"),
                      extra=dict(missing_job_id_identifiers=missing_keys,
                                 job_id_template=_template,
                                 **ld),
                      exception_kls=DAGMisconfigured)

    # look at the key:value pairs in <app_name>.depends_on
    for k, v in dep_grp.items():
        # dups in values?
        if isinstance(v, cb.TasksConfigBaseSequence):
            _log_raise_if(
                len(set(v)) != len(v),
                "You have duplicate metadata in dependency group metadata",
                extra=dict(key=k, value=v, **ld),
                exception_kls=DAGMisconfigured)
        else:
            _log_raise_if(
                v != 'all',
                ("The value of a depends_on.<key> must be a list of values"
                 " or the exact string 'all'"),
                extra=dict(key=k, value=v, **ld),
                exception_kls=DAGMisconfigured)
Exemple #19
0
def _generate_job_ids(app_name, job_id, child, group_name, depends_on):
    # ignore dependency groups that have nothing to do with the parent app_name
    if app_name not in depends_on['app_name']:
        return []

    # if len(depends_on) == 1:
        # # child depends only on one parent, so it must be the parent we've
        # # called get_children on!
        # return [(child, job_id)]

    # check that the job_id applies to this group
    pjob_id = parse_job_id(app_name, job_id)  # parent data
    ctemplate, cparsed_template = get_job_id_template(child)  # child data

    # check if parent job_ids are hardcoded into configuration
    if 'job_id' in depends_on:
        if job_id in depends_on['job_id']:
            kwargs = dict()
            kwargs.update(pjob_id)
            kwargs.update({k: v[0] for k, v in depends_on.items()
                           if len(v) == 1})
            cjob_id = ctemplate.format(**kwargs)
            return [(child, cjob_id)]
        return []
    # check if the parent job_id template is compatible with this dep_grp
    child_autofill_values = get_autofill_values(child, raise_err=False)
    for k, v in pjob_id.items():
        # is the parent's job_id identifier defined anywhere?
        if k not in depends_on and k not in cparsed_template:
            return []
        # is the identifier appropriately missing from the dep_grp?
        if k in depends_on and v not in depends_on[k]:
            return []
        # is parent identifier defined in child autofill_values different
        # than parent's given job id?

        if k in child_autofill_values and v not in child_autofill_values[k]:
            return []

    # check that child's autofill_values are defined if parent doesn't
    # completely define a child's job_id components.
    required_autofill_values = set(cparsed_template).difference(pjob_id)
    _log_raise_if(
        any(x not in child_autofill_values
            for x in required_autofill_values),
        "autofill_values must be defined on child app_name if you have a"
        " parent whose job_id template is not a superset of the child's",
        extra=dict(
            child_app_name=child, parent_app_name=app_name,
            required_autofill_values=required_autofill_values),
        exception_kls=DAGMisconfigured)

    # check if the child's job_id template is compatible with this dep_grp
    for k in cparsed_template:
        # is child's job_id identifier appropriately missing from the dep_grp?
        if k in depends_on and k in pjob_id and \
                pjob_id[k] not in depends_on[k]:
            return []
        # is identifier defined anywhere?
        if (
                k not in depends_on and
                k not in pjob_id and
                k not in get_autofill_values(child, raise_err=False)
        ):
            return []
    return _generate_job_ids2(
        depends_on, pjob_id, cparsed_template, ctemplate, group_name, child)
Exemple #20
0
def _get_parents_validate_group_names(dep_names, filter_deps, ld):
    _log_raise_if(not set(dep_names).issuperset(filter_deps),
                  "You specified dependency group names that don't exist",
                  extra=dict(filter_deps=filter_deps, **ld),
                  exception_kls=DAGMisconfigured)