Example #1
0
def _build_dict_deps(dg, app_name, deps):
    """Build edges between dependent nodes by looking at listed dependencies

    `dg` (nx.MultiDiGraph instance) - the Tasks configuration as a graph
    `app_name` (str) - the name of a scheduled application
    `deps` (obj) - the dependencies for given `app_name`.  Should be a subclass
        of cb.TasksConfigBaseMapping, and is the value of given app_name's
        "depends_on" field
    """
    log_details = dict(app_name=app_name, key='depends_on', deps=dict(deps))
    if isinstance(deps, cb.TasksConfigBaseMapping) and "app_name" in deps:
        _add_edges(
            dg, app_name=app_name,
            dep_name=get_NS().dependency_group_default_name,
            dep_grp=deps, log_details=log_details)
    else:
        for dep_name, dep_data in deps.items():
            if isinstance(dep_data, cb.TasksConfigBaseMapping):
                _add_edges(
                    dg=dg, app_name=app_name, dep_name=dep_name,
                    dep_grp=dep_data, log_details=log_details)
            elif isinstance(dep_data, cb.TasksConfigBaseSequence):
                for _dep_grp in dep_data:
                    _add_edges(
                        dg=dg, app_name=app_name, dep_name=dep_name,
                        dep_grp=_dep_grp, log_details=log_details)
            else:
                _log_raise(
                    "Unrecognized dependency.  Expected a list or dict",
                    dict(dep_name=dep_name, dep_data=dep_data, **log_details),
                    exception_kls=DAGMisconfigured)
Example #2
0
def _inplace_modify_depends_on(dep_group, child_app_name, child_job_id,
                               parent_app_name, ld):
    """Given metadata about a dependency group, set the dep_group['job_id']
    value.  Assume the dependency group only specifies an app_name key.
    Also, if the field for each identifier in the current job_id does
    not exist in the dependency group, add it.

    Basically, just update the dependency group with information """
    # if only "app_name" is defined in this dependency group,
    # assume child inherited the parent's job_id and passed that
    # to this child
    if child_job_id is None:
        _log_raise(
            ("It's impossible to get all parent job_ids if the"
                " child expects to inherit the parent's job_id and you"
                " haven't specified the child's job_id"),
            extra=dict(parent_app_name=parent_app_name, **ld),
            exception_kls=DAGMisconfigured)
    pjob_id = parse_job_id(child_app_name, child_job_id)
    if len(dep_group) == 1 and len(dep_group['app_name']) == 1:
        t, pt = get_job_id_template(parent_app_name)
        try:
            dep_group['job_id'] = [t.format(**pjob_id)]
        except Exception as err:
            _log_raise(
                ("The child job_id doesn't contain enough pjob_id data to"
                 " create the parent job_id. Err details: %s") % err,
                extra=dict(job_id_template=t, pjob_iddata=str(pjob_id), **ld),
                exception_kls=err.__class__)
    else:
        for k, v in pjob_id.items():
            if k not in dep_group:
                dep_group[k] = [v]
Example #3
0
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld):
    # valid_if_or  -  are we specifying what makes a job valid correctly?
    if 'valid_if_or' not in metadata:
        return

    for k, v in metadata['valid_if_or'].items():
        if k == '_func':
            continue
        location = "%s.valid_if_or.%s" % (app_name1, k)
        _log_raise_if(
            not isinstance(v, cb.TasksConfigBaseSequence),
            "Task is misconfigured.  Wrong value type. Expected a sequence",
            extra=dict(wrong_value_type=type(v), key=location, **ld),
            exception_kls=DAGMisconfigured)
        templ = node.get_job_id_template(app_name1)[1]
        _log_raise_if(
            k not in templ,
            "valid_if_or contains a key that isn't in its job_id template",
            extra=dict(key=k, job_id_template=templ, **ld),
            exception_kls=DAGMisconfigured)
        try:
            validation_func = get_NS().job_id_validations[k]
        except KeyError:
            continue
        for vv in v:
            try:
                validation_func(vv)
            except Exception as err:
                _log_raise(
                    ("valid_if_or contains a value that wasn't validated"
                     " by your job_id_validations. err: %s(%s)")
                    % (err.__class__, err),
                    extra=dict(key=location, wrong_value_type=type(vv), **ld),
                    exception_kls=DAGMisconfigured)
Example #4
0
def _add_edges(dg, app_name, dep_name, dep_grp, log_details):
    """Add edge(s) to a networkx graph instance

    `dg` is an instance of a nx.MultiDiGraph, which means we can have
        multiple edges between two nodes
    `dep_name` (str) - the name of a dependency group
    `dep_grp` (obj) - dependency group data.  Subclass of
        cb.TasksConfigBaseMapping.  An example of what this may look like is:
            dep_grp = {
                "app_name": ["test_app"],
                "date": [20140601],
                "client_id": [123, 140, 150],
                ...
            }
    """
    try:
        parent = dep_grp['app_name']
    except (KeyError, TypeError):
        raise DAGMisconfigured(
            "You defined a dependency but forgot to include the app_name")
    if isinstance(parent, six.string_types):
        dg.add_edge(parent, app_name, key=dep_name, label=dep_name)
    elif isinstance(parent, cb.TasksConfigBaseSequence):
        for _parent in parent:
            dg.add_edge(_parent, app_name, key=dep_name, label=dep_name)
    else:
        _log_raise((
            "Unrecognized type:"
            " I found a child that doesn't properly define parents."
            " Children should have the parent app_name"
            " define a string or sequence"
            " of strings that represent the child's parents."),
            dict(parent_app_name=parent, parent_app_name_type=type(parent),
                 **log_details),
            exception_kls=DAGMisconfigured)
Example #5
0
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld):
    # valid_if_or  -  are we specifying what makes a job valid correctly?
    if 'valid_if_or' not in metadata:
        return

    for k, v in metadata['valid_if_or'].items():
        if k == '_func':
            continue
        location = "%s.valid_if_or.%s" % (app_name1, k)
        _log_raise_if(
            not isinstance(v, cb.TasksConfigBaseSequence),
            "Task is misconfigured.  Wrong value type. Expected a sequence",
            extra=dict(wrong_value_type=type(v), key=location, **ld),
            exception_kls=DAGMisconfigured)
        templ = node.get_job_id_template(app_name1)[1]
        _log_raise_if(
            k not in templ,
            "valid_if_or contains a key that isn't in its job_id template",
            extra=dict(key=k, job_id_template=templ, **ld),
            exception_kls=DAGMisconfigured)
        try:
            validation_func = get_NS().job_id_validations[k]
        except KeyError:
            continue
        for vv in v:
            try:
                validation_func(vv)
            except Exception as err:
                _log_raise(
                    ("valid_if_or contains a value that wasn't validated"
                     " by your job_id_validations. err: %s(%s)") %
                    (err.__class__, err),
                    extra=dict(key=location, wrong_value_type=type(vv), **ld),
                    exception_kls=DAGMisconfigured)
Example #6
0
def _add_edges(dg, app_name, dep_name, dep_grp, log_details):
    """Add edge(s) to a networkx graph instance

    `dg` is an instance of a nx.MultiDiGraph, which means we can have
        multiple edges between two nodes
    `dep_name` (str) - the name of a dependency group
    `dep_grp` (obj) - dependency group data.  Subclass of
        cb.TasksConfigBaseMapping.  An example of what this may look like is:
            dep_grp = {
                "app_name": ["test_app"],
                "date": [20140601],
                "client_id": [123, 140, 150],
                ...
            }
    """
    try:
        parent = dep_grp['app_name']
    except (KeyError, TypeError):
        raise DAGMisconfigured(
            "You defined a dependency but forgot to include the app_name")
    if isinstance(parent, six.string_types):
        dg.add_edge(parent, app_name, key=dep_name, label=dep_name)
    elif isinstance(parent, cb.TasksConfigBaseSequence):
        for _parent in parent:
            dg.add_edge(_parent, app_name, key=dep_name, label=dep_name)
    else:
        _log_raise(("Unrecognized type:"
                    " I found a child that doesn't properly define parents."
                    " Children should have the parent app_name"
                    " define a string or sequence"
                    " of strings that represent the child's parents."),
                   dict(parent_app_name=parent,
                        parent_app_name_type=type(parent),
                        **log_details),
                   exception_kls=DAGMisconfigured)
Example #7
0
def parse_job_id(app_name, job_id, delimiter=None):
    """Convert given `job_id` into a dict

    `app_name` (str) identifies a task
    `job_id` (str) identifies an instance of a task (ie a subtask)
    `validations` (dict) functions to ensure parts of the job_id are
                         properly typed
    `job_id_template` (str) identifies which validations to apply
    `delimiter` (str) value to split job_id into different components

    ie:
        20140506_876_profile -->

        {'date': 20140506, 'client_id': 876, 'collection_name': 'profile'}

    Returned values are cast into the appropriate type by the validations funcs

    """
    if delimiter is None:
        delimiter = get_NS().job_id_delimiter
    template, ptemplate = get_job_id_template(app_name)
    vals = job_id.split(delimiter, len(ptemplate) - 1)
    ld = dict(job_id=job_id, app_name=app_name, job_id_template=template)
    if len(vals) != len(ptemplate):
        _log_raise(
            ("Job_id isn't properly delimited.  You might have too few" " or too many underscores."),
            extra=ld,
            exception_kls=InvalidJobId,
        )
    return _validate_job_id_identifiers(app_name, vals)
Example #8
0
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf):
    """Do the user defined job_id validations, if they exist,
    apply to each individual value of the relevant key in the dep group?"""
    for k, v in dep_grp.items():

        # don't do validation on depends_on."app_name" field here,
        # and not for the depends_on."job_id" either
        # These fields are the only two fields in depends_on that are
        # not job_id components
        if k in ["app_name", "job_id"]:
            continue

        func = get_NS().job_id_validations.get(k)

        # ensure that job_id validations are fully specified for keys in
        # depends_on sections
        _log_raise_if(
            not func, "You introduced a new job_id component in a"
            " <app_name>.depends_on.<key> subsection, and you must inform"
            " Stolos how to parse the component",
            extra=dict(key=k, value=v, **ld),
            exception_kls=DAGMisconfigured)

        # skip rest of validations if "all" is used
        if v == "all":
            # assert that autofill_values exists on all parents

            msg = (
                " You requested that child depends on \"all\" values for some"
                " part of its parent job_id_template.  If you do this,"
                " the parent must define"
                " <parent_app_name>.autofill_values.<key>")
            for parent in dep_grp['app_name']:
                _log_raise_if(k not in tasks_conf[parent].get(
                    'autofill_values', {}),
                              msg,
                              extra=dict(parent_app_name=parent, key=k, **ld),
                              exception_kls=DAGMisconfigured)
            continue

        for vv in v:
            try:
                res = func(vv)
            except Exception as err:
                _log_raise(
                    ("Invalid data at <app_name>.depends_on.<key>.[nth_value]."
                     " The job_id_validation function complained that the"
                     " value was invalid. Error details: %s") % err.message,
                    extra=dict(key='%s.%s' % (k, v), value=vv, **ld),
                    exception_kls=DAGMisconfigured)

            _log_raise_if(
                vv != res,
                ("A job_id_validation func just returned a modified"
                 " value.  It should return input unmodified or fail."),
                extra=dict(key='%s.%s' % (k, v),
                           value=vv,
                           job_id_validation=func,
                           **ld),
                exception_kls=DAGMisconfigured)
Example #9
0
def _inplace_modify_depends_on(dep_group, child_app_name, child_job_id,
                               parent_app_name, ld):
    """Given metadata about a dependency group, set the dep_group['job_id']
    value.  Assume the dependency group only specifies an app_name key.
    Also, if the field for each identifier in the current job_id does
    not exist in the dependency group, add it.

    Basically, just update the dependency group with information """
    # if only "app_name" is defined in this dependency group,
    # assume child inherited the parent's job_id and passed that
    # to this child
    if child_job_id is None:
        _log_raise(("It's impossible to get all parent job_ids if the"
                    " child expects to inherit the parent's job_id and you"
                    " haven't specified the child's job_id"),
                   extra=dict(parent_app_name=parent_app_name, **ld),
                   exception_kls=DAGMisconfigured)
    pjob_id = parse_job_id(child_app_name, child_job_id)
    if len(dep_group) == 1 and len(dep_group['app_name']) == 1:
        t, pt = get_job_id_template(parent_app_name)
        try:
            dep_group['job_id'] = [t.format(**pjob_id)]
        except Exception as err:
            _log_raise(
                ("The child job_id doesn't contain enough pjob_id data to"
                 " create the parent job_id. Err details: %s") % err,
                extra=dict(job_id_template=t, pjob_iddata=str(pjob_id), **ld),
                exception_kls=err.__class__)
    else:
        for k, v in pjob_id.items():
            if k not in dep_group:
                dep_group[k] = [v]
Example #10
0
File: node.py Project: xyuan/stolos
def parse_job_id(app_name, job_id, delimiter=None):
    """Convert given `job_id` into a dict

    `app_name` (str) identifies a task
    `job_id` (str) identifies an instance of a task (ie a subtask)
    `validations` (dict) functions to ensure parts of the job_id are
                         properly typed
    `job_id_template` (str) identifies which validations to apply
    `delimiter` (str) value to split job_id into different components

    ie:
        20140506_876_profile -->

        {'date': 20140506, 'client_id': 876, 'collection_name': 'profile'}

    Returned values are cast into the appropriate type by the validations funcs

    """
    if delimiter is None:
        delimiter = get_NS().job_id_delimiter
    template, ptemplate = get_job_id_template(app_name)
    vals = job_id.split(delimiter, len(ptemplate) - 1)
    ld = dict(job_id=job_id, app_name=app_name, job_id_template=template)
    if len(vals) != len(ptemplate):
        _log_raise(("Job_id isn't properly delimited.  You might have too few"
                    " or too many underscores."),
                   extra=ld,
                   exception_kls=InvalidJobId)
    return _validate_job_id_identifiers(app_name, vals)
Example #11
0
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf):
    """Do the user defined job_id validations, if they exist,
    apply to each individual value of the relevant key in the dep group?"""
    for k, v in dep_grp.items():

        # don't do validation on depends_on."app_name" field here,
        # and not for the depends_on."job_id" either
        # These fields are the only two fields in depends_on that are
        # not job_id components
        if k in ["app_name", "job_id"]:
            continue

        func = get_NS().job_id_validations.get(k)

        # ensure that job_id validations are fully specified for keys in
        # depends_on sections
        _log_raise_if(
            not func,
            "You introduced a new job_id component in a"
            " <app_name>.depends_on.<key> subsection, and you must inform"
            " Stolos how to parse the component", extra=dict(
                key=k, value=v, **ld), exception_kls=DAGMisconfigured)

        # skip rest of validations if "all" is used
        if v == "all":
            # assert that autofill_values exists on all parents

            msg = (
                " You requested that child depends on \"all\" values for some"
                " part of its parent job_id_template.  If you do this,"
                " the parent must define"
                " <parent_app_name>.autofill_values.<key>")
            for parent in dep_grp['app_name']:
                _log_raise_if(
                    k not in tasks_conf[parent].get('autofill_values', {}),
                    msg, extra=dict(parent_app_name=parent, key=k, **ld),
                    exception_kls=DAGMisconfigured)
            continue

        for vv in v:
            try:
                res = func(vv)
            except Exception as err:
                _log_raise((
                    "Invalid data at <app_name>.depends_on.<key>.[nth_value]."
                    " The job_id_validation function complained that the"
                    " value was invalid. Error details: %s"
                ) % err.message,
                    extra=dict(key='%s.%s' % (k, v), value=vv, **ld),
                    exception_kls=DAGMisconfigured)

            _log_raise_if(
                vv != res,
                ("A job_id_validation func just returned a modified"
                 " value.  It should return input unmodified or fail."),
                extra=dict(
                    key='%s.%s' % (k, v), value=vv, job_id_validation=func,
                    **ld),
                exception_kls=DAGMisconfigured)
Example #12
0
def _iter_job_ids(dep_group, group_name, parent_app_name, ld):
    """
    Assume there specific job_ids listed in dependency group metadata that
    the child would inherit from and yield those.
    """
    for jid in dep_group['job_id']:
        try:
            parse_job_id(parent_app_name, jid)
        except InvalidJobId:
            _ld = dict(**ld)
            _ld.update(dependency_group_name=group_name, job_id=jid)
            _log_raise(("There's no way parent could have the child's job_id"),
                       extra=_ld,
                       exception_kls=InvalidJobId)
        yield (parent_app_name, jid)
Example #13
0
def _iter_job_ids(dep_group, group_name, parent_app_name, ld):
    """
    Assume there specific job_ids listed in dependency group metadata that
    the child would inherit from and yield those.
    """
    for jid in dep_group['job_id']:
        try:
            parse_job_id(parent_app_name, jid)
        except InvalidJobId:
            _ld = dict(**ld)
            _ld.update(
                dependency_group_name=group_name,
                job_id=jid)
            _log_raise(
                ("There's no way parent could have the child's job_id"),
                extra=_ld,
                exception_kls=InvalidJobId)
        yield (parent_app_name, jid)
Example #14
0
def passes_filter(app_name, job_id):
    """Determine if this job matches certain criteria that state it is a
    valid job for this app_name.

    A partially out of scope for dag stuff, but important detail:
        Jobs that don't match the criteria should immediately be marked
        as completed
    """
    # for now, if we can parse it, it's valid
    pjob_id = parse_job_id(app_name, job_id)

    # does this job matches criteria that makes it executable? if so, we can't
    # autocomplete it
    dg = cb.get_tasks_config()
    meta = dg[app_name]
    ld = dict(app_name=app_name, job_id=job_id)
    try:
        dct = dict(meta["valid_if_or"])
    except (KeyError, TypeError):
        return True  # everything is valid

    if "_func" in dct:
        import_path = dct.pop("_func")  # safe because config is immutable
        try:
            func = load_obj_from_path(import_path, ld)
        except Exception as err:
            raise err.__class__("valid_if_or._func misconfigured: %s" % err.message)

        if func(app_name, **pjob_id):
            return True

    for k, v in dct.items():
        try:
            kk = pjob_id[k]
        except KeyError:
            _log_raise(
                "valid_if_or contains a key that's not in the job_id",
                extra=dict(valid_if_or_key=k, **ld),
                exception_kls=DAGMisconfigured,
            )
        vals = [get_NS().job_id_validations[k](x) for x in v]
        if kk in vals:
            return True
    return False
Example #15
0
File: node.py Project: xyuan/stolos
def passes_filter(app_name, job_id):
    """Determine if this job matches certain criteria that state it is a
    valid job for this app_name.

    A partially out of scope for dag stuff, but important detail:
        Jobs that don't match the criteria should immediately be marked
        as completed
    """
    # for now, if we can parse it, it's valid
    pjob_id = parse_job_id(app_name, job_id)

    # does this job matches criteria that makes it executable? if so, we can't
    # autocomplete it
    dg = cb.get_tasks_config()
    meta = dg[app_name]
    ld = dict(app_name=app_name, job_id=job_id)
    try:
        dct = dict(meta['valid_if_or'])
    except (KeyError, TypeError):
        return True  # everything is valid

    if '_func' in dct:
        import_path = dct.pop('_func')  # safe because config is immutable
        try:
            func = load_obj_from_path(import_path, ld)
        except Exception as err:
            raise err.__class__("valid_if_or._func misconfigured: %s" %
                                err.message)

        if func(app_name, **pjob_id):
            return True

    for k, v in dct.items():
        try:
            kk = pjob_id[k]
        except KeyError:
            _log_raise("valid_if_or contains a key that's not in the job_id",
                       extra=dict(valid_if_or_key=k, **ld),
                       exception_kls=DAGMisconfigured)
        vals = [get_NS().job_id_validations[k](x) for x in v]
        if kk in vals:
            return True
    return False
Example #16
0
def _build_dict_deps(dg, app_name, deps):
    """Build edges between dependent nodes by looking at listed dependencies

    `dg` (nx.MultiDiGraph instance) - the Tasks configuration as a graph
    `app_name` (str) - the name of a scheduled application
    `deps` (obj) - the dependencies for given `app_name`.  Should be a subclass
        of cb.TasksConfigBaseMapping, and is the value of given app_name's
        "depends_on" field
    """
    log_details = dict(app_name=app_name, key='depends_on', deps=dict(deps))
    if isinstance(deps, cb.TasksConfigBaseMapping) and "app_name" in deps:
        _add_edges(dg,
                   app_name=app_name,
                   dep_name=get_NS().dependency_group_default_name,
                   dep_grp=deps,
                   log_details=log_details)
    else:
        for dep_name, dep_data in deps.items():
            if isinstance(dep_data, cb.TasksConfigBaseMapping):
                _add_edges(dg=dg,
                           app_name=app_name,
                           dep_name=dep_name,
                           dep_grp=dep_data,
                           log_details=log_details)
            elif isinstance(dep_data, cb.TasksConfigBaseSequence):
                for _dep_grp in dep_data:
                    _add_edges(dg=dg,
                               app_name=app_name,
                               dep_name=dep_name,
                               dep_grp=_dep_grp,
                               log_details=log_details)
            else:
                _log_raise("Unrecognized dependency.  Expected a list or dict",
                           dict(dep_name=dep_name,
                                dep_data=dep_data,
                                **log_details),
                           exception_kls=DAGMisconfigured)