Beispiel #1
0
def get_satisfiable_jobs(
    rules: Rules, resources_per_client: Dict[str, Dict[str, Union[float,
                                                                  int]]],
    pending_jobs: List[RuleExecution],
    executions: Union[List[Execution], List[DelegateExecution]]
) -> List[RuleExecution]:
    # print("get_satisfiable_jobs", len(pending_jobs), executions)
    ready = []

    # copy resources_per_client to a version we'll decrement as we consume
    resources_remaining_per_client = dict([
        (name, dict(resources))
        for name, resources in resources_per_client.items()
    ])

    # print("max resources", resources_remaining_per_client)

    def get_remaining(
        job
    ):  # returns the remaining resources for the client used by a given job
        rule = rules.get_rule(job.transform)
        return resources_remaining_per_client[rule.executor]

    for job in executions:
        rule = rules.get_rule(job.transform)
        resources = rule.resources
        # print("job.id={}, active_job_ids={}".format(repr(job.id), repr(active_job_ids)))
        resources_remaining = get_remaining(job)
        # print("decrementing ", job.transform, rules.get_rule(job.transform).executor, resources_remaining, " by ", resources)
        for resource, amount in resources.items():
            resources_remaining[resource] -= amount

    for job in pending_jobs:
        satisfiable = True
        rule = rules.get_rule(job.transform)
        resources = rule.resources
        resources_remaining = get_remaining(job)
        # print("for ", job.transform, rules.get_rule(job.transform).executor, resources_remaining)
        for resource, amount in resources.items():
            if resources_remaining[resource] < amount:
                satisfiable = False
                break

        if satisfiable:
            for resource, amount in resources.items():
                resources_remaining[resource] -= amount

            ready.append(job)

    return ready
Beispiel #2
0
def reattach(j: Jobs, rules: Rules,
             pending_jobs: List[Execution]) -> List[DelegateExecution]:
    executing = []
    for e in pending_jobs:
        if e.exec_xref != None:
            rule = rules.get_rule(e.transform)
            client = rules.get_client(rule.executor)
            ee = client.reattach(e.exec_xref)
            executing.append(ee)
            log.warn("Reattaching existing job {}: {}".format(
                e.transform, e.exec_xref))
        else:
            log.warn("Canceling {}".format(e.id))
            j.cancel_execution(e.id)
    return executing
Beispiel #3
0
def test_relative_file_paths(tmpdir):
    sample_rel_path = os.path.relpath(__file__, os.path.abspath("."))
    assert sample_rel_path[0] != "/"

    statements = parser.parse_str("""
    rule a:
        inputs: x=filename("{}")
    """.format(sample_rel_path))

    rules = Rules()
    _eval_stmts(rules, statements, "none",
                HashCache(str(tmpdir.join("hashcache"))))
    a = rules.get_rule("a")
    assert a is not None
    print(a.inputs)
    a.inputs[0].json_obj["name"] == os.path.abspath(sample_rel_path)
Beispiel #4
0
def test_file_ref_with_copy_to(tmpdir):
    rules = Rules()
    # rules.set_var(name, value)

    localfile = tmpdir.join("xyz")
    localfile.write("x")

    statements = parser.parse_str("""
    rule a:
        inputs: x=filename("{}", copy_to="z")
    """.format(localfile))
    _eval_stmts(rules, statements, "none",
                HashCache(str(tmpdir.join("hashcache"))))

    a = rules.get_rule("a")
    assert a is not None
    assert a.inputs[0].copy_to == "z"
Beispiel #5
0
def test_file_ref(tmpdir):
    rules = Rules()
    # rules.set_var(name, value)

    localfile = tmpdir.join("xyz")
    localfile.write("x")

    statements = parser.parse_str("""
    rule a:
        inputs: x=filename("{}")
    """.format(localfile))
    _eval_stmts(rules, statements,
                str(tmpdir) + "/none",
                HashCache(str(tmpdir.join("hashcache"))))
    a = rules.get_rule("a")
    assert a is not None
    print(a.inputs)
    assert a.inputs[0].json_obj["name"] == os.path.relpath(
        str(localfile), str(tmpdir))
    assert a.inputs[0].json_obj["type"] == "$fileref"
    assert a.inputs[0].copy_to is None
    assert len(rules.objs) == 1
Beispiel #6
0
def test_eval_if():
    from conseq.config import Rules, _eval_stmts
    rules = Rules()
    # rules.set_var(name, value)

    statements = parser.parse_str("""
    if "'x' == 'y'":
      let a='1'
    else:
      let a='2'
    endif
    """)
    _eval_stmts(rules, statements, "none", None)
    assert rules.vars["a"] == "2"
Beispiel #7
0
def test_parse_if():
    from conseq.config import Rules, _eval_stmts
    rules = Rules()
    # from conseq.parser import IfStatement, LetStatement

    statements = parser.parse_str(
        """
    if "'x' == 'y'":
      let a='1'
    else:
      let a='2'
    endif
    """, "declarations")
    _eval_stmts(rules, statements, "none", None)
    assert rules.vars["a"] == "2"
Beispiel #8
0
def test_file_refs_with_vars(tmpdir):
    # make sure we can use variables work in filenames
    rules = Rules()
    rules.set_var("VARIABLE", str(tmpdir))
    rules.set_var("NUMBER", 2)

    localfile = tmpdir.join("xyz-2")
    localfile.write("x")

    statements = parser.parse_str("""
    rule a:
        inputs: x=filename("{{config.VARIABLE}}/xyz-{{config.NUMBER}}")
    """)
    _eval_stmts(rules, statements, "none",
                HashCache(str(tmpdir.join("hashcache"))))
    a = rules.get_rule("a")
    assert a is not None
    print(a.inputs)
    a.inputs[0].json_obj["name"] == str(localfile)
Beispiel #9
0
def test_generic_eval():
    from conseq.config import Rules, _eval_stmts
    rules = Rules()
    # rules.set_var(name, value)

    statements = parser.parse_str("""
    eval \"\"\"
        print('here')
        rules.set_var('x', 'y')
        print(config['x'])
        print(rules.vars)
        print(config)
        \"\"\"

    if "config.x == 'y'":
      let a='1'
    else:
      let a='2'
    endif
    """)
    _eval_stmts(rules, statements, "none", None)
    assert rules.vars["a"] == "1"
Beispiel #10
0
def main_loop(jinja2_env: Environment,
              j: Jobs,
              new_object_listener: Callable,
              rules: Rules,
              state_dir: str,
              executing: List[DelegateExecution],
              capture_output: bool,
              req_confirm: bool,
              maxfail: int,
              maxstart: None,
              properties_to_add=[]) -> None:
    from conseq.exec_client import create_publish_exec_client
    _client_for_publishing = Lazy(
        lambda: create_publish_exec_client(rules.get_vars()))

    resources_per_client = dict([
        (name, client.resources)
        for name, client in rules.exec_clients.items()
    ])
    timings = TimelineLog(state_dir + "/timeline.log")
    active_job_ids = set([e.id for e in executing])

    resolver = xref.Resolver(state_dir, rules.vars)

    prev_msg = None
    abort = False
    success_count = 0
    failures = []
    start_count = 0
    job_ids_to_ignore = set()
    skip_remaining = False

    def get_pending():
        pending_jobs = j.get_pending()
        if skip_remaining:
            pending_jobs = []
            job_ids_to_ignore.update([pj.id for pj in pending_jobs])
        else:
            pending_jobs = [
                pj for pj in pending_jobs if pj.id not in job_ids_to_ignore
            ]

        return pending_jobs

    with ui.capture_sigint() as was_interrupted_fn:
        while not abort:
            interrupted = was_interrupted_fn()
            if interrupted:
                break

            if len(failures) >= maxfail:
                we_should_stop = True
                if len(executing) > 0:
                    # if we have other tasks which are still running, ask user if we really want to abort now.
                    we_should_stop, maxfail = ui.user_says_we_should_stop(
                        len(failures), executing)
                if we_should_stop:
                    break

            pending_jobs = get_pending()

            summary = get_execution_summary(executing)

            msg = "%d processes running (%s), %d executions pending, %d skipped" % (
                len(executing), summary, len(pending_jobs),
                len(job_ids_to_ignore))
            if prev_msg != msg:
                log.info(msg)
                if len(pending_jobs) + len(executing) > 0:
                    long_summary = get_long_execution_summary(
                        executing, pending_jobs)
                    log.info("Summary of queue:\n%s\n", long_summary)

            prev_msg = msg
            cannot_start_more = (maxstart is not None
                                 and start_count >= maxstart) or skip_remaining
            if len(executing) == 0 and (cannot_start_more
                                        or len(pending_jobs) == 0):
                # now that we've completed everything, check for deferred jobs by marking them as ready.  If we have any, loop again
                j.enable_deferred()
                deferred_jobs = len(get_pending())
                if deferred_jobs > 0 and not cannot_start_more:
                    log.info("Marked deferred %d executions as ready",
                             deferred_jobs)
                    continue
                break

            did_useful_work = False

            # might be worth checking to see if the inputs are identical to previous call
            # to avoid wasting CPU time checking to schedule over and over when resources are exhausted.

            # also, the current design has an issue when rerunning part of of the execution tree.  Imagine
            # rule "A" produces "a1", "b1", and "c1", rule "T" transforms "a1" to "a2", "b1" to "b2, and "c1" to "c2".
            # Lastly rule F takes in a2, b2, and c2 and produces "f".
            # Now, everything is great if starting from a clean slate.  But we've run once, in the artifact db we have
            # a1, a2, b1, b2, c1, c2, f.   If we then rerun T, then we'll get the following executions:  (new objects denoted with
            # "*", old objects from previous run have no star.)
            # T(a1) -> a2*
            # F(a2*, b2, c2) -> f*
            # T(b1) -> b2*
            # F(a2*, b2*, c2) -> f*
            # T(c1) -> c2*
            # F(a2*, b2*, c2*) -> f*
            #
            # So in the end the right thing would get done.  However, we've run F three times as many as necessary.  If we
            # had a priority queue for work, then we could just set each rule execution priority to be the max(input.id)
            # That would force a breadth-first execution of the graph.  However, since jobs can execute in parallel,
            # priortizing is not enough.  (And we can't block based on priority or there'd be no parallelism!)
            #
            # ultimately, I don't think there's a shortcut, and we may need to check the DAG from the previous execution to see
            # if ancestor node is being re-executed, if so, prune that pending rule execution from the pending list until that
            # task is done.
            ready_jobs = get_satisfiable_jobs(rules, resources_per_client,
                                              pending_jobs, executing)
            for job in ready_jobs:
                assert isinstance(job, dep.RuleExecution)

                if maxstart is not None and start_count >= maxstart:
                    break

                active_job_ids.add(job.id)
                did_useful_work = True

                rule = rules.get_rule(job.transform)

                timings.log(job.id, "preprocess_xrefs")
                # process xrefs which might require rewriting an artifact
                xrefs_resolved = exec_client.preprocess_xref_inputs(
                    j, resolver, job.inputs)
                if xrefs_resolved:
                    log.info(
                        "Resolved xrefs on rule, new version will be executed next pass"
                    )
                    timings.log(job.id, "resolved_xrefs")
                    continue

                timings.log(job.id, "preprocess_inputs")
                if rule.is_publish_rule:
                    client = _client_for_publishing()
                else:
                    # localize paths that will be used in scripts
                    client = rules.get_client(rule.executor)
                inputs, resolver_state = client.preprocess_inputs(
                    resolver, bind_inputs(rule, job.inputs))
                debug_log.log_input_preprocess(job.id, job.inputs, inputs)

                # if we're required confirmation from the user, do this before we continue
                if req_confirm:
                    answer = ui.confirm_execution(job.transform, inputs)
                    if answer == "a":
                        req_confirm = False
                    elif answer == "q":
                        abort = True
                        break
                    elif answer == "s":
                        job_ids_to_ignore.add(job.id)
                        continue
                    elif answer == "S":
                        skip_remaining = True
                        break

                if rule.is_publish_rule:
                    publish(jinja2_env, rule.publish_location,
                            rules.get_vars(), inputs)

                # maybe record_started and update_exec_xref should be merged so anything started
                # always has an xref
                exec_id = j.record_started(job.id)
                timings.log(job.id, "start")

                job_dir = get_job_dir(state_dir, exec_id)
                if not os.path.exists(job_dir):
                    os.makedirs(job_dir)

                e = execute(job.transform, resolver,
                            jinja2_env, exec_id, job_dir, inputs, rule,
                            rules.get_vars(), capture_output, resolver_state,
                            client)
                executing.append(e)
                j.update_exec_xref(e.id, e.get_external_id(), job_dir)
                start_count += 1

            # now poll the jobs which are running and look for which have completed
            for i, e in reversed(list(enumerate(executing))):
                failure, completion = e.get_completion()

                if failure is None and completion is None:
                    continue

                del executing[i]
                timestamp = datetime.datetime.now().isoformat()

                if completion is not None:
                    rule = rules.get_rule(e.transform)
                    if not rule.has_for_all_input():
                        # only do this check if no inputs are marked as "for all"
                        # because we can have cases where a new artifact appears and we _do_ want
                        # to re-run the rule and clobber the output of the previous run.
                        # If we wanted to be very conservative, we could handle for-all by
                        # looking up which rule created the previous artifact and confirm that it was
                        # from a rule with the same inputs, only verifying the "all" parameters have
                        # changed. However, just ignoring clobbers from rules with "for all" is a cheap
                        # approximation.
                        _failures = []
                        for artifact in completion:
                            if j.get_existing_id(None, artifact) is not None:
                                # j.gc()
                                _failure = f"Rule {e.transform} ({e.job_dir} generated an output which already exists: {artifact}"
                                _failures.append(_failure)
                                log.error(_failure)
                        if len(_failures) > 0:
                            failure = ", ".join(_failures)

                if failure is not None:
                    job_id = j.record_completed(timestamp, e.id,
                                                dep.STATUS_FAILED, {})
                    failures.append((e.transform, e.job_dir))
                    debug_log.log_completed(job_id, dep.STATUS_FAILED,
                                            completion)
                    timings.log(job_id, "fail")
                elif completion is not None:
                    amended_outputs = _amend_outputs(completion,
                                                     properties_to_add)

                    job_id = j.record_completed(timestamp, e.id,
                                                dep.STATUS_COMPLETED,
                                                amended_outputs)
                    debug_log.log_completed(job_id, dep.STATUS_COMPLETED,
                                            completion)
                    success_count += 1
                    timings.log(job_id, "complete")

                did_useful_work = True

            j.refresh_rules()

            if not did_useful_work:
                time.sleep(0.5)

    if len(executing) > 0:
        ui.ask_user_to_cancel(j, executing)

    log.info("%d jobs successfully executed", success_count)
    if len(failures) > 0:
        # maybe also show summary of which jobs failed?
        log.warning(
            "%d jobs failed: %s", len(failures), ", ".join([
                "{} ({})".format(job_dir, transform)
                for transform, job_dir in failures
            ]))
        return -1

    return 0