Exemple #1
0
def follow_experiment_logs(master_url: str, exp_id: int) -> None:
    # Get the ID of this experiment's first trial (i.e., the one with the lowest ID).
    q = api.GraphQLQuery(master_url)
    trials = q.op.trials(
        where=gql.trials_bool_exp(experiment_id=gql.Int_comparison_exp(
            _eq=exp_id)),
        order_by=[gql.trials_order_by(id=gql.order_by.asc)],
        limit=1,
    )
    trials.id()

    print("Waiting for first trial to begin...")
    while True:
        resp = q.send()
        if resp.trials:
            break
        else:
            time.sleep(0.1)

    first_trial_id = resp.trials[0].id
    print("Following first trial with ID {}".format(first_trial_id))

    # Call `logs --follow` on the new trial.
    logs_args = Namespace(trial_id=first_trial_id,
                          follow=True,
                          master=master_url,
                          tail=None)
    logs(logs_args)
Exemple #2
0
def follow_experiment_logs(master_url: str, exp_id: int) -> None:
    # Get the ID of this experiment's first trial (i.e., the one with the lowest ID).
    print("Waiting for first trial to begin...")
    while True:
        r = api.get(master_url, "experiments/{}".format(exp_id))
        if len(r.json()["trials"]) > 0:
            break
        else:
            time.sleep(0.1)

    first_trial_id = sorted(t_id["id"] for t_id in r.json()["trials"])[0]
    print("Following first trial with ID {}".format(first_trial_id))

    # Call `logs --follow` on the new trial.
    logs_args = Namespace(trial_id=first_trial_id, follow=True, master=master_url, tail=None)
    logs(logs_args)
Exemple #3
0
def follow_test_experiment_logs(master_url: str, exp_id: int) -> None:
    def print_progress(active_stage: int, ended: bool) -> None:
        # There are four sequential stages of verification. Track the
        # current stage with an index into this list.
        stages = [
            "Scheduling task",
            "Testing training",
            "Testing validation",
            "Testing checkpointing",
        ]

        for idx, stage in enumerate(stages):
            if active_stage > idx:
                color = "green"
                checkbox = "✔"
            elif active_stage == idx:
                color = "red" if ended else "yellow"
                checkbox = "✗" if ended else " "
            else:
                color = "white"
                checkbox = " "
            print(colored(stage + (25 - len(stage)) * ".", color), end="")
            print(colored(" [" + checkbox + "]", color), end="")

            if idx == len(stages) - 1:
                print("\n" if ended else "\r", end="")
            else:
                print(", ", end="")

    while True:
        r = api.get(master_url, "experiments/{}".format(exp_id)).json()

        # Wait for experiment to start and initialize a trial and step.
        if len(r["trials"]) < 1 or len(r["trials"][0]["steps"]) < 1:
            step = {}  # type: Dict
        else:
            step = r["trials"][0]["steps"][0]

        # Update the active_stage by examining the result from master
        # /experiments/<experiment-id> endpoint.
        if r["state"] == constants.COMPLETED:
            active_stage = 4
        elif step.get("checkpoint"):
            active_stage = 3
        elif step.get("validation"):
            active_stage = 2
        elif step:
            active_stage = 1
        else:
            active_stage = 0

        # If the experiment is in a terminal state, output the appropriate
        # message and exit. Otherwise, sleep and repeat.
        if r["state"] == constants.COMPLETED:
            print_progress(active_stage, ended=True)
            print(colored("Model definition test succeeded! 🎉", "green"))
            return
        elif r["state"] == constants.CANCELED:
            print_progress(active_stage, ended=True)
            print(
                colored(
                    "Model definition test (ID: {}) canceled before "
                    "model test could complete. Please re-run the "
                    "command.".format(exp_id),
                    "yellow",
                ))
            sys.exit(1)
        elif r["state"] == constants.ERROR:
            print_progress(active_stage, ended=True)
            trial_id = r["trials"][0]["id"]
            logs_args = Namespace(trial_id=trial_id,
                                  master=master_url,
                                  tail=None,
                                  follow=False)
            logs(logs_args)
            sys.exit(1)
        else:
            print_progress(active_stage, ended=False)
            time.sleep(0.2)
Exemple #4
0
def follow_test_experiment_logs(master_url: str, exp_id: int) -> None:
    def print_progress(active_stage: int, ended: bool) -> None:
        # There are four sequential stages of verification. Track the
        # current stage with an index into this list.
        stages = [
            "Scheduling task",
            "Testing training",
            "Testing validation",
            "Testing checkpointing",
        ]

        for idx, stage in enumerate(stages):
            if active_stage > idx:
                color = "green"
                checkbox = "✔"
            elif active_stage == idx:
                color = "red" if ended else "yellow"
                checkbox = "✗" if ended else " "
            else:
                color = "white"
                checkbox = " "
            print(colored(stage + (25 - len(stage)) * ".", color), end="")
            print(colored(" [" + checkbox + "]", color), end="")

            if idx == len(stages) - 1:
                print("\n" if ended else "\r", end="")
            else:
                print(", ", end="")

    q = api.GraphQLQuery(master_url)
    exp = q.op.experiments_by_pk(id=exp_id)
    exp.state()
    steps = exp.trials.steps(
        order_by=[gql.steps_order_by(id=gql.order_by.asc)])
    steps.checkpoint().id()
    steps.validation().id()

    while True:
        exp = q.send().experiments_by_pk

        # Wait for experiment to start and initialize a trial and step.
        step = None
        if exp.trials and exp.trials[0].steps:
            step = exp.trials[0].steps[0]

        # Update the active stage by examining the status of the experiment. The way the GraphQL
        # library works is that the checkpoint and validation attributes of a step are always
        # present and non-None, but they don't have any attributes of their own when the
        # corresponding database object doesn't exist.
        if exp.state == constants.COMPLETED:
            active_stage = 4
        elif step and hasattr(step.checkpoint, "id"):
            active_stage = 3
        elif step and hasattr(step.validation, "id"):
            active_stage = 2
        elif step:
            active_stage = 1
        else:
            active_stage = 0

        # If the experiment is in a terminal state, output the appropriate
        # message and exit. Otherwise, sleep and repeat.
        if exp.state == "COMPLETED":
            print_progress(active_stage, ended=True)
            print(colored("Model definition test succeeded! 🎉", "green"))
            return
        elif exp.state == constants.CANCELED:
            print_progress(active_stage, ended=True)
            print(
                colored(
                    "Model definition test (ID: {}) canceled before "
                    "model test could complete. Please re-run the "
                    "command.".format(exp_id),
                    "yellow",
                ))
            sys.exit(1)
        elif exp.state == constants.ERROR:
            print_progress(active_stage, ended=True)
            trial_id = exp.trials[0].id
            logs_args = Namespace(trial_id=trial_id,
                                  master=master_url,
                                  tail=None,
                                  follow=False)
            logs(logs_args)
            sys.exit(1)
        else:
            print_progress(active_stage, ended=False)
            time.sleep(0.2)