def follow_experiment_logs(master_url: str, exp_id: int) -> None: # Get the ID of this experiment's first trial (i.e., the one with the lowest ID). q = api.GraphQLQuery(master_url) trials = q.op.trials( where=gql.trials_bool_exp(experiment_id=gql.Int_comparison_exp( _eq=exp_id)), order_by=[gql.trials_order_by(id=gql.order_by.asc)], limit=1, ) trials.id() print("Waiting for first trial to begin...") while True: resp = q.send() if resp.trials: break else: time.sleep(0.1) first_trial_id = resp.trials[0].id print("Following first trial with ID {}".format(first_trial_id)) # Call `logs --follow` on the new trial. logs_args = Namespace(trial_id=first_trial_id, follow=True, master=master_url, tail=None) logs(logs_args)
def follow_experiment_logs(master_url: str, exp_id: int) -> None: # Get the ID of this experiment's first trial (i.e., the one with the lowest ID). print("Waiting for first trial to begin...") while True: r = api.get(master_url, "experiments/{}".format(exp_id)) if len(r.json()["trials"]) > 0: break else: time.sleep(0.1) first_trial_id = sorted(t_id["id"] for t_id in r.json()["trials"])[0] print("Following first trial with ID {}".format(first_trial_id)) # Call `logs --follow` on the new trial. logs_args = Namespace(trial_id=first_trial_id, follow=True, master=master_url, tail=None) logs(logs_args)
def follow_test_experiment_logs(master_url: str, exp_id: int) -> None: def print_progress(active_stage: int, ended: bool) -> None: # There are four sequential stages of verification. Track the # current stage with an index into this list. stages = [ "Scheduling task", "Testing training", "Testing validation", "Testing checkpointing", ] for idx, stage in enumerate(stages): if active_stage > idx: color = "green" checkbox = "✔" elif active_stage == idx: color = "red" if ended else "yellow" checkbox = "✗" if ended else " " else: color = "white" checkbox = " " print(colored(stage + (25 - len(stage)) * ".", color), end="") print(colored(" [" + checkbox + "]", color), end="") if idx == len(stages) - 1: print("\n" if ended else "\r", end="") else: print(", ", end="") while True: r = api.get(master_url, "experiments/{}".format(exp_id)).json() # Wait for experiment to start and initialize a trial and step. if len(r["trials"]) < 1 or len(r["trials"][0]["steps"]) < 1: step = {} # type: Dict else: step = r["trials"][0]["steps"][0] # Update the active_stage by examining the result from master # /experiments/<experiment-id> endpoint. if r["state"] == constants.COMPLETED: active_stage = 4 elif step.get("checkpoint"): active_stage = 3 elif step.get("validation"): active_stage = 2 elif step: active_stage = 1 else: active_stage = 0 # If the experiment is in a terminal state, output the appropriate # message and exit. Otherwise, sleep and repeat. if r["state"] == constants.COMPLETED: print_progress(active_stage, ended=True) print(colored("Model definition test succeeded! 🎉", "green")) return elif r["state"] == constants.CANCELED: print_progress(active_stage, ended=True) print( colored( "Model definition test (ID: {}) canceled before " "model test could complete. Please re-run the " "command.".format(exp_id), "yellow", )) sys.exit(1) elif r["state"] == constants.ERROR: print_progress(active_stage, ended=True) trial_id = r["trials"][0]["id"] logs_args = Namespace(trial_id=trial_id, master=master_url, tail=None, follow=False) logs(logs_args) sys.exit(1) else: print_progress(active_stage, ended=False) time.sleep(0.2)
def follow_test_experiment_logs(master_url: str, exp_id: int) -> None: def print_progress(active_stage: int, ended: bool) -> None: # There are four sequential stages of verification. Track the # current stage with an index into this list. stages = [ "Scheduling task", "Testing training", "Testing validation", "Testing checkpointing", ] for idx, stage in enumerate(stages): if active_stage > idx: color = "green" checkbox = "✔" elif active_stage == idx: color = "red" if ended else "yellow" checkbox = "✗" if ended else " " else: color = "white" checkbox = " " print(colored(stage + (25 - len(stage)) * ".", color), end="") print(colored(" [" + checkbox + "]", color), end="") if idx == len(stages) - 1: print("\n" if ended else "\r", end="") else: print(", ", end="") q = api.GraphQLQuery(master_url) exp = q.op.experiments_by_pk(id=exp_id) exp.state() steps = exp.trials.steps( order_by=[gql.steps_order_by(id=gql.order_by.asc)]) steps.checkpoint().id() steps.validation().id() while True: exp = q.send().experiments_by_pk # Wait for experiment to start and initialize a trial and step. step = None if exp.trials and exp.trials[0].steps: step = exp.trials[0].steps[0] # Update the active stage by examining the status of the experiment. The way the GraphQL # library works is that the checkpoint and validation attributes of a step are always # present and non-None, but they don't have any attributes of their own when the # corresponding database object doesn't exist. if exp.state == constants.COMPLETED: active_stage = 4 elif step and hasattr(step.checkpoint, "id"): active_stage = 3 elif step and hasattr(step.validation, "id"): active_stage = 2 elif step: active_stage = 1 else: active_stage = 0 # If the experiment is in a terminal state, output the appropriate # message and exit. Otherwise, sleep and repeat. if exp.state == "COMPLETED": print_progress(active_stage, ended=True) print(colored("Model definition test succeeded! 🎉", "green")) return elif exp.state == constants.CANCELED: print_progress(active_stage, ended=True) print( colored( "Model definition test (ID: {}) canceled before " "model test could complete. Please re-run the " "command.".format(exp_id), "yellow", )) sys.exit(1) elif exp.state == constants.ERROR: print_progress(active_stage, ended=True) trial_id = exp.trials[0].id logs_args = Namespace(trial_id=trial_id, master=master_url, tail=None, follow=False) logs(logs_args) sys.exit(1) else: print_progress(active_stage, ended=False) time.sleep(0.2)