def list_tasks(args: Namespace) -> None: r = api.get(args.master, "tasks") def agent_info(t: Dict[str, Any]) -> Union[str, List[str]]: containers = t.get("containers", []) if not containers: return "unassigned" if len(containers) == 1: agent = containers[0]["agent"] # type: str return agent return [c["agent"] for c in containers] tasks = r.json() headers = [ "ID", "Name", "Slots Needed", "Registered Time", "Agent", "Priority" ] values = [[ task["id"], task["name"], task["slots_needed"], render.format_time(task["registered_time"]), agent_info(task), task["priority"] if task["scheduler_type"] == "priority" else "N/A", ] for task_id, task in sorted( tasks.items(), key=lambda tup: (render.format_time(tup[1]["registered_time"]), ), )] render.tabulate_or_csv(headers, values, args.csv)
def download(args: Namespace) -> None: checkpoint = ( Determined(args.master, None) .get_trial(args.trial_id) .select_checkpoint( latest=args.latest, best=args.best, uuid=args.uuid, sort_by=args.sort_by, smaller_is_better=args.smaller_is_better, ) ) path = checkpoint.download(path=args.output_dir) if args.quiet: print(path) return print("Local checkpoint path:") print(path, "\n") # Print information about the downloaded step/checkpoint. table = [ ["Batch #", checkpoint.batch_number], ["Start Time", render.format_time(checkpoint.start_time)], ["End Time", render.format_time(checkpoint.end_time)], ["Checkpoint UUID", checkpoint.uuid], ["Validation Metrics", format_validation(checkpoint.validation)[1]], ] headers, values = zip(*table) # type: ignore render.tabulate_or_csv(headers, [values], False)
def list_trials(args: Namespace) -> None: q = api.GraphQLQuery(args.master) trials = q.op.trials( order_by=[gql.trials_order_by(id=gql.order_by.asc)], where=gql.trials_bool_exp(experiment_id=gql.Int_comparison_exp( _eq=args.experiment_id)), ) trials.id() trials.state() trials.hparams() trials.start_time() trials.end_time() trials.steps_aggregate().aggregate.count() resp = q.send() headers = [ "Trial ID", "State", "H-Params", "Start Time", "End Time", "# of Steps" ] values = [[ t.id, t.state, json.dumps(t.hparams, indent=4), render.format_time(t.start_time), render.format_time(t.end_time), t.steps_aggregate.aggregate.count, ] for t in resp.trials] render.tabulate_or_csv(headers, values, args.csv)
def describe_trial(args: Namespace) -> None: if args.metrics: r = api.get(args.master, "trials/{}/metrics".format(args.trial_id)) else: r = api.get(args.master, "trials/{}".format(args.trial_id)) trial = r.json() if args.json: print(json.dumps(trial, indent=4)) return # Print information about the trial itself. headers = [ "Experiment ID", "State", "H-Params", "Start Time", "End Time", ] values = [[ trial["experiment_id"], trial["state"], json.dumps(trial["hparams"], indent=4), render.format_time(trial["start_time"]), render.format_time(trial["end_time"]), ]] render.tabulate_or_csv(headers, values, args.csv) # Print information about individual steps. headers = [ "Step #", "State", "Start Time", "End Time", "Checkpoint", "Checkpoint UUID", "Checkpoint Metadata", "Validation", "Validation Metrics", ] if args.metrics: headers.append("Step Metrics") values = [[ s["id"], s["state"], render.format_time(s["start_time"]), render.format_time(s["end_time"]), *format_checkpoint(s["checkpoint"]), *format_validation(s["validation"]), *([json.dumps(s["metrics"], indent=4)] if args.metrics else []), ] for s in trial["steps"]] print() print("Steps:") render.tabulate_or_csv(headers, values, args.csv)
def format_experiment(e: Any) -> List[Any]: result = [ e["id"], e["owner"]["username"], e["config"]["description"], e["state"], render.format_percent(e["progress"]), render.format_time(e["start_time"]), render.format_time(e["end_time"]), ] if args.all: result.append(e["archived"]) return result
def format_experiment(e: Any) -> List[Any]: result = [ e.id, e.owner.username, e.config["description"], e.state, render.format_percent(e.progress), render.format_time(e.start_time), render.format_time(e.end_time), ] if args.all: result.append(e.archived) return result
def list_agents(args: argparse.Namespace) -> None: r = api.get(args.master, "agents") agents = r.json() agents = [ OrderedDict( [ ("id", local_id(agent_id)), ("registered_time", render.format_time(agent["registered_time"])), ("num_slots", len(agent["slots"])), ("num_containers", agent["num_containers"]), ("label", agent["label"]), ] ) for agent_id, agent in sorted(agents.items()) ] if args.json: print(json.dumps(agents, indent=4)) return headers = ["Agent ID", "Registered Time", "Slots", "Containers", "Label"] values = [a.values() for a in agents] render.tabulate_or_csv(headers, values, args.csv)
def list_tasks(args: Namespace) -> None: r = api.get(args.master, "tasks") def agent_info(t: Dict[str, Any]) -> Union[str, List[str]]: containers = t.get("containers", []) if not containers: return "unassigned" if len(containers) == 1: agent = containers[0]["agent"] # type: str return agent return [c["agent"] for c in containers] def get_state_rank(state: str) -> int: if state == "PENDING": return 0 if state == "RUNNING": return 1 if state == "TERMINATING": return 2 if state == "TERMINATED": return 3 return 4 tasks = r.json() headers = [ "ID", "Name", "Slots Needed", "Registered Time", "State", "Agent", "Exit Status" ] values = [[ task_id, task["name"], task["slots_needed"], render.format_time(task["registered_time"]), task["state"], agent_info(task), task["exit_status"] if task.get("exit_status", None) else "N/A", ] for task_id, task in sorted( tasks.items(), key=lambda tup: ( get_state_rank(tup[1]["state"]), render.format_time(tup[1]["registered_time"]), ), )] render.tabulate_or_csv(headers, values, args.csv)
def list_trials(args: Namespace) -> None: r = api.get(args.master, "experiments/{}/summary".format(args.experiment_id)) experiment = r.json() headers = [ "Trial ID", "State", "H-Params", "Start Time", "End Time", "# of Steps" ] values = [[ t["id"], t["state"], json.dumps(t["hparams"], indent=4), render.format_time(t["start_time"]), render.format_time(t["end_time"]), t["num_steps"], ] for t in experiment["trials"]] render.tabulate_or_csv(headers, values, args.csv)
def describe_trial(args: Namespace) -> None: q = api.GraphQLQuery(args.master) trial = q.op.trials_by_pk(id=args.trial_id) trial.end_time() trial.experiment_id() trial.hparams() trial.start_time() trial.state() steps = trial.steps(order_by=[gql.steps_order_by(id=gql.order_by.asc)]) steps.metrics() steps.id() steps.state() steps.start_time() steps.end_time() checkpoint_gql = steps.checkpoint() checkpoint_gql.state() checkpoint_gql.uuid() validation = steps.validation() validation.state() validation.metrics() resp = q.send() if args.json: print(json.dumps(resp.trials_by_pk.__to_json_value__(), indent=4)) return trial = resp.trials_by_pk # Print information about the trial itself. headers = ["Experiment ID", "State", "H-Params", "Start Time", "End Time"] values = [ [ trial.experiment_id, trial.state, json.dumps(trial.hparams, indent=4), render.format_time(trial.start_time), render.format_time(trial.end_time), ] ] render.tabulate_or_csv(headers, values, args.csv) # Print information about individual steps. headers = [ "Step #", "State", "Start Time", "End Time", "Checkpoint", "Checkpoint UUID", "Validation", "Validation Metrics", ] if args.metrics: headers.append("Step Metrics") values = [ [ s.id, s.state, render.format_time(s.start_time), render.format_time(s.end_time), *format_checkpoint(s.checkpoint), *format_validation(s.validation), *([json.dumps(s.metrics, indent=4)] if args.metrics else []), ] for s in trial.steps ] print() print("Steps:") render.tabulate_or_csv(headers, values, args.csv)
def describe(args: Namespace) -> None: docs = [] for experiment_id in args.experiment_ids.split(","): if args.metrics: r = api.get(args.master, "experiments/{}/metrics/summary".format(experiment_id)) else: r = api.get(args.master, "experiments/{}".format(experiment_id)) docs.append(r.json()) if args.json: print(json.dumps(docs, indent=4)) return # Display overall experiment information. headers = [ "Experiment ID", "State", "Progress", "Start Time", "End Time", "Description", "Archived", "Labels", ] values = [[ doc["id"], doc["state"], render.format_percent(doc["progress"]), render.format_time(doc.get("start_time")), render.format_time(doc.get("end_time")), doc["config"].get("description"), doc["archived"], ", ".join(sorted(doc["config"].get("labels", []))), ] for doc in docs] if not args.outdir: outfile = None print("Experiment:") else: outfile = args.outdir.joinpath("experiments.csv") render.tabulate_or_csv(headers, values, args.csv, outfile) # Display trial-related information. headers = [ "Trial ID", "Experiment ID", "State", "Start Time", "End Time", "H-Params" ] values = [[ trial["id"], doc["id"], trial["state"], render.format_time(trial.get("start_time")), render.format_time(trial.get("end_time")), json.dumps(trial["hparams"], indent=4), ] for doc in docs for trial in doc["trials"]] if not args.outdir: outfile = None print("\nTrials:") else: outfile = args.outdir.joinpath("trials.csv") render.tabulate_or_csv(headers, values, args.csv, outfile) # Display step-related information. if args.metrics: # Accumulate the scalar training and validation metric names from all provided experiments. t_metrics_names = sorted( {n for doc in docs for n in scalar_training_metrics_names(doc)}) t_metrics_headers = [ "Training Metric: {}".format(name) for name in t_metrics_names ] v_metrics_names = sorted( {n for doc in docs for n in scalar_validation_metrics_names(doc)}) v_metrics_headers = [ "Validation Metric: {}".format(name) for name in v_metrics_names ] else: t_metrics_headers = [] v_metrics_headers = [] headers = (["Trial ID", "Step ID", "State", "Start Time", "End Time"] + t_metrics_headers + [ "Checkpoint State", "Checkpoint Start Time", "Checkpoint End Time", "Validation State", "Validation Start Time", "Validation End Time", ] + v_metrics_headers) values = [] for doc in docs: for trial in doc["trials"]: for step in trial["steps"]: t_metrics_fields = [] if step.get("metrics"): avg_metrics = step["metrics"]["avg_metrics"] for name in t_metrics_names: if name in avg_metrics: t_metrics_fields.append(avg_metrics[name]) else: t_metrics_fields.append(None) checkpoint = step.get("checkpoint") if checkpoint: checkpoint_state = checkpoint["state"] checkpoint_start_time = checkpoint.get("start_time") checkpoint_end_time = checkpoint.get("end_time") else: checkpoint_state = None checkpoint_start_time = None checkpoint_end_time = None validation = step.get("validation") if validation: validation_state = validation["state"] validation_start_time = validation.get("start_time") validation_end_time = validation.get("end_time") else: validation_state = None validation_start_time = None validation_end_time = None if args.metrics: v_metrics_fields = [ api.metric.get_validation_metric(name, validation) for name in v_metrics_names ] else: v_metrics_fields = [] row = ([ step["trial_id"], step["id"], step["state"], render.format_time(step.get("start_time")), render.format_time(step.get("end_time")), ] + t_metrics_fields + [ checkpoint_state, render.format_time(checkpoint_start_time), render.format_time(checkpoint_end_time), validation_state, render.format_time(validation_start_time), render.format_time(validation_end_time), ] + v_metrics_fields) values.append(row) if not args.outdir: outfile = None print("\nSteps:") else: outfile = args.outdir.joinpath("steps.csv") render.tabulate_or_csv(headers, values, args.csv, outfile)
def describe(args: Namespace) -> None: ids = [int(x) for x in args.experiment_ids.split(",")] q = api.GraphQLQuery(args.master) exps = q.op.experiments(where=gql.experiments_bool_exp( id=gql.Int_comparison_exp(_in=ids))) exps.archived() exps.config() exps.end_time() exps.id() exps.progress() exps.start_time() exps.state() trials = exps.trials(order_by=[gql.trials_order_by(id=gql.order_by.asc)]) trials.end_time() trials.hparams() trials.id() trials.start_time() trials.state() steps = trials.steps(order_by=[gql.steps_order_by(id=gql.order_by.asc)]) steps.end_time() steps.id() steps.start_time() steps.state() steps.trial_id() steps.checkpoint.end_time() steps.checkpoint.start_time() steps.checkpoint.state() steps.validation.end_time() steps.validation.start_time() steps.validation.state() if args.metrics: steps.metrics(path="avg_metrics") steps.validation.metrics() resp = q.send() # Re-sort the experiment objects to match the original order. exps_by_id = {e.id: e for e in resp.experiments} experiments = [exps_by_id[id] for id in ids] if args.json: print(json.dumps(resp.__to_json_value__()["experiments"], indent=4)) return # Display overall experiment information. headers = [ "Experiment ID", "State", "Progress", "Start Time", "End Time", "Description", "Archived", "Labels", ] values = [[ e.id, e.state, render.format_percent(e.progress), render.format_time(e.start_time), render.format_time(e.end_time), e.config.get("description"), e.archived, ", ".join(sorted(e.config.get("labels", []))), ] for e in experiments] if not args.outdir: outfile = None print("Experiment:") else: outfile = args.outdir.joinpath("experiments.csv") render.tabulate_or_csv(headers, values, args.csv, outfile) # Display trial-related information. headers = [ "Trial ID", "Experiment ID", "State", "Start Time", "End Time", "H-Params" ] values = [[ t.id, e.id, t.state, render.format_time(t.start_time), render.format_time(t.end_time), json.dumps(t.hparams, indent=4), ] for e in experiments for t in e.trials] if not args.outdir: outfile = None print("\nTrials:") else: outfile = args.outdir.joinpath("trials.csv") render.tabulate_or_csv(headers, values, args.csv, outfile) # Display step-related information. if args.metrics: # Accumulate the scalar training and validation metric names from all provided experiments. t_metrics_names = sorted( {n for e in experiments for n in scalar_training_metrics_names(e)}) t_metrics_headers = [ "Training Metric: {}".format(name) for name in t_metrics_names ] v_metrics_names = sorted({ n for e in experiments for n in scalar_validation_metrics_names(e) }) v_metrics_headers = [ "Validation Metric: {}".format(name) for name in v_metrics_names ] else: t_metrics_headers = [] v_metrics_headers = [] headers = (["Trial ID", "Step ID", "State", "Start Time", "End Time"] + t_metrics_headers + [ "Checkpoint State", "Checkpoint Start Time", "Checkpoint End Time", "Validation State", "Validation Start Time", "Validation End Time", ] + v_metrics_headers) values = [] for e in experiments: for t in e.trials: for step in t.steps: t_metrics_fields = [] if hasattr(step, "metrics"): avg_metrics = step.metrics for name in t_metrics_names: if name in avg_metrics: t_metrics_fields.append(avg_metrics[name]) else: t_metrics_fields.append(None) checkpoint = step.checkpoint if checkpoint: checkpoint_state = checkpoint.state checkpoint_start_time = checkpoint.start_time checkpoint_end_time = checkpoint.end_time else: checkpoint_state = None checkpoint_start_time = None checkpoint_end_time = None validation = step.validation if validation: validation_state = validation.state validation_start_time = validation.start_time validation_end_time = validation.end_time else: validation_state = None validation_start_time = None validation_end_time = None if args.metrics: v_metrics_fields = [ api.metric.get_validation_metric(name, validation) for name in v_metrics_names ] else: v_metrics_fields = [] row = ([ step.trial_id, step.id, step.state, render.format_time(step.start_time), render.format_time(step.end_time), ] + t_metrics_fields + [ checkpoint_state, render.format_time(checkpoint_start_time), render.format_time(checkpoint_end_time), validation_state, render.format_time(validation_start_time), render.format_time(validation_end_time), ] + v_metrics_fields) values.append(row) if not args.outdir: outfile = None print("\nSteps:") else: outfile = args.outdir.joinpath("steps.csv") render.tabulate_or_csv(headers, values, args.csv, outfile)