Esempio n. 1
0
def download(args: Namespace) -> None:
    checkpoint = (
        Determined(args.master, None)
        .get_trial(args.trial_id)
        .select_checkpoint(
            latest=args.latest,
            best=args.best,
            uuid=args.uuid,
            sort_by=args.sort_by,
            smaller_is_better=args.smaller_is_better,
        )
    )

    path = checkpoint.download(path=args.output_dir)

    if args.quiet:
        print(path)
        return

    print("Local checkpoint path:")
    print(path, "\n")

    # Print information about the downloaded step/checkpoint.
    table = [
        ["Batch #", checkpoint.batch_number],
        ["Start Time", render.format_time(checkpoint.start_time)],
        ["End Time", render.format_time(checkpoint.end_time)],
        ["Checkpoint UUID", checkpoint.uuid],
        ["Validation Metrics", format_validation(checkpoint.validation)[1]],
    ]

    headers, values = zip(*table)  # type: ignore

    render.tabulate_or_csv(headers, [values], False)
Esempio n. 2
0
def list_experiments(args: Namespace) -> None:
    params = {}
    if args.all:
        params["filter"] = "all"
    else:
        params["user"] = api.Authentication.instance().get_session_user()

    r = api.get(args.master, "experiments", params=params)

    def format_experiment(e: Any) -> List[Any]:
        result = [
            e["id"],
            e["owner"]["username"],
            e["config"]["description"],
            e["state"],
            render.format_percent(e["progress"]),
            render.format_time(e["start_time"]),
            render.format_time(e["end_time"]),
        ]
        if args.all:
            result.append(e["archived"])
        return result

    headers = [
        "ID", "Owner", "Description", "State", "Progress", "Start Time",
        "End Time"
    ]
    if args.all:
        headers.append("Archived")

    values = [format_experiment(e) for e in r.json()]
    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 3
0
def list_agents(args: argparse.Namespace) -> None:
    r = api.get(args.master, "agents")

    agents = r.json()
    agents = [
        OrderedDict(
            [
                ("id", local_id(agent_id)),
                ("registered_time", render.format_time(agent["registered_time"])),
                ("num_slots", len(agent["slots"])),
                ("num_containers", agent["num_containers"]),
                ("label", agent["label"]),
            ]
        )
        for agent_id, agent in sorted(agents.items())
    ]

    if args.json:
        print(json.dumps(agents, indent=4))
        return

    headers = ["Agent ID", "Registered Time", "Slots", "Containers", "Label"]
    values = [a.values() for a in agents]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 4
0
def list_tasks(args: Namespace) -> None:
    r = api.get(args.master, "tasks")

    def agent_info(t: Dict[str, Any]) -> Union[str, List[str]]:
        containers = t.get("containers", [])
        if not containers:
            return "unassigned"
        if len(containers) == 1:
            agent = containers[0]["agent"]  # type: str
            return agent
        return [c["agent"] for c in containers]

    tasks = r.json()
    headers = [
        "ID", "Name", "Slots Needed", "Registered Time", "Agent", "Priority"
    ]
    values = [[
        task["id"],
        task["name"],
        task["slots_needed"],
        render.format_time(task["registered_time"]),
        agent_info(task),
        task["priority"] if task["scheduler_type"] == "priority" else "N/A",
    ] for task_id, task in sorted(
        tasks.items(),
        key=lambda tup: (render.format_time(tup[1]["registered_time"]), ),
    )]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 5
0
def list_trials(args: Namespace) -> None:
    q = api.GraphQLQuery(args.master)
    trials = q.op.trials(
        order_by=[gql.trials_order_by(id=gql.order_by.asc)],
        where=gql.trials_bool_exp(experiment_id=gql.Int_comparison_exp(
            _eq=args.experiment_id)),
    )
    trials.id()
    trials.state()
    trials.hparams()
    trials.start_time()
    trials.end_time()
    trials.steps_aggregate().aggregate.count()

    resp = q.send()

    headers = [
        "Trial ID", "State", "H-Params", "Start Time", "End Time", "# of Steps"
    ]
    values = [[
        t.id,
        t.state,
        json.dumps(t.hparams, indent=4),
        render.format_time(t.start_time),
        render.format_time(t.end_time),
        t.steps_aggregate.aggregate.count,
    ] for t in resp.trials]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 6
0
def describe_trial(args: Namespace) -> None:
    if args.metrics:
        r = api.get(args.master, "trials/{}/metrics".format(args.trial_id))
    else:
        r = api.get(args.master, "trials/{}".format(args.trial_id))

    trial = r.json()

    if args.json:
        print(json.dumps(trial, indent=4))
        return

    # Print information about the trial itself.
    headers = [
        "Experiment ID",
        "State",
        "H-Params",
        "Start Time",
        "End Time",
    ]
    values = [[
        trial["experiment_id"],
        trial["state"],
        json.dumps(trial["hparams"], indent=4),
        render.format_time(trial["start_time"]),
        render.format_time(trial["end_time"]),
    ]]
    render.tabulate_or_csv(headers, values, args.csv)

    # Print information about individual steps.
    headers = [
        "Step #",
        "State",
        "Start Time",
        "End Time",
        "Checkpoint",
        "Checkpoint UUID",
        "Checkpoint Metadata",
        "Validation",
        "Validation Metrics",
    ]
    if args.metrics:
        headers.append("Step Metrics")

    values = [[
        s["id"],
        s["state"],
        render.format_time(s["start_time"]),
        render.format_time(s["end_time"]),
        *format_checkpoint(s["checkpoint"]),
        *format_validation(s["validation"]),
        *([json.dumps(s["metrics"], indent=4)] if args.metrics else []),
    ] for s in trial["steps"]]

    print()
    print("Steps:")
    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 7
0
def list_slots(args: argparse.Namespace) -> None:
    task_res = api.get(args.master, "tasks")
    agent_res = api.get(args.master, "agents")

    agents = agent_res.json()
    tasks = task_res.json()

    c_names = {}
    for task in tasks.values():
        for cont in task["containers"]:
            c_names[cont["id"]] = {"name": task["name"], "id": task["id"]}

    slots = [
        OrderedDict([
            ("agent_id", local_id(agent_id)),
            ("resource_pool", agent["resource_pool"]),
            ("slot_id", local_id(slot_id)),
            ("enabled", slot["enabled"]),
            (
                "task_id",
                c_names[slot["container"]["id"]]["id"]
                if slot["container"] else "FREE",
            ),
            (
                "task_name",
                c_names[slot["container"]["id"]]["name"]
                if slot["container"] else "None",
            ),
            ("type", slot["device"]["type"]),
            ("device", slot["device"]["brand"]),
        ]) for agent_id, agent in sorted(agents.items())
        for slot_id, slot in sorted(agent["slots"].items())
    ]

    if args.json:
        print(json.dumps(slots, indent=4))
        return

    headers = [
        "Agent ID",
        "Resource Pool",
        "Slot ID",
        "Enabled",
        "Task ID",
        "Task Name",
        "Type",
        "Device",
    ]
    values = [s.values() for s in slots]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 8
0
def list_experiments(args: Namespace) -> None:
    where = None
    if not args.all:
        user = api.Authentication.instance().get_session_user()
        where = gql.experiments_bool_exp(
            archived=gql.Boolean_comparison_exp(_eq=False),
            owner=gql.users_bool_exp(username=gql.String_comparison_exp(
                _eq=user)),
        )

    q = api.GraphQLQuery(args.master)
    exps = q.op.experiments(
        order_by=[gql.experiments_order_by(id=gql.order_by.desc)], where=where)
    exps.archived()
    exps.config()
    exps.end_time()
    exps.id()
    exps.owner.username()
    exps.progress()
    exps.start_time()
    exps.state()

    resp = q.send()

    def format_experiment(e: Any) -> List[Any]:
        result = [
            e.id,
            e.owner.username,
            e.config["description"],
            e.state,
            render.format_percent(e.progress),
            render.format_time(e.start_time),
            render.format_time(e.end_time),
        ]
        if args.all:
            result.append(e.archived)
        return result

    headers = [
        "ID", "Owner", "Description", "State", "Progress", "Start Time",
        "End Time"
    ]
    if args.all:
        headers.append("Archived")

    values = [format_experiment(e) for e in resp.experiments]
    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 9
0
def list_tasks(args: Namespace) -> None:
    r = api.get(args.master, "tasks")

    def agent_info(t: Dict[str, Any]) -> Union[str, List[str]]:
        containers = t.get("containers", [])
        if not containers:
            return "unassigned"
        if len(containers) == 1:
            agent = containers[0]["agent"]  # type: str
            return agent
        return [c["agent"] for c in containers]

    def get_state_rank(state: str) -> int:
        if state == "PENDING":
            return 0
        if state == "RUNNING":
            return 1
        if state == "TERMINATING":
            return 2
        if state == "TERMINATED":
            return 3
        return 4

    tasks = r.json()
    headers = [
        "ID", "Name", "Slots Needed", "Registered Time", "State", "Agent",
        "Exit Status"
    ]
    values = [[
        task_id,
        task["name"],
        task["slots_needed"],
        render.format_time(task["registered_time"]),
        task["state"],
        agent_info(task),
        task["exit_status"] if task.get("exit_status", None) else "N/A",
    ] for task_id, task in sorted(
        tasks.items(),
        key=lambda tup: (
            get_state_rank(tup[1]["state"]),
            render.format_time(tup[1]["registered_time"]),
        ),
    )]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 10
0
def list_trials(args: Namespace) -> None:
    r = api.get(args.master,
                "experiments/{}/summary".format(args.experiment_id))
    experiment = r.json()

    headers = [
        "Trial ID", "State", "H-Params", "Start Time", "End Time", "# of Steps"
    ]
    values = [[
        t["id"],
        t["state"],
        json.dumps(t["hparams"], indent=4),
        render.format_time(t["start_time"]),
        render.format_time(t["end_time"]),
        t["num_steps"],
    ] for t in experiment["trials"]]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 11
0
def describe_trial(args: Namespace) -> None:
    q = api.GraphQLQuery(args.master)
    trial = q.op.trials_by_pk(id=args.trial_id)
    trial.end_time()
    trial.experiment_id()
    trial.hparams()
    trial.start_time()
    trial.state()

    steps = trial.steps(order_by=[gql.steps_order_by(id=gql.order_by.asc)])
    steps.metrics()
    steps.id()
    steps.state()
    steps.start_time()
    steps.end_time()

    checkpoint_gql = steps.checkpoint()
    checkpoint_gql.state()
    checkpoint_gql.uuid()

    validation = steps.validation()
    validation.state()
    validation.metrics()

    resp = q.send()

    if args.json:
        print(json.dumps(resp.trials_by_pk.__to_json_value__(), indent=4))
        return

    trial = resp.trials_by_pk

    # Print information about the trial itself.
    headers = ["Experiment ID", "State", "H-Params", "Start Time", "End Time"]
    values = [
        [
            trial.experiment_id,
            trial.state,
            json.dumps(trial.hparams, indent=4),
            render.format_time(trial.start_time),
            render.format_time(trial.end_time),
        ]
    ]
    render.tabulate_or_csv(headers, values, args.csv)

    # Print information about individual steps.
    headers = [
        "Step #",
        "State",
        "Start Time",
        "End Time",
        "Checkpoint",
        "Checkpoint UUID",
        "Validation",
        "Validation Metrics",
    ]
    if args.metrics:
        headers.append("Step Metrics")

    values = [
        [
            s.id,
            s.state,
            render.format_time(s.start_time),
            render.format_time(s.end_time),
            *format_checkpoint(s.checkpoint),
            *format_validation(s.validation),
            *([json.dumps(s.metrics, indent=4)] if args.metrics else []),
        ]
        for s in trial.steps
    ]

    print()
    print("Steps:")
    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 12
0
def describe(args: Namespace) -> None:
    docs = []
    for experiment_id in args.experiment_ids.split(","):
        if args.metrics:
            r = api.get(args.master,
                        "experiments/{}/metrics/summary".format(experiment_id))
        else:
            r = api.get(args.master, "experiments/{}".format(experiment_id))
        docs.append(r.json())

    if args.json:
        print(json.dumps(docs, indent=4))
        return

    # Display overall experiment information.
    headers = [
        "Experiment ID",
        "State",
        "Progress",
        "Start Time",
        "End Time",
        "Description",
        "Archived",
        "Labels",
    ]
    values = [[
        doc["id"],
        doc["state"],
        render.format_percent(doc["progress"]),
        render.format_time(doc.get("start_time")),
        render.format_time(doc.get("end_time")),
        doc["config"].get("description"),
        doc["archived"],
        ", ".join(sorted(doc["config"].get("labels", []))),
    ] for doc in docs]
    if not args.outdir:
        outfile = None
        print("Experiment:")
    else:
        outfile = args.outdir.joinpath("experiments.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display trial-related information.
    headers = [
        "Trial ID", "Experiment ID", "State", "Start Time", "End Time",
        "H-Params"
    ]
    values = [[
        trial["id"],
        doc["id"],
        trial["state"],
        render.format_time(trial.get("start_time")),
        render.format_time(trial.get("end_time")),
        json.dumps(trial["hparams"], indent=4),
    ] for doc in docs for trial in doc["trials"]]
    if not args.outdir:
        outfile = None
        print("\nTrials:")
    else:
        outfile = args.outdir.joinpath("trials.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display step-related information.
    if args.metrics:
        # Accumulate the scalar training and validation metric names from all provided experiments.
        t_metrics_names = sorted(
            {n
             for doc in docs for n in scalar_training_metrics_names(doc)})
        t_metrics_headers = [
            "Training Metric: {}".format(name) for name in t_metrics_names
        ]

        v_metrics_names = sorted(
            {n
             for doc in docs for n in scalar_validation_metrics_names(doc)})
        v_metrics_headers = [
            "Validation Metric: {}".format(name) for name in v_metrics_names
        ]
    else:
        t_metrics_headers = []
        v_metrics_headers = []

    headers = (["Trial ID", "Step ID", "State", "Start Time", "End Time"] +
               t_metrics_headers + [
                   "Checkpoint State",
                   "Checkpoint Start Time",
                   "Checkpoint End Time",
                   "Validation State",
                   "Validation Start Time",
                   "Validation End Time",
               ] + v_metrics_headers)

    values = []
    for doc in docs:
        for trial in doc["trials"]:
            for step in trial["steps"]:
                t_metrics_fields = []
                if step.get("metrics"):
                    avg_metrics = step["metrics"]["avg_metrics"]
                    for name in t_metrics_names:
                        if name in avg_metrics:
                            t_metrics_fields.append(avg_metrics[name])
                        else:
                            t_metrics_fields.append(None)

                checkpoint = step.get("checkpoint")
                if checkpoint:
                    checkpoint_state = checkpoint["state"]
                    checkpoint_start_time = checkpoint.get("start_time")
                    checkpoint_end_time = checkpoint.get("end_time")
                else:
                    checkpoint_state = None
                    checkpoint_start_time = None
                    checkpoint_end_time = None

                validation = step.get("validation")
                if validation:
                    validation_state = validation["state"]
                    validation_start_time = validation.get("start_time")
                    validation_end_time = validation.get("end_time")
                else:
                    validation_state = None
                    validation_start_time = None
                    validation_end_time = None

                if args.metrics:
                    v_metrics_fields = [
                        api.metric.get_validation_metric(name, validation)
                        for name in v_metrics_names
                    ]
                else:
                    v_metrics_fields = []

                row = ([
                    step["trial_id"],
                    step["id"],
                    step["state"],
                    render.format_time(step.get("start_time")),
                    render.format_time(step.get("end_time")),
                ] + t_metrics_fields + [
                    checkpoint_state,
                    render.format_time(checkpoint_start_time),
                    render.format_time(checkpoint_end_time),
                    validation_state,
                    render.format_time(validation_start_time),
                    render.format_time(validation_end_time),
                ] + v_metrics_fields)
                values.append(row)

    if not args.outdir:
        outfile = None
        print("\nSteps:")
    else:
        outfile = args.outdir.joinpath("steps.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)
Esempio n. 13
0
def describe(args: Namespace) -> None:
    ids = [int(x) for x in args.experiment_ids.split(",")]

    q = api.GraphQLQuery(args.master)
    exps = q.op.experiments(where=gql.experiments_bool_exp(
        id=gql.Int_comparison_exp(_in=ids)))
    exps.archived()
    exps.config()
    exps.end_time()
    exps.id()
    exps.progress()
    exps.start_time()
    exps.state()

    trials = exps.trials(order_by=[gql.trials_order_by(id=gql.order_by.asc)])
    trials.end_time()
    trials.hparams()
    trials.id()
    trials.start_time()
    trials.state()

    steps = trials.steps(order_by=[gql.steps_order_by(id=gql.order_by.asc)])
    steps.end_time()
    steps.id()
    steps.start_time()
    steps.state()
    steps.trial_id()

    steps.checkpoint.end_time()
    steps.checkpoint.start_time()
    steps.checkpoint.state()

    steps.validation.end_time()
    steps.validation.start_time()
    steps.validation.state()

    if args.metrics:
        steps.metrics(path="avg_metrics")
        steps.validation.metrics()

    resp = q.send()

    # Re-sort the experiment objects to match the original order.
    exps_by_id = {e.id: e for e in resp.experiments}
    experiments = [exps_by_id[id] for id in ids]

    if args.json:
        print(json.dumps(resp.__to_json_value__()["experiments"], indent=4))
        return

    # Display overall experiment information.
    headers = [
        "Experiment ID",
        "State",
        "Progress",
        "Start Time",
        "End Time",
        "Description",
        "Archived",
        "Labels",
    ]
    values = [[
        e.id,
        e.state,
        render.format_percent(e.progress),
        render.format_time(e.start_time),
        render.format_time(e.end_time),
        e.config.get("description"),
        e.archived,
        ", ".join(sorted(e.config.get("labels", []))),
    ] for e in experiments]
    if not args.outdir:
        outfile = None
        print("Experiment:")
    else:
        outfile = args.outdir.joinpath("experiments.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display trial-related information.
    headers = [
        "Trial ID", "Experiment ID", "State", "Start Time", "End Time",
        "H-Params"
    ]
    values = [[
        t.id,
        e.id,
        t.state,
        render.format_time(t.start_time),
        render.format_time(t.end_time),
        json.dumps(t.hparams, indent=4),
    ] for e in experiments for t in e.trials]
    if not args.outdir:
        outfile = None
        print("\nTrials:")
    else:
        outfile = args.outdir.joinpath("trials.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display step-related information.
    if args.metrics:
        # Accumulate the scalar training and validation metric names from all provided experiments.
        t_metrics_names = sorted(
            {n
             for e in experiments for n in scalar_training_metrics_names(e)})
        t_metrics_headers = [
            "Training Metric: {}".format(name) for name in t_metrics_names
        ]

        v_metrics_names = sorted({
            n
            for e in experiments for n in scalar_validation_metrics_names(e)
        })
        v_metrics_headers = [
            "Validation Metric: {}".format(name) for name in v_metrics_names
        ]
    else:
        t_metrics_headers = []
        v_metrics_headers = []

    headers = (["Trial ID", "Step ID", "State", "Start Time", "End Time"] +
               t_metrics_headers + [
                   "Checkpoint State",
                   "Checkpoint Start Time",
                   "Checkpoint End Time",
                   "Validation State",
                   "Validation Start Time",
                   "Validation End Time",
               ] + v_metrics_headers)

    values = []
    for e in experiments:
        for t in e.trials:
            for step in t.steps:
                t_metrics_fields = []
                if hasattr(step, "metrics"):
                    avg_metrics = step.metrics
                    for name in t_metrics_names:
                        if name in avg_metrics:
                            t_metrics_fields.append(avg_metrics[name])
                        else:
                            t_metrics_fields.append(None)

                checkpoint = step.checkpoint
                if checkpoint:
                    checkpoint_state = checkpoint.state
                    checkpoint_start_time = checkpoint.start_time
                    checkpoint_end_time = checkpoint.end_time
                else:
                    checkpoint_state = None
                    checkpoint_start_time = None
                    checkpoint_end_time = None

                validation = step.validation
                if validation:
                    validation_state = validation.state
                    validation_start_time = validation.start_time
                    validation_end_time = validation.end_time

                else:
                    validation_state = None
                    validation_start_time = None
                    validation_end_time = None

                if args.metrics:
                    v_metrics_fields = [
                        api.metric.get_validation_metric(name, validation)
                        for name in v_metrics_names
                    ]
                else:
                    v_metrics_fields = []

                row = ([
                    step.trial_id,
                    step.id,
                    step.state,
                    render.format_time(step.start_time),
                    render.format_time(step.end_time),
                ] + t_metrics_fields + [
                    checkpoint_state,
                    render.format_time(checkpoint_start_time),
                    render.format_time(checkpoint_end_time),
                    validation_state,
                    render.format_time(validation_start_time),
                    render.format_time(validation_end_time),
                ] + v_metrics_fields)
                values.append(row)

    if not args.outdir:
        outfile = None
        print("\nSteps:")
    else:
        outfile = args.outdir.joinpath("steps.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)