Esempio n. 1
0
def follow_experiment_logs(master_url: str, exp_id: int) -> None:
    # Get the ID of this experiment's first trial (i.e., the one with the lowest ID).
    q = api.GraphQLQuery(master_url)
    trials = q.op.trials(
        where=gql.trials_bool_exp(experiment_id=gql.Int_comparison_exp(
            _eq=exp_id)),
        order_by=[gql.trials_order_by(id=gql.order_by.asc)],
        limit=1,
    )
    trials.id()

    print("Waiting for first trial to begin...")
    while True:
        resp = q.send()
        if resp.trials:
            break
        else:
            time.sleep(0.1)

    first_trial_id = resp.trials[0].id
    print("Following first trial with ID {}".format(first_trial_id))

    # Call `logs --follow` on the new trial.
    logs_args = Namespace(trial_id=first_trial_id,
                          follow=True,
                          master=master_url,
                          tail=None)
    logs(logs_args)
Esempio n. 2
0
def list_trials(args: Namespace) -> None:
    q = api.GraphQLQuery(args.master)
    trials = q.op.trials(
        order_by=[gql.trials_order_by(id=gql.order_by.asc)],
        where=gql.trials_bool_exp(experiment_id=gql.Int_comparison_exp(
            _eq=args.experiment_id)),
    )
    trials.id()
    trials.state()
    trials.hparams()
    trials.start_time()
    trials.end_time()
    trials.steps_aggregate().aggregate.count()

    resp = q.send()

    headers = [
        "Trial ID", "State", "H-Params", "Start Time", "End Time", "# of Steps"
    ]
    values = [[
        t.id,
        t.state,
        json.dumps(t.hparams, indent=4),
        render.format_time(t.start_time),
        render.format_time(t.end_time),
        t.steps_aggregate.aggregate.count,
    ] for t in resp.trials]

    render.tabulate_or_csv(headers, values, args.csv)
Esempio n. 3
0
def experiment_trials(experiment_id: int) -> List[gql.trials]:
    q = query()
    trials = q.op.experiments_by_pk(id=experiment_id).trials(
        order_by=[gql.trials_order_by(id=gql.order_by.asc)])
    trials.id()
    trials.state()
    trials.warm_start_checkpoint_id()

    steps = trials.steps(order_by=[gql.steps_order_by(id=gql.order_by.asc)])
    steps.id()
    steps.state()
    steps.checkpoint.id()
    steps.checkpoint.state()
    steps.checkpoint.step_id()
    steps.checkpoint.uuid()
    steps.validation.metrics()
    steps.validation.state()
    r = q.send()
    return cast(List[gql.trials], r.experiments_by_pk.trials)
Esempio n. 4
0
def get_experiment_durations(experiment_id: int,
                             trial_idx: int) -> ExperimentDurations:
    q = query()
    exp = q.op.experiments_by_pk(id=experiment_id)
    exp.end_time()
    exp.start_time()

    steps = exp.trials(order_by=[gql.trials_order_by(
        id=gql.order_by.asc)]).steps()
    steps.end_time()
    steps.start_time()
    steps.checkpoint.end_time()
    steps.checkpoint.start_time()
    steps.validation.end_time()
    steps.validation.start_time()

    r = q.send()

    end_time = dateutil.parser.parse(r.end_time)
    start_time = dateutil.parser.parse(r.start_time)
    experiment_duration = end_time - start_time

    training_duration = datetime.timedelta(seconds=0)
    validation_duration = datetime.timedelta(seconds=0)
    checkpoint_duration = datetime.timedelta(seconds=0)
    for step in r.trials[trial_idx].steps:
        end_time = dateutil.parser.parse(step.end_time)
        start_time = dateutil.parser.parse(step.start_time)
        training_duration += end_time - start_time
        if step.validation:
            end_time = dateutil.parser.parse(step.validation.end_time)
            start_time = dateutil.parser.parse(step.validation.start_time)
            validation_duration += end_time - start_time
        if step.checkpoint:
            end_time = dateutil.parser.parse(step.checkpoint.end_time)
            start_time = dateutil.parser.parse(step.checkpoint.start_time)
            checkpoint_duration += end_time - start_time
    return ExperimentDurations(experiment_duration, training_duration,
                               validation_duration, checkpoint_duration)
Esempio n. 5
0
def describe(args: Namespace) -> None:
    ids = [int(x) for x in args.experiment_ids.split(",")]

    q = api.GraphQLQuery(args.master)
    exps = q.op.experiments(where=gql.experiments_bool_exp(
        id=gql.Int_comparison_exp(_in=ids)))
    exps.archived()
    exps.config()
    exps.end_time()
    exps.id()
    exps.progress()
    exps.start_time()
    exps.state()

    trials = exps.trials(order_by=[gql.trials_order_by(id=gql.order_by.asc)])
    trials.end_time()
    trials.hparams()
    trials.id()
    trials.start_time()
    trials.state()

    steps = trials.steps(order_by=[gql.steps_order_by(id=gql.order_by.asc)])
    steps.end_time()
    steps.id()
    steps.start_time()
    steps.state()
    steps.trial_id()

    steps.checkpoint.end_time()
    steps.checkpoint.start_time()
    steps.checkpoint.state()

    steps.validation.end_time()
    steps.validation.start_time()
    steps.validation.state()

    if args.metrics:
        steps.metrics(path="avg_metrics")
        steps.validation.metrics()

    resp = q.send()

    # Re-sort the experiment objects to match the original order.
    exps_by_id = {e.id: e for e in resp.experiments}
    experiments = [exps_by_id[id] for id in ids]

    if args.json:
        print(json.dumps(resp.__to_json_value__()["experiments"], indent=4))
        return

    # Display overall experiment information.
    headers = [
        "Experiment ID",
        "State",
        "Progress",
        "Start Time",
        "End Time",
        "Description",
        "Archived",
        "Labels",
    ]
    values = [[
        e.id,
        e.state,
        render.format_percent(e.progress),
        render.format_time(e.start_time),
        render.format_time(e.end_time),
        e.config.get("description"),
        e.archived,
        ", ".join(sorted(e.config.get("labels", []))),
    ] for e in experiments]
    if not args.outdir:
        outfile = None
        print("Experiment:")
    else:
        outfile = args.outdir.joinpath("experiments.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display trial-related information.
    headers = [
        "Trial ID", "Experiment ID", "State", "Start Time", "End Time",
        "H-Params"
    ]
    values = [[
        t.id,
        e.id,
        t.state,
        render.format_time(t.start_time),
        render.format_time(t.end_time),
        json.dumps(t.hparams, indent=4),
    ] for e in experiments for t in e.trials]
    if not args.outdir:
        outfile = None
        print("\nTrials:")
    else:
        outfile = args.outdir.joinpath("trials.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)

    # Display step-related information.
    if args.metrics:
        # Accumulate the scalar training and validation metric names from all provided experiments.
        t_metrics_names = sorted(
            {n
             for e in experiments for n in scalar_training_metrics_names(e)})
        t_metrics_headers = [
            "Training Metric: {}".format(name) for name in t_metrics_names
        ]

        v_metrics_names = sorted({
            n
            for e in experiments for n in scalar_validation_metrics_names(e)
        })
        v_metrics_headers = [
            "Validation Metric: {}".format(name) for name in v_metrics_names
        ]
    else:
        t_metrics_headers = []
        v_metrics_headers = []

    headers = (["Trial ID", "Step ID", "State", "Start Time", "End Time"] +
               t_metrics_headers + [
                   "Checkpoint State",
                   "Checkpoint Start Time",
                   "Checkpoint End Time",
                   "Validation State",
                   "Validation Start Time",
                   "Validation End Time",
               ] + v_metrics_headers)

    values = []
    for e in experiments:
        for t in e.trials:
            for step in t.steps:
                t_metrics_fields = []
                if hasattr(step, "metrics"):
                    avg_metrics = step.metrics
                    for name in t_metrics_names:
                        if name in avg_metrics:
                            t_metrics_fields.append(avg_metrics[name])
                        else:
                            t_metrics_fields.append(None)

                checkpoint = step.checkpoint
                if checkpoint:
                    checkpoint_state = checkpoint.state
                    checkpoint_start_time = checkpoint.start_time
                    checkpoint_end_time = checkpoint.end_time
                else:
                    checkpoint_state = None
                    checkpoint_start_time = None
                    checkpoint_end_time = None

                validation = step.validation
                if validation:
                    validation_state = validation.state
                    validation_start_time = validation.start_time
                    validation_end_time = validation.end_time

                else:
                    validation_state = None
                    validation_start_time = None
                    validation_end_time = None

                if args.metrics:
                    v_metrics_fields = [
                        api.metric.get_validation_metric(name, validation)
                        for name in v_metrics_names
                    ]
                else:
                    v_metrics_fields = []

                row = ([
                    step.trial_id,
                    step.id,
                    step.state,
                    render.format_time(step.start_time),
                    render.format_time(step.end_time),
                ] + t_metrics_fields + [
                    checkpoint_state,
                    render.format_time(checkpoint_start_time),
                    render.format_time(checkpoint_end_time),
                    validation_state,
                    render.format_time(validation_start_time),
                    render.format_time(validation_end_time),
                ] + v_metrics_fields)
                values.append(row)

    if not args.outdir:
        outfile = None
        print("\nSteps:")
    else:
        outfile = args.outdir.joinpath("steps.csv")
    render.tabulate_or_csv(headers, values, args.csv, outfile)