def set_name(args: Namespace) -> None: session = setup_session(args) exp = bindings.get_GetExperiment( session, experimentId=args.experiment_id).experiment exp_patch = bindings.v1PatchExperiment.from_json(exp.to_json()) exp_patch.name = args.name bindings.patch_PatchExperiment(session, body=exp_patch, experiment_id=args.experiment_id) print("Set name of experiment {} to '{}'".format(args.experiment_id, args.name))
def remove_label(args: Namespace) -> None: session = setup_session(args) exp = bindings.get_GetExperiment( session, experimentId=args.experiment_id).experiment exp_patch = bindings.v1PatchExperiment.from_json(exp.to_json()) if (exp_patch.labels) and (args.label in exp_patch.labels): exp_patch.labels = [ label for label in exp_patch.labels if label != args.label ] bindings.patch_PatchExperiment(session, body=exp_patch, experiment_id=args.experiment_id) print("Removed label '{}' from experiment {}".format( args.label, args.experiment_id))
def add_label(args: Namespace) -> None: session = setup_session(args) exp = bindings.get_GetExperiment( session, experimentId=args.experiment_id).experiment exp_patch = bindings.v1PatchExperiment.from_json(exp.to_json()) if exp_patch.labels is None: exp_patch.labels = [] if args.label not in exp_patch.labels: exp_patch.labels = list(exp_patch.labels) + [args.label] bindings.patch_PatchExperiment(session, body=exp_patch, experiment_id=args.experiment_id) print("Added label '{}' to experiment {}".format(args.label, args.experiment_id))
def write_api_call(args: Namespace, temp_dir: str) -> Tuple[str, str]: api_experiment_filepath = os.path.join(temp_dir, "api_experiment_call.json") api_trial_filepath = os.path.join(temp_dir, "api_trial_call.json") trial_obj = bindings.get_GetTrial(setup_session(args), trialId=args.trial_id).trial experiment_id = trial_obj.experimentId exp_obj = bindings.get_GetExperiment(setup_session(args), experimentId=experiment_id) create_json_file_in_dir(exp_obj.to_json(), api_experiment_filepath) create_json_file_in_dir(trial_obj.to_json(), api_trial_filepath) return api_experiment_filepath, api_trial_filepath
def wait(args: Namespace) -> None: while True: r = bindings.get_GetExperiment( setup_session(args), experimentId=args.experiment_id).experiment if r.state.value.replace("STATE_", "") in constants.TERMINAL_STATES: print("Experiment {} terminated with state {}".format( args.experiment_id, r.state.value.replace("STATE_", ""))) if r.state.value.replace("STATE_", "") == constants.COMPLETED: sys.exit(0) else: sys.exit(1) time.sleep(args.polling_interval)
def test_workspace_org() -> None: master_url = conf.make_master_url() authentication.cli_auth = authentication.Authentication(master_url, try_reauth=True) sess = session.Session(master_url, None, None, None) test_experiments: List[bindings.v1Experiment] = [] test_projects: List[bindings.v1Project] = [] test_workspaces: List[bindings.v1Workspace] = [] try: # Uncategorized workspace / project should exist already. r = bindings.get_GetWorkspaces(sess, name="Uncategorized") assert len(r.workspaces) == 1 default_workspace = r.workspaces[0] assert default_workspace.immutable r2 = bindings.get_GetWorkspaceProjects(sess, id=default_workspace.id) assert len(r2.projects) == 1 default_project = r2.projects[0] assert default_project.name == "Uncategorized" assert default_project.immutable # Add a test workspace. r3 = bindings.post_PostWorkspace( sess, body=bindings.v1PostWorkspaceRequest(name="_TestOnly")) made_workspace = r3.workspace test_workspaces.append(made_workspace) get_workspace = bindings.get_GetWorkspace( sess, id=made_workspace.id).workspace assert get_workspace.name == made_workspace.name assert not made_workspace.immutable and not get_workspace.immutable # Patch the workspace w_patch = bindings.v1PatchWorkspace.from_json(made_workspace.to_json()) w_patch.name = "_TestPatched" bindings.patch_PatchWorkspace(sess, body=w_patch, id=made_workspace.id) get_workspace = bindings.get_GetWorkspace( sess, id=made_workspace.id).workspace assert get_workspace.name == "_TestPatched" # Archive the workspace assert not made_workspace.archived bindings.post_ArchiveWorkspace(sess, id=made_workspace.id) get_workspace_2 = bindings.get_GetWorkspace( sess, id=made_workspace.id).workspace assert get_workspace_2.archived with pytest.raises(errors.APIException): # Cannot patch archived workspace bindings.patch_PatchWorkspace(sess, body=w_patch, id=made_workspace.id) with pytest.raises(errors.APIException): # Cannot create project inside archived workspace bindings.post_PostProject( sess, body=bindings.v1PostProjectRequest( name="Nope2", workspaceId=made_workspace.id), workspaceId=made_workspace.id, ) bindings.post_UnarchiveWorkspace(sess, id=made_workspace.id) get_workspace_3 = bindings.get_GetWorkspace( sess, id=made_workspace.id).workspace assert not get_workspace_3.archived # Refuse to patch, archive, unarchive, or delete the default workspace with pytest.raises(errors.APIException): bindings.patch_PatchWorkspace(sess, body=w_patch, id=default_workspace.id) with pytest.raises(errors.APIException): bindings.post_ArchiveWorkspace(sess, id=default_workspace.id) with pytest.raises(errors.APIException): bindings.post_UnarchiveWorkspace(sess, id=default_workspace.id) with pytest.raises(errors.APIException): bindings.delete_DeleteWorkspace(sess, id=default_workspace.id) # Sort test and default workspaces. workspace2 = bindings.post_PostWorkspace( sess, body=bindings.v1PostWorkspaceRequest(name="_TestWS")).workspace test_workspaces.append(workspace2) list_test_1 = bindings.get_GetWorkspaces(sess).workspaces assert ["Uncategorized", "_TestPatched", "_TestWS"] == [w.name for w in list_test_1] list_test_2 = bindings.get_GetWorkspaces( sess, orderBy=bindings.v1OrderBy.ORDER_BY_DESC).workspaces assert ["_TestWS", "_TestPatched", "Uncategorized"] == [w.name for w in list_test_2] list_test_3 = bindings.get_GetWorkspaces( sess, sortBy=bindings.v1GetWorkspacesRequestSortBy.SORT_BY_NAME ).workspaces assert ["_TestPatched", "_TestWS", "Uncategorized"] == [w.name for w in list_test_3] # Test pinned workspaces. pinned = bindings.get_GetWorkspaces( sess, pinned=True, ).workspaces assert len(pinned) == 2 bindings.post_UnpinWorkspace(sess, id=made_workspace.id) pinned = bindings.get_GetWorkspaces( sess, pinned=True, ).workspaces assert len(pinned) == 1 bindings.post_PinWorkspace(sess, id=made_workspace.id) pinned = bindings.get_GetWorkspaces( sess, pinned=True, ).workspaces assert len(pinned) == 2 # Add a test project to a workspace. r4 = bindings.post_PostProject( sess, body=bindings.v1PostProjectRequest(name="_TestOnly", workspaceId=made_workspace.id), workspaceId=made_workspace.id, ) made_project = r4.project test_projects.append(made_project) get_project = bindings.get_GetProject(sess, id=made_project.id).project assert get_project.name == made_project.name assert not made_project.immutable and not get_project.immutable # Project cannot be created in the default workspace. with pytest.raises(errors.APIException): bindings.post_PostProject( sess, body=bindings.v1PostProjectRequest( name="Nope", workspaceId=default_workspace.id), workspaceId=default_workspace.id, ) # Patch the project p_patch = bindings.v1PatchProject.from_json(made_project.to_json()) p_patch.name = "_TestPatchedProject" bindings.patch_PatchProject(sess, body=p_patch, id=made_project.id) get_project = bindings.get_GetProject(sess, id=made_project.id).project assert get_project.name == "_TestPatchedProject" # Archive the project assert not made_project.archived bindings.post_ArchiveProject(sess, id=made_project.id) get_project_2 = bindings.get_GetProject(sess, id=made_project.id).project assert get_project_2.archived # Cannot patch or move an archived project with pytest.raises(errors.APIException): bindings.patch_PatchProject(sess, body=p_patch, id=made_project.id) with pytest.raises(errors.APIException): bindings.post_MoveProject( sess, projectId=made_project.id, body=bindings.v1MoveProjectRequest( destinationWorkspaceId=workspace2.id, projectId=made_project.id, ), ) # Unarchive the project bindings.post_UnarchiveProject(sess, id=made_project.id) get_project_3 = bindings.get_GetProject(sess, id=made_project.id).project assert not get_project_3.archived # Can't archive, un-archive, or move while parent workspace is archived bindings.post_ArchiveWorkspace(sess, id=made_workspace.id) get_project_4 = bindings.get_GetProject(sess, id=made_project.id).project assert get_project_4.archived with pytest.raises(errors.APIException): bindings.post_ArchiveProject(sess, id=made_project.id) with pytest.raises(errors.APIException): bindings.post_UnarchiveProject(sess, id=made_project.id) with pytest.raises(errors.APIException): bindings.post_MoveProject( sess, projectId=made_project.id, body=bindings.v1MoveProjectRequest( destinationWorkspaceId=workspace2.id, projectId=made_project.id, ), ) bindings.post_UnarchiveWorkspace(sess, id=made_workspace.id) # Refuse to patch, archive, unarchive, or delete the default project with pytest.raises(errors.APIException): bindings.patch_PatchProject(sess, body=p_patch, id=default_project.id) with pytest.raises(errors.APIException): bindings.post_ArchiveProject(sess, id=default_project.id) with pytest.raises(errors.APIException): bindings.post_UnarchiveProject(sess, id=default_project.id) with pytest.raises(errors.APIException): bindings.delete_DeleteProject(sess, id=default_project.id) # Sort workspaces' projects. p1 = bindings.post_PostProject( sess, body=bindings.v1PostProjectRequest(name="_TestPRJ", workspaceId=made_workspace.id), workspaceId=made_workspace.id, ).project p2 = bindings.post_PostProject( sess, body=bindings.v1PostProjectRequest(name="_TestEarly", workspaceId=made_workspace.id), workspaceId=made_workspace.id, ).project test_projects += [p1, p2] list_test_4 = bindings.get_GetWorkspaceProjects( sess, id=made_workspace.id).projects assert ["_TestPatchedProject", "_TestPRJ", "_TestEarly"] == [p.name for p in list_test_4] list_test_5 = bindings.get_GetWorkspaceProjects( sess, id=made_workspace.id, orderBy=bindings.v1OrderBy.ORDER_BY_DESC).projects assert ["_TestEarly", "_TestPRJ", "_TestPatchedProject"] == [p.name for p in list_test_5] list_test_6 = bindings.get_GetWorkspaceProjects( sess, id=made_workspace.id, sortBy=bindings.v1GetWorkspaceProjectsRequestSortBy.SORT_BY_NAME, ).projects assert ["_TestEarly", "_TestPatchedProject", "_TestPRJ"] == [p.name for p in list_test_6] # Move a project to another workspace bindings.post_MoveProject( sess, projectId=made_project.id, body=bindings.v1MoveProjectRequest( destinationWorkspaceId=workspace2.id, projectId=made_project.id, ), ) get_project = bindings.get_GetProject(sess, id=made_project.id).project assert get_project.workspaceId == workspace2.id # Default project cannot be moved. with pytest.raises(errors.APIException): bindings.post_MoveProject( sess, projectId=default_project.id, body=bindings.v1MoveProjectRequest( destinationWorkspaceId=workspace2.id, projectId=default_project.id, ), ) # Project cannot be moved into the default workspace. with pytest.raises(errors.APIException): bindings.post_MoveProject( sess, projectId=made_project.id, body=bindings.v1MoveProjectRequest( destinationWorkspaceId=default_workspace.id, projectId=made_project.id, ), ) # Project cannot be moved into an archived workspace. bindings.post_ArchiveWorkspace(sess, id=made_workspace.id) with pytest.raises(errors.APIException): bindings.post_MoveProject( sess, projectId=made_project.id, body=bindings.v1MoveProjectRequest( destinationWorkspaceId=made_workspace.id, projectId=made_project.id, ), ) bindings.post_UnarchiveWorkspace(sess, id=made_workspace.id) # Add a test note to a project. note = bindings.v1Note(name="Hello", contents="Hello World") note2 = bindings.v1Note(name="Hello 2", contents="Hello World") bindings.post_AddProjectNote( sess, body=note, projectId=made_project.id, ) r5 = bindings.post_AddProjectNote( sess, body=note2, projectId=made_project.id, ) returned_notes = r5.notes assert len(returned_notes) == 2 # Put notes r6 = bindings.put_PutProjectNotes( sess, body=bindings.v1PutProjectNotesRequest(notes=[note], projectId=made_project.id), projectId=made_project.id, ) returned_notes = r6.notes assert len(returned_notes) == 1 # Create an experiment in the default project. test_exp_id = run_basic_test(conf.fixtures_path("no_op/single.yaml"), conf.fixtures_path("no_op"), 1) test_exp = bindings.get_GetExperiment( sess, experimentId=test_exp_id).experiment test_experiments.append(test_exp) wait_for_experiment_state( test_exp_id, bindings.determinedexperimentv1State.STATE_COMPLETED) assert test_exp.projectId == default_project.id # Move the test experiment into a user-made project dproj_exp = bindings.get_GetProjectExperiments( sess, id=default_project.id).experiments exp_count = len( bindings.get_GetProjectExperiments(sess, id=made_project.id).experiments) assert exp_count == 0 mbody = bindings.v1MoveExperimentRequest( destinationProjectId=made_project.id, experimentId=test_exp_id) bindings.post_MoveExperiment(sess, experimentId=test_exp_id, body=mbody) modified_exp = bindings.get_GetExperiment( sess, experimentId=test_exp_id).experiment assert modified_exp.projectId == made_project.id # Confirm the test experiment is in the new project, no longer in old project. exp_count = len( bindings.get_GetProjectExperiments(sess, id=made_project.id).experiments) assert exp_count == 1 dproj_exp2 = bindings.get_GetProjectExperiments( sess, id=default_project.id).experiments assert len(dproj_exp2) == len(dproj_exp) - 1 # Cannot move an experiment out of an archived project bindings.post_ArchiveProject(sess, id=made_project.id) mbody2 = bindings.v1MoveExperimentRequest( destinationProjectId=default_project.id, experimentId=test_exp_id) with pytest.raises(errors.APIException): bindings.post_MoveExperiment(sess, experimentId=test_exp_id, body=mbody2) bindings.post_UnarchiveProject(sess, id=made_project.id) # Moving an experiment into default project bindings.post_MoveExperiment(sess, experimentId=test_exp_id, body=mbody2) # Cannot move an experiment into an archived project bindings.post_ArchiveProject(sess, id=made_project.id) with pytest.raises(errors.APIException): bindings.post_MoveExperiment(sess, experimentId=test_exp_id, body=mbody) finally: # Clean out experiments, projects, workspaces. # In dependency order: for e in test_experiments: bindings.delete_DeleteExperiment(sess, experimentId=e.id) for p in test_projects: bindings.delete_DeleteProject(sess, id=p.id) for w in test_workspaces: bindings.delete_DeleteWorkspace(sess, id=w.id)
def config(args: Namespace) -> None: result = bindings.get_GetExperiment(setup_session(args), experimentId=args.experiment_id).config yaml.safe_dump(result, stream=sys.stdout, default_flow_style=False)
def describe(args: Namespace) -> None: session = setup_session(args) exps = [] for experiment_id in args.experiment_ids.split(","): r = bindings.get_GetExperiment(session, experimentId=experiment_id) if args.json: exps.append(r.to_json()) else: exps.append(r.experiment) if args.json: print(json.dumps(exps, indent=4)) return # Display overall experiment information. headers = [ "Experiment ID", "State", "Progress", "Start Time", "End Time", "Name", "Description", "Archived", "Resource Pool", "Labels", ] values = [[ exp.id, exp.state.value.replace("STATE_", ""), render.format_percent(exp.progress), render.format_time(exp.startTime), render.format_time(exp.endTime), exp.name, exp.description, exp.archived, exp.resourcePool, ", ".join(sorted(exp.labels or [])), ] for exp in exps] if not args.outdir: outfile = None print("Experiment:") else: outfile = args.outdir.joinpath("experiments.csv") render.tabulate_or_csv(headers, values, args.csv, outfile) # Display trial-related information. trials_for_experiment: Dict[str, Sequence[bindings.trialv1Trial]] = {} for exp in exps: trials_for_experiment[exp.id] = bindings.get_GetExperimentTrials( session, experimentId=exp.id).trials headers = [ "Trial ID", "Experiment ID", "State", "Start Time", "End Time", "H-Params" ] values = [[ trial.id, exp.id, trial.state.value.replace("STATE_", ""), render.format_time(trial.startTime), render.format_time(trial.endTime), json.dumps(trial.hparams, indent=4), ] for exp in exps for trial in trials_for_experiment[exp.id]] if not args.outdir: outfile = None print("\nTrials:") else: outfile = args.outdir.joinpath("trials.csv") render.tabulate_or_csv(headers, values, args.csv, outfile) # Display step-related information. t_metrics_headers: List[str] = [] t_metrics_names: List[str] = [] v_metrics_headers: List[str] = [] v_metrics_names: List[str] = [] if args.metrics: # Accumulate the scalar training and validation metric names from all provided experiments. for exp in exps: sample_trial = trials_for_experiment[exp.id][0] sample_workloads = bindings.get_GetTrial( session, trialId=sample_trial.id).workloads t_metrics_names += scalar_training_metrics_names(sample_workloads) v_metrics_names += scalar_validation_metrics_names( sample_workloads) t_metrics_names = sorted(set(t_metrics_names)) t_metrics_headers = [ "Training Metric: {}".format(name) for name in t_metrics_names ] v_metrics_names = sorted(set(v_metrics_names)) v_metrics_headers = [ "Validation Metric: {}".format(name) for name in v_metrics_names ] headers = (["Trial ID", "# of Batches", "State", "Report Time"] + t_metrics_headers + [ "Checkpoint State", "Checkpoint Report Time", "Validation State", "Validation Report Time", ] + v_metrics_headers) wl_output: Dict[int, List[Any]] = {} for exp in exps: for trial in trials_for_experiment[exp.id]: workloads = bindings.get_GetTrial(session, trialId=trial.id).workloads for workload in workloads: t_metrics_fields = [] wl_detail: Optional[ Union[bindings.v1MetricsWorkload, bindings.v1CheckpointWorkload]] = None if workload.training: wl_detail = workload.training for name in t_metrics_names: if wl_detail.metrics and (name in wl_detail.metrics): t_metrics_fields.append(wl_detail.metrics[name]) else: t_metrics_fields.append(None) else: t_metrics_fields = [None for name in t_metrics_names] if workload.checkpoint: wl_detail = workload.checkpoint if workload.checkpoint and wl_detail: checkpoint_state = wl_detail.state.value checkpoint_end_time = wl_detail.endTime else: checkpoint_state = "" checkpoint_end_time = None v_metrics_fields = [] if workload.validation: wl_detail = workload.validation validation_state = wl_detail.state.value validation_end_time = wl_detail.endTime for name in v_metrics_names: if wl_detail.metrics and (name in wl_detail.metrics): v_metrics_fields.append(wl_detail.metrics[name]) else: v_metrics_fields.append(None) else: validation_state = "" validation_end_time = None v_metrics_fields = [None for name in v_metrics_names] if wl_detail: if wl_detail.totalBatches in wl_output: # condense training, checkpoints, validation workloads into one step-like # row for compatibility with previous versions of describe merge_row = wl_output[wl_detail.totalBatches] merge_row[3] = max( merge_row[3], render.format_time(wl_detail.endTime)) for idx, tfield in enumerate(t_metrics_fields): if tfield and merge_row[4 + idx] is None: merge_row[4 + idx] = tfield start_checkpoint = 4 + len(t_metrics_fields) if checkpoint_state: merge_row[ start_checkpoint] = checkpoint_state.replace( "STATE_", "") merge_row[start_checkpoint + 1] = render.format_time( checkpoint_end_time) if validation_end_time: merge_row[start_checkpoint + 3] = render.format_time( validation_end_time) if validation_state: merge_row[start_checkpoint + 2] = validation_state.replace( "STATE_", "") for idx, vfield in enumerate(v_metrics_fields): if vfield and merge_row[start_checkpoint + idx + 4] is None: merge_row[start_checkpoint + idx + 4] = vfield else: row = ([ trial.id, wl_detail.totalBatches, wl_detail.state.value.replace("STATE_", ""), render.format_time(wl_detail.endTime), ] + t_metrics_fields + [ checkpoint_state.replace("STATE_", ""), render.format_time(checkpoint_end_time), validation_state.replace("STATE_", ""), render.format_time(validation_end_time), ] + v_metrics_fields) wl_output[wl_detail.totalBatches] = row if not args.outdir: outfile = None print("\nWorkloads:") else: outfile = args.outdir.joinpath("workloads.csv") values = sorted(wl_output.values(), key=lambda a: int(a[1])) render.tabulate_or_csv(headers, values, args.csv, outfile)
def experiment_state(experiment_id: int) -> determinedexperimentv1State: r = bindings.get_GetExperiment(test_session(), experimentId=experiment_id) return r.experiment.state
def experiment_config_json(experiment_id: int) -> Dict[str, Any]: r = bindings.get_GetExperiment(test_session(), experimentId=experiment_id) return r.config
def follow_test_experiment_logs(master_url: str, exp_id: int) -> None: def print_progress(active_stage: int, ended: bool) -> None: # There are four sequential stages of verification. Track the # current stage with an index into this list. stages = [ "Scheduling task", "Testing training", "Testing validation", "Testing checkpointing", ] for idx, stage in enumerate(stages): if active_stage > idx: color = "green" checkbox = "✔" elif active_stage == idx: color = "red" if ended else "yellow" checkbox = "✗" if ended else " " else: color = "white" checkbox = " " print(colored(stage + (25 - len(stage)) * ".", color), end="") print(colored(" [" + checkbox + "]", color), end="") if idx == len(stages) - 1: print("\n" if ended else "\r", end="") else: print(", ", end="") sess = session.Session(master_url, None, None, None) while True: r = bindings.get_GetExperiment(sess, experimentId=exp_id).experiment trials = bindings.get_GetExperimentTrials(sess, experimentId=exp_id).trials # Wait for experiment to start and initialize a trial. if len(trials) < 1: t = {} else: trial_id = trials[0].id t = api.get(master_url, f"trials/{trial_id}").json() # Update the active_stage by examining the result from master # /api/v1/experiments/<experiment-id> endpoint. exp_state = r.state.value.replace("STATE_", "") if exp_state == constants.COMPLETED: active_stage = 4 elif t.get("runner_state") == "checkpointing": active_stage = 3 elif t.get("runner_state") == "validating": active_stage = 2 elif t.get("runner_state") in ("UNSPECIFIED", "training"): active_stage = 1 else: active_stage = 0 # If the experiment is in a terminal state, output the appropriate # message and exit. Otherwise, sleep and repeat. if exp_state == constants.COMPLETED: print_progress(active_stage, ended=True) print(colored("Model definition test succeeded! 🎉", "green")) return elif exp_state == constants.CANCELED: print_progress(active_stage, ended=True) print( colored( "Model definition test (ID: {}) canceled before " "model test could complete. Please re-run the " "command.".format(exp_id), "yellow", )) sys.exit(1) elif exp_state == constants.ERROR: print_progress(active_stage, ended=True) trial_id = trials[0].id logs.pprint_trial_logs(master_url, trial_id) sys.exit(1) else: print_progress(active_stage, ended=False) time.sleep(0.2)