def collect_iteration_data(roller, iteration, train_envs, test_envs, latest_model_filename): setup = P.get_current_parameters()["Setup"] # Collect data with current policy num_train_samples = PARAMS["train_envs_per_iteration"] train_envs_i = sample_n_from_list(train_envs, num_train_samples) if PARAMS["test_on_train"]: test_envs_i = train_envs_i else: test_envs_i = sample_n_from_list(test_envs, PARAMS["test_envs_per_iteration"]) train_data_i = rollout_on_env_set(roller, train_envs_i, iteration, latest_model_filename, test=False) test_data_i = rollout_on_env_set(roller, test_envs_i, iteration, latest_model_filename, test=True) if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False) evaler.evaluate_dataset(test_data_i) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], entire_trajectory=not PARAMS["segment_level"], save_images=False) evaler.evaluate_dataset(test_data_i) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) # Kill the simulators after each rollout to save CPU cycles and avoid the slowdown os.system("killall -9 MyProject5-Linux-Shipping") os.system("killall -9 MyProject5") return train_data_i, test_data_i
def evaluate(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] # At this point test and dev have been swapped. # Whatever we've been developing on called "test" is hereafter called dev # Test is the data that hasn't been touched at all eval_envs = get_correct_eval_env_id_list() dataset = faux_dataset_random_pt(eval_envs) #dataset = faux_dataset_random_landmark(eval_envs) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False) evaler.evaluate_dataset(dataset) results = evaler.get_results() results["all_dist"] = [] print("Results:", results)
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] # import pdb;pdb.set_trace() models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) models.append(model) eval_envs = get_correct_eval_env_id_list() roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(400) \ .setHorizon(100) \ .setStepsToForceStop(10) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(True) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(True) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() models.append(model) eval_envs = list(sorted(get_correct_eval_env_id_list())) round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time") # TODO: Scrap RollOutParams and use parameter server JSON params instead roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(800) \ .setHorizon(setup["trajectory_length"]) \ .setStepsToForceStop(20) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(False) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(False) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) \ .setRealDrone(setup["real_drone"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() if round_size: eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name( setup["model"], setup["run_name"]) eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name) cumulative_dataset = [] if os.path.exists(eval_dataset_path): result = query_user_load_discard(eval_dataset_path) if result == "load": print("Loading dataset and continuing evaluation") cumulative_dataset = load_multiple_env_data_from_dir( eval_dataset_path) elif result == "discard": print("Discarding existing evaluation data") shutil.rmtree(eval_dataset_path) elif result == "cancel": print("Cancelling evaluation") return os.makedirs(eval_dataset_path, exist_ok=True) collected_envs = set([ rollout[0]["env_id"] for rollout in cumulative_dataset if len(rollout) > 0 ]) eval_envs = [e for e in eval_envs if e not in collected_envs] if setup.get("compute_results_no_rollout", False): eval_envs = [] for i in range(0, len(eval_envs), round_size): j = min(len(eval_envs), i + round_size) round_envs = eval_envs[i:j] roll_out_params.setEnvList(round_envs) dataset = roller.roll_out_policy(roll_out_params) # Save this data for rollout in dataset: if len(rollout) == 0: print( "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS") continue ## rollout is a list of samples: env_id = rollout[0]["env_id"] if "metadata" in rollout[ 0] else rollout[0]["env_id"] if True: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) ## rollout is a list of segments, each is a list of samples else: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) cumulative_dataset += dataset print(f"Saved cumulative dataset to: {eval_dataset_path}") dataset = cumulative_dataset else: dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=True, world_size=setup["world_size_m"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)