def calc_rollout_metrics(rollouts): ev = DataEvalNL("", save_images=False, entire_trajectory=False) metrics = {} total_task_success = 0 total_stop_success = 0 coverages = [] reward_keys = [k for k in rollouts[0][0].keys() if k.endswith("_reward")] rewards = {k: [] for k in reward_keys} for rollout in rollouts: success = ev.rollout_success(rollout[0]["env_id"], rollout[0]["set_idx"], rollout[0]["seg_idx"], rollout) # Take sum of rewards for each type of reward for k in reward_keys: v = sum([s[k] for s in rollout]) rewards[k].append(v) visit_success = stop_success(rollout) total_stop_success += 1 if visit_success else 0 total_task_success += 1 if success else 0 task_success_rate = total_task_success / len(rollouts) visit_success_rate = total_stop_success / len(rollouts) metrics["task_success_rate"] = task_success_rate metrics["visit_success_rate"] = visit_success_rate rollout_lens = [len(rollout) for rollout in rollouts] metrics["mean_rollout_len"] = np.asarray(rollout_lens).mean() # Average each reward across rollouts rewards = {k: np.mean(np.asarray(l)) for k, l in rewards.items()} metrics = dict_merge(metrics, rewards) return metrics
def collect_iteration_data(roller, iteration, train_envs, test_envs, latest_model_filename): setup = P.get_current_parameters()["Setup"] # Collect data with current policy num_train_samples = PARAMS["train_envs_per_iteration"] train_envs_i = sample_n_from_list(train_envs, num_train_samples) if PARAMS["test_on_train"]: test_envs_i = train_envs_i else: test_envs_i = sample_n_from_list(test_envs, PARAMS["test_envs_per_iteration"]) train_data_i = rollout_on_env_set(roller, train_envs_i, iteration, latest_model_filename, test=False) test_data_i = rollout_on_env_set(roller, test_envs_i, iteration, latest_model_filename, test=True) if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False) evaler.evaluate_dataset(test_data_i) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], entire_trajectory=not PARAMS["segment_level"], save_images=False) evaler.evaluate_dataset(test_data_i) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) # Kill the simulators after each rollout to save CPU cycles and avoid the slowdown os.system("killall -9 MyProject5-Linux-Shipping") os.system("killall -9 MyProject5") return train_data_i, test_data_i
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] # import pdb;pdb.set_trace() models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) models.append(model) eval_envs = get_correct_eval_env_id_list() roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(400) \ .setHorizon(100) \ .setStepsToForceStop(10) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(True) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(True) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def evaluate_saved_rollouts(): params = P.get_current_parameters() setup = params["Setup"] model_name = setup["model"] run_name = setup["run_name"] eval_dname = get_eval_tmp_dataset_name(model_name, run_name) eval_envs = set(list(sorted(get_correct_eval_env_id_list()))) rollouts = load_multiple_env_data(eval_dname) present_envs = set( [rollout[0]["env_id"] for rollout in rollouts if len(rollout) > 0]) missing_envs = eval_envs - present_envs logdir = get_results_dir(run_name) if len(missing_envs) > 0: print(f"Warning! {len(missing_envs)} envs missing: {missing_envs}") #sys.exit(1) log("", logdir) log( "--------------------------------------------------------------------------------------------", logdir) log(f"Evaluating rollouts for run {run_name}", logdir) log(f" using dataset {eval_dname}", logdir) log(f" missing envs {missing_envs}", logdir) log( "--------------------------------------------------------------------------------------------", logdir) evaler1 = DataEvalNL(setup["run_name"] + "1-1", save_images=False, entire_trajectory=False, aug_len=1) evaler1.evaluate_dataset(rollouts) results1 = evaler1.get_results() evaler2 = DataEvalNL(setup["run_name"] + "2-2", save_images=False, entire_trajectory=False, aug_len=2) evaler2.evaluate_dataset(rollouts) results2 = evaler2.get_results() evalerf = DataEvalNL(setup["run_name"] + "1-2", save_images=True, entire_trajectory=False) evalerf.evaluate_dataset(rollouts) resultsf = evalerf.get_results() log(f"Results 1-1:{results1}", logdir) log(f"Results 2-2:{results2}", logdir) log(f"Results 1-2:{resultsf}", logdir) log(f" -- END EVALUATION FOR {run_name}-- ", logdir) log( "--------------------------------------------------------------------------------------------", logdir)
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() models.append(model) eval_envs = list(sorted(get_correct_eval_env_id_list())) round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time") # TODO: Scrap RollOutParams and use parameter server JSON params instead roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(800) \ .setHorizon(setup["trajectory_length"]) \ .setStepsToForceStop(20) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(False) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(False) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) \ .setRealDrone(setup["real_drone"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() if round_size: eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name( setup["model"], setup["run_name"]) eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name) cumulative_dataset = [] if os.path.exists(eval_dataset_path): result = query_user_load_discard(eval_dataset_path) if result == "load": print("Loading dataset and continuing evaluation") cumulative_dataset = load_multiple_env_data_from_dir( eval_dataset_path) elif result == "discard": print("Discarding existing evaluation data") shutil.rmtree(eval_dataset_path) elif result == "cancel": print("Cancelling evaluation") return os.makedirs(eval_dataset_path, exist_ok=True) collected_envs = set([ rollout[0]["env_id"] for rollout in cumulative_dataset if len(rollout) > 0 ]) eval_envs = [e for e in eval_envs if e not in collected_envs] if setup.get("compute_results_no_rollout", False): eval_envs = [] for i in range(0, len(eval_envs), round_size): j = min(len(eval_envs), i + round_size) round_envs = eval_envs[i:j] roll_out_params.setEnvList(round_envs) dataset = roller.roll_out_policy(roll_out_params) # Save this data for rollout in dataset: if len(rollout) == 0: print( "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS") continue ## rollout is a list of samples: env_id = rollout[0]["env_id"] if "metadata" in rollout[ 0] else rollout[0]["env_id"] if True: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) ## rollout is a list of segments, each is a list of samples else: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) cumulative_dataset += dataset print(f"Saved cumulative dataset to: {eval_dataset_path}") dataset = cumulative_dataset else: dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=True, world_size=setup["world_size_m"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def train_dagger_simple(): # ---------------------------------------------------------------------------------------------------------------- # Load params and configure stuff P.initialize_experiment() params = P.get_current_parameters()["SimpleDagger"] setup = P.get_current_parameters()["Setup"] num_iterations = params["num_iterations"] sim_seed_dataset = params.get("sim_seed_dataset") run_name = setup["run_name"] device = params.get("device", "cuda:1") dataset_limit = params.get("dataset_size_limit_envs") seed_count = params.get("seed_count") # Trigger rebuild if necessary before going into all the threads and processes _ = get_restricted_env_id_lists(full=True) # Initialize the dataset if sim_seed_dataset: copy_seed_dataset(from_dataset=sim_seed_dataset, to_dataset=dagger_dataset_name(run_name), seed_count=seed_count or dataset_limit) gap = 0 else: # TODO: Refactor this into a prompt function data_path = get_dataset_dir(dagger_dataset_name(run_name)) if os.path.exists(data_path): print("DATASET EXISTS! Continue where left off?") c = input(" (y/n) >>> ") if c != "y": raise ValueError( f"Not continuing: Dataset {data_path} exists. Delete it if you like and try again" ) else: os.makedirs(data_path, exist_ok=True) gap = dataset_limit - len(os.listdir(data_path)) print("SUPP: Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() # ---------------------------------------------------------------------------------------------------------------- # Load / initialize model model = load_model(setup["model"], setup["model_file"], domain="sim")[0].to(device) oracle = load_model("oracle")[0] # ---------------------------------------------------------------------------------------------------------------- # Continue where we left off - load the model and set the iteration/epoch number for start_iteration in range(10000): epfname = epoch_dag_filename(run_name, start_iteration) path = os.path.join(get_model_dir(), str(epfname) + ".pytorch") if not os.path.exists(path): break if start_iteration > 0: print( f"DAG: CONTINUING DAGGER TRAINING FROM ITERATION: {start_iteration}" ) load_pytorch_model(model, epoch_dag_filename(run_name, start_iteration - 1)) # ---------------------------------------------------------------------------------------------------------------- # Intialize trainer trainer = Trainer(model, epoch=start_iteration, name=setup["model"], run_name=setup["run_name"]) trainer.set_dataset_names([dagger_dataset_name(run_name)]) # ---------------------------------------------------------------------------------------------------------------- # Initialize policy roller roller = SimpleParallelPolicyRoller( num_workers=params["num_workers"], device=params["device"], policy_name=setup["model"], policy_file=setup["model_file"], oracle=oracle, dataset_save_name=dagger_dataset_name(run_name), no_reward=True) rollout_sampler = RolloutSampler(roller) # ---------------------------------------------------------------------------------------------------------------- # Train DAgger - loop over iteartions, in each, prune, rollout and train an epoch print("SUPP: Beginning training...") for iteration in range(start_iteration, num_iterations): print(f"DAG: Starting iteration {iteration}") # Remove extra rollouts to keep within DAggerFM limit prune_dataset(run_name, dataset_limit) # Rollout and collect more data for training and evaluation policy_state = model.get_policy_state() rollout_sampler.sample_n_rollouts( n=gap if iteration == 0 else params["train_envs_per_iteration"], policy_state=policy_state, sample=False, envs="train", dagger_beta=dagger_beta(params, iteration)) eval_rollouts = rollout_sampler.sample_n_rollouts( n=params["eval_envs_per_iteration"], policy_state=policy_state, sample=False, envs="dev", dagger_beta=0) # Kill airsim instances so that they don't take up GPU memory and in general slow things down during training roller.kill_airsim() # Evaluate success / metrics and save to tensorboard if setup["eval_nl"]: evaler = DataEvalNL(run_name, entire_trajectory=False, save_images=False) evaler.evaluate_dataset(eval_rollouts) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) # Do one epoch of supervised training print("SUPP: Beginning Epoch") train_loss = trainer.train_epoch(train_envs=train_envs, eval=False) #test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True) # Save the model to file print("SUPP: Epoch", iteration, "train_loss:", train_loss) save_pytorch_model(model, epoch_dag_filename(run_name, iteration))