def load_latest_model(latest_model_filename): # If we retrain every iteration, don't load previously trained model, but train from scratch setup = P.get_current_parameters()["Setup"] if PARAMS["retrain_every_iteration"]: model, model_loaded = load_model(model_file_override="reset") # Otherwise load the latest model else: model, model_loaded = load_model(model_file_override=latest_model_filename) return model, model_loaded
def load_dagger_model(latest_model_filename): setup = P.get_current_parameters()["Setup"] # Load and re-save the model to the continuously updated dagger filename if PARAMS["restore_latest"]: print("Loading latest model: ", latest_model_filename) model, model_loaded = load_model(model_file_override=latest_model_filename) elif PARAMS["restore"] == 0 or setup["restore_data_only"]: model, model_loaded = load_model() elif setup["restore"] > 0 and not setup["restore_data_only"]: model_name = get_model_filename_at_iteration(setup, setup["dagger_restore"] - 1) model, model_loaded = load_model(model_file_override=model_name) return model
def train_supervised_bidomain(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"]) print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() env_list_name = setup.get("eval_env_set", "dev") if env_list_name == "dev": print("Using DEV envs") use_envs = dev_envs elif env_list_name == "train": print("Using TRAIN envs") use_envs = train_envs elif env_list_name == "test": print("Using TEST envs") use_envs = test_envs else: raise ValueError(f"Unknown env set {env_list_name}") env_range_start = setup.get("env_range_start") if env_range_start > 0: use_envs = [e for e in use_envs if e >= env_range_start] env_range_end = setup.get("env_range_end") if env_range_end > 0: use_envs = [e for e in use_envs if e < env_range_end] restricted_domain = "simulator" if restricted_domain == "simulator": # Load dummy model for real domain model_real, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_sim.set_save_path_overlays(True) elif restricted_domain == "real": # Load dummy model for sim domain model_sim, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_real.set_save_path_overlays(True) else: model_real.set_save_path_overlays(True) model_sim.set_save_path_overlays(True) trainer = TrainerBidomain(model_real, model_sim, model_critic, epoch=0) trainer.train_epoch(env_list=use_envs, eval=True, restricted_domain=restricted_domain) if restricted_domain != "simulator": model_real.print_metrics() if restricted_domain != "real": model_sim.print_metrics()
def test_rollout_sampler(): policy, _ = load_model("pvn_full_bidomain") policy_state = policy.get_policy_state() from visualization import Presenter #roller = SimplePolicyRoller(policy_factory) roller = SimpleParallelPolicyRoller("pvn_full_bidomain", num_workers=4) rollout_sampler = RolloutSampler(roller) # TODO: Load some policy print("Sampling once") rollouts = rollout_sampler.sample_n_rollouts(12, policy_state) print("Sampling twice") rollouts += rollout_sampler.sample_n_rollouts(12, policy_state) print("Sampling thrice") rollouts += rollout_sampler.sample_n_rollouts(12, policy_state) for rollout in rollouts: print("Visualizing rollout") for sample in rollout: state = sample["state"] image = state.get_rgb_image() Presenter().show_image(image, "fpv", waitkey=True, scale=4) print("Done!") roller.__exit__() print("ding")
def train_supervised(): initialize_experiment() setup = get_current_parameters()["Setup"] supervised_params = get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] model, model_loaded = load_model() print("Loading data") train_envs, dev_envs, test_envs = get_all_env_id_lists(max_envs=setup["max_envs"]) if "split_train_data" in supervised_params and supervised_params["split_train_data"]: split_name = supervised_params["train_data_split"] split = load_env_split()[split_name] train_envs = [env_id for env_id in train_envs if env_id in split] print("Using " + str(len(train_envs)) + " envs from dataset split: " + split_name) filename = "supervised_" + setup["model"] + "_" + setup["run_name"] start_filename = "tmp/" + filename + "_epoch_" + str(supervised_params["start_epoch"]) if supervised_params["start_epoch"] > 0: if file_exists(start_filename): load_pytorch_model(model, start_filename) else: print("Couldn't continue training. Model file doesn't exist at:") print(start_filename) exit(-1) if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) trainer = Trainer(model, epoch=supervised_params["start_epoch"], name=setup["model"], run_name=setup["run_name"]) print("Beginning training...") best_test_loss = 1000 for epoch in range(num_epochs): train_loss = trainer.train_epoch(train_data=None, train_envs=train_envs, eval=False) trainer.model.correct_goals = 0 trainer.model.total_goals = 0 test_loss = trainer.train_epoch(train_data=None, train_envs=dev_envs, eval=True) print("GOALS: ", trainer.model.correct_goals, trainer.model.total_goals) if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) if hasattr(trainer.model, "save"): trainer.model.save(epoch) save_pretrained_weights(trainer.model, setup["run_name"])
def loadPolicy(self): P.initialize_experiment(self.setup_name) self.params = P.get_current_parameters()["Rollout"] if self.model_name is not None: print("RollOutParams loading model") print("Use cuda: " + str(self.cuda)) self.policy, self.policy_loaded = \ load_model(model_file_override=self.model_file) self.use_policy = True if self.policy is not None: print("Loaded policy: ", self.model_name) else: print("Error loading policy: ", self.model_name) else: print("Error! Requested loadPolicy, but model_name is None!") return self
def evaluate_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model, model_loaded = load_model() eval_envs = get_correct_eval_env_id_list() dataset_name = P.get_current_parameters().get("Data").get("dataset_name") dataset = model.get_dataset(envs=eval_envs, dataset_prefix=dataset_name, dataset_prefix="supervised", eval=eval) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=False) total_loss = 0 count = 0 num_batches = len(dataloader) for b, batch in enumerate(dataloader): loss_var = model.sup_loss_on_batch(batch, eval=True, viz=True) total_loss += loss_var.data[0] count += 1 print("batch: " + str(b) + " / " + str(num_batches) + \ " loss: " + str(loss_var.data[0])) avg_loss = total_loss / count results_dir = get_results_dir(setup["run_name"]) results_json_path = get_results_path(setup["run_name"]) os.makedirs(results_dir, exist_ok=True) viz = model.get_viz() for key, lst in viz.items(): for i, img in enumerate(lst): img_path = os.path.join( results_dir, key + str(i) + "_" + setup["model"] + ".jpg") sp.misc.imsave(img_path, img) print("Saved image: " + img_path) with open(results_json_path, "w") as fp: json.dump({"loss": avg_loss}, fp)
def browse_pvn_dataset(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") data_params = P.get_current_parameters()["Training"] print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() #dom="real" dom = "sim" dataset = model_sim.get_dataset( data=None, envs=train_envs, domain=dom, dataset_names=data_params[f"{dom}_dataset_names"], dataset_prefix="supervised", eval=False, halfway_only=False) p = Presenter() for example in dataset: if example is None: continue md = example["md"][0] print( f"Showing example: {md['env_id']}:{md['set_idx']}:{md['seg_idx']}") print(f" instruction: {md['instruction']}") exec_len = len(example["images"]) for i in range(exec_len): print(f" timestep: {i}") img_i = example["images"][i] lm_fpv_i = example["lm_pos_fpv"][i] if lm_fpv_i is not None: img_i = p.plot_pts_on_torch_image(img_i, lm_fpv_i.long()) p.show_image(img_i, "fpv_img_i", scale=4, waitkey=True)
def __init__(self, policy_name, policy_file, num_workers, oracle=None, device=None, dataset_save_name="", restart_every_n=1000, no_reward=False): self.num_workers = num_workers self.processes = [] self.connections = [] self.policy_name = policy_name self.shared_policy, _ = load_model(policy_name, policy_file) self.shared_policy.make_picklable() self.shared_policy = self.shared_policy.to(device) self.device = device self.dataset_save_name = dataset_save_name self.restart_every_n = restart_every_n self.rollout_num = 0 self.no_reward = no_reward for i in range(self.num_workers): ctx = mp.get_context("spawn") parent_conn, child_conn = ctx.Pipe() print(f"LAUNCHING WORKER {i}") p = ctx.Process(target=worker_process, args=(child_conn, i, dataset_save_name, self.shared_policy, oracle, device, no_reward)) self.processes.append(p) self.connections.append(parent_conn) p.start()
def train_supervised_worker(rl_process_conn): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] rlsup = P.get_current_parameters()["RLSUP"] setup["trajectory_length"] = setup["sup_trajectory_length"] run_name = setup["run_name"] supervised_params = P.get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] sup_device = rlsup.get("sup_device", "cuda:1") model_oracle_critic = None print("SUPP: Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() # Load the starter model and save it at epoch 0 # Supervised worker to use GPU 1, RL will use GPU 0. Simulators run on GPU 2 model_sim = load_model(setup["sup_model"], setup["sim_model_file"], domain="sim")[0].to(sup_device) model_real = load_model(setup["sup_model"], setup["real_model_file"], domain="real")[0].to(sup_device) model_critic = load_model(setup["sup_critic_model"], setup["critic_model_file"])[0].to(sup_device) # ---------------------------------------------------------------------------------------------------------------- print("SUPP: Initializing trainer") rlsup_params = P.get_current_parameters()["RLSUP"] sim_seed_dataset = rlsup_params.get("sim_seed_dataset") # TODO: Figure if 6000 or 7000 here trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0) train_envs_common = [e for e in train_envs if 6000 <= e < 7000] train_envs_sim = [e for e in train_envs if e < 7000] dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000] dev_envs_sim = [e for e in dev_envs if e < 7000] sim_datasets = [rl_dataset_name(run_name)] real_datasets = ["real"] trainer.set_dataset_names(sim_datasets=sim_datasets, real_datasets=real_datasets) # ---------------------------------------------------------------------------------------------------------------- for start_sup_epoch in range(10000): epfname = epoch_sup_filename(run_name, start_sup_epoch, model="stage1", domain="sim") path = os.path.join(get_model_dir(), str(epfname) + ".pytorch") if not os.path.exists(path): break if start_sup_epoch > 0: print(f"SUPP: CONTINUING SUP TRAINING FROM EPOCH: {start_sup_epoch}") load_pytorch_model( model_real, epoch_sup_filename(run_name, start_sup_epoch - 1, model="stage1", domain="real")) load_pytorch_model( model_sim, epoch_sup_filename(run_name, start_sup_epoch - 1, model="stage1", domain="sim")) load_pytorch_model( model_critic, epoch_sup_filename(run_name, start_sup_epoch - 1, model="critic", domain="critic")) trainer.set_start_epoch(start_sup_epoch) # ---------------------------------------------------------------------------------------------------------------- print("SUPP: Beginning training...") for epoch in range(start_sup_epoch, num_epochs): # Tell the RL process that a new Stage 1 model is ready for loading print("SUPP: Sending model to RL") model_sim.reset() rl_process_conn.send( ["stage1_model_state_dict", model_sim.state_dict()]) if DEBUG_RL: while True: sleep(1) if not sim_seed_dataset: ddir = get_dataset_dir(rl_dataset_name(run_name)) os.makedirs(ddir, exist_ok=True) while len(os.listdir(ddir)) < 20: print("SUPP: Waiting for rollouts to appear") sleep(3) print("SUPP: Beginning Epoch") train_loss = trainer.train_epoch(env_list_common=train_envs_common, env_list_sim=train_envs_sim, eval=False) test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True) print("SUPP: Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model( model_real, epoch_sup_filename(run_name, epoch, model="stage1", domain="real")) save_pytorch_model( model_sim, epoch_sup_filename(run_name, epoch, model="stage1", domain="sim")) save_pytorch_model( model_critic, epoch_sup_filename(run_name, epoch, model="critic", domain="critic"))
def multiple_eval_rollout(): params, system_namespaces = setup_parameter_namespaces() setup_overlay = params["MultipleEval"]["SetupOverlay"] domain = "real" if setup_overlay["real_drone"] else "sim" one_at_a_time = params["MultipleEval"]["one_at_a_time"] check_and_prompt_if_data_exists(system_namespaces) # Load the systems # TODO: Check how many can fit in GPU memory. If not too many, perhaps we can move them off-GPU between rounds policies = [] for system_namespace in system_namespaces: P.switch_to_namespace(system_namespace) setup = P.get_current_parameters()["Setup"] policy, _ = load_model(setup["model"], setup["model_file"], domain) policies.append(policy) # ---------------------------------------------------------------------------------------- # Initialize Roller # ---------------------------------------------------------------------------------------- policy_roller = SimplePolicyRoller(instance_id=7, real_drone=setup_overlay["real_drone"], policy=None, oracle=None, no_reward=True) # ---------------------------------------------------------------------------------------- # Collect rollouts # ---------------------------------------------------------------------------------------- eval_envs = list(sorted(get_correct_eval_env_id_list())) count = 0 # Loop over environments for env_id in eval_envs: seg_ids = get_segs_available_for_env(env_id, 0) env_ids = [env_id] * len(seg_ids) print("Beginning rollouts for env: {env_id}") if len(seg_ids) == 0: print(" NO SEGMENTS! Next...") continue # Loop over systems and save data for i, (policy, system_namespace) in enumerate(zip(policies, system_namespaces)): print( f"Rolling policy in namespace {system_namespace} for env: {env_id}" ) P.switch_to_namespace(system_namespace) setup = P.get_current_parameters()["Setup"] if env_data_already_collected(env_id, setup["model"], setup["run_name"]): print(f"Skipping env_id: {env_id}, policy: {setup['model']}") continue eval_dataset_name = get_eval_tmp_dataset_name( setup["model"], setup["run_name"]) policy_roller.set_policy(policy) # when the last policy is done, we should land the drone policy_roller.rollout_segments( env_ids, seg_ids, None, False, 0, save_dataset_name=eval_dataset_name, rl_rollout=False, land_afterwards=(i == len(policies) - 1)) count += 1 if one_at_a_time and count > 0: print("Stopping. Run again to roll-out on the next environment!") break print("Done")
def evaluate(): P.initialize_experiment() model, model_loaded = load_model() eval_envs = get_correct_eval_env_id_list() model.eval() dataset_name = P.get_current_parameters().get("Data").get("dataset_name") dataset = model.get_dataset(data=None, envs=eval_envs, dataset_prefix=dataset_name, dataset_prefix="supervised", eval=eval, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=4, pin_memory=True, timeout=0) count = 0 success = 0 total_dist = 0 for batch in dataloader: if batch is None: print("None batch!") continue images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment. # Reset model state (embedding etc) between batches, but not between segments. # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all. # TODO: This code is outdated and wrongly discretizes the goal location. Grab the fixed version from the old branch. batch_size = len(images) print("batch: ", count) print("successes: ", success) for i in range(batch_size): num_segments = len(instructions[i]) for s in range(num_segments): instruction = cuda_var(instructions[i][s], model.is_cuda, model.cuda_device) instruction_mask = torch.ones_like(instruction) image = cuda_var(images[i][s], model.is_cuda, model.cuda_device) label_mask = cuda_var(label_masks[i][s], model.is_cuda, model.cuda_device) label_mask = model.label_pool(label_mask) goal_mask_l = label_mask[0, 1, :, :] goal_mask_l_np = goal_mask_l.data.cpu().numpy() goal_mask_l_flat = np.reshape(goal_mask_l_np, [-1]) max_index_l = np.argmax(goal_mask_l_flat) argmax_loc_l = np.asarray([ int(max_index_l / goal_mask_l_np.shape[1]), int(max_index_l % goal_mask_l_np.shape[1]) ]) if np.sum(goal_mask_l_np) < 0.01: continue mask_pred, features, emb_loss = model(image, instruction, instruction_mask) goal_mask = mask_pred[0, 1, :, :] goal_mask_np = goal_mask.data.cpu().numpy() goal_mask_flat = np.reshape(goal_mask_np, [-1]) max_index = np.argmax(goal_mask_flat) argmax_loc = np.asarray([ int(max_index / goal_mask_np.shape[1]), int(max_index % goal_mask_np.shape[1]) ]) dist = np.linalg.norm(argmax_loc - argmax_loc_l) if dist < OK_DIST: success += 1 count += 1 total_dist += dist print("Correct goal predictions: ", success) print("Total evaluations: ", count) print("total dist: ", total_dist) print("avg dist: ", total_dist / float(count)) print("success rate: ", success / float(count))
def automatic_demo(): P.initialize_experiment() instruction_display = InstructionDisplay() rate = Rate(0.1) env = PomdpInterface( is_real=get_current_parameters()["Setup"]["real_drone"]) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( ) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) # Run on dev set interact_instructions = dev_instructions env_range_start = get_current_parameters()["Setup"].get( "env_range_start", 0) env_range_end = get_current_parameters()["Setup"].get( "env_range_end", 10e10) interact_instructions = { k: v for k, v in interact_instructions.items() if env_range_start < k < env_range_end } model, _ = load_model(get_current_parameters()["Setup"]["model"]) # Loop over the select few examples while True: for instruction_sets in interact_instructions.values(): for set_idx, instruction_set in enumerate(instruction_sets): env_id = instruction_set['env'] found_example = None for example in examples: if example[0] == env_id: found_example = example if found_example is None: continue env.set_environment(env_id, instruction_set["instructions"]) presenter = Presenter() cumulative_reward = 0 for seg_idx in range(len(instruction_set["instructions"])): if seg_idx != found_example[2]: continue print(f"RUNNING ENV {env_id} SEG {seg_idx}") real_instruction_str = instruction_set["instructions"][ seg_idx]["instruction"] instruction_display.show_instruction(real_instruction_str) valid_segment = env.set_current_segment(seg_idx) if not valid_segment: continue state = env.reset(seg_idx) for i in range(START_PAUSE): instruction_display.tick() time.sleep(1) tok_instruction = tokenize_instruction( real_instruction_str, word2token) state = env.reset(seg_idx) print("Executing: f{instruction_str}") while True: instruction_display.tick() rate.sleep() action, internals = model.get_action( state, tok_instruction) state, reward, done, expired, oob = env.step(action) cumulative_reward += reward #presenter.show_sample(state, action, reward, cumulative_reward, real_instruction_str) #show_depth(state.image) if done: break for i in range(END_PAUSE): instruction_display.tick() time.sleep(1) print("Segment finished!") instruction_display.show_instruction("...") print("Env finished!")
def train_top_down_pred(args, max_epoch=SUPERVISED_EPOCHS): initialize_experiment(args.run_name, args.setup_name) model, model_loaded = load_model() # TODO: Get batch size from global parameter server when it exists batch_size = 1 if \ args.model == "top_down" or \ args.model == "top_down_prior" or \ args.model == "top_down_sm" or \ args.model == "top_down_pretrain" or \ args.model == "top_down_goal_pretrain" or \ args.model == "top_down_nav" or \ args.model == "top_down_cond" \ else BATCH_SIZE lr = 0.001 # * batch_size trainer = Trainer(model, epoch=args.start_epoch, name=args.model, run_name=args.run_name) train_envs, dev_envs, test_envs = get_all_env_id_lists( max_envs=args.max_envs) filename = "top_down_" + args.model + "_" + args.run_name if args.restore_weights_name is not None: restore_pretrained_weights(model, args.restore_weights_name, args.fix_restored_weights) print("Beginning training...") best_test_loss = 1000 validation_loss = [] for epoch in range(SUPERVISED_EPOCHS): train_loss = -1 if not args.eval_pretrain: train_loss = trainer.train_epoch(train_envs=train_envs, eval=False) test_loss = trainer.train_epoch(train_envs=dev_envs, eval=True) validation_loss.append([epoch, test_loss]) if not args.eval_pretrain: if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) save_pretrained_weights(trainer.model, args.run_name) else: break if max_epoch is not None and epoch > max_epoch: print("Reached epoch limit!") break test_loss_dir = get_model_dir( ) + "/test_loss/" + filename + "_test_loss.csv" validation_loss = pd.DataFrame(validation_loss, columns=['epoch', "test_loss"]) validation_loss.to_csv(test_loss_dir, index=False)
def interactive_demo(): P.initialize_experiment() InteractAPI.launch_ui() rate = Rate(0.1) env = PomdpInterface( is_real=get_current_parameters()["Setup"]["real_drone"]) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( ) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) # Run on dev set interact_instructions = dev_instructions env_range_start = get_current_parameters()["Setup"].get( "env_range_start", 0) env_range_end = get_current_parameters()["Setup"].get( "env_range_end", 10e10) interact_instructions = { k: v for k, v in interact_instructions.items() if env_range_start < k < env_range_end } count = 0 stuck_count = 0 model, _ = load_model(get_current_parameters()["Setup"]["model"]) InteractAPI.write_empty_instruction() InteractAPI.write_real_instruction("None") instruction_str = InteractAPI.read_instruction_file() print("Initial instruction: ", instruction_str) for instruction_sets in interact_instructions.values(): for set_idx, instruction_set in enumerate(instruction_sets): env_id = instruction_set['env'] env.set_environment(env_id, instruction_set["instructions"]) presenter = Presenter() cumulative_reward = 0 for seg_idx in range(len(instruction_set["instructions"])): print(f"RUNNING ENV {env_id} SEG {seg_idx}") real_instruction_str = instruction_set["instructions"][ seg_idx]["instruction"] InteractAPI.write_real_instruction(real_instruction_str) valid_segment = env.set_current_segment(seg_idx) if not valid_segment: continue state = env.reset(seg_idx) keep_going = True while keep_going: InteractAPI.write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = InteractAPI.read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False InteractAPI.write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(seg_idx) InteractAPI.write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction break if not keep_going: continue env.override_instruction(instruction_str) tok_instruction = tokenize_instruction( instruction_str, word2token) state = env.reset(seg_idx) print("Executing: f{instruction_str}") while True: rate.sleep() action, internals = model.get_action( state, tok_instruction) state, reward, done, expired, oob = env.step(action) cumulative_reward += reward presenter.show_sample(state, action, reward, cumulative_reward, instruction_str) #show_depth(state.image) if done: break InteractAPI.write_empty_instruction() print("Segment finished!") print("Env finished!")
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() models.append(model) eval_envs = list(sorted(get_correct_eval_env_id_list())) round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time") # TODO: Scrap RollOutParams and use parameter server JSON params instead roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(800) \ .setHorizon(setup["trajectory_length"]) \ .setStepsToForceStop(20) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(False) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(False) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) \ .setRealDrone(setup["real_drone"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() if round_size: eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name( setup["model"], setup["run_name"]) eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name) cumulative_dataset = [] if os.path.exists(eval_dataset_path): result = query_user_load_discard(eval_dataset_path) if result == "load": print("Loading dataset and continuing evaluation") cumulative_dataset = load_multiple_env_data_from_dir( eval_dataset_path) elif result == "discard": print("Discarding existing evaluation data") shutil.rmtree(eval_dataset_path) elif result == "cancel": print("Cancelling evaluation") return os.makedirs(eval_dataset_path, exist_ok=True) collected_envs = set([ rollout[0]["env_id"] for rollout in cumulative_dataset if len(rollout) > 0 ]) eval_envs = [e for e in eval_envs if e not in collected_envs] if setup.get("compute_results_no_rollout", False): eval_envs = [] for i in range(0, len(eval_envs), round_size): j = min(len(eval_envs), i + round_size) round_envs = eval_envs[i:j] roll_out_params.setEnvList(round_envs) dataset = roller.roll_out_policy(roll_out_params) # Save this data for rollout in dataset: if len(rollout) == 0: print( "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS") continue ## rollout is a list of samples: env_id = rollout[0]["env_id"] if "metadata" in rollout[ 0] else rollout[0]["env_id"] if True: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) ## rollout is a list of segments, each is a list of samples else: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) cumulative_dataset += dataset print(f"Saved cumulative dataset to: {eval_dataset_path}") dataset = cumulative_dataset else: dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=True, world_size=setup["world_size_m"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def train_dagger_simple(): # ---------------------------------------------------------------------------------------------------------------- # Load params and configure stuff P.initialize_experiment() params = P.get_current_parameters()["SimpleDagger"] setup = P.get_current_parameters()["Setup"] num_iterations = params["num_iterations"] sim_seed_dataset = params.get("sim_seed_dataset") run_name = setup["run_name"] device = params.get("device", "cuda:1") dataset_limit = params.get("dataset_size_limit_envs") seed_count = params.get("seed_count") # Trigger rebuild if necessary before going into all the threads and processes _ = get_restricted_env_id_lists(full=True) # Initialize the dataset if sim_seed_dataset: copy_seed_dataset(from_dataset=sim_seed_dataset, to_dataset=dagger_dataset_name(run_name), seed_count=seed_count or dataset_limit) gap = 0 else: # TODO: Refactor this into a prompt function data_path = get_dataset_dir(dagger_dataset_name(run_name)) if os.path.exists(data_path): print("DATASET EXISTS! Continue where left off?") c = input(" (y/n) >>> ") if c != "y": raise ValueError( f"Not continuing: Dataset {data_path} exists. Delete it if you like and try again" ) else: os.makedirs(data_path, exist_ok=True) gap = dataset_limit - len(os.listdir(data_path)) print("SUPP: Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() # ---------------------------------------------------------------------------------------------------------------- # Load / initialize model model = load_model(setup["model"], setup["model_file"], domain="sim")[0].to(device) oracle = load_model("oracle")[0] # ---------------------------------------------------------------------------------------------------------------- # Continue where we left off - load the model and set the iteration/epoch number for start_iteration in range(10000): epfname = epoch_dag_filename(run_name, start_iteration) path = os.path.join(get_model_dir(), str(epfname) + ".pytorch") if not os.path.exists(path): break if start_iteration > 0: print( f"DAG: CONTINUING DAGGER TRAINING FROM ITERATION: {start_iteration}" ) load_pytorch_model(model, epoch_dag_filename(run_name, start_iteration - 1)) # ---------------------------------------------------------------------------------------------------------------- # Intialize trainer trainer = Trainer(model, epoch=start_iteration, name=setup["model"], run_name=setup["run_name"]) trainer.set_dataset_names([dagger_dataset_name(run_name)]) # ---------------------------------------------------------------------------------------------------------------- # Initialize policy roller roller = SimpleParallelPolicyRoller( num_workers=params["num_workers"], device=params["device"], policy_name=setup["model"], policy_file=setup["model_file"], oracle=oracle, dataset_save_name=dagger_dataset_name(run_name), no_reward=True) rollout_sampler = RolloutSampler(roller) # ---------------------------------------------------------------------------------------------------------------- # Train DAgger - loop over iteartions, in each, prune, rollout and train an epoch print("SUPP: Beginning training...") for iteration in range(start_iteration, num_iterations): print(f"DAG: Starting iteration {iteration}") # Remove extra rollouts to keep within DAggerFM limit prune_dataset(run_name, dataset_limit) # Rollout and collect more data for training and evaluation policy_state = model.get_policy_state() rollout_sampler.sample_n_rollouts( n=gap if iteration == 0 else params["train_envs_per_iteration"], policy_state=policy_state, sample=False, envs="train", dagger_beta=dagger_beta(params, iteration)) eval_rollouts = rollout_sampler.sample_n_rollouts( n=params["eval_envs_per_iteration"], policy_state=policy_state, sample=False, envs="dev", dagger_beta=0) # Kill airsim instances so that they don't take up GPU memory and in general slow things down during training roller.kill_airsim() # Evaluate success / metrics and save to tensorboard if setup["eval_nl"]: evaler = DataEvalNL(run_name, entire_trajectory=False, save_images=False) evaler.evaluate_dataset(eval_rollouts) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) # Do one epoch of supervised training print("SUPP: Beginning Epoch") train_loss = trainer.train_epoch(train_envs=train_envs, eval=False) #test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True) # Save the model to file print("SUPP: Epoch", iteration, "train_loss:", train_loss) save_pytorch_model(model, epoch_dag_filename(run_name, iteration))
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] # import pdb;pdb.set_trace() models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) models.append(model) eval_envs = get_correct_eval_env_id_list() roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(400) \ .setHorizon(100) \ .setStepsToForceStop(10) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(True) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(True) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def train_dagger(): P.initialize_experiment() global PARAMS PARAMS = P.get_current_parameters()["Dagger"] setup = P.get_current_parameters()["Setup"] roller = pick_policy_roller(setup) save_json(PARAMS, get_dagger_data_dir(setup, real_drone=False) + "run_params.json") # Load less tf data, but sample dagger rollouts from more environments to avoid overfitting. train_envs, dev_envs, test_envs = data_io.instructions.get_restricted_env_id_lists( max_envs=PARAMS["max_envs_dag"]) all_train_data_real, all_dev_data_real = \ data_io.train_data.load_supervised_data("real", max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"]) all_train_data_sim, all_dev_data_sim = \ data_io.train_data.load_supervised_data("simulator", max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"]) print("Loaded data: ") print( f" Real train {len(all_train_data_real)}, dev {len(all_dev_data_real)}" ) print( f" Sim train {len(all_train_data_sim)}, dev {len(all_dev_data_sim)}") # Load and re-save models from supervised learning stage model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"]) data_io.model_io.save_pytorch_model( model_sim, get_latest_model_filename(setup, "sim")) data_io.model_io.save_pytorch_model( model_real, get_latest_model_filename(setup, "real")) data_io.model_io.save_pytorch_model( model_critic, get_latest_model_filename(setup, "critic")) last_trainer_state = None for iteration in range(0, PARAMS["max_iterations"]): gc.collect() print("-------------------------------") print("DAGGER ITERATION : ", iteration) print("-------------------------------") # If we have too many training examples in memory, discard uniformly at random to keep a somewhat fixed bound max_samples = PARAMS["max_samples_in_memory"] all_train_data_real = discard_if_too_many(all_train_data_real, max_samples) all_train_data_sim = discard_if_too_many(all_train_data_sim, max_samples) # Roll out new data in simulation only latest_model_filename_sim = get_latest_model_filename(setup, "sim") train_data_i_sim, dev_data_i_sim = collect_iteration_data( roller, iteration, train_envs, test_envs, latest_model_filename_sim) # TODO: Save #data_io.train_data.save_dataset(dataset_name, train_data_i, dagger_data_dir + "train_" + str(iteration)) #data_io.train_data.save_dataset(dataset_name, test_data_i, dagger_data_dir + "test_" + str(iteration)) # Aggregate the dataset all_train_data_sim += train_data_i_sim all_dev_data_sim += dev_data_i_sim print("Aggregated dataset!)") print("Total samples: ", len(all_train_data_sim)) print("New samples: ", len(train_data_i_sim)) data_io.train_data.save_dataset( "sim_dagger", all_train_data_sim, get_dagger_data_dir(setup, False) + "train_latest") data_io.train_data.save_dataset( "sim_dagger", dev_data_i_sim, get_dagger_data_dir(setup, False) + "test_latest") model_sim, _ = load_model(setup["model"], get_latest_model_filename(setup, "sim"), domain="sim") model_real, _ = load_model(setup["model"], get_latest_model_filename(setup, "real"), domain="real") model_critic, _ = load_model( setup["critic_model"], get_latest_model_filename(setup, "critic")) trainer = TrainerBidomain(model_real, model_sim, model_critic, state=last_trainer_state) # Hacky reset of the rollout flag after doing the rollouts import rollout.run_metadata as run_md run_md.IS_ROLLOUT = False # Train on the newly aggregated dataset num_epochs = PARAMS["epochs_per_iteration"] for epoch in range(num_epochs): loss = trainer.train_epoch(data_list_real=all_train_data_real, data_list_sim=all_train_data_sim) dev_loss = trainer.train_epoch(data_list_real=all_dev_data_real, data_list_sim=dev_data_i_sim, eval=True) data_io.model_io.save_pytorch_model( model_sim, get_latest_model_filename(setup, "sim")) data_io.model_io.save_pytorch_model( model_real, get_latest_model_filename(setup, "real")) data_io.model_io.save_pytorch_model( model_critic, get_latest_model_filename(setup, "critic")) print("Epoch", epoch, "Loss: Train:", loss, "Test:", dev_loss) data_io.model_io.save_pytorch_model( model_real, get_model_filename_at_iteration(setup, iteration, "real")) data_io.model_io.save_pytorch_model( model_sim, get_model_filename_at_iteration(setup, iteration, "sim")) data_io.model_io.save_pytorch_model( model_critic, get_model_filename_at_iteration(setup, iteration, "critic")) last_trainer_state = trainer.get_state()
def train_supervised_bidomain(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] supervised_params = P.get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"]) if P.get_current_parameters()["Training"].get("use_oracle_critic", False): model_oracle_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"]) # This changes the name in the summary writer to get a different color plot oname = model_oracle_critic.model_name model_oracle_critic.set_model_name(oname + "_oracle") model_oracle_critic.model_name = oname else: model_oracle_critic = None print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() real_filename = f"supervised_{setup['model']}_{setup['run_name']}_real" sim_filename = f"supervised_{setup['model']}_{setup['run_name']}_sim" critic_filename = f"supervised_{setup['critic_model']}_{setup['run_name']}_critic" # TODO: (Maybe) Implement continuing of training # Bidata means that we treat Lani++ and LaniOriginal examples differently, only computing domain-adversarial stuff on Lani++ bidata = P.get_current_parameters()["Training"].get("bidata", False) if bidata == "v2": trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0) train_envs_common = [e for e in train_envs if 6000 <= e < 7000] train_envs_sim = train_envs dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000] dev_envs_sim = dev_envs elif bidata: trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0) train_envs_common = [e for e in train_envs if 6000 <= e < 7000] train_envs_sim = [e for e in train_envs if e < 6000] dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000] dev_envs_sim = [e for e in dev_envs if e < 6000] else: trainer = TrainerBidomain(model_real, model_sim, model_critic, model_oracle_critic, epoch=0) print("Beginning training...") best_test_loss = 1000 for epoch in range(num_epochs): if bidata: train_loss = trainer.train_epoch(env_list_common=train_envs_common, env_list_sim=train_envs_sim, eval=False) test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True) else: train_loss = trainer.train_epoch(env_list=train_envs, eval=False) test_loss = trainer.train_epoch(env_list=dev_envs, eval=True) if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(model_real, real_filename) save_pytorch_model(model_sim, sim_filename) save_pytorch_model(model_critic, critic_filename) print(f"Saved models in: \n Real: {real_filename} \n Sim: {sim_filename} \n Critic: {critic_filename}") print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(model_real, f"tmp/{real_filename}_epoch_{epoch}") save_pytorch_model(model_sim, f"tmp/{sim_filename}_epoch_{epoch}") save_pytorch_model(model_critic, f"tmp/{critic_filename}_epoch_{epoch}")
def train_supervised(): initialize_experiment() setup = get_current_parameters()["Setup"] supervised_params = get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] model, model_loaded = load_model() # import pdb; pdb.set_trace() # import pickle # with open('/storage/dxsun/model_input.pickle', 'rb') as f: data = pickle.load(f) # g = model(data['images'], data['states'], data['instructions'], data['instr_lengths'], data['has_obs'], data['plan'], data['save_maps_only'], data['pos_enc'], data['noisy_poses'], data['start_poses'], data['firstseg']) print("model:", model) print("model type:", type(model)) print("Loading data") # import pdb;pdb.set_trace() train_envs, dev_envs, test_envs = get_all_env_id_lists( max_envs=setup["max_envs"]) if "split_train_data" in supervised_params and supervised_params[ "split_train_data"]: split_name = supervised_params["train_data_split"] split = load_env_split()[split_name] train_envs = [env_id for env_id in train_envs if env_id in split] print("Using " + str(len(train_envs)) + " envs from dataset split: " + split_name) filename = "supervised_" + setup["model"] + "_" + setup["run_name"] # Code looks weird here because load_pytorch_model adds ".pytorch" to end of path, but # file_exists doesn't model_path = "tmp/" + filename + "_epoch_" + str( supervised_params["start_epoch"]) model_path_with_extension = model_path + ".pytorch" print("model path:", model_path_with_extension) if supervised_params["start_epoch"] > 0: if file_exists(model_path_with_extension): print("THE FILE EXISTS code1") load_pytorch_model(model, model_path) else: print("Couldn't continue training. Model file doesn't exist at:") print(model_path_with_extension) exit(-1) # import pdb;pdb.set_trace() ## If you just want to use the pretrained model # load_pytorch_model(model, "supervised_pvn_stage1_train_corl_pvn_stage1") # all_train_data, all_test_data = data_io.train_data.load_supervised_data(max_envs=100) if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) # Add a tensorboard logger to the model and trainer tensorboard_dir = get_current_parameters( )['Environment']['tensorboard_dir'] logger = Logger(tensorboard_dir) model.logger = logger if hasattr(model, "goal_good_criterion"): print("gave logger to goal evaluator") model.goal_good_criterion.logger = logger trainer = Trainer(model, epoch=supervised_params["start_epoch"], name=setup["model"], run_name=setup["run_name"]) trainer.logger = logger # import pdb;pdb.set_trace() print("Beginning training...") best_test_loss = 1000 continue_epoch = supervised_params["start_epoch"] + 1 if supervised_params[ "start_epoch"] > 0 else 0 rng = range(0, num_epochs) print("filename:", filename) import pdb pdb.set_trace() for epoch in rng: # import pdb;pdb.set_trace() train_loss = trainer.train_epoch(train_data=None, train_envs=train_envs, eval=False) # train_loss = trainer.train_epoch(train_data=all_train_data, train_envs=train_envs, eval=False) trainer.model.correct_goals = 0 trainer.model.total_goals = 0 test_loss = trainer.train_epoch(train_data=None, train_envs=dev_envs, eval=True) print("GOALS: ", trainer.model.correct_goals, trainer.model.total_goals) if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) if hasattr(trainer.model, "save"): trainer.model.save(epoch) save_pretrained_weights(trainer.model, setup["run_name"])
def train_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] launch_ui() env = PomdpInterface() print("model_name:", setup["top_down_model"]) print("model_file:", setup["top_down_model_file"]) model, model_loaded = load_model( model_name_override=setup["top_down_model"], model_file_override=setup["top_down_model_file"]) exec_model, wrapper_model_loaded = load_model( model_name_override=setup["wrapper_model"], model_file_override=setup["wrapper_model_file"]) affine2d = Affine2D() if model.is_cuda: affine2d.cuda() eval_envs = get_correct_eval_env_id_list() print("eval_envs:", eval_envs) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( max_size=setup["max_envs"]) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) dataset = model.get_dataset(envs=eval_envs, dataset_name="supervised", eval=True, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for b, batch in list(enumerate(dataloader)): print("batch:", batch) images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] affines = batch["affines_g_to_s"] env_ids = batch["env_id"] set_idxs = batch["set_idx"] seg_idxs = batch["seg_idx"] env_id = env_ids[0][0] set_idx = set_idxs[0][0] print("env_id of this batch:", env_id) env.set_environment( env_id, instruction_set=all_instr[env_id][set_idx]["instructions"]) env.reset(0) num_segments = len(instructions[0]) print("num_segments in this batch:", num_segments) write_instruction("") write_real_instruction("None") instruction_str = read_instruction_file() print("Initial instruction: ", instruction_str) # TODO: Reset model state here if we keep any temporal memory etc for s in range(num_segments): start_state = env.reset(s) keep_going = True real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0) tmp = list(real_instruction.data.cpu()[0].numpy()) real_instruction_str = debug_untokenize_instruction(tmp) write_real_instruction(real_instruction_str) #write_instruction(real_instruction_str) #instruction_str = real_instruction_str image = cuda_var(images[0][s], setup["cuda"], 0) label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0) affine_g_to_s = affines[0][s] print("Your current environment:") with open( "/storage/dxsun/unreal_config_nl/configs/configs/random_config_" + str(env_id) + ".json") as fp: config = json.load(fp) print(config) while keep_going: write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(s) write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction print("Executing: ", instruction_str) break if not keep_going: continue #instruction_str = read_instruction_file() # TODO: Load instruction from file tok_instruction = tokenize_instruction(instruction_str, word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) instruction_v = cuda_var(instruction_t, setup["cuda"], 0) instruction_mask = torch.ones_like(instruction_v) tmp = list(instruction_t[0].numpy()) instruction_dbg_str = debug_untokenize_instruction( tmp, token2term) # import matplotlib.pyplot as plt #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy()) #plt.show() res = model(image, instruction_v, instruction_mask) mask_pred = res[0] shp = mask_pred.shape mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp) #mask_pred = softmax2d(mask_pred) # TODO: Rotate the mask_pred to the global frame affine_s_to_g = np.linalg.inv(affine_g_to_s) S = 8.0 affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]]) affine_scale_down = np.linalg.inv(affine_scale_up) affine_pred_to_g = np.dot( affine_scale_down, np.dot(affine_s_to_g, affine_scale_up)) #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float() mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose( 1, 2, 0) mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g, 32, 32) print("Sum of global mask: ", mask_pred_g_np.sum()) mask_pred_g = torch.from_numpy( mask_pred_g_np.transpose(2, 0, 1)).float()[np.newaxis, :, :, :] exec_model.set_ground_truth_visitation_d(mask_pred_g) # Create a batch axis for pytorch #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :]) mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min() mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9) mask_pred_np[:, :, 0] *= 2.0 mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min() mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9) presenter = Presenter() presenter.show_image(mask_pred_g_np, "mask_pred_g", torch=False, waitkey=1, scale=4) #import matplotlib.pyplot as plt #print("image.data shape:", image.data.cpu().numpy().shape) #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy()) #plt.show() # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4) #import pdb; pdb.set_trace() pred_viz_np = presenter.overlaid_image(image.data, mask_pred_np, channel=0) # TODO: Don't show labels # TODO: OpenCV colours #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0) labl_viz_np = presenter.overlaid_image(image.data, label_mask.data, channel=0) viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1) viz_img_np = pred_viz_np viz_img = presenter.overlay_text(viz_img_np, instruction_dbg_str) cv2.imshow("interactive viz", viz_img) cv2.waitKey(100) rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s], seg_idxs[0][s], tok_instruction) write_instruction("")
def __init__(self, params, save_rollouts_to_dataset="", device=None): self.iterations_per_epoch = params.get("iterations_per_epoch", 1) self.test_iterations_per_epoch = params.get( "test_iterations_per_epoch", 1) self.num_workers = params.get("num_workers") self.num_rollouts_per_iter = params.get("num_rollouts_per_iter") self.model_name = params.get("model") or params.get("rl_model") self.init_model_file = params.get("model_file") self.num_steps = params.get("trajectory_len") self.device = device self.summary_every_n = params.get("plot_every_n") self.roller = SimpleParallelPolicyRoller( num_workers=self.num_workers, device=self.device, policy_name=self.model_name, policy_file=self.init_model_file, dataset_save_name=save_rollouts_to_dataset) self.rollout_sampler = RolloutSampler(self.roller) # This should load it's own weights from file based on self.full_model, _ = load_model(self.model_name) self.full_model = self.full_model.to(self.device) self.actor_critic = self.full_model.stage2_action_generation # Train in eval mode to disable dropout #self.actor_critic.eval() self.full_model.stage1_visitation_prediction.eval() self.writer = LoggingSummaryWriter( log_dir=f"{get_logging_dir()}/runs/{params['run_name']}/ppo") self.global_step = 0 self.stage1_updates = 0 clip_param = params.get("clip") num_mini_batch = params.get("num_mini_batch") value_loss_coef = params.get("value_loss_coef") lr = params.get("lr") eps = params.get("eps") max_grad_norm = params.get("max_grad_norm") use_clipped_value_loss = params.get("use_clipped_value_loss") self.entropy_coef = params.get("entropy_coef") self.entropy_schedule_epochs = params.get("entropy_schedule_epochs", []) self.entropy_schedule_multipliers = params.get( "entropy_schedule_multipliers", []) self.minibatch_size = params.get("minibatch_size") self.use_gae = params.get("use_gae") self.gamma = params.get("gamma") self.gae_lambda = params.get("gae_lambda") self.intrinsic_reward_only = params.get("intrinsic_reward_only") self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE) print( f"PPO trainable parameters: {get_n_trainable_params(self.actor_critic)}" ) print( f"PPO actor-critic all parameters: {get_n_params(self.actor_critic)}" ) self.ppo = PPO(self.actor_critic, clip_param=clip_param, ppo_epoch=1, num_mini_batch=num_mini_batch, value_loss_coef=value_loss_coef, entropy_coef=self.entropy_coef, lr=lr, eps=eps, max_grad_norm=max_grad_norm, use_clipped_value_loss=use_clipped_value_loss)