def count_avg_num_steps(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] train_envs, dev_envs, test_envs = get_all_env_id_lists(setup["max_envs"]) count_avg_num_steps_on_data(train_envs)
def train_sureal(): P.initialize_experiment() ctx = mp.get_context("spawn") pipe_rl_end, pipe_sup_end = ctx.Pipe() rlsup_params = P.get_current_parameters()["RLSUP"] sim_seed_dataset = rlsup_params.get("sim_seed_dataset") run_name = P.get_current_parameters()["Setup"]["run_name"] # Trigger rebuild if necessary before going into all the threads and processes _ = get_restricted_env_id_lists() _ = get_restricted_env_id_lists(full=True) if sim_seed_dataset: copy_seed_dataset(from_dataset=sim_seed_dataset, to_dataset=rl_dataset_name(run_name)) if DEBUG_SUP: train_supervised_worker(pipe_sup_end) elif DEBUG_RL: # Start supervised learning in another process. Keep RL in main process. sup_proces = ctx.Process(target=train_supervised_worker, args=[pipe_sup_end]) sup_proces.start() train_rl_worker(pipe_rl_end) else: rl_process = ctx.Process(target=train_rl_worker, args=[pipe_rl_end]) sup_proces = ctx.Process(target=train_supervised_worker, args=[pipe_sup_end]) rl_process.start() sup_proces.start()
def generate_config_files(start_i, end_i): P.initialize_experiment() for config_num in range(start_i, end_i): # attempt to space landmarks config = None attempts = 0 # It's easier to generate a config with less objects, so to have a truly uniform distribution, we must sample it here. if FORCE_LANDMARK_SELECTION: num_objects = len(FORCE_LANDMARK_SELECTION) else: num_objects = int(random.uniform(MIN_NUM_OBJECTS, MAX_NUM_OBJECTS)) print("making config %d with %d objects" % (config_num, num_objects)) while True: config = try_make_config(num_objects) attempts += 1 sys.stdout.write("\r Attempts: " + str(attempts)) if config is not None: print("") break if MAKE_LAKES: config = add_lake_to_config(config, X_RANGE, Y_RANGE) path = paths.get_env_config_path(config_num) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'w') as fp: json.dump(config, fp)
def build_noisy_pose_data(): """ Randomly sample pose noise for every observation in every environment for the RSS experiment with noisy poses. This needs to be pre-computed once before training to simulate the noise being measured during trajectory collection. If we were to randomize poses during training, that would be akin to regularization, which could actually improve instead of hurt performance. :return: """ initialize_experiment() params = get_current_parameters() setup_params = params["Setup"] train_envs, dev_envs, test_envs = get_all_env_id_lists( max_envs=setup_params["max_envs"], prune_ambiguous=setup_params["prune_ambiguous"]) envs = dev_envs + train_envs + test_envs print("Num envs:" + str(len(envs))) pos_noise = params["Data"]["noisy_pos_variance"] rot_noise = params["Data"]["noisy_rot_variance"] noisy_poses = get_pose_noise_np(setup_params["max_envs"], setup_params["trajectory_length"], pos_noise, rot_noise) save_noisy_poses(noisy_poses) print("saved noisy poses for " + str(setup_params["max_envs"]) + " envs")
def collect_supervised_data(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] train_envs, dev_envs, test_envs = get_all_env_id_lists( setup["max_envs"]) # collect_data_on_env_list(train_envs) collect_data_on_env_list(dev_envs)
def make_annotations(end_i): P.initialize_experiment() annotations = {"train": [], "test": [], "dev": []} train_range, dev_range, test_range = get_split_ranges(end_i) assert ( train_range[1] - train_range[0] ) % NEW_CONFIG_EVERY_N == 0, "training set size must be a multiple of NEW_CONFIG_EVERY_N" for config_id in range(end_i): config_path = paths.get_env_config_path(config_id) path_path = paths.get_curve_path(config_id) instruction_path = paths.get_instructions_path(config_id) with open(config_path) as fp: config = json.load(fp) with open(path_path) as fp: curve = json.load(fp) with open(instruction_path) as fp: instruction = fp.readline() token_list = clean_instruction(instruction) curve_np = np.asarray(list(zip(curve["x_array"], curve["z_array"]))) split = "train" if train_range[0] <= config_id < train_range[1] else \ "dev" if dev_range[0] <= config_id < dev_range[1] else \ "test" if test_range[0] <= config_id < test_range[1] else None #start_dir = np.asarray(config["startHeading"]) - np.asarray(config["startPos"]) start_dir = curve_np[1] - curve_np[0] start_yaw = vec_to_yaw(start_dir) start_yaw_cfg = np.rad2deg(-start_yaw + np.pi / 2) dataset = { "id": str(config_id), "start_z": [curve["z_array"][0]], "start_x": [curve["x_array"][0]], "end_z": [curve["z_array"][-1]], "end_x": [curve["x_array"][-1]], "start_rot": [start_yaw_cfg], "config_file": "configs/random_config_%d.json" % config_id, "instructions_file": "instructions/instructions_%d.txt" % config_id, "path_file": "paths/random_curve_%d.json" % config_id, "moves": [], "valid": True, "num_tokens": [len(token_list)], "instructions": [instruction] } annotations[split].append(dataset) print("Added annotations for env: " + str(config_id)) with open(paths.get_instruction_annotations_path(), "w") as fp: json.dump(annotations, fp)
def main(): P.initialize_experiment() global last_end_pos last_end_pos = None pool = Pool(16) every_nth_env = range(START_I, END_I, NEW_CONFIG_EVERY_N) pool.map(make_curves_for_unique_config, every_nth_env) pool.join() pool.close()
def count_avg_num_steps(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] P.get_current_parameters()["Data"]["locking"] = False train_envs, dev_envs, test_envs = get_restricted_env_id_lists() train_envs = [e for e in train_envs if e >= 6000] count_avg_num_steps_on_data(train_envs)
def train_supervised(): initialize_experiment() setup = get_current_parameters()["Setup"] supervised_params = get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] model, model_loaded = load_model() print("Loading data") train_envs, dev_envs, test_envs = get_all_env_id_lists(max_envs=setup["max_envs"]) if "split_train_data" in supervised_params and supervised_params["split_train_data"]: split_name = supervised_params["train_data_split"] split = load_env_split()[split_name] train_envs = [env_id for env_id in train_envs if env_id in split] print("Using " + str(len(train_envs)) + " envs from dataset split: " + split_name) filename = "supervised_" + setup["model"] + "_" + setup["run_name"] start_filename = "tmp/" + filename + "_epoch_" + str(supervised_params["start_epoch"]) if supervised_params["start_epoch"] > 0: if file_exists(start_filename): load_pytorch_model(model, start_filename) else: print("Couldn't continue training. Model file doesn't exist at:") print(start_filename) exit(-1) if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) trainer = Trainer(model, epoch=supervised_params["start_epoch"], name=setup["model"], run_name=setup["run_name"]) print("Beginning training...") best_test_loss = 1000 for epoch in range(num_epochs): train_loss = trainer.train_epoch(train_data=None, train_envs=train_envs, eval=False) trainer.model.correct_goals = 0 trainer.model.total_goals = 0 test_loss = trainer.train_epoch(train_data=None, train_envs=dev_envs, eval=True) print("GOALS: ", trainer.model.correct_goals, trainer.model.total_goals) if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) if hasattr(trainer.model, "save"): trainer.model.save(epoch) save_pretrained_weights(trainer.model, setup["run_name"])
def take_pics(): P.initialize_experiment() train_i, dev_i, test_i, _ = get_all_instructions() all_instructions = {**train_i, **dev_i, **test_i} save_dir = paths.get_env_image_path(0) os.makedirs(os.path.dirname(save_dir), exist_ok=True) keylist = list(all_instructions.keys()) envs = [PomdpInterface(instance_id=i) for i in range(0, NUM_WORKERS)] env_id_splits = [[] for _ in range(NUM_WORKERS)] keylist = [6825] for i, key in enumerate(keylist): env_id_splits[i % NUM_WORKERS].append(key) time.sleep(1.0) for i in range(len(keylist)): d = False # For each worker, start the correct env for w in range(NUM_WORKERS): if i >= len(env_id_splits[w]): continue env_id = env_id_splits[w][i] # FIXME: :This assumes that there is only 1 instruction set per env! fname = paths.get_env_image_path(env_id) if os.path.isfile(fname): print("Img exists: " + fname) continue d = True instruction_set = all_instructions[env_id][0] envs[w].set_environment(env_id, instruction_set["instructions"], fast=True) print("setting env on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id)) # Then for each worker, take a picture and save it if d: time.sleep(0.1) for w in range(NUM_WORKERS): if i >= len(env_id_splits[w]): continue env_id = env_id_splits[w][i] fname = paths.get_env_image_path(env_id) if os.path.isfile(fname): print("Img exists: " + fname) continue envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV) image = envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV) image = np.flip(image, 0) imsave(fname, image) print("saving pic on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id))
def train_supervised_bidomain(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"]) print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() env_list_name = setup.get("eval_env_set", "dev") if env_list_name == "dev": print("Using DEV envs") use_envs = dev_envs elif env_list_name == "train": print("Using TRAIN envs") use_envs = train_envs elif env_list_name == "test": print("Using TEST envs") use_envs = test_envs else: raise ValueError(f"Unknown env set {env_list_name}") env_range_start = setup.get("env_range_start") if env_range_start > 0: use_envs = [e for e in use_envs if e >= env_range_start] env_range_end = setup.get("env_range_end") if env_range_end > 0: use_envs = [e for e in use_envs if e < env_range_end] restricted_domain = "simulator" if restricted_domain == "simulator": # Load dummy model for real domain model_real, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_sim.set_save_path_overlays(True) elif restricted_domain == "real": # Load dummy model for sim domain model_sim, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_real.set_save_path_overlays(True) else: model_real.set_save_path_overlays(True) model_sim.set_save_path_overlays(True) trainer = TrainerBidomain(model_real, model_sim, model_critic, epoch=0) trainer.train_epoch(env_list=use_envs, eval=True, restricted_domain=restricted_domain) if restricted_domain != "simulator": model_real.print_metrics() if restricted_domain != "real": model_sim.print_metrics()
def make_annotations(start_i, end_i): P.initialize_experiment() annotations = { "train": [], "test": [], "dev": [] } for config_id in range(start_i, end_i): config_path = paths.get_env_config_path(config_id) path_path = paths.get_curve_path(config_id) instruction_path = paths.get_instructions_path(config_id) with open(config_path) as fp: config = json.load(fp) with open(path_path) as fp: curve = json.load(fp) with open(instruction_path) as fp: instruction = fp.readline() token_list = clean_instruction(instruction) split = get_split((config_id % 100) / 100.0) start_dir = np.asarray(config["startHeading"]) - np.asarray(config["startPos"]) start_yaw = vec_to_yaw(start_dir) start_yaw_cfg = np.rad2deg(-start_yaw + np.pi/2) dataset = { "id": str(config_id), "start_z": [curve["z_array"][0]], "start_x": [curve["x_array"][0]], "end_z": [curve["z_array"][-1]], "end_x": [curve["x_array"][-1]], "start_rot": [start_yaw_cfg], "config_file": "configs/random_config_%d.json" % config_id, "instructions_file": "instructions/instructions_%d.txt" % config_id, "path_file": "paths/random_curve_%d.json" % config_id, "moves": [], "valid": True, "num_tokens": [len(token_list)], "instructions": [instruction] } annotations[split].append(dataset) print ("Added annotations for env: " + str(config_id)) with open(paths.get_instruction_annotations_path(), "w") as fp: json.dump(annotations, fp)
def collect_supervised_data(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] train_envs, dev_envs, test_envs = get_restricted_env_id_lists() # if P.get_current_parameters()["Setup"].get("env_set") == "train": print("Collecting data for training envs") collect_data_on_env_list(train_envs) elif P.get_current_parameters()["Setup"].get("env_set") == "dev": print("Collecting data for dev envs") collect_data_on_env_list(dev_envs) else: print("Collecting data for both training and dev envs") collect_data_on_env_list(train_envs) collect_data_on_env_list(dev_envs)
def loadPolicy(self): P.initialize_experiment(self.setup_name) self.params = P.get_current_parameters()["Rollout"] if self.model_name is not None: print("RollOutParams loading model") print("Use cuda: " + str(self.cuda)) self.policy, self.policy_loaded = \ load_model(model_file_override=self.model_file) self.use_policy = True if self.policy is not None: print("Loaded policy: ", self.model_name) else: print("Error loading policy: ", self.model_name) else: print("Error! Requested loadPolicy, but model_name is None!") return self
def evaluate_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model, model_loaded = load_model() eval_envs = get_correct_eval_env_id_list() dataset_name = P.get_current_parameters().get("Data").get("dataset_name") dataset = model.get_dataset(envs=eval_envs, dataset_prefix=dataset_name, dataset_prefix="supervised", eval=eval) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=False) total_loss = 0 count = 0 num_batches = len(dataloader) for b, batch in enumerate(dataloader): loss_var = model.sup_loss_on_batch(batch, eval=True, viz=True) total_loss += loss_var.data[0] count += 1 print("batch: " + str(b) + " / " + str(num_batches) + \ " loss: " + str(loss_var.data[0])) avg_loss = total_loss / count results_dir = get_results_dir(setup["run_name"]) results_json_path = get_results_path(setup["run_name"]) os.makedirs(results_dir, exist_ok=True) viz = model.get_viz() for key, lst in viz.items(): for i, img in enumerate(lst): img_path = os.path.join( results_dir, key + str(i) + "_" + setup["model"] + ".jpg") sp.misc.imsave(img_path, img) print("Saved image: " + img_path) with open(results_json_path, "w") as fp: json.dump({"loss": avg_loss}, fp)
def train_rl(): initialize_experiment() setup = get_current_parameters()["Setup"] params = get_current_parameters()["RL"] print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() filename = "rl_" + setup["model"] + "_" + setup["run_name"] trainer = TrainerRL(params=dict_merge(setup, params)) for start_epoch in range(10000): epfname = epoch_filename(filename, start_epoch) path = os.path.join(get_model_dir(), str(epfname) + ".pytorch") if not os.path.exists(path): break if start_epoch > 0: print(f"CONTINUING RL TRAINING FROM EPOCH: {start_epoch}") load_pytorch_model(trainer.full_model, epoch_filename(filename, start_epoch - 1)) trainer.set_start_epoch(start_epoch) print("Beginning training...") best_dev_reward = -1e+10 for epoch in range(start_epoch, 10000): train_reward, metrics = trainer.train_epoch(eval=False, envs="train") # TODO: Test on just a few dev environments # TODO: Take most likely or mean action when testing dev_reward, metrics = trainer.train_epoch(eval=True, envs="dev") #dev_reward, metrics = trainer.train_epoch(eval=True, envs="dev") dev_reward = 0 #if dev_reward >= best_dev_reward: # best_dev_reward = dev_reward # save_pytorch_model(trainer.full_model, filename) # print("Saved model in:", filename) print("Epoch", epoch, "train reward:", train_reward, "dev reward:", dev_reward) save_pytorch_model(trainer.full_model, epoch_filename(filename, epoch)) if hasattr(trainer.full_model, "save"): trainer.full_model.save(epoch)
def generate_thesaurus(): P.initialize_experiment() train_instr, dev_instr, test_instr, corpus = get_all_instructions() _, word2token = get_word_to_token_map(corpus, use_thesaurus=False) term2word, word2term = get_identity_term_mapping(corpus) term2landmark = get_template_term_groundings(corpus, word2term) thesaurus = { "term2word": term2word, "word2term": word2term, "term_groundings": term2landmark, "rejected_words": [] } save_landmark_alignments(thesaurus)
def analyze_lani(): P.initialize_experiment() train_i, dev_i, text_i, corpus = get_all_instructions() train_i_lani = {k:v for k,v in train_i.items() if int(k) < 6000} dev_i_lani = {k:v for k,v in dev_i.items() if int(k) < 6000} test_i_lani = {k:v for k,v in text_i.items() if int(k) < 6000} train_i_real = {k:v for k,v in train_i.items() if int(k) >= 6000} dev_i_real = {k:v for k,v in dev_i.items() if int(k) >= 6000} test_i_real = {k:v for k,v in text_i.items() if int(k) >= 6000} test_i_small = {k:v for k,v in test_i_real.items() if int(k) in test_small_envs} dev_i_small = {k:v for k,v in dev_i_real.items() if int(k) in dev_small_envs} all_i_real = {**train_i_real, **dev_i_real, **test_i_real} analyze_instruction_set("Lani Train 1Seg", train_i_lani, corpus, merge_len=1) analyze_instruction_set("Lani Dev 1Seg", dev_i_lani, corpus, merge_len=1) analyze_instruction_set("Lani Test 1Seg", test_i_lani, corpus, merge_len=1) analyze_instruction_set("Lani Train 2Seg", train_i_lani, corpus, merge_len=2) analyze_instruction_set("Lani Dev 2Seg", dev_i_lani, corpus, merge_len=2) analyze_instruction_set("Lani Test 2Seg", test_i_lani, corpus, merge_len=2) analyze_instruction_set("Real Train 1Seg", train_i_real, corpus, merge_len=1) analyze_instruction_set("Real Dev 1Seg", dev_i_real, corpus, merge_len=1) analyze_instruction_set("Real Test 1Seg", test_i_real, corpus, merge_len=1) analyze_instruction_set("Real Train 2Seg", train_i_real, corpus, merge_len=2) analyze_instruction_set("Real Dev 2Seg", dev_i_real, corpus, merge_len=2) analyze_instruction_set("Real Test 2Seg", test_i_real, corpus, merge_len=2) #analyze_instruction_set("Real Test 1Seg", test_i_real, corpus, merge_len=1) #analyze_instruction_set("Real Test 2Seg", test_i_real, corpus, merge_len=2) #analyze_instruction_set("Small Test 1Seg", test_i_small, corpus, merge_len=1) #analyze_instruction_set("Small Test 2Seg", test_i_small, corpus, merge_len=2) analyze_instruction_set("Small Dev 1Seg", dev_i_small, corpus, merge_len=1) analyze_instruction_set("Small Dev 2Seg", dev_i_small, corpus, merge_len=2) analyze_instruction_set("Small Dev 1Seg", dev_i_small, corpus, merge_len=1) analyze_instruction_set("Small Dev 2Seg", dev_i_small, corpus, merge_len=2) print("ding")
def main(start_i, end_i, config_type): P.initialize_experiment() for config_num in range(start_i, end_i): # attempt to space landmarks config = None attempts = 0 # It's easier to generate a config with less objects, so to have a truly uniform distribution, we must sample it here. num_objects = int(random.uniform(MIN_NUM_OBJECTS, MAX_NUM_OBJECTS)) if FORCE_LANDMARK_SELECTION: num_objects = len(FORCE_LANDMARK_SELECTION) print("making config %d with %d objects" % (config_num, num_objects)) while True: if config_type == ConfigType.RANDOM or config_type == ConfigType.RANDOM_CORNER: start_pos, start_heading = None, None if config_type == ConfigType.RANDOM_CORNER: start_pos, start_heading = pick_drone_start_pos() config = try_make_config_random(num_objects, start_pos, start_heading) if config is not None and MAKE_LAKES: config = add_lake_to_config(config, X_RANGE, Y_RANGE) elif config_type == ConfigType.CIRCLE_OF_LANDMARKS: config = try_make_config_circle_of_landmarks(num_objects) elif config_type == ConfigType.CIRCLE_PERMUTATIONS: config = try_make_config_circle_permutations(num_objects, config_num) else: print ("Invalid config type!" + str(config_type)) quit(-1) attempts += 1 sys.stdout.write("\r Attemtps: " + str(attempts)) if config is not None: print("") break os.makedirs(paths.get_env_config_dir(), exist_ok=True) path = os.path.join(paths.get_env_config_path(config_num)) with open(path, 'w') as fp: json.dump(config, fp)
def evaluate(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] # At this point test and dev have been swapped. # Whatever we've been developing on called "test" is hereafter called dev # Test is the data that hasn't been touched at all eval_envs = get_correct_eval_env_id_list() dataset = faux_dataset_random_pt(eval_envs) #dataset = faux_dataset_random_landmark(eval_envs) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False) evaler.evaluate_dataset(dataset) results = evaler.get_results() results["all_dist"] = [] print("Results:", results)
def generate_thesaurus(): P.initialize_experiment() train_instr, dev_instr, test_instr, corpus = get_all_instructions() _, word2token = get_word_to_token_map(corpus, use_thesaurus=False) terms, rejected_words = cluster_corpus(corpus, train_instr) landmark_names = get_landmark_names() term_groundings, word2term = ground_terms(word2token, terms, landmark_names, train_instr) thesaurus = { "term2word": terms, "word2term": word2term, "term_groundings": term_groundings, "rejected_words": rejected_words } save_landmark_alignments(thesaurus)
def browse_pvn_dataset(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") data_params = P.get_current_parameters()["Training"] print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() #dom="real" dom = "sim" dataset = model_sim.get_dataset( data=None, envs=train_envs, domain=dom, dataset_names=data_params[f"{dom}_dataset_names"], dataset_prefix="supervised", eval=False, halfway_only=False) p = Presenter() for example in dataset: if example is None: continue md = example["md"][0] print( f"Showing example: {md['env_id']}:{md['set_idx']}:{md['seg_idx']}") print(f" instruction: {md['instruction']}") exec_len = len(example["images"]) for i in range(exec_len): print(f" timestep: {i}") img_i = example["images"][i] lm_fpv_i = example["lm_pos_fpv"][i] if lm_fpv_i is not None: img_i = p.plot_pts_on_torch_image(img_i, lm_fpv_i.long()) p.show_image(img_i, "fpv_img_i", scale=4, waitkey=True)
def worker_process(conn, instance_id, save_dataset_name, policy, oracle, device, no_reward): # This doesn't carry over to subprocess, so have to re-load the params from json P.initialize_experiment() policy_roller = SimplePolicyRoller(instance_id=instance_id, policy=policy, oracle=oracle, no_reward=no_reward) current_policy = policy while True: msg, payload = conn.recv() if msg == "Stop": break elif msg == "KillSim": print(f" RECV: {msg}") del policy_roller os.system("killall -9 MyProject5-Linux-Shipping") policy_roller = None elif msg == "Restart": print(f" RECV: {msg}") del policy_roller os.system("killall -9 MyProject5-Linux-Shipping") sleep(2) policy_roller = SimplePolicyRoller(instance_id=instance_id, policy=current_policy) elif msg == "ReloadStaticState": print(f" RECV: {msg}") print(f"payload.device: {next(payload.parameters()).device}") # TODO: This should be general and not know anything about model details: current_policy.stage1_visitation_prediction = payload policy_roller.set_policy(current_policy) elif msg == "Rollout": if policy_roller is None: policy_roller = SimplePolicyRoller(instance_id=instance_id, policy=current_policy, no_reward=no_reward) env_ids, seg_ids, policy_state, sample, dagger_beta = payload result = policy_roller.rollout_segments(env_ids, seg_ids, policy_state, sample, dagger_beta, save_dataset_name) conn.send(result) else: raise ValueError(f"Unrecognized worker task message: {msg}") conn.close()
def sample_real_data_subset(): global env_list, num_env_groups P.initialize_experiment() if env_list == "DEV": train_i, dev_i, test_i = get_restricted_env_id_lists() env_list = dev_i elif env_list == "TEST": train_i, dev_i, test_i = get_restricted_env_id_lists() env_list = test_i # Each 5 subsequent environments are the same. First sample groups, then sample environments groups = set() for env in env_list: groups.add(int(env/5)) groups = list(groups) group_envs_rel = {} pick_groups = random.sample(groups, num_env_groups) for group in pick_groups: group_envs_rel[group] = [] i = 0 while i < envs_each_group: rint = random.randint(0,4) if rint not in group_envs_rel[group]: group_envs_rel[group].append(rint) i += 1 else: # Retry this loop iteration continue env_ids_out = [] for group, env_rels in group_envs_rel.items(): for env_rel in env_rels: env_id = group * 5 + env_rel env_ids_out.append(env_id) print(f"Sampled {len(env_ids_out)} envs:") print(list(sorted(env_ids_out)))
def generate_config_files(start_i, end_i): P.initialize_experiment() config_metadata = { "all_landmark_names": list(PORTABLE_LANDMARK_RADII.keys()), "all_landmark_radii": PORTABLE_LANDMARK_RADII, "new_config_every_n": NEW_CONFIG_EVERY_N } os.makedirs(os.path.dirname(paths.get_config_metadata_path()), exist_ok=True) with open(paths.get_config_metadata_path(), "w") as fp: json.dump(config_metadata, fp) config = None for config_num in range(start_i, end_i): # attempt to space landmarks attempts = 0 # It's easier to generate a config with less objects, so to have a truly uniform distribution, we must sample it here. if FORCE_LANDMARK_SELECTION: num_objects = len(FORCE_LANDMARK_SELECTION) else: num_objects = int(random.uniform(MIN_NUM_OBJECTS, MAX_NUM_OBJECTS)) if config_num % NEW_CONFIG_EVERY_N == 0 or config is None: print("making config %d with %d objects" % (config_num, num_objects)) while True: config = try_make_config(num_objects) attempts += 1 sys.stdout.write("\r Attempts: " + str(attempts)) if config is not None: print("") break if MAKE_LAKES: config = add_lake_to_config(config, X_RANGE, Y_RANGE) path = paths.get_env_config_path(config_num) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'w') as fp: json.dump(config, fp)
def collect_fpv_images(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] flight_height = P.get_current_parameters( )["PomdpInterface"]["flight_height"] drone = drone_controller_factory(simulator=True)( instance=0, flight_height=flight_height) os.makedirs(get_landmark_images_dir(), exist_ok=True) for landmark in LM_RADII.keys(): print(f"Saving landmark: {landmark}") config, lm_name_to_idx = build_default_fpv_config(LM_RADII) idx = lm_name_to_idx[landmark] config["xPos"][idx] = 500 config["zPos"][idx] = 500 drone.set_current_env_from_config(config, instance_id=0) drone.reset_environment() time.sleep(0.5) drone.teleport_to([4.7 - 1.5, 4.7 - 1.5], 0.78 + 3.14159) drone.send_local_velocity_command([0.0, 0.0, 0.0]) state, img = drone.get_state() Presenter().show_image(img, "img", waitkey=10) stage_name = get_landmark_stage_name(landmark) cv2.putText(img, stage_name, (5, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 5) cv2.putText(img, stage_name, (5, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 3) impath = get_landmark_image_path(stage_name) imageio.imsave(impath, img)
def generate_template_curves(start_i, end_i): P.initialize_experiment() pool = Pool(18) pool.map(generate_template_curve, range(start_i, end_i)) pool.close() pool.join()
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] # import pdb;pdb.set_trace() models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) models.append(model) eval_envs = get_correct_eval_env_id_list() roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(400) \ .setHorizon(100) \ .setStepsToForceStop(10) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(True) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(True) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
#roller = SimplePolicyRoller(policy_factory) roller = SimpleParallelPolicyRoller("pvn_full_bidomain", num_workers=4) rollout_sampler = RolloutSampler(roller) # TODO: Load some policy print("Sampling once") rollouts = rollout_sampler.sample_n_rollouts(12, policy_state) print("Sampling twice") rollouts += rollout_sampler.sample_n_rollouts(12, policy_state) print("Sampling thrice") rollouts += rollout_sampler.sample_n_rollouts(12, policy_state) for rollout in rollouts: print("Visualizing rollout") for sample in rollout: state = sample["state"] image = state.get_rgb_image() Presenter().show_image(image, "fpv", waitkey=True, scale=4) print("Done!") roller.__exit__() print("ding") if __name__ == "__main__": P.initialize_experiment() mp.set_start_method('spawn') test_rollout_sampler()
class KeyTeleop(): def __init__(self): self.mon = MonitorSuper() self.thread = threading.Thread(target=self.run, args=()) self.thread.daemon = True self.thread.start() def run(self): self.mon.run() def get_command(self): return self.mon.current_vel initialize_experiment("nl_datacollect_cage") teleoper = KeyTeleop() rate = Rate(0.1) env = PomdpInterface() train_instructions, dev_instructions, test_instructions, _ = get_all_instructions( ) count = 0 stuck_count = 0 def show_depth(image): grayscale = np.mean(image[:, :, 0:3], axis=2)