def main(): print("---------------------") print("Actions") print("STOP", HabitatSimActions.STOP) print("FORWARD", HabitatSimActions.MOVE_FORWARD) print("LEFT", HabitatSimActions.TURN_LEFT) print("RIGHT", HabitatSimActions.TURN_RIGHT) log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name) dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name) tb_dir = log_dir + "tensorboard" if not os.path.exists(tb_dir): os.makedirs(tb_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists("{}/images/".format(dump_dir)): os.makedirs("{}/images/".format(dump_dir)) logging.basicConfig( filename=log_dir + 'train.log', level=logging.INFO) print("Dumping at {}".format(log_dir)) print("Arguments starting with ", args) logging.info(args) device = args.device = torch.device("cuda:0" if args.cuda else "cpu") # Logging and loss variables num_scenes = args.num_processes num_episodes = int(args.num_episodes) # setting up rewards and losses # policy_loss = 0 best_cost = float('inf') costs = deque(maxlen=1000) exp_costs = deque(maxlen=1000) pose_costs = deque(maxlen=1000) l_masks = torch.zeros(num_scenes).float().to(device) # best_local_loss = np.inf # if args.eval: # traj_lengths = args.max_episode_length // args.num_local_steps # l_action_losses = deque(maxlen=1000) print("Setup rewards") print("starting envrionments ...") # Starting environments torch.set_num_threads(1) envs = make_vec_envs(args) obs, infos = envs.reset() print("environments reset") # show_gpu_usage() # Initialize map variables ### Full map consists of 4 channels containing the following: ### 1. Obstacle Map ### 2. Exploread Area ### 3. Current Agent Location ### 4. Past Agent Locations print("creating maps and poses ") torch.set_grad_enabled(False) # Calculating full and local map sizes map_size = args.map_size_cm // args.map_resolution full_w, full_h = map_size, map_size local_w, local_h = int(full_w / args.global_downscaling), \ int(full_h / args.global_downscaling) # Initializing full and local map full_map = torch.zeros(num_scenes, 4, full_w, full_h).float().to(device) local_map = torch.zeros(num_scenes, 4, local_w, local_h).float().to(device) # Initial full and local pose full_pose = torch.zeros(num_scenes, 3).float().to(device) local_pose = torch.zeros(num_scenes, 3).float().to(device) # Origin of local map origins = np.zeros((num_scenes, 3)) # Local Map Boundaries lmb = np.zeros((num_scenes, 4)).astype(int) ### Planner pose inputs has 7 dimensions ### 1-3 store continuous global agent location ### 4-7 store local map boundaries planner_pose_inputs = np.zeros((num_scenes, 7)) # show_gpu_usage() start_full_pose = np.zeros(3) start_full_pose[:2] = args.map_size_cm / 100.0 / 2.0 def init_map_and_pose(): full_map.fill_(0.) full_pose.fill_(0.) full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0 full_pose_np = full_pose.cpu().numpy() planner_pose_inputs[:, :3] = full_pose_np for e in range(num_scenes): r, c = full_pose_np[e, 1], full_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] full_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 lmb[e] = get_local_map_boundaries((loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0.] for e in range(num_scenes): local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() init_map_and_pose() print("maps and poses intialized") print("defining architecture") # slam nslam_module = Neural_SLAM_Module(args).to(device) slam_optimizer = get_optimizer(nslam_module.parameters(), args.slam_optimizer) slam_memory = FIFOMemory(args.slam_memory_size) # # Local policy # print("policy observation space", envs.observation_space.spaces['rgb']) # print("policy action space ", envs.action_space) # l_observation_space = gym.spaces.Box(0, 255, # (3, # args.frame_width, # args.frame_width), dtype='uint8') # # todo change this to use envs.observation_space.spaces['rgb'].shape later # l_policy = Local_IL_Policy(l_observation_space.shape, envs.action_space.n, # recurrent=args.use_recurrent_local, # hidden_size=args.local_hidden_size, # deterministic=args.use_deterministic_local).to(device) # local_optimizer = get_optimizer(l_policy.parameters(), args.local_optimizer) # show_gpu_usage() print("loading model weights") # Loading model if args.load_slam != "0": print("Loading slam {}".format(args.load_slam)) state_dict = torch.load(args.load_slam, map_location=lambda storage, loc: storage) nslam_module.load_state_dict(state_dict) if not args.train_slam: nslam_module.eval() # if args.load_local != "0": # print("Loading local {}".format(args.load_local)) # state_dict = torch.load(args.load_local, # map_location=lambda storage, loc: storage) # l_policy.load_state_dict(state_dict) # if not args.train_local: # l_policy.eval() print("predicting first pose and initializing maps") # if not (args.use_gt_pose and args.use_gt_map): # delta_pose is the expected change in pose when action is applied at # the current pose in the absence of noise. # initially no action is applied so this is zero. delta_poses = torch.from_numpy(np.zeros(local_pose.shape)).float().to(device) # initial estimate for local pose and local map from first observation, # initialized (zero) pose and map _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(obs, obs, delta_poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose) # if args.use_gt_pose: # # todo update local_pose here # full_pose = envs.get_gt_pose() # for e in range(num_scenes): # local_pose[e] = full_pose[e] - \ # torch.from_numpy(origins[e]).to(device).float() # if args.use_gt_map: # full_map = envs.get_gt_map() # for e in range(num_scenes): # local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] print("slam module returned pose and maps") # NOT NEEDED : 4/29 local_pose_np = local_pose.cpu().numpy() # update local map for each scene - input for planner for e in range(num_scenes): r, c = local_pose_np[e, 1], local_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] local_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1. # # todo get goal from env here global_goals = envs.get_goal_coords().int() # Compute planner inputs planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['goal'] = global_goals[e].detach().cpu().numpy() p_input['map_pred'] = local_map[e, 0, :, :].detach().cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].detach().cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] # Output stores local goals as well as the the ground-truth action planner_out = envs.get_short_term_goal(planner_inputs) # planner output contains: # Distance to short term goal - positive discretized number # angle to short term goal - angle -180 to 180 but in buckets of 5 degrees so multiply by 5 to ge true angle # GT action - action to be taken according to planner (int) # going to step through the episodes, so cache previous information last_obs = obs.detach() local_rec_states = torch.zeros(num_scenes, args.local_hidden_size).to(device) start = time.time() total_num_steps = -1 torch.set_grad_enabled(False) print("starting episodes") with TensorboardWriter( tb_dir, flush_secs=60 ) as writer: for itr_counter, ep_num in enumerate(range(num_episodes)): print("------------------------------------------------------") print("Episode", ep_num) # if itr_counter >= 20: # print("DONE WE FIXED IT") # die() # for step in range(args.max_episode_length): step_bar = tqdm(range(args.max_episode_length)) for step in step_bar: # print("------------------------------------------------------") # print("episode ", ep_num, "step ", step) total_num_steps += 1 l_step = step % args.num_local_steps # Local Policy # ------------------------------------------------------------------ # cache previous information del last_obs last_obs = obs.detach() # if not args.use_optimal_policy and not args.use_shortest_path_gt: # local_masks = l_masks # local_goals = planner_out[:, :-1].to(device).long() # if args.train_local: # torch.set_grad_enabled(True) # # local policy "step" # action, action_prob, local_rec_states = l_policy( # obs, # local_rec_states, # local_masks, # extras=local_goals, # ) # if args.train_local: # action_target = planner_out[:, -1].long().to(device) # # doubt: this is probably wrong? one is action probability and the other is action # policy_loss += nn.CrossEntropyLoss()(action_prob, action_target) # torch.set_grad_enabled(False) # l_action = action.cpu() # else: # if args.use_optimal_policy: # l_action = planner_out[:, -1] # else: # l_action = envs.get_optimal_gt_action() l_action = envs.get_optimal_action(start_full_pose, full_pose).cpu() # if step > 10: # l_action = torch.tensor([HabitatSimActions.STOP]) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Env step # print("stepping with action ", l_action) # try: obs, rew, done, infos = envs.step(l_action) # ------------------------------------------------------------------ # Reinitialize variables when episode ends # doubt what if episode ends before max_episode_length? # maybe add (or done ) here? if l_action == HabitatSimActions.STOP or step == args.max_episode_length - 1: print("l_action", l_action) init_map_and_pose() del last_obs last_obs = obs.detach() print("Reinitialize since at end of episode ") obs, infos = envs.reset() # except: # print("can't do that") # print(l_action) # init_map_and_pose() # del last_obs # last_obs = obs.detach() # print("Reinitialize since at end of episode ") # break # step_bar.set_description("rew, done, info-sensor_pose, pose_err (stepping) {}, {}, {}, {}".format(rew, done, infos[0]['sensor_pose'], infos[0]['pose_err'])) if total_num_steps % args.log_interval == 0 and False: print("rew, done, info-sensor_pose, pose_err after stepping ", rew, done, infos[0]['sensor_pose'], infos[0]['pose_err']) # l_masks = torch.FloatTensor([0 if x else 1 # for x in done]).to(device) # ------------------------------------------------------------------ # # ------------------------------------------------------------------ # # Reinitialize variables when episode ends # # doubt what if episode ends before max_episode_length? # # maybe add (or done ) here? # if step == args.max_episode_length - 1 or l_action == HabitatSimActions.STOP: # Last episode step # init_map_and_pose() # del last_obs # last_obs = obs.detach() # print("Reinitialize since at end of episode ") # break # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Neural SLAM Module delta_poses_np = np.zeros(local_pose_np.shape) if args.train_slam: # Add frames to memory for env_idx in range(num_scenes): env_obs = obs[env_idx].to("cpu") env_poses = torch.from_numpy(np.asarray( delta_poses_np[env_idx] )).float().to("cpu") env_gt_fp_projs = torch.from_numpy(np.asarray( infos[env_idx]['fp_proj'] )).unsqueeze(0).float().to("cpu") env_gt_fp_explored = torch.from_numpy(np.asarray( infos[env_idx]['fp_explored'] )).unsqueeze(0).float().to("cpu") # TODO change pose err here env_gt_pose_err = torch.from_numpy(np.asarray( infos[env_idx]['pose_err'] )).float().to("cpu") slam_memory.push( (last_obs[env_idx].cpu(), env_obs, env_poses), (env_gt_fp_projs, env_gt_fp_explored, env_gt_pose_err)) delta_poses_np[env_idx] = get_delta_pose(local_pose_np[env_idx], l_action[env_idx]) delta_poses = torch.from_numpy(delta_poses_np).float().to(device) # print("delta pose from SLAM ", delta_poses) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(last_obs, obs, delta_poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose, build_maps=True) # print("updated local pose from SLAM ", local_pose) # if args.use_gt_pose: # # todo update local_pose here # full_pose = envs.get_gt_pose() # for e in range(num_scenes): # local_pose[e] = full_pose[e] - \ # torch.from_numpy(origins[e]).to(device).float() # print("updated local pose from gt ", local_pose) # if args.use_gt_map: # full_map = envs.get_gt_map() # for e in range(num_scenes): # local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] # print("updated local map from gt") local_pose_np = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = local_pose_np + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = local_pose_np[e, 1], local_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. if l_step == args.num_local_steps - 1: # For every global step, update the full and local maps for e in range(num_scenes): full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ local_map[e] full_pose[e] = local_pose[e] + \ torch.from_numpy(origins[e]).to(device).float() full_pose_np = full_pose[e].cpu().numpy() r, c = full_pose_np[1], full_pose_np[0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] lmb[e] = get_local_map_boundaries((loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0.] local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() local_pose_np = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = local_pose_np + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = local_pose_np[e, 1], local_pose_np[e, 0] loc_r, loc_c = [int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution)] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] p_input['goal'] = global_goals[e].cpu().numpy() planner_out = envs.get_short_term_goal(planner_inputs) ### TRAINING torch.set_grad_enabled(True) # ------------------------------------------------------------------ # Train Neural SLAM Module if args.train_slam and len(slam_memory) > args.slam_batch_size: for _ in range(args.slam_iterations): inputs, outputs = slam_memory.sample(args.slam_batch_size) b_obs_last, b_obs, b_poses = inputs gt_fp_projs, gt_fp_explored, gt_pose_err = outputs b_obs = b_obs.to(device) b_obs_last = b_obs_last.to(device) b_poses = b_poses.to(device) gt_fp_projs = gt_fp_projs.to(device) gt_fp_explored = gt_fp_explored.to(device) gt_pose_err = gt_pose_err.to(device) b_proj_pred, b_fp_exp_pred, _, _, b_pose_err_pred, _ = \ nslam_module(b_obs_last, b_obs, b_poses, None, None, None, build_maps=False) loss = 0 if args.proj_loss_coeff > 0: proj_loss = F.binary_cross_entropy(b_proj_pred, gt_fp_projs) costs.append(proj_loss.item()) loss += args.proj_loss_coeff * proj_loss if args.exp_loss_coeff > 0: exp_loss = F.binary_cross_entropy(b_fp_exp_pred, gt_fp_explored) exp_costs.append(exp_loss.item()) loss += args.exp_loss_coeff * exp_loss if args.pose_loss_coeff > 0: pose_loss = torch.nn.MSELoss()(b_pose_err_pred, gt_pose_err) pose_costs.append(args.pose_loss_coeff * pose_loss.item()) loss += args.pose_loss_coeff * pose_loss if args.train_slam: slam_optimizer.zero_grad() loss.backward() slam_optimizer.step() del b_obs_last, b_obs, b_poses del gt_fp_projs, gt_fp_explored, gt_pose_err del b_proj_pred, b_fp_exp_pred, b_pose_err_pred # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Local Policy # if (l_step + 1) % args.local_policy_update_freq == 0 \ # and args.train_local: # local_optimizer.zero_grad() # policy_loss.backward() # local_optimizer.step() # l_action_losses.append(policy_loss.item()) # policy_loss = 0 # local_rec_states = local_rec_states.detach_() # ------------------------------------------------------------------ # Finish Training torch.set_grad_enabled(False) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Logging writer.add_scalar("SLAM_Loss_Proj", np.mean(costs), total_num_steps) writer.add_scalar("SLAM_Loss_Exp", np.mean(exp_costs), total_num_steps) writer.add_scalar("SLAM_Loss_Pose", np.mean(pose_costs), total_num_steps) gettime = lambda: str(datetime.now()).split('.')[0] if total_num_steps % args.log_interval == 0: end = time.time() time_elapsed = time.gmtime(end - start) log = " ".join([ "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1), "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)), gettime(), "num timesteps {},".format(total_num_steps * num_scenes), "FPS {},".format(int(total_num_steps * num_scenes \ / (end - start))) ]) log += "\n\tLosses:" # if args.train_local and len(l_action_losses) > 0: # log += " ".join([ # " Local Loss:", # "{:.3f},".format( # np.mean(l_action_losses)) # ]) if args.train_slam and len(costs) > 0: log += " ".join([ " SLAM Loss proj/exp/pose:" "{:.4f}/{:.4f}/{:.4f}".format( np.mean(costs), np.mean(exp_costs), np.mean(pose_costs)) ]) print(log) logging.info(log) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Save best models if (total_num_steps * num_scenes) % args.save_interval < \ num_scenes: # Save Neural SLAM Model if len(costs) >= 1000 and np.mean(costs) < best_cost \ and not args.eval: print("Saved best model") best_cost = np.mean(costs) torch.save(nslam_module.state_dict(), os.path.join(log_dir, "model_best.slam")) # Save Local Policy Model # if len(l_action_losses) >= 100 and \ # (np.mean(l_action_losses) <= best_local_loss) \ # and not args.eval: # torch.save(l_policy.state_dict(), # os.path.join(log_dir, "model_best.local")) # # best_local_loss = np.mean(l_action_losses) # Save periodic models if (total_num_steps * num_scenes) % args.save_periodic < \ num_scenes: step = total_num_steps * num_scenes if args.train_slam: torch.save(nslam_module.state_dict(), os.path.join(dump_dir, "periodic_{}.slam".format(step))) # if args.train_local: # torch.save(l_policy.state_dict(), # os.path.join(dump_dir, # "periodic_{}.local".format(step))) # ------------------------------------------------------------------ if l_action == HabitatSimActions.STOP: # Last episode step break # Print and save model performance numbers during evaluation if args.eval: logfile = open("{}/explored_area.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_area_log[e].shape[0]): logfile.write(str(explored_area_log[e, i]) + "\n") logfile.flush() logfile.close() logfile = open("{}/explored_ratio.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_ratio_log[e].shape[0]): logfile.write(str(explored_ratio_log[e, i]) + "\n") logfile.flush() logfile.close() log = "Final Exp Area: \n" for i in range(explored_area_log.shape[2]): log += "{:.5f}, ".format( np.mean(explored_area_log[:, :, i])) log += "\nFinal Exp Ratio: \n" for i in range(explored_ratio_log.shape[2]): log += "{:.5f}, ".format( np.mean(explored_ratio_log[:, :, i])) print(log) logging.info(log)
if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.add_argument("--size", type=int, default=3) parser.add_argument("--add_ri", action="store_true") parser.add_argument("--random", action="store_true") p = parser.parse_args() cfg = load_cfg("default") cfg.update(vars(p)) cfg["env"] = "pol" wandb.init(project="lwm", config=cfg) num_env = 1 envs = make_vec_envs( num=num_env, size=cfg["size"], max_ep_len=cfg["train"]["max_ep_len"], ) buffer = Buffer( num_env=num_env, maxlen=int(cfg["buffer"]["size"] / num_env), obs_shape=(4, ), device=cfg["buffer"]["device"], ) model = DQN(cfg["agent"]["rnn_size"]).cuda() pred = Predictor(buffer, cfg) if cfg["random"]: warmup = 1e8 else: cp = torch.load("models/dqn.pt") model.load_state_dict(cp)
import torch from common.load_cfg import load_cfg from env import make_vec_envs from dqn import actor_iter, DQN from dqn.buffer import Buffer from predictor import Predictor if __name__ == "__main__": cfg = load_cfg("default") cfg["env"] = "pol" num_env = cfg["agent"]["actors"] env = make_vec_envs( num=1, size=3, max_ep_len=cfg["train"]["max_ep_len"], seed=10, ) model = DQN(cfg["agent"]["rnn_size"], device="cpu") pred = Predictor(None, cfg, device="cpu") actor = actor_iter(env, model, pred, 0, eps=0) buffer = Buffer(num_env=1, maxlen=2, obs_shape=(4, ), device="cpu") cp = torch.load("models/dqn.pt", map_location="cpu") model.load_state_dict(cp) model.eval() pred.load() for n_iter in range(2000): full_step = buffer.get_recent(2, "cpu") step, hx, log_a = actor.send(full_step)
def main(): # Setup Logging log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name) dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists("{}/images/".format(dump_dir)): os.makedirs("{}/images/".format(dump_dir)) logging.basicConfig(filename=log_dir + 'train.log', level=logging.INFO) print("Dumping at {}".format(log_dir)) print(args) logging.info(args) # Logging and loss variables num_scenes = args.num_processes num_episodes = int(args.num_episodes) device = args.device = torch.device("cuda:0" if args.cuda else "cpu") policy_loss = 0 best_cost = 100000 costs = deque(maxlen=1000) exp_costs = deque(maxlen=1000) pose_costs = deque(maxlen=1000) g_masks = torch.ones(num_scenes).float().to(device) l_masks = torch.zeros(num_scenes).float().to(device) best_local_loss = np.inf best_g_reward = -np.inf if args.eval: traj_lengths = args.max_episode_length // args.num_local_steps explored_area_log = np.zeros((num_scenes, num_episodes, traj_lengths)) explored_ratio_log = np.zeros((num_scenes, num_episodes, traj_lengths)) g_episode_rewards = deque(maxlen=1000) l_action_losses = deque(maxlen=1000) g_value_losses = deque(maxlen=1000) g_action_losses = deque(maxlen=1000) g_dist_entropies = deque(maxlen=1000) per_step_g_rewards = deque(maxlen=1000) g_process_rewards = np.zeros((num_scenes)) # Starting environments torch.set_num_threads(1) envs = make_vec_envs(args) obs, infos = envs.reset() # Initialize map variables ### Full map consists of 4 channels containing the following: ### 1. Obstacle Map ### 2. Exploread Area ### 3. Current Agent Location ### 4. Past Agent Locations torch.set_grad_enabled(False) # Calculating full and local map sizes map_size = args.map_size_cm // args.map_resolution full_w, full_h = map_size, map_size local_w, local_h = int(full_w / args.global_downscaling), \ int(full_h / args.global_downscaling) # Initializing full and local map full_map = torch.zeros(num_scenes, 4, full_w, full_h).float().to(device) local_map = torch.zeros(num_scenes, 4, local_w, local_h).float().to(device) # Initial full and local pose full_pose = torch.zeros(num_scenes, 3).float().to(device) local_pose = torch.zeros(num_scenes, 3).float().to(device) # Origin of local map origins = np.zeros((num_scenes, 3)) # Local Map Boundaries lmb = np.zeros((num_scenes, 4)).astype(int) ### Planner pose inputs has 7 dimensions ### 1-3 store continuous global agent location ### 4-7 store local map boundaries planner_pose_inputs = np.zeros((num_scenes, 7)) def init_map_and_pose(): full_map.fill_(0.) full_pose.fill_(0.) full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0 locs = full_pose.cpu().numpy() planner_pose_inputs[:, :3] = locs for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] full_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 lmb[e] = get_local_map_boundaries( (loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [ lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0. ] for e in range(num_scenes): local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() init_map_and_pose() # Global policy observation space g_observation_space = gym.spaces.Box(0, 1, (8, local_w, local_h), dtype='uint8') # Global policy action space g_action_space = gym.spaces.Box(low=0.0, high=1.0, shape=(2, ), dtype=np.float32) # Local policy observation space l_observation_space = gym.spaces.Box( 0, 255, (3, args.frame_width, args.frame_width), dtype='uint8') # Local and Global policy recurrent layer sizes l_hidden_size = args.local_hidden_size g_hidden_size = args.global_hidden_size # slam nslam_module = Neural_SLAM_Module(args).to(device) slam_optimizer = get_optimizer(nslam_module.parameters(), args.slam_optimizer) # Global policy g_policy = RL_Policy(g_observation_space.shape, g_action_space, base_kwargs={ 'recurrent': args.use_recurrent_global, 'hidden_size': g_hidden_size, 'downscaling': args.global_downscaling }).to(device) g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.global_lr, eps=args.eps, max_grad_norm=args.max_grad_norm) # Local policy l_policy = Local_IL_Policy( l_observation_space.shape, envs.action_space.n, recurrent=args.use_recurrent_local, hidden_size=l_hidden_size, deterministic=args.use_deterministic_local).to(device) local_optimizer = get_optimizer(l_policy.parameters(), args.local_optimizer) # Storage g_rollouts = GlobalRolloutStorage(args.num_global_steps, num_scenes, g_observation_space.shape, g_action_space, g_policy.rec_state_size, 1).to(device) slam_memory = FIFOMemory(args.slam_memory_size) # Loading model if args.load_slam != "0": print("Loading slam {}".format(args.load_slam)) state_dict = torch.load(args.load_slam, map_location=lambda storage, loc: storage) nslam_module.load_state_dict(state_dict) if not args.train_slam: nslam_module.eval() if args.load_global != "0": print("Loading global {}".format(args.load_global)) state_dict = torch.load(args.load_global, map_location=lambda storage, loc: storage) g_policy.load_state_dict(state_dict) if not args.train_global: g_policy.eval() if args.load_local != "0": print("Loading local {}".format(args.load_local)) state_dict = torch.load(args.load_local, map_location=lambda storage, loc: storage) l_policy.load_state_dict(state_dict) if not args.train_local: l_policy.eval() # Predict map from frame 1: poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose) # Compute Global policy input locs = local_pose.cpu().numpy() global_input = torch.zeros(num_scenes, 8, local_w, local_h) global_orientation = torch.zeros(num_scenes, 1).long() for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] local_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1. global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) global_input[:, 0:4, :, :] = local_map.detach() global_input[:, 4:, :, :] = nn.MaxPool2d(args.global_downscaling)(full_map) g_rollouts.obs[0].copy_(global_input) g_rollouts.extras[0].copy_(global_orientation) # Run Global Policy (global_goals = Long-Term Goal) g_value, g_action, g_action_log_prob, g_rec_states = \ g_policy.act( g_rollouts.obs[0], g_rollouts.rec_states[0], g_rollouts.masks[0], extras=g_rollouts.extras[0], deterministic=False ) cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() global_goals = [[int(action[0] * local_w), int(action[1] * local_h)] for action in cpu_actions] # Compute planner inputs planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['goal'] = global_goals[e] p_input['map_pred'] = global_input[e, 0, :, :].detach().cpu().numpy() p_input['exp_pred'] = global_input[e, 1, :, :].detach().cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] # Output stores local goals as well as the the ground-truth action output = envs.get_short_term_goal(planner_inputs) last_obs = obs.detach() local_rec_states = torch.zeros(num_scenes, l_hidden_size).to(device) start = time.time() total_num_steps = -1 g_reward = 0 torch.set_grad_enabled(False) for ep_num in range(num_episodes): for step in range(args.max_episode_length): total_num_steps += 1 g_step = (step // args.num_local_steps) % args.num_global_steps eval_g_step = step // args.num_local_steps + 1 l_step = step % args.num_local_steps # ------------------------------------------------------------------ # Local Policy del last_obs last_obs = obs.detach() local_masks = l_masks local_goals = output[:, :-1].to(device).long() if args.train_local: torch.set_grad_enabled(True) action, action_prob, local_rec_states = l_policy( obs, local_rec_states, local_masks, extras=local_goals, ) if args.train_local: action_target = output[:, -1].long().to(device) policy_loss += nn.CrossEntropyLoss()(action_prob, action_target) torch.set_grad_enabled(False) l_action = action.cpu() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Env step obs, rew, done, infos = envs.step(l_action) l_masks = torch.FloatTensor([0 if x else 1 for x in done]).to(device) g_masks *= l_masks # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Reinitialize variables when episode ends if step == args.max_episode_length - 1: # Last episode step init_map_and_pose() del last_obs last_obs = obs.detach() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Neural SLAM Module if args.train_slam: # Add frames to memory for env_idx in range(num_scenes): env_obs = obs[env_idx].to("cpu") env_poses = torch.from_numpy( np.asarray( infos[env_idx]['sensor_pose'])).float().to("cpu") env_gt_fp_projs = torch.from_numpy( np.asarray(infos[env_idx]['fp_proj'])).unsqueeze( 0).float().to("cpu") env_gt_fp_explored = torch.from_numpy( np.asarray(infos[env_idx]['fp_explored'])).unsqueeze( 0).float().to("cpu") env_gt_pose_err = torch.from_numpy( np.asarray( infos[env_idx]['pose_err'])).float().to("cpu") slam_memory.push( (last_obs[env_idx].cpu(), env_obs, env_poses), (env_gt_fp_projs, env_gt_fp_explored, env_gt_pose_err)) poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(last_obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose, build_maps=True) locs = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = locs + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Global Policy if l_step == args.num_local_steps - 1: # For every global step, update the full and local maps for e in range(num_scenes): full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ local_map[e] full_pose[e] = local_pose[e] + \ torch.from_numpy(origins[e]).to(device).float() locs = full_pose[e].cpu().numpy() r, c = locs[1], locs[0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] lmb[e] = get_local_map_boundaries( (loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [ lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0. ] local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() locs = local_pose.cpu().numpy() for e in range(num_scenes): global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) global_input[:, 0:4, :, :] = local_map global_input[:, 4:, :, :] = \ nn.MaxPool2d(args.global_downscaling)(full_map) if False: for i in range(4): ax[i].clear() ax[i].set_yticks([]) ax[i].set_xticks([]) ax[i].set_yticklabels([]) ax[i].set_xticklabels([]) ax[i].imshow(global_input.cpu().numpy()[0, 4 + i]) plt.gcf().canvas.flush_events() # plt.pause(0.1) fig.canvas.start_event_loop(0.001) plt.gcf().canvas.flush_events() # Get exploration reward and metrics g_reward = torch.from_numpy( np.asarray([ infos[env_idx]['exp_reward'] for env_idx in range(num_scenes) ])).float().to(device) if args.eval: g_reward = g_reward * 50.0 # Convert reward to area in m2 g_process_rewards += g_reward.cpu().numpy() g_total_rewards = g_process_rewards * \ (1 - g_masks.cpu().numpy()) g_process_rewards *= g_masks.cpu().numpy() per_step_g_rewards.append(np.mean(g_reward.cpu().numpy())) if np.sum(g_total_rewards) != 0: for tr in g_total_rewards: g_episode_rewards.append(tr) if tr != 0 else None if args.eval: exp_ratio = torch.from_numpy( np.asarray([ infos[env_idx]['exp_ratio'] for env_idx in range(num_scenes) ])).float() for e in range(num_scenes): explored_area_log[e, ep_num, eval_g_step - 1] = \ explored_area_log[e, ep_num, eval_g_step - 2] + \ g_reward[e].cpu().numpy() explored_ratio_log[e, ep_num, eval_g_step - 1] = \ explored_ratio_log[e, ep_num, eval_g_step - 2] + \ exp_ratio[e].cpu().numpy() # Add samples to global policy storage g_rollouts.insert(global_input, g_rec_states, g_action, g_action_log_prob, g_value, g_reward, g_masks, global_orientation) # Sample long-term goal from global policy g_value, g_action, g_action_log_prob, g_rec_states = \ g_policy.act( g_rollouts.obs[g_step + 1], g_rollouts.rec_states[g_step + 1], g_rollouts.masks[g_step + 1], extras=g_rollouts.extras[g_step + 1], deterministic=False ) cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() global_goals = [[ int(action[0] * local_w), int(action[1] * local_h) ] for action in cpu_actions] g_reward = 0 g_masks = torch.ones(num_scenes).float().to(device) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Get short term goal planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] p_input['goal'] = global_goals[e] output = envs.get_short_term_goal(planner_inputs) # ------------------------------------------------------------------ ### TRAINING torch.set_grad_enabled(True) # ------------------------------------------------------------------ # Train Neural SLAM Module if args.train_slam and len(slam_memory) > args.slam_batch_size: for _ in range(args.slam_iterations): inputs, outputs = slam_memory.sample(args.slam_batch_size) b_obs_last, b_obs, b_poses = inputs gt_fp_projs, gt_fp_explored, gt_pose_err = outputs b_obs = b_obs.to(device) b_obs_last = b_obs_last.to(device) b_poses = b_poses.to(device) gt_fp_projs = gt_fp_projs.to(device) gt_fp_explored = gt_fp_explored.to(device) gt_pose_err = gt_pose_err.to(device) b_proj_pred, b_fp_exp_pred, _, _, b_pose_err_pred, _ = \ nslam_module(b_obs_last, b_obs, b_poses, None, None, None, build_maps=False) loss = 0 if args.proj_loss_coeff > 0: proj_loss = F.binary_cross_entropy( b_proj_pred, gt_fp_projs) costs.append(proj_loss.item()) loss += args.proj_loss_coeff * proj_loss if args.exp_loss_coeff > 0: exp_loss = F.binary_cross_entropy( b_fp_exp_pred, gt_fp_explored) exp_costs.append(exp_loss.item()) loss += args.exp_loss_coeff * exp_loss if args.pose_loss_coeff > 0: pose_loss = torch.nn.MSELoss()(b_pose_err_pred, gt_pose_err) pose_costs.append(args.pose_loss_coeff * pose_loss.item()) loss += args.pose_loss_coeff * pose_loss if args.train_slam: slam_optimizer.zero_grad() loss.backward() slam_optimizer.step() del b_obs_last, b_obs, b_poses del gt_fp_projs, gt_fp_explored, gt_pose_err del b_proj_pred, b_fp_exp_pred, b_pose_err_pred # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Local Policy if (l_step + 1) % args.local_policy_update_freq == 0 \ and args.train_local: local_optimizer.zero_grad() policy_loss.backward() local_optimizer.step() l_action_losses.append(policy_loss.item()) policy_loss = 0 local_rec_states = local_rec_states.detach_() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Global Policy if g_step % args.num_global_steps == args.num_global_steps - 1 \ and l_step == args.num_local_steps - 1: if args.train_global: g_next_value = g_policy.get_value( g_rollouts.obs[-1], g_rollouts.rec_states[-1], g_rollouts.masks[-1], extras=g_rollouts.extras[-1]).detach() g_rollouts.compute_returns(g_next_value, args.use_gae, args.gamma, args.tau) g_value_loss, g_action_loss, g_dist_entropy = \ g_agent.update(g_rollouts) g_value_losses.append(g_value_loss) g_action_losses.append(g_action_loss) g_dist_entropies.append(g_dist_entropy) g_rollouts.after_update() # ------------------------------------------------------------------ # Finish Training torch.set_grad_enabled(False) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Logging if total_num_steps % args.log_interval == 0: end = time.time() time_elapsed = time.gmtime(end - start) log = " ".join([ "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1), "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)), "num timesteps {},".format(total_num_steps * num_scenes), "FPS {},".format(int(total_num_steps * num_scenes \ / (end - start))) ]) log += "\n\tRewards:" if len(g_episode_rewards) > 0: log += " ".join([ " Global step mean/med rew:", "{:.4f}/{:.4f},".format(np.mean(per_step_g_rewards), np.median(per_step_g_rewards)), " Global eps mean/med/min/max eps rew:", "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format( np.mean(g_episode_rewards), np.median(g_episode_rewards), np.min(g_episode_rewards), np.max(g_episode_rewards)) ]) log += "\n\tLosses:" if args.train_local and len(l_action_losses) > 0: log += " ".join([ " Local Loss:", "{:.3f},".format(np.mean(l_action_losses)) ]) if args.train_global and len(g_value_losses) > 0: log += " ".join([ " Global Loss value/action/dist:", "{:.3f}/{:.3f}/{:.3f},".format( np.mean(g_value_losses), np.mean(g_action_losses), np.mean(g_dist_entropies)) ]) if args.train_slam and len(costs) > 0: log += " ".join([ " SLAM Loss proj/exp/pose:" "{:.4f}/{:.4f}/{:.4f}".format(np.mean(costs), np.mean(exp_costs), np.mean(pose_costs)) ]) print(log) logging.info(log) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Save best models if (total_num_steps * num_scenes) % args.save_interval < \ num_scenes: # Save Neural SLAM Model if len(costs) >= 1000 and np.mean(costs) < best_cost \ and not args.eval: best_cost = np.mean(costs) torch.save(nslam_module.state_dict(), os.path.join(log_dir, "model_best.slam")) # Save Local Policy Model if len(l_action_losses) >= 100 and \ (np.mean(l_action_losses) <= best_local_loss) \ and not args.eval: torch.save(l_policy.state_dict(), os.path.join(log_dir, "model_best.local")) best_local_loss = np.mean(l_action_losses) # Save Global Policy Model if len(g_episode_rewards) >= 100 and \ (np.mean(g_episode_rewards) >= best_g_reward) \ and not args.eval: torch.save(g_policy.state_dict(), os.path.join(log_dir, "model_best.global")) best_g_reward = np.mean(g_episode_rewards) # Save periodic models if (total_num_steps * num_scenes) % args.save_periodic < \ num_scenes: step = total_num_steps * num_scenes if args.train_slam: torch.save( nslam_module.state_dict(), os.path.join(dump_dir, "periodic_{}.slam".format(step))) if args.train_local: torch.save( l_policy.state_dict(), os.path.join(dump_dir, "periodic_{}.local".format(step))) if args.train_global: torch.save( g_policy.state_dict(), os.path.join(dump_dir, "periodic_{}.global".format(step))) # ------------------------------------------------------------------ # Print and save model performance numbers during evaluation if args.eval: logfile = open("{}/explored_area.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_area_log[e].shape[0]): logfile.write(str(explored_area_log[e, i]) + "\n") logfile.flush() logfile.close() logfile = open("{}/explored_ratio.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_ratio_log[e].shape[0]): logfile.write(str(explored_ratio_log[e, i]) + "\n") logfile.flush() logfile.close() log = "Final Exp Area: \n" for i in range(explored_area_log.shape[2]): log += "{:.5f}, ".format(np.mean(explored_area_log[:, :, i])) log += "\nFinal Exp Ratio: \n" for i in range(explored_ratio_log.shape[2]): log += "{:.5f}, ".format(np.mean(explored_ratio_log[:, :, i])) print(log) logging.info(log)
for f in files: os.remove(f) print_now('Reset log directory contents at: %s' % (args.log_dir)) eval_log_dir = args.log_dir + "_eval" try: os.makedirs(eval_log_dir) except OSError: files = glob.glob(os.path.join(eval_log_dir, '*.monitor.csv')) for f in files: os.remove(f) # Env following https://github.com/ikostrikov/pytorch-a2c-ppo-acktr print_now('Using device: {}'.format(device)) envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, args.add_timestep, device, False) action_space = envs.action_space.n if USE_IQN_C51: policy_net = IQN_C51(num_inputs=4, num_actions=action_space, use_duel=USE_DUEL, use_noisy_net=USE_NOISY_NET).to(device) target_net = IQN_C51(num_inputs=4, num_actions=action_space, use_duel=USE_DUEL, use_noisy_net=USE_NOISY_NET).to(device) elif USE_C51: policy_net = C51(num_inputs=4, num_actions=action_space, atoms=C51_atoms,
def test(): ########################################################## # # Realsense test # pipeline = rs.pipeline() # config = rs.config() # config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) # config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) # pipeline.start(config) # frames = pipeline.wait_for_frames() # color_frame = frames.get_color_frame() # img = np.asanyarray(color_frame.get_data()) # img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_CUBIC) # cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE) # cv2.imshow('RealSense', img) # cv2.waitKey(1) ########################################################## device = args.device = torch.device("cuda:0" if args.cuda else "cpu") # Setup Logging log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name) dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists("{}/images/".format(dump_dir)): os.makedirs("{}/images/".format(dump_dir)) logging.basicConfig(filename=log_dir + 'train.log', level=logging.INFO) print("Dumping at {}".format(log_dir)) logging.info(args) # Logging and loss variables num_scenes = args.num_processes num_episodes = int(args.num_episodes) device = args.device = torch.device("cuda:0" if args.cuda else "cpu") policy_loss = 0 best_cost = 100000 costs = deque(maxlen=1000) exp_costs = deque(maxlen=1000) pose_costs = deque(maxlen=1000) g_masks = torch.ones(num_scenes).float().to(device) l_masks = torch.zeros(num_scenes).float().to(device) best_local_loss = np.inf best_g_reward = -np.inf if args.eval: traj_lengths = args.max_episode_length // args.num_local_steps explored_area_log = np.zeros((num_scenes, num_episodes, traj_lengths)) explored_ratio_log = np.zeros((num_scenes, num_episodes, traj_lengths)) g_episode_rewards = deque(maxlen=1000) l_action_losses = deque(maxlen=1000) g_value_losses = deque(maxlen=1000) g_action_losses = deque(maxlen=1000) g_dist_entropies = deque(maxlen=1000) per_step_g_rewards = deque(maxlen=1000) g_process_rewards = np.zeros((num_scenes)) # Starting environments torch.set_num_threads(1) envs = make_vec_envs(args) obs, infos = envs.reset() # Initialize map variables ### Full map consists of 4 channels containing the following: ### 1. Obstacle Map ### 2. Exploread Area ### 3. Current Agent Location ### 4. Past Agent Locations torch.set_grad_enabled(False) # Calculating full and local map sizes map_size = args.map_size_cm // args.map_resolution full_w, full_h = map_size, map_size local_w, local_h = int(full_w / args.global_downscaling), \ int(full_h / args.global_downscaling) # Initializing full and local map full_map = torch.zeros(num_scenes, 4, full_w, full_h).float().to(device) local_map = torch.zeros(num_scenes, 4, local_w, local_h).float().to(device) # Initial full and local pose full_pose = torch.zeros(num_scenes, 3).float().to(device) local_pose = torch.zeros(num_scenes, 3).float().to(device) # Origin of local map origins = np.zeros((num_scenes, 3)) # Local Map Boundaries lmb = np.zeros((num_scenes, 4)).astype(int) ### Planner pose inputs The global agent location ### 4-7 store local map boundaries planner_pose_inputs = np.zeros((num_scenes, 7)) # Initialize full_map and full_pose def init_map_and_pose(): full_map.fill_(0.) full_pose.fill_(0.) full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0 locs = full_pose.cpu().numpy() planner_pose_inputs[:, :3] = locs for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] full_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 lmb[e] = get_local_map_boundaries( (loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [ lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0. ] for e in range(num_scenes): local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() init_map_and_pose() # Global policy observation space g_observation_space = gym.spaces.Box(0, 1, (8, local_w, local_h), dtype='uint8') # Global policy action space g_action_space = gym.spaces.Box(low=0.0, high=1.0, shape=(2, ), dtype=np.float32) # Local policy observation space l_observation_space = gym.spaces.Box( 0, 255, (3, args.frame_width, args.frame_width), dtype='uint8') # Local and Global policy recurrent layer sizes l_hidden_size = args.local_hidden_size g_hidden_size = args.global_hidden_size # slam nslam_module = Neural_SLAM_Module(args).to(device) slam_optimizer = get_optimizer(nslam_module.parameters(), args.slam_optimizer) # Global policy # obse_space.shape= [8, 500, 500] # act_space= Box shape (2,) # g_hidden_size = 256 g_policy = RL_Policy(g_observation_space.shape, g_action_space, base_kwargs={ 'recurrent': args.use_recurrent_global, 'hidden_size': g_hidden_size, 'downscaling': args.global_downscaling }).to(device) g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.global_lr, eps=args.eps, max_grad_norm=args.max_grad_norm) # Local policy l_policy = Local_IL_Policy( l_observation_space.shape, envs.action_space.n, recurrent=args.use_recurrent_local, hidden_size=l_hidden_size, deterministic=args.use_deterministic_local).to(device) local_optimizer = get_optimizer(l_policy.parameters(), args.local_optimizer) # Storage g_rollouts = GlobalRolloutStorage(args.num_global_steps, num_scenes, g_observation_space.shape, g_action_space, g_policy.rec_state_size, 1).to(device) slam_memory = FIFOMemory(args.slam_memory_size) ''' ''' # Loading model if args.load_slam != "0": print("Loading slam {}".format(args.load_slam)) state_dict = torch.load(args.load_slam, map_location=lambda storage, loc: storage) nslam_module.load_state_dict(state_dict) if not args.train_slam: nslam_module.eval() if args.load_global != "0": print("Loading global {}".format(args.load_global)) state_dict = torch.load(args.load_global, map_location=lambda storage, loc: storage) g_policy.load_state_dict(state_dict) if not args.train_global: g_policy.eval() if args.load_local != "0": print("Loading local {}".format(args.load_local)) state_dict = torch.load(args.load_local, map_location=lambda storage, loc: storage) l_policy.load_state_dict(state_dict) if not args.train_local: l_policy.eval() # /////////////////////////////////////////////////////////////// TESTING from matplotlib import image if args.testing: test_images = {} for i in range(5): for j in range(12): img_pth = 'imgs/robots_rs/test_{}_{}.jpg'.format(i + 1, j) img = image.imread(img_pth) test_images[(i + 1, j)] = np.array(img) poses_array = [] for i in range(8): poses_array.append(np.array([[0.3, 0.0, 0.0], [0.3, 0.0, 0.0]])) for i in range(4): poses_array.append( np.array([[0.0, 0.0, -0.24587], [0.0, 0.0, -0.27587]])) # index from 1 to 5 test_1_idx = 3 test_2_idx = 1 # image1_1 = image.imread('imgs/robots_rs/img_128_6.jpg') # image1_2 = image.imread('imgs/robots_rs/img_128_7.jpg') # image1_3 = image.imread('imgs/robots_rs/img_128_8.jpg') # image2_1 = image.imread('imgs/robots_rs/img_128_30.jpg') # image2_2 = image.imread('imgs/robots_rs/img_128_31.jpg') # image2_3 = image.imread('imgs/robots_rs/img_128_32.jpg') # # image_data = np.asarray(image) # # plt.imshow(image) # # plt.show() # image_data_1_1 = np.array(image1_1) # image_data_1_2 = np.array(image1_2) # image_data_1_3 = np.array(image1_3) # image_data_2_1 = np.array(image2_1) # image_data_2_2 = np.array(image2_2) # image_data_2_3 = np.array(image2_3) # image_data_1_all = np.array([image_data_1_1, image_data_2_1]) # image_data_2_all = np.array([image_data_1_2, image_data_2_2]) # image_data_3_all = np.array([image_data_1_3, image_data_2_3]) image_data_all = np.array( [test_images[(test_1_idx, 0)], test_images[(test_2_idx, 0)]]) obs = torch.from_numpy(image_data_all).float().to(device) obs = obs.permute((0, 3, 1, 2)).contiguous() # print(f"New obs: {obs}") print(f"New obs size: {obs.size()}") # /////////////////////////////////////////////////////////////// TESTING # Predict map from frame 1: poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose) # print(f"\n\n local_map shape: {local_map.shape}") # print(f"\n obs shape: {obs.shape}") # print(f"\n poses shape: {poses.shape}") # Compute Global policy input locs = local_pose.cpu().numpy() global_input = torch.zeros(num_scenes, 8, local_w, local_h) global_orientation = torch.zeros(num_scenes, 1).long() for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] local_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1. global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) global_input[:, 0:4, :, :] = local_map.detach() global_input[:, 4:, :, :] = nn.MaxPool2d(args.global_downscaling)(full_map) g_rollouts.obs[0].copy_(global_input) g_rollouts.extras[0].copy_(global_orientation) # Run Global Policy (global_goals = Long-Term Goal) g_value, g_action, g_action_log_prob, g_rec_states = \ g_policy.act( g_rollouts.obs[0], g_rollouts.rec_states[0], g_rollouts.masks[0], extras=g_rollouts.extras[0], deterministic=False ) cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() global_goals = [[int(action[0] * local_w), int(action[1] * local_h)] for action in cpu_actions] # Compute planner inputs planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['goal'] = global_goals[e] p_input['map_pred'] = global_input[e, 0, :, :].detach().cpu().numpy() p_input['exp_pred'] = global_input[e, 1, :, :].detach().cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] # Output stores local goals as well as the the ground-truth action output = envs.get_short_term_goal(planner_inputs) last_obs = obs.detach() local_rec_states = torch.zeros(num_scenes, l_hidden_size).to(device) start = time.time() total_num_steps = -1 g_reward = 0 torch.set_grad_enabled(False) # fig, axis = plt.subplots(1,3) fig, axis = plt.subplots(2, 3) # a = [[1, 0, 1], [1, 0, 1], [1, 0, 1]] # plt.imshow(a) for ep_num in range(num_episodes): for step in range(args.max_episode_length): total_num_steps += 1 g_step = (step // args.num_local_steps) % args.num_global_steps eval_g_step = step // args.num_local_steps + 1 l_step = step % args.num_local_steps # ------------------------------------------------------------------ # Local Policy del last_obs last_obs = obs.detach() local_masks = l_masks local_goals = output[:, :-1].to(device).long() if args.train_local: torch.set_grad_enabled(True) action, action_prob, local_rec_states = l_policy( obs, local_rec_states, local_masks, extras=local_goals, ) if args.train_local: action_target = output[:, -1].long().to(device) policy_loss += nn.CrossEntropyLoss()(action_prob, action_target) torch.set_grad_enabled(False) l_action = action.cpu() # ------------------------------------------------------------------ # ------------------------------------------------------------------ print(f"l_action: {l_action}") print(f"l_action size: {l_action.size()}") # Env step obs, rew, done, infos = envs.step(l_action) # ////////////////////////////////////////////////////////////////// TESTING # obs_all = _process_obs_for_display(obs) # _ims = [transform_rgb_bgr(obs_all[0]), transform_rgb_bgr(obs_all[1])] # ax1.imshow(_ims[0]) # ax2.imshow(_ims[1]) # plt.savefig(f"imgs/img_0_{step}.png") # # plt.clf() # ////////////////////////////////////////////////////////////////// TESTING l_masks = torch.FloatTensor([0 if x else 1 for x in done]).to(device) g_masks *= l_masks # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Reinitialize variables when episode ends if step == args.max_episode_length - 1: # Last episode step print("Final step") init_map_and_pose() del last_obs last_obs = obs.detach() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Neural SLAM Module if args.train_slam: # Add frames to memory for env_idx in range(num_scenes): env_obs = obs[env_idx].to("cpu") env_poses = torch.from_numpy( np.asarray( infos[env_idx]['sensor_pose'])).float().to("cpu") env_gt_fp_projs = torch.from_numpy( np.asarray(infos[env_idx]['fp_proj'])).unsqueeze( 0).float().to("cpu") env_gt_fp_explored = torch.from_numpy( np.asarray(infos[env_idx]['fp_explored'])).unsqueeze( 0).float().to("cpu") env_gt_pose_err = torch.from_numpy( np.asarray( infos[env_idx]['pose_err'])).float().to("cpu") slam_memory.push( (last_obs[env_idx].cpu(), env_obs, env_poses), (env_gt_fp_projs, env_gt_fp_explored, env_gt_pose_err)) poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) # ///////////////////////////////////////////////////////////////// TESTING if args.testing: # obs = torch.from_numpy(obs_).float().to(self.device) # obs_cpu = obs.detach().cpu().numpy() # last_obs_cpu = last_obs.detach().cpu().numpy() # print(f"obs shape: {obs_cpu.shape}") # print(f"last_obs shape: {last_obs_cpu.shape}") original_obs = obs original_last_obs = last_obs original_poses = poses print(f"step: {step}") last_obs = torch.from_numpy(image_data_all).float().to(device) last_obs = last_obs.permute((0, 3, 1, 2)).contiguous() image_data_all = np.array([ test_images[(test_1_idx, step + 1)], test_images[(test_2_idx, step + 1)] ]) obs = torch.from_numpy(image_data_all).float().to(device) obs = obs.permute((0, 3, 1, 2)).contiguous() _poses = poses_array[step] poses = torch.from_numpy(_poses).float().to(device) # if step == 0: # print(f"step: {step}") # last_obs = torch.from_numpy(image_data_1_all).float().to(device) # last_obs = last_obs.permute((0, 3, 1, 2)).contiguous() # obs = torch.from_numpy(image_data_2_all).float().to(device) # obs = obs.permute((0, 3, 1, 2)).contiguous() # _poses = np.array([[0.2, 0.0, 0.0], [0.2, 0.0, 0.0]]) # poses = torch.from_numpy(_poses).float().to(device) # elif step == 1: # print(f"step: {step}") # last_obs = torch.from_numpy(image_data_2_all).float().to(device) # last_obs = last_obs.permute((0, 3, 1, 2)).contiguous() # obs = torch.from_numpy(image_data_3_all).float().to(device) # obs = obs.permute((0, 3, 1, 2)).contiguous() # _poses = np.array([[0.4, 0.0, 0.0], [0.2, 0.0, 0.17587]]) # poses = torch.from_numpy(_poses).float().to(device) # _poses = np.asarray([infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes)]) # print(f"New poses: {_poses}") # last_obs = torch.from_numpy(image_data_1_1).float().to(device) # obs = torch.from_numpy(image_data_1_2).float().to(device) # print(f"Original obs: {original_obs}") # print(f"Original obs shape: {original_obs.size()}") # print(f"Obs: {obs}") # print(f"Obs shape: {obs.size()}") # print(f"Original last_obs: {original_last_obs}") # print(f"Original last_obs shape: {original_last_obs.size()}") # print(f"last_obs: {last_obs}") # print(f"Last_obs shape: {last_obs.size()}") # print(f"Original poses: {original_poses}") # print(f"Original poses shape: {original_poses.size()}") print(f"Local poses : {local_pose}") # ///////////////////////////////////////////////////////////////// TESTING _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(last_obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose, build_maps=True) locs = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = locs + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. # ////////////////////////////////////////////////////////////////// if args.testing: local_map_draw = local_map if step % 1 == 0: obs_all = _process_obs_for_display(obs) _ims = [ transform_rgb_bgr(obs_all[0]), transform_rgb_bgr(obs_all[1]) ] imgs_1 = local_map_draw[0, :, :, :].cpu().numpy() imgs_2 = local_map_draw[1, :, :, :].cpu().numpy() # axis[1].imshow(imgs_1[0], cmap='gray') # axis[2].imshow(imgs_1[1], cmap='gray') # axis[0].imshow(_ims[0]) axis[0][1].imshow(imgs_1[0], cmap='gray') axis[0][2].imshow(imgs_1[1], cmap='gray') axis[0][0].imshow(_ims[0]) axis[1][1].imshow(imgs_2[0], cmap='gray') axis[1][2].imshow(imgs_2[1], cmap='gray') axis[1][0].imshow(_ims[1]) plt.savefig(f"imgs/test_{step}.png") obs = original_obs last_obs = original_last_obs poses = original_poses # ////////////////////////////////////////////////////////////////// # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Global Policy if l_step == args.num_local_steps - 1: # For every global step, update the full and local maps for e in range(num_scenes): full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ local_map[e] full_pose[e] = local_pose[e] + \ torch.from_numpy(origins[e]).to(device).float() locs = full_pose[e].cpu().numpy() r, c = locs[1], locs[0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] lmb[e] = get_local_map_boundaries( (loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [ lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0. ] local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() locs = local_pose.cpu().numpy() for e in range(num_scenes): global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) global_input[:, 0:4, :, :] = local_map global_input[:, 4:, :, :] = \ nn.MaxPool2d(args.global_downscaling)(full_map) if False: for i in range(4): ax[i].clear() ax[i].set_yticks([]) ax[i].set_xticks([]) ax[i].set_yticklabels([]) ax[i].set_xticklabels([]) ax[i].imshow(global_input.cpu().numpy()[0, 4 + i]) plt.gcf().canvas.flush_events() # plt.pause(0.1) fig.canvas.start_event_loop(0.001) plt.gcf().canvas.flush_events() # Get exploration reward and metrics g_reward = torch.from_numpy( np.asarray([ infos[env_idx]['exp_reward'] for env_idx in range(num_scenes) ])).float().to(device) if args.eval: g_reward = g_reward * 50.0 # Convert reward to area in m2 g_process_rewards += g_reward.cpu().numpy() g_total_rewards = g_process_rewards * \ (1 - g_masks.cpu().numpy()) g_process_rewards *= g_masks.cpu().numpy() per_step_g_rewards.append(np.mean(g_reward.cpu().numpy())) if np.sum(g_total_rewards) != 0: for tr in g_total_rewards: g_episode_rewards.append(tr) if tr != 0 else None if args.eval: exp_ratio = torch.from_numpy( np.asarray([ infos[env_idx]['exp_ratio'] for env_idx in range(num_scenes) ])).float() for e in range(num_scenes): explored_area_log[e, ep_num, eval_g_step - 1] = \ explored_area_log[e, ep_num, eval_g_step - 2] + \ g_reward[e].cpu().numpy() explored_ratio_log[e, ep_num, eval_g_step - 1] = \ explored_ratio_log[e, ep_num, eval_g_step - 2] + \ exp_ratio[e].cpu().numpy() # Add samples to global policy storage g_rollouts.insert(global_input, g_rec_states, g_action, g_action_log_prob, g_value, g_reward, g_masks, global_orientation) # Sample long-term goal from global policy g_value, g_action, g_action_log_prob, g_rec_states = \ g_policy.act( g_rollouts.obs[g_step + 1], g_rollouts.rec_states[g_step + 1], g_rollouts.masks[g_step + 1], extras=g_rollouts.extras[g_step + 1], deterministic=False ) cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() global_goals = [[ int(action[0] * local_w), int(action[1] * local_h) ] for action in cpu_actions] g_reward = 0 g_masks = torch.ones(num_scenes).float().to(device) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Get short term goal planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] p_input['goal'] = global_goals[e] output = envs.get_short_term_goal(planner_inputs) # print(f"\n output (short term goal): {output}\n") # ------------------------------------------------------------------ ### TRAINING torch.set_grad_enabled(True) # ------------------------------------------------------------------ # Train Neural SLAM Module if args.train_slam and len(slam_memory) > args.slam_batch_size: for _ in range(args.slam_iterations): inputs, outputs = slam_memory.sample(args.slam_batch_size) b_obs_last, b_obs, b_poses = inputs gt_fp_projs, gt_fp_explored, gt_pose_err = outputs b_obs = b_obs.to(device) b_obs_last = b_obs_last.to(device) b_poses = b_poses.to(device) gt_fp_projs = gt_fp_projs.to(device) gt_fp_explored = gt_fp_explored.to(device) gt_pose_err = gt_pose_err.to(device) b_proj_pred, b_fp_exp_pred, _, _, b_pose_err_pred, _ = \ nslam_module(b_obs_last, b_obs, b_poses, None, None, None, build_maps=False) loss = 0 if args.proj_loss_coeff > 0: proj_loss = F.binary_cross_entropy( b_proj_pred, gt_fp_projs) costs.append(proj_loss.item()) loss += args.proj_loss_coeff * proj_loss if args.exp_loss_coeff > 0: exp_loss = F.binary_cross_entropy( b_fp_exp_pred, gt_fp_explored) exp_costs.append(exp_loss.item()) loss += args.exp_loss_coeff * exp_loss if args.pose_loss_coeff > 0: pose_loss = torch.nn.MSELoss()(b_pose_err_pred, gt_pose_err) pose_costs.append(args.pose_loss_coeff * pose_loss.item()) loss += args.pose_loss_coeff * pose_loss if args.train_slam: slam_optimizer.zero_grad() loss.backward() slam_optimizer.step() del b_obs_last, b_obs, b_poses del gt_fp_projs, gt_fp_explored, gt_pose_err del b_proj_pred, b_fp_exp_pred, b_pose_err_pred # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Local Policy if (l_step + 1) % args.local_policy_update_freq == 0 \ and args.train_local: local_optimizer.zero_grad() policy_loss.backward() local_optimizer.step() l_action_losses.append(policy_loss.item()) policy_loss = 0 local_rec_states = local_rec_states.detach_() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Global Policy if g_step % args.num_global_steps == args.num_global_steps - 1 \ and l_step == args.num_local_steps - 1: if args.train_global: g_next_value = g_policy.get_value( g_rollouts.obs[-1], g_rollouts.rec_states[-1], g_rollouts.masks[-1], extras=g_rollouts.extras[-1]).detach() g_rollouts.compute_returns(g_next_value, args.use_gae, args.gamma, args.tau) g_value_loss, g_action_loss, g_dist_entropy = \ g_agent.update(g_rollouts) g_value_losses.append(g_value_loss) g_action_losses.append(g_action_loss) g_dist_entropies.append(g_dist_entropy) g_rollouts.after_update() # ------------------------------------------------------------------ # Finish Training torch.set_grad_enabled(False) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Logging if total_num_steps % args.log_interval == 0: end = time.time() time_elapsed = time.gmtime(end - start) log = " ".join([ "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1), "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)), "num timesteps {},".format(total_num_steps * num_scenes), "FPS {},".format(int(total_num_steps * num_scenes \ / (end - start))) ]) log += "\n\tRewards:" if len(g_episode_rewards) > 0: log += " ".join([ " Global step mean/med rew:", "{:.4f}/{:.4f},".format(np.mean(per_step_g_rewards), np.median(per_step_g_rewards)), " Global eps mean/med/min/max eps rew:", "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format( np.mean(g_episode_rewards), np.median(g_episode_rewards), np.min(g_episode_rewards), np.max(g_episode_rewards)) ]) log += "\n\tLosses:" if args.train_local and len(l_action_losses) > 0: log += " ".join([ " Local Loss:", "{:.3f},".format(np.mean(l_action_losses)) ]) if args.train_global and len(g_value_losses) > 0: log += " ".join([ " Global Loss value/action/dist:", "{:.3f}/{:.3f}/{:.3f},".format( np.mean(g_value_losses), np.mean(g_action_losses), np.mean(g_dist_entropies)) ]) if args.train_slam and len(costs) > 0: log += " ".join([ " SLAM Loss proj/exp/pose:" "{:.4f}/{:.4f}/{:.4f}".format(np.mean(costs), np.mean(exp_costs), np.mean(pose_costs)) ]) print(log) logging.info(log) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Save best models if (total_num_steps * num_scenes) % args.save_interval < \ num_scenes: # Save Neural SLAM Model if len(costs) >= 1000 and np.mean(costs) < best_cost \ and not args.eval: best_cost = np.mean(costs) torch.save(nslam_module.state_dict(), os.path.join(log_dir, "model_best.slam")) # Save Local Policy Model if len(l_action_losses) >= 100 and \ (np.mean(l_action_losses) <= best_local_loss) \ and not args.eval: torch.save(l_policy.state_dict(), os.path.join(log_dir, "model_best.local")) best_local_loss = np.mean(l_action_losses) # Save Global Policy Model if len(g_episode_rewards) >= 100 and \ (np.mean(g_episode_rewards) >= best_g_reward) \ and not args.eval: torch.save(g_policy.state_dict(), os.path.join(log_dir, "model_best.global")) best_g_reward = np.mean(g_episode_rewards) # Save periodic models if (total_num_steps * num_scenes) % args.save_periodic < \ num_scenes: step = total_num_steps * num_scenes if args.train_slam: torch.save( nslam_module.state_dict(), os.path.join(dump_dir, "periodic_{}.slam".format(step))) if args.train_local: torch.save( l_policy.state_dict(), os.path.join(dump_dir, "periodic_{}.local".format(step))) if args.train_global: torch.save( g_policy.state_dict(), os.path.join(dump_dir, "periodic_{}.global".format(step))) # ------------------------------------------------------------------ print("Finishing Epsiods") # Print and save model performance numbers during evaluation if args.eval: logfile = open("{}/explored_area.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_area_log[e].shape[0]): logfile.write(str(explored_area_log[e, i]) + "\n") logfile.flush() logfile.close() logfile = open("{}/explored_ratio.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_ratio_log[e].shape[0]): logfile.write(str(explored_ratio_log[e, i]) + "\n") logfile.flush() logfile.close() log = "Final Exp Area: \n" for i in range(explored_area_log.shape[2]): log += "{:.5f}, ".format(np.mean(explored_area_log[:, :, i])) log += "\nFinal Exp Ratio: \n" for i in range(explored_ratio_log.shape[2]): log += "{:.5f}, ".format(np.mean(explored_ratio_log[:, :, i])) print(log) logging.info(log) imgs_1 = local_map[0, :, :, :].cpu().numpy() imgs_2 = local_map[1, :, :, :].cpu().numpy() obs_all = _process_obs_for_display(obs) # fig, axis = plt.subplots(1, 3) # axis[0].imshow(obs_all[0]) # axis[1].imshow(imgs_1[0], cmap='gray') # axis[2].imshow(imgs_1[1], cmap='gray') return cv2.imshow("Camer", transform_rgb_bgr(obs_all[0])) cv2.imshow("Proj", imgs_1[0]) cv2.imshow("Map", imgs_1[1]) cv2.imshow("Camer2", transform_rgb_bgr(obs_all[1])) cv2.imshow("Proj2", imgs_2[0]) cv2.imshow("Map2", imgs_2[1]) action = 1 while action != 4: k = cv2.waitKey(0) if k == 119: action = 1 action_2 = 1 elif k == 100: action = 3 action_2 = 1 elif k == 97: action = 2 action_2 = 2 elif k == 102: action = 4 break else: action = 1 last_obs = obs.detach() obs, rew, done, infos = envs.step( torch.from_numpy(np.array([action, action_2]))) obs_all = _process_obs_for_display(obs) cv2.imshow("Camer", transform_rgb_bgr(obs_all[0])) cv2.imshow("Camer2", transform_rgb_bgr(obs_all[1])) poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(last_obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose, build_maps=True) imgs_1 = local_map[0, :, :, :].cpu().numpy() imgs_2 = local_map[1, :, :, :].cpu().numpy() cv2.imshow("Proj", imgs_1[0]) cv2.imshow("Map", imgs_1[1]) cv2.imshow("Proj2", imgs_2[0]) cv2.imshow("Map2", imgs_2[1]) # plt.show() print("\n\nDone\n\n")