class DQNAgent: def __init__( self, env: UnityEnvironment, memory_size: int, batch_size: int, target_update: int, epsilon_decay: float = 1 / 2000, max_epsilon: float = 1.0, min_epsilon: float = 0.1, gamma: float = 0.99, ): self.brain_name = env.brain_names[0] self.brain = env.brains[self.brain_name] env_info = env.reset(train_mode=True)[self.brain_name] self.env = env action_size = self.brain.vector_action_space_size state = env_info.vector_observations[0] state_size = len(state) self.obs_dim = state_size self.action_dim = 1 self.memory = ReplayBuffer(self.obs_dim, self.action_dim, memory_size, batch_size) self.batch_size = batch_size self.target_update = target_update self.epsilon_decay = epsilon_decay self.max_epsilon = max_epsilon self.min_epsilon = min_epsilon self.gamma = gamma self.epsilon = max_epsilon self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.dqn = Network(self.obs_dim, self.action_dim) self.dqn_target = Network(self.obs_dim, self.action_dim) self.dqn_target.load_state_dict(self.dqn.state_dict()) self.dqn_target.eval() self.optimizer = optim.Adam(self.dqn.parameters(), lr=5e-5) self.transition = list() self.is_test = False def select_action(self, state: np.ndarray) -> np.int64: """ Select an action given input """ if self.epsilon > np.random.random(): selected_action = np.random.random_integers(0, self.action_dim-1) else: selected_action = self.dqn( torch.FloatTensor(state).to(self.device) ) selected_action = np.argmax(selected_action.detach().cpu().numpy()) if not self.is_test: self.transition = [state, selected_action] return selected_action def step(self, action: np.int64) -> Tuple[np.ndarray, np.float64, bool]: "Take an action and return environment response" env_info = self.env.step(action)[self.brain_name] next_state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] if not self.is_test: self.transition += [reward, next_state, done] self.memory.store(*self.transition) return next_state, reward, done def update_model(self) -> torch.Tensor: """ Update model by gradient descent""" samples = self.memory.sample_batch() loss = self._compute_dqn_loss(samples) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss.item() def train(self, num_episode: int, max_iteration: int=1000, plotting_interval: int=400): """ train the agent """ self.is_test = False env_info = self.env.reset(train_mode=True)[self.brain_name] state = env_info.vector_observations[0] update_cnt = 0 epsilons = [] losses = [] avg_losses= [] scores = [] avg_scores = [] for episode in range(num_episode): env_info = self.env.reset(train_mode=True)[self.brain_name] state = env_info.vector_observations[0] score = 0 for iter in range(max_iteration): action = self.select_action(state) next_state, reward, done = self.step(action) state = next_state score += reward if done: break if len(self.memory) > self.batch_size: loss = self.update_model() losses.append(loss) update_cnt += 1 avg_losses.append(np.mean(losses)) losses = [] self.epsilon = max( self.min_epsilon, self.epsilon - ( self.max_epsilon - self.min_epsilon ) * self.epsilon_decay ) epsilons.append(self.epsilon) if update_cnt % self.target_update == 0: self._target_hard_update() scores.append(score) epsilons.append(self.epsilon) if episode >= 100: avg_scores.append(np.mean(scores[-100:])) self._plot(episode, scores, avg_scores, avg_losses, epsilons) torch.save(self.dqn.state_dict(), "model_weight/dqn.pt") def test(self): """ Test agent """ self.is_test = True env_info = self.env.reset(train_mode=False)[self.brain_name] state = env_info.vector_observations[0] done = False score = 0 while not done: action = self.select_action(state) next_state, reward, done = self.step(action) state = next_state score += reward print("score: ", score) self.env.close() def _compute_dqn_loss(self, samples: Dict[str, np.ndarray], gamma: float=0.99) -> torch.Tensor: """ Compute and return DQN loss""" gamma = self.gamma device = self.device state = torch.FloatTensor(samples["obs"]).to(device) next_state = torch.FloatTensor(samples["next_obs"]).to(device) action = torch.LongTensor(samples["acts"]).reshape(-1, 1).to(device) reward = torch.FloatTensor(samples["rews"]).reshape(-1, 1).to(device) done = torch.FloatTensor(samples["done"]).reshape(-1, 1).to(device) curr_q_value = self.dqn(state).gather(1, action) next_q_value = self.dqn_target(next_state).max(dim=1, keepdim=True)[0].detach() mask = 1 - done target = (reward + gamma * next_q_value * mask).to(device) loss = F.smooth_l1_loss(curr_q_value, target) return loss def _target_hard_update(self): """ update target network """ self.dqn_target.load_state_dict(self.dqn.state_dict()) def _plot( self, episode :int, scores: List[float], avg_scores: List[float], losses: List[float], epsilons: List[float] ): """ Plot the training process""" plt.figure(figsize=(20, 5)) plt.subplot(141) if len(avg_scores) > 0: plt.title("Average reward per 100 episodes. Score: %s" % (avg_scores[-1])) else: plt.title("Average reward over 100 episodes.") plt.plot([100 + i for i in range(len(avg_scores))], avg_scores) plt.subplot(142) plt.title("episode %s. Score: %s" % (episode, np.mean(scores[-10:]))) plt.plot(scores) plt.subplot(143) plt.title('Loss') plt.plot(losses) plt.subplot(144) plt.title('epsilons') plt.plot(epsilons) plt.savefig('plots/dqn_result.png')
class Planner(object): def __init__(self, path_to_conf_file): self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.transform = torchvision.transforms.ToTensor() self.converter = ConverterTorch().to(self.device) self.target_index = 65 self.speed_mult = 2.5 path_to_conf_file = Path(path_to_conf_file) config = load_yaml(path_to_conf_file.parent / 'config.yaml') self.net = Network(**config['model_args']).to(self.device) self.net.load_state_dict(torch.load(path_to_conf_file)) self.net.eval() @torch.no_grad() def run_step(self, rgb, rgb_forward, viz=None): if Modular: # Modularity and Abstract rgb = Image.fromarray(rgb).convert('RGB') img = input_transform_cityscapes(rgb) img = img.cuda().unsqueeze(0) rgb_forward = Image.fromarray(rgb_forward).convert('RGB') img_forward = input_transform_cityscapes(rgb_forward) img_forward = img_forward.cuda().unsqueeze(0) output = model(img) label = output[0].max(0)[1].byte().cpu().data label_color = Colorize()(label.unsqueeze(0)) rgb = ToPILImage()(label_color) rgb.save('./seg.jpg') output = model(img_forward) label = output[0].max(0)[1].byte().cpu().data label_color = Colorize()(label.unsqueeze(0)) rgb_forward = ToPILImage()(label_color) rgb_forward.save('./seg_2.jpg') img = self.transform(rgb).to(self.device).unsqueeze(0) img_forward = self.transform(rgb_forward).to(self.device).unsqueeze(0) # print(img_forward.shape) model_input = torch.cat((img_forward, img), 1) cam_coords = self.net(model_input) cam_coords[..., 0] = (cam_coords[..., 0] + 1) / 2 * img.shape[-1] # rgb coords cam_coords[..., 1] = (cam_coords[..., 1] + 1) / 2 * img.shape[-2] map_coords = self.converter.cam_to_map( cam_coords).cpu().numpy().squeeze() world_coords = self.converter.cam_to_world( cam_coords).cpu().numpy().squeeze() target_speed = np.sqrt( ((world_coords[:2] - world_coords[1:3])**2).sum(1).mean()) target_speed *= self.speed_mult theta1 = np.degrees(np.arctan2(world_coords[0][0], world_coords[0][1])) theta2 = np.degrees(np.arctan2(world_coords[4][0], world_coords[4][1])) # print(abs(theta2 - theta1)) if abs(theta2 - theta1) < 2: target_speed *= self.speed_mult else: target_speed *= 1.2 curve = spline(map_coords + 1e-8 * np.random.rand(*map_coords.shape), 100) target = curve[self.target_index] curve_world = spline( world_coords + 1e-8 * np.random.rand(*world_coords.shape), 100) target_world = curve_world[self.target_index] if viz: viz.planner_draw(cam_coords.cpu().numpy().squeeze(), map_coords, curve, target) return target_world, target_speed
if __name__ == "__main__": args = parse_args() coloredlogs.install( level="INFO", fmt="%(asctime)s %(filename)s %(levelname)s %(message)s") logging.info("Called with args: " + str(args)) if args.cfg: cfg_from_file(args.cfg) net = Network() checkpoint = torch.load(osp.abspath(args.checkpoint)) net.load_state_dict(checkpoint["model"]) logging.info("Loaded checkpoint from: %s" % args.checkpoint) net.eval() device = torch.device("cuda:%s" % args.gpu if args.gpu != -1 else "cpu") net.to(device) # Extract feature of the query person query_img = cv2.imread("imgs/query.jpg") query_roi = np.array([0, 0, 466, 943]) # [x1, y1, x2, y2] query_feat = net.inference(query_img, query_roi).view(-1, 1) # Get gallery images gallery_imgs = sorted(glob("imgs/gallery*.jpg")) for gallery_img in gallery_imgs: logging.info("Detecting %s" % gallery_img) detections, features = net.inference(cv2.imread(gallery_img))