def build_graph(params, env_id, max_num_episodes=1000): def make_env_func(): e = create_env(env_id, mode='test', skip_frames=False) e.seed(0) return e checkpoint_dir = model_dir(params.experiment_dir()) map_img, coord_limits = generate_env_map(make_env_func) env = make_env_func() m = None for _ in range(max_num_episodes): env_obs, info = reset_with_info(env) obs = main_observation(env_obs) done = False if m is None: m = TopologicalMap(obs, directed_graph=False, initial_info=info, verbose=True) m.maybe_load_checkpoint(checkpoint_dir) while not done and not terminate: env.render() if len(current_actions) > 0: action = current_actions[-1] else: action = 0 env_obs, rew, done, info = env.step(action) obs = main_observation(env_obs) global add_landmark if add_landmark: # noinspection PyProtectedMember new_idx = m._add_new_node(obs=obs, pos=get_position(info), angle=get_angle(info)) log.info('Added landmark idx %d', new_idx) add_landmark = False res = m.save_checkpoint(checkpoint_dir, map_img=map_img, coord_limits=coord_limits, verbose=True) cv2.imshow('map', cv2.imread(res.graph_filename)) cv2.waitKey(50) if terminate: break else: env.render() time.sleep(0.2) m.save_checkpoint(checkpoint_dir, map_img=map_img, coord_limits=coord_limits, verbose=True) log.debug('Set breakpoint here to edit graph edges before saving...') log.info('Saving to %s...', checkpoint_dir) m.save_checkpoint(checkpoint_dir, map_img=map_img, coord_limits=coord_limits, verbose=True) env.close() return 0
def record_trajectory(params, env_id): def make_env_func(): e = create_env(env_id, skip_frames=True) e.seed(0) return e env = make_env_func() map_img, coord_limits = generate_env_map(make_env_func) env_obs, info = reset_with_info(env) obs = main_observation(env_obs) done = False m = TopologicalMap(obs, directed_graph=False, initial_info=info, verbose=True) trajectory = Trajectory(env_idx=-1) frame = 0 t = Timing() while not done and not terminate: with t.timeit('one_frame'): env.render() if len(current_actions) > 0: action = current_actions[-1] else: action = 0 trajectory.add(obs, action, info) m.add_landmark(obs, info, update_curr_landmark=True) env_obs, rew, done, info = env.step(action) obs = main_observation(env_obs) took_seconds = t.one_frame desired_fps = 15 wait_seconds = (1.0 / desired_fps) - took_seconds wait_seconds = max(0.0, wait_seconds) time.sleep(wait_seconds) frame += 1 env.render() time.sleep(0.2) trajectory_dir = trajectory.save(params.experiment_dir()) m.save_checkpoint(trajectory_dir, map_img=map_img, coord_limits=coord_limits, verbose=True) env.close() return 0
def test_env_map(self): map_img, coord_limits = generate_env_map(self.make_env) self.assertIsInstance(coord_limits, tuple) self.assertIsInstance(map_img, np.ndarray) show = False # set to True only for debug if show: import cv2 cv2.imshow('map', map_img) cv2.waitKey()
def __init__(self, make_env_func, params): """Initialize PPO computation graph and some auxiliary tensors.""" super(AgentRandom, self).__init__(params) self.make_env_func = make_env_func env = make_env_func() self.action_space = env.action_space env.close() self.objectives = None self.last_action = None self.best_reward = None summary_dir = summaries_dir(self.params.experiment_dir()) self.summary_writer = tf.summary.FileWriter(summary_dir) if self.params.use_env_map: self.map_img, self.coord_limits = generate_env_map(make_env_func)
def __init__(self, make_env_func, params): """Initialize PPO computation graph and some auxiliary tensors.""" super(AgentPPO, self).__init__(params) self.actor_step = tf.Variable(0, trainable=False, dtype=tf.int64, name='actor_step') self.critic_step = tf.Variable(0, trainable=False, dtype=tf.int64, name='critic_step') self.make_env_func = make_env_func env = make_env_func() # we need the env to query observation shape, number of actions, etc. self.obs_shape = [-1] + list(main_observation_space(env).shape) self.ph_observations = placeholder_from_space(main_observation_space(env)) self.ph_actions = placeholder_from_space(env.action_space) # actions sampled from the policy self.ph_advantages, self.ph_returns, self.ph_old_action_probs = placeholders(None, None, None) self.actor_critic = ActorCritic(env, self.ph_observations, self.params) env.close() self.objectives = self.add_ppo_objectives( self.actor_critic, self.ph_actions, self.ph_old_action_probs, self.ph_advantages, self.ph_returns, self.params, self.actor_step, ) # optimizers actor_opt = tf.train.AdamOptimizer(learning_rate=self.params.learning_rate, name='actor_opt') self.train_actor = actor_opt.minimize(self.objectives.actor_loss, global_step=self.actor_step) critic_opt = tf.train.AdamOptimizer(learning_rate=self.params.learning_rate, name='critic_opt') self.train_critic = critic_opt.minimize(self.objectives.critic_loss, global_step=self.critic_step) self.add_ppo_summaries() summary_dir = summaries_dir(self.params.experiment_dir()) self.summary_writer = tf.summary.FileWriter(summary_dir) self.actor_summaries = merge_summaries(collections=['actor']) self.critic_summaries = merge_summaries(collections=['critic']) if self.params.use_env_map: self.map_img, self.coord_limits = generate_env_map(make_env_func)
def trajectory_to_map(params, env_id): def make_env_func(): e = create_env(env_id) e.seed(0) return e params.num_envs = 1 params.with_timer = False agent = AgentTMAX(make_env_func, params) agent.initialize() map_img, coord_limits = generate_env_map(make_env_func) experiment_dir = params.experiment_dir() trajectories_dir = ensure_dir_exists(join(experiment_dir, '.trajectories')) if params.persistent_map_checkpoint is None: prefix = 'traj_' all_trajectories = glob.glob(f'{trajectories_dir}/{prefix}*') all_trajectories.sort() trajectories = [] for i, trajectory_dir in enumerate(all_trajectories): with open(join(trajectory_dir, 'trajectory.pickle'), 'rb') as traj_file: traj = Trajectory(i) traj.__dict__.update(pickle.load(traj_file)) trajectories.append(traj) else: loaded_persistent_map = TopologicalMap.create_empty() loaded_persistent_map.maybe_load_checkpoint( params.persistent_map_checkpoint) num_trajectories = loaded_persistent_map.num_trajectories trajectories = [Trajectory(i) for i in range(num_trajectories)] zero_frame = loaded_persistent_map.graph.nodes[0] for i in range(1, num_trajectories): trajectories[i].add(zero_frame['obs'], -1, zero_frame['info']) for node in loaded_persistent_map.graph.nodes(data=True): node_idx, d = node trajectories[d['traj_idx']].add(d['obs'], -1, d['info']) log.info('Loaded %d trajectories from the map', num_trajectories) log.info('Trajectory lengths %r', [len(t) for t in trajectories]) def init_map(): return TopologicalMap( trajectories[0].obs[0], directed_graph=False, initial_info=trajectories[0].infos[0], ) map_builder = MapBuilder(agent) # trajectories = [map_builder.sparsify_trajectory(t) for t in trajectories] # TODO sparse_map = trajectories_to_sparse_map( init_map, trajectories, trajectories_dir, agent, map_img, coord_limits, ) test_pick_best_trajectory = True if test_pick_best_trajectory: pick_best_trajectory(init_map, agent, copy.deepcopy(trajectories)) m = init_map() for i, t in enumerate(trajectories): m = map_builder.add_trajectory_to_dense_map(m, t) map_builder.calc_distances_to_landmarks(sparse_map, m) map_builder.sieve_landmarks_by_distance(sparse_map) dense_map_dir = ensure_dir_exists(join(trajectories_dir, 'dense_map')) m.save_checkpoint(dense_map_dir, map_img=map_img, coord_limits=coord_limits, verbose=True) # check if landmark correspondence between dense and sparse map is correct for node, data in sparse_map.graph.nodes.data(): traj_idx = data['traj_idx'] frame_idx = data['frame_idx'] dense_map_landmark = m.frame_to_node_idx[traj_idx][frame_idx] log.info('Sparse map node %d corresponds to dense map node %d', node, dense_map_landmark) log.info('Sparse map node %d distance %d', node, data['distance']) obs_sparse = sparse_map.get_observation(node) obs_dense = m.get_observation(dense_map_landmark) assert np.array_equal(obs_sparse, obs_dense) show_landmarks = False if show_landmarks: import cv2 cv2.imshow('sparse', obs_sparse) cv2.imshow('dense', obs_dense) cv2.waitKey() agent.finalize() return 0