コード例 #1
0
def build_graph(params, env_id, max_num_episodes=1000):
    def make_env_func():
        e = create_env(env_id, mode='test', skip_frames=False)
        e.seed(0)
        return e

    checkpoint_dir = model_dir(params.experiment_dir())
    map_img, coord_limits = generate_env_map(make_env_func)
    env = make_env_func()

    m = None

    for _ in range(max_num_episodes):
        env_obs, info = reset_with_info(env)
        obs = main_observation(env_obs)
        done = False

        if m is None:
            m = TopologicalMap(obs, directed_graph=False, initial_info=info, verbose=True)
            m.maybe_load_checkpoint(checkpoint_dir)

        while not done and not terminate:
            env.render()

            if len(current_actions) > 0:
                action = current_actions[-1]
            else:
                action = 0

            env_obs, rew, done, info = env.step(action)
            obs = main_observation(env_obs)

            global add_landmark
            if add_landmark:
                # noinspection PyProtectedMember
                new_idx = m._add_new_node(obs=obs, pos=get_position(info), angle=get_angle(info))
                log.info('Added landmark idx %d', new_idx)
                add_landmark = False

                res = m.save_checkpoint(checkpoint_dir, map_img=map_img, coord_limits=coord_limits, verbose=True)
                cv2.imshow('map', cv2.imread(res.graph_filename))
                cv2.waitKey(50)

        if terminate:
            break
        else:
            env.render()
            time.sleep(0.2)

    m.save_checkpoint(checkpoint_dir, map_img=map_img, coord_limits=coord_limits, verbose=True)
    log.debug('Set breakpoint here to edit graph edges before saving...')

    log.info('Saving to %s...', checkpoint_dir)
    m.save_checkpoint(checkpoint_dir, map_img=map_img, coord_limits=coord_limits, verbose=True)

    env.close()
    return 0
コード例 #2
0
def record_trajectory(params, env_id):
    def make_env_func():
        e = create_env(env_id, skip_frames=True)
        e.seed(0)
        return e

    env = make_env_func()
    map_img, coord_limits = generate_env_map(make_env_func)

    env_obs, info = reset_with_info(env)
    obs = main_observation(env_obs)
    done = False

    m = TopologicalMap(obs,
                       directed_graph=False,
                       initial_info=info,
                       verbose=True)

    trajectory = Trajectory(env_idx=-1)
    frame = 0

    t = Timing()

    while not done and not terminate:
        with t.timeit('one_frame'):
            env.render()

            if len(current_actions) > 0:
                action = current_actions[-1]
            else:
                action = 0

            trajectory.add(obs, action, info)
            m.add_landmark(obs, info, update_curr_landmark=True)

            env_obs, rew, done, info = env.step(action)
            obs = main_observation(env_obs)

        took_seconds = t.one_frame
        desired_fps = 15
        wait_seconds = (1.0 / desired_fps) - took_seconds
        wait_seconds = max(0.0, wait_seconds)
        time.sleep(wait_seconds)

        frame += 1

    env.render()
    time.sleep(0.2)

    trajectory_dir = trajectory.save(params.experiment_dir())
    m.save_checkpoint(trajectory_dir,
                      map_img=map_img,
                      coord_limits=coord_limits,
                      verbose=True)

    env.close()
    return 0
コード例 #3
0
    def test_env_map(self):
        map_img, coord_limits = generate_env_map(self.make_env)
        self.assertIsInstance(coord_limits, tuple)
        self.assertIsInstance(map_img, np.ndarray)

        show = False  # set to True only for debug
        if show:
            import cv2
            cv2.imshow('map', map_img)
            cv2.waitKey()
コード例 #4
0
    def __init__(self, make_env_func, params):
        """Initialize PPO computation graph and some auxiliary tensors."""
        super(AgentRandom, self).__init__(params)

        self.make_env_func = make_env_func
        env = make_env_func()
        self.action_space = env.action_space
        env.close()

        self.objectives = None

        self.last_action = None
        self.best_reward = None

        summary_dir = summaries_dir(self.params.experiment_dir())
        self.summary_writer = tf.summary.FileWriter(summary_dir)

        if self.params.use_env_map:
            self.map_img, self.coord_limits = generate_env_map(make_env_func)
コード例 #5
0
    def __init__(self, make_env_func, params):
        """Initialize PPO computation graph and some auxiliary tensors."""
        super(AgentPPO, self).__init__(params)

        self.actor_step = tf.Variable(0, trainable=False, dtype=tf.int64, name='actor_step')
        self.critic_step = tf.Variable(0, trainable=False, dtype=tf.int64, name='critic_step')

        self.make_env_func = make_env_func
        env = make_env_func()  # we need the env to query observation shape, number of actions, etc.

        self.obs_shape = [-1] + list(main_observation_space(env).shape)
        self.ph_observations = placeholder_from_space(main_observation_space(env))
        self.ph_actions = placeholder_from_space(env.action_space)  # actions sampled from the policy
        self.ph_advantages, self.ph_returns, self.ph_old_action_probs = placeholders(None, None, None)

        self.actor_critic = ActorCritic(env, self.ph_observations, self.params)

        env.close()

        self.objectives = self.add_ppo_objectives(
            self.actor_critic,
            self.ph_actions, self.ph_old_action_probs, self.ph_advantages, self.ph_returns,
            self.params,
            self.actor_step,
        )

        # optimizers
        actor_opt = tf.train.AdamOptimizer(learning_rate=self.params.learning_rate, name='actor_opt')
        self.train_actor = actor_opt.minimize(self.objectives.actor_loss, global_step=self.actor_step)

        critic_opt = tf.train.AdamOptimizer(learning_rate=self.params.learning_rate, name='critic_opt')
        self.train_critic = critic_opt.minimize(self.objectives.critic_loss, global_step=self.critic_step)

        self.add_ppo_summaries()

        summary_dir = summaries_dir(self.params.experiment_dir())
        self.summary_writer = tf.summary.FileWriter(summary_dir)
        self.actor_summaries = merge_summaries(collections=['actor'])
        self.critic_summaries = merge_summaries(collections=['critic'])

        if self.params.use_env_map:
            self.map_img, self.coord_limits = generate_env_map(make_env_func)
コード例 #6
0
def trajectory_to_map(params, env_id):
    def make_env_func():
        e = create_env(env_id)
        e.seed(0)
        return e

    params.num_envs = 1
    params.with_timer = False
    agent = AgentTMAX(make_env_func, params)
    agent.initialize()

    map_img, coord_limits = generate_env_map(make_env_func)

    experiment_dir = params.experiment_dir()
    trajectories_dir = ensure_dir_exists(join(experiment_dir, '.trajectories'))

    if params.persistent_map_checkpoint is None:
        prefix = 'traj_'
        all_trajectories = glob.glob(f'{trajectories_dir}/{prefix}*')
        all_trajectories.sort()

        trajectories = []
        for i, trajectory_dir in enumerate(all_trajectories):
            with open(join(trajectory_dir, 'trajectory.pickle'),
                      'rb') as traj_file:
                traj = Trajectory(i)
                traj.__dict__.update(pickle.load(traj_file))
                trajectories.append(traj)
    else:
        loaded_persistent_map = TopologicalMap.create_empty()
        loaded_persistent_map.maybe_load_checkpoint(
            params.persistent_map_checkpoint)

        num_trajectories = loaded_persistent_map.num_trajectories
        trajectories = [Trajectory(i) for i in range(num_trajectories)]

        zero_frame = loaded_persistent_map.graph.nodes[0]
        for i in range(1, num_trajectories):
            trajectories[i].add(zero_frame['obs'], -1, zero_frame['info'])

        for node in loaded_persistent_map.graph.nodes(data=True):
            node_idx, d = node
            trajectories[d['traj_idx']].add(d['obs'], -1, d['info'])

        log.info('Loaded %d trajectories from the map', num_trajectories)
        log.info('Trajectory lengths %r', [len(t) for t in trajectories])

    def init_map():
        return TopologicalMap(
            trajectories[0].obs[0],
            directed_graph=False,
            initial_info=trajectories[0].infos[0],
        )

    map_builder = MapBuilder(agent)
    # trajectories = [map_builder.sparsify_trajectory(t) for t in trajectories]  # TODO

    sparse_map = trajectories_to_sparse_map(
        init_map,
        trajectories,
        trajectories_dir,
        agent,
        map_img,
        coord_limits,
    )

    test_pick_best_trajectory = True
    if test_pick_best_trajectory:
        pick_best_trajectory(init_map, agent, copy.deepcopy(trajectories))

    m = init_map()

    for i, t in enumerate(trajectories):
        m = map_builder.add_trajectory_to_dense_map(m, t)

    map_builder.calc_distances_to_landmarks(sparse_map, m)
    map_builder.sieve_landmarks_by_distance(sparse_map)

    dense_map_dir = ensure_dir_exists(join(trajectories_dir, 'dense_map'))
    m.save_checkpoint(dense_map_dir,
                      map_img=map_img,
                      coord_limits=coord_limits,
                      verbose=True)

    # check if landmark correspondence between dense and sparse map is correct
    for node, data in sparse_map.graph.nodes.data():
        traj_idx = data['traj_idx']
        frame_idx = data['frame_idx']

        dense_map_landmark = m.frame_to_node_idx[traj_idx][frame_idx]
        log.info('Sparse map node %d corresponds to dense map node %d', node,
                 dense_map_landmark)
        log.info('Sparse map node %d distance %d', node, data['distance'])

        obs_sparse = sparse_map.get_observation(node)
        obs_dense = m.get_observation(dense_map_landmark)

        assert np.array_equal(obs_sparse, obs_dense)

        show_landmarks = False
        if show_landmarks:
            import cv2
            cv2.imshow('sparse', obs_sparse)
            cv2.imshow('dense', obs_dense)
            cv2.waitKey()

    agent.finalize()
    return 0