def train_distance(params, env_id):
    def make_env_func():
        e = create_env(env_id)
        return e

    agent = AgentTMAX(make_env_func, params)
    agent.initialize()

    multi_env = None
    try:
        multi_env = MultiEnv(
            params.num_envs,
            params.num_workers,
            make_env_func=agent.make_env_func,
            stats_episodes=params.stats_episodes,
        )

        train_loop(agent, multi_env)
    except (Exception, KeyboardInterrupt, SystemExit):
        log.exception('Interrupt...')
    finally:
        log.info('Closing env...')
        if multi_env is not None:
            multi_env.close()

        agent.finalize()

    return 0
예제 #2
0
def evaluate_experiment(env_id, experiment_name, num_envs=96):
    # fixed seeds
    random.seed(0)
    np.random.seed(0)
    tf.random.set_random_seed(0)

    params = AgentTMAX.Params(experiment_name)
    params = params.load()
    params.seed = 0

    # for faster evaluation
    params.num_envs = num_envs
    params.num_workers = 32 if num_envs >= 32 else num_envs

    def make_env_func():
        e = create_env(env_id, skip_frames=True)
        e.seed(0)
        return e

    agent = AgentTMAX(make_env_func, params)
    agent.initialize()

    rate, speed = 0, -1

    multi_env = None
    try:
        multi_env = MultiEnv(
            params.num_envs,
            params.num_workers,
            make_env_func=make_env_func,
            stats_episodes=params.stats_episodes,
        )

        success, avg_speed = evaluate_locomotion_agent(agent, multi_env)

        log.info('Finished evaluating experiment %s', experiment_name)
        rate = np.mean(success)
        speed = -1
        avg_speed = [s for s in avg_speed if s > 0]
        if len(avg_speed) > 0:
            speed = np.mean(avg_speed)

        log.info('Success rate %.1f%%, avg. speed %.2f edges/frame', rate * 100, speed)

    except (Exception, KeyboardInterrupt, SystemExit):
        log.exception('Interrupt...')
    finally:
        log.info('Closing env...')
        if multi_env is not None:
            multi_env.close()

    agent.finalize()
    return rate, speed
예제 #3
0
    def test_dist_training(self):
        t = Timing()

        def make_env():
            return make_doom_env(doom_env_by_name(TEST_ENV_NAME))

        params = AgentTMAX.Params('__test_dist_train__')
        params.distance_target_buffer_size = 1000

        with t.timeit('generate_data'):
            # first: generate fake random data
            buffer = Buffer()

            obs1 = np.full([84, 84, 3], 0, dtype=np.uint8)
            obs1[:, :, 1] = 255
            obs2 = np.full([84, 84, 3], 0, dtype=np.uint8)
            obs2[:, :, 2] = 255

            data_size = params.distance_target_buffer_size
            for i in range(data_size):
                same = i % 2 == 0
                if same:
                    if random.random() < 0.5:
                        obs_first = obs_second = obs1
                    else:
                        obs_first = obs_second = obs2
                else:
                    obs_first, obs_second = obs1, obs2
                    if random.random() < 0.5:
                        obs_second, obs_first = obs_first, obs_second

                buffer.add(obs_first=obs_first,
                           obs_second=obs_second,
                           labels=0 if same else 1)

        with t.timeit('init'):
            agent = AgentTMAX(make_env, params)
            agent.initialize()

            params.distance_train_epochs = 1
            params.distance_batch_size = 256
            agent.distance.train(buffer, 1, agent)

        with t.timeit('train'):
            params.distance_train_epochs = 2
            params.distance_batch_size = 64
            agent.distance.train(buffer, 1, agent, t)

        agent.finalize()

        log.info('Timing: %s', t)
        shutil.rmtree(params.experiment_dir())
예제 #4
0
def train(params, env_id):
    agent = AgentTMAX(partial(create_env, env=env_id), params=params)
    agent.initialize()
    status = agent.learn()
    agent.finalize()
    return status
예제 #5
0
def trajectory_to_map(params, env_id):
    def make_env_func():
        e = create_env(env_id)
        e.seed(0)
        return e

    params.num_envs = 1
    params.with_timer = False
    agent = AgentTMAX(make_env_func, params)
    agent.initialize()

    map_img, coord_limits = generate_env_map(make_env_func)

    experiment_dir = params.experiment_dir()
    trajectories_dir = ensure_dir_exists(join(experiment_dir, '.trajectories'))

    if params.persistent_map_checkpoint is None:
        prefix = 'traj_'
        all_trajectories = glob.glob(f'{trajectories_dir}/{prefix}*')
        all_trajectories.sort()

        trajectories = []
        for i, trajectory_dir in enumerate(all_trajectories):
            with open(join(trajectory_dir, 'trajectory.pickle'),
                      'rb') as traj_file:
                traj = Trajectory(i)
                traj.__dict__.update(pickle.load(traj_file))
                trajectories.append(traj)
    else:
        loaded_persistent_map = TopologicalMap.create_empty()
        loaded_persistent_map.maybe_load_checkpoint(
            params.persistent_map_checkpoint)

        num_trajectories = loaded_persistent_map.num_trajectories
        trajectories = [Trajectory(i) for i in range(num_trajectories)]

        zero_frame = loaded_persistent_map.graph.nodes[0]
        for i in range(1, num_trajectories):
            trajectories[i].add(zero_frame['obs'], -1, zero_frame['info'])

        for node in loaded_persistent_map.graph.nodes(data=True):
            node_idx, d = node
            trajectories[d['traj_idx']].add(d['obs'], -1, d['info'])

        log.info('Loaded %d trajectories from the map', num_trajectories)
        log.info('Trajectory lengths %r', [len(t) for t in trajectories])

    def init_map():
        return TopologicalMap(
            trajectories[0].obs[0],
            directed_graph=False,
            initial_info=trajectories[0].infos[0],
        )

    map_builder = MapBuilder(agent)
    # trajectories = [map_builder.sparsify_trajectory(t) for t in trajectories]  # TODO

    sparse_map = trajectories_to_sparse_map(
        init_map,
        trajectories,
        trajectories_dir,
        agent,
        map_img,
        coord_limits,
    )

    test_pick_best_trajectory = True
    if test_pick_best_trajectory:
        pick_best_trajectory(init_map, agent, copy.deepcopy(trajectories))

    m = init_map()

    for i, t in enumerate(trajectories):
        m = map_builder.add_trajectory_to_dense_map(m, t)

    map_builder.calc_distances_to_landmarks(sparse_map, m)
    map_builder.sieve_landmarks_by_distance(sparse_map)

    dense_map_dir = ensure_dir_exists(join(trajectories_dir, 'dense_map'))
    m.save_checkpoint(dense_map_dir,
                      map_img=map_img,
                      coord_limits=coord_limits,
                      verbose=True)

    # check if landmark correspondence between dense and sparse map is correct
    for node, data in sparse_map.graph.nodes.data():
        traj_idx = data['traj_idx']
        frame_idx = data['frame_idx']

        dense_map_landmark = m.frame_to_node_idx[traj_idx][frame_idx]
        log.info('Sparse map node %d corresponds to dense map node %d', node,
                 dense_map_landmark)
        log.info('Sparse map node %d distance %d', node, data['distance'])

        obs_sparse = sparse_map.get_observation(node)
        obs_dense = m.get_observation(dense_map_landmark)

        assert np.array_equal(obs_sparse, obs_dense)

        show_landmarks = False
        if show_landmarks:
            import cv2
            cv2.imshow('sparse', obs_sparse)
            cv2.imshow('dense', obs_dense)
            cv2.waitKey()

    agent.finalize()
    return 0
예제 #6
0
def enjoy(params,
          env_id,
          max_num_episodes=1000,
          max_num_frames=None,
          show_automap=False):
    def make_env_func():
        e = create_env(env_id, mode='test', show_automap=show_automap)
        e.seed(0)
        return e

    params = params.load()
    params.num_envs = 1  # during execution we're only using one env
    agent = AgentTMAX(make_env_func, params)
    env = make_env_func()

    agent.initialize()

    global persistent_map
    if agent.params.persistent_map_checkpoint is not None:
        persistent_map = TopologicalMap.create_empty()
        persistent_map.maybe_load_checkpoint(
            agent.params.persistent_map_checkpoint)

    global current_landmark

    episode_rewards = []
    num_frames = 0

    def max_frames_reached(frames):
        return max_num_frames is not None and frames > max_num_frames

    for _ in range(max_num_episodes):
        env_obs, info = reset_with_info(env)
        done = False

        obs, goal_obs = main_observation(env_obs), goal_observation(env_obs)
        prev_obs = obs
        if current_landmark is None:
            current_landmark = obs

        if goal_obs is not None:
            goal_obs_rgb = cv2.cvtColor(goal_obs, cv2.COLOR_BGR2RGB)
            cv2.imshow('goal', cv2.resize(goal_obs_rgb, (500, 500)))
            cv2.waitKey(500)

        episode_reward, episode_frames = 0, 0

        if not agent.tmax_mgr.initialized:
            agent.tmax_mgr.initialize([obs], [info], env_steps=0)
            persistent_map = agent.tmax_mgr.dense_persistent_maps[-1]
            sparse_persistent_map = agent.tmax_mgr.sparse_persistent_maps[-1]
            log.debug('Num landmarks in sparse map: %d',
                      sparse_persistent_map.num_landmarks())

        agent.curiosity.initialized = True
        agent.tmax_mgr.mode[0] = TmaxMode.EXPLORATION
        agent.tmax_mgr.locomotion_final_targets[0] = None
        agent.tmax_mgr.locomotion_targets[0] = None

        start_episode = time.time()
        t = Timing()

        while not done and not terminate and not max_frames_reached(
                num_frames):
            with t.timeit('one_frame'):
                env.render()
                cv2.waitKey(1)  # to prevent window from fading

                if pause:
                    time.sleep(0.01)
                    continue

                if len(current_actions) > 0:
                    # key combinations are not handled, but this is purely for testing
                    action = current_actions[-1]
                else:
                    action = 0

                if policy_type == PolicyType.PLAYER:
                    pass
                elif policy_type == PolicyType.RANDOM:
                    action = env.action_space.sample()
                elif policy_type == PolicyType.AGENT:
                    agent.tmax_mgr.mode[0] = TmaxMode.EXPLORATION
                    action, *_ = agent.policy_step([prev_obs], [obs],
                                                   [goal_obs], None, None)
                    action = action[0]
                elif policy_type == PolicyType.LOCOMOTION:
                    agent.tmax_mgr.mode[0] = TmaxMode.LOCOMOTION
                    action, _, _ = agent.loco_actor_critic.invoke(
                        agent.session,
                        [obs],
                        [current_landmark],
                        None,
                        None,
                        [1.0],
                    )
                    action = action[0]

                env_obs, rew, done, info = env.step(action)
                next_obs, goal_obs = main_observation(
                    env_obs), goal_observation(env_obs)

                _, _ = agent.tmax_mgr.update(
                    [obs],
                    [next_obs],
                    [rew],
                    [done],
                    [info],
                    num_frames,
                    t,
                    verbose=True,
                )

                prev_obs = obs
                obs = next_obs

                calc_distance_to_memory(agent, sparse_persistent_map, obs)
                calc_value_estimate(agent, obs)

                episode_reward += rew

                num_frames += 1
                episode_frames += 1

            took_seconds = t.one_frame
            desired_fps = 15  # (4-repeated here, which means actually 60fps)
            wait_seconds = (1.0 / desired_fps) - took_seconds
            wait_seconds = max(0.0, wait_seconds)
            if wait_seconds > EPS:
                time.sleep(wait_seconds)

        env.render()
        log.info('Actual fps: %.1f',
                 episode_frames / (time.time() - start_episode))
        time.sleep(0.2)

        episode_rewards.append(episode_reward)
        last_episodes = episode_rewards[-100:]
        avg_reward = sum(last_episodes) / len(last_episodes)
        log.info(
            'Episode reward: %f, avg reward for %d episodes: %f',
            episode_reward,
            len(last_episodes),
            avg_reward,
        )

        if max_frames_reached(num_frames) or terminate:
            break

    agent.finalize()
    env.close()
    cv2.destroyAllWindows()

    return 0
예제 #7
0
def test_locomotion(params, env_id):
    def make_env_func():
        e = create_env(env_id, skip_frames=True)
        e.seed(0)
        return e

    # params = params.load()
    # params.ensure_serialized()

    params.num_envs = 1
    # params.naive_locomotion = True

    agent = AgentTMAX(make_env_func, params)

    agent.initialize()

    env = make_env_func()

    env_obs, info = reset_with_info(env)
    obs_prev = obs = main_observation(env_obs)
    done = False

    if params.persistent_map_checkpoint is not None:
        loaded_persistent_map = TopologicalMap.create_empty()
        loaded_persistent_map.maybe_load_checkpoint(
            params.persistent_map_checkpoint)
    else:
        agent.tmax_mgr.initialize([obs], [info], 1)
        loaded_persistent_map = agent.tmax_mgr.dense_persistent_maps[-1]

    m = loaded_persistent_map

    t = Timing()

    log.info('Num landmarks: %d', m.num_landmarks())
    final_goal_idx = 49

    log.info('Locomotion goal is %d', final_goal_idx)

    # localizer = Localizer(m, agent)

    final_goal_obs = m.get_observation(final_goal_idx)
    cv2.namedWindow('next_target')
    cv2.moveWindow('next_target', 800, 100)
    cv2.namedWindow('final_goal')
    cv2.moveWindow('final_goal', 1400, 100)
    display_obs('next_target', obs)
    display_obs('final_goal', final_goal_obs)
    cv2.waitKey(1)

    # localizer.current_landmark = 0
    # next_target = localizer.get_next_target(obs, final_goal_idx)
    # next_target_obs = m.get_observation(next_target)

    frame = 0

    if params.naive_locomotion:
        navigator = NavigatorNaive(agent)
    else:
        navigator = Navigator(agent)

    navigator.reset(0, m)

    next_target, next_target_d = navigator.get_next_target(
        [m],
        [obs],
        [final_goal_idx],
        [frame],
    )
    next_target, next_target_d = next_target[0], next_target_d[0]
    next_target_obs = m.get_observation(next_target)

    while not done and not terminate:
        with t.timeit('one_frame'):
            env.render()
            if not pause:
                if random.random() < 0.5:
                    deterministic = False
                else:
                    deterministic = True

                if params.naive_locomotion:
                    action = navigator.replay_action([0])[0]
                else:
                    action = agent.locomotion.navigate(
                        agent.session,
                        [obs_prev],
                        [obs],
                        [next_target_obs],
                        deterministic=deterministic,
                    )[0]

                env_obs, rew, done, info = env.step(action)

                log.info('Action is %d', action)
                obs_prev = obs
                obs = main_observation(env_obs)

                next_target, next_target_d = navigator.get_next_target(
                    [m],
                    [obs],
                    [final_goal_idx],
                    [frame],
                )
                next_target, next_target_d = next_target[0], next_target_d[0]
                if next_target is None:
                    log.error('We are lost!')
                else:
                    log.info('Next target is %d with distance %.3f!',
                             next_target, next_target_d)
                    display_obs('next_target', next_target_obs)
                    cv2.waitKey(1)

                if next_target is not None:
                    next_target_obs = m.get_observation(next_target)

                log.info('Frame %d...', frame)

        took_seconds = t.one_frame
        desired_fps = 10
        wait_seconds = (1.0 / desired_fps) - took_seconds
        wait_seconds = max(0.0, wait_seconds)
        if wait_seconds > EPS:
            time.sleep(wait_seconds)

        if not pause:
            frame += 1

    log.info('After loop')

    env.render()
    time.sleep(0.05)

    env.close()
    agent.finalize()
    return 0