예제 #1
0
def env_setup(name='MiniGrid-Empty-8x8-v0'):
    env_name = name
    env = gym.make(env_name)
    env = RGBImgObsWrapper(env)
    env.max_steps = min(env.max_steps, 200)
    env.seed(12345)
    env.reset()
    return env
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    gin.parse_config_files_and_bindings([
        os.path.join(mon_minigrid.GIN_FILES_PREFIX, '{}.gin'.format(
            FLAGS.env_name))
    ],
                                        bindings=FLAGS.gin_bindings,
                                        skip_unknown=False)
    env_id = mon_minigrid.register_environment()
    env = gym.make(env_id)
    env = RGBImgObsWrapper(env)  # Get pixel observations
    # Get tabular observation and drop the 'mission' field:
    env = tabular_wrapper.TabularWrapper(env, get_rgb=True)
    env.reset()

    num_frames = 0
    max_num_frames = 500

    if not tf.io.gfile.exists(FLAGS.file_path):
        tf.io.gfile.makedirs(FLAGS.file_path)

    print('Available actions:')
    for a in ACTION_MAPPINGS:
        print('\t{}: "{}"'.format(ACTION_MAPPINGS[a], a))
    print()
    undisc_return = 0
    while num_frames < max_num_frames:
        draw_ascii_view(env)
        a = input('action: ')
        if a not in ACTION_MAPPINGS:
            print('Unrecognized action.')
            continue
        action = env.DirectionalActions[ACTION_MAPPINGS[a]].value
        obs, reward, done, _ = env.step(action)
        undisc_return += reward
        num_frames += 1

        print('t:', num_frames, '   s:', obs['state'])
        # Draw environment frame just for simple visualization
        plt.imshow(obs['image'])
        path = os.path.join(FLAGS.file_path, 'obs_{}.png'.format(num_frames))
        plt.savefig(path)
        plt.clf()

        if done:
            break

    print('Undiscounted return: %.2f' % undisc_return)
    env.close()
예제 #3
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    gin.parse_config_files_and_bindings(
        [os.path.join(mon_minigrid.GIN_FILES_PREFIX, 'classic_fourrooms.gin')],
        bindings=FLAGS.gin_bindings,
        skip_unknown=False)
    env_id = mon_minigrid.register_environment()
    env = gym.make(env_id)
    env = RGBImgObsWrapper(env)  # Get pixel observations
    # Get tabular observation and drop the 'mission' field:
    env = tabular_wrapper.TabularWrapper(env, get_rgb=True)
    env.reset()

    num_frames = 0
    max_num_frames = 500

    if not tf.io.gfile.exists(FLAGS.file_path):
        tf.io.gfile.makedirs(FLAGS.file_path)

    undisc_return = 0
    while num_frames < max_num_frames:
        # Act randomly
        obs, reward, done, _ = env.step(env.action_space.sample())
        undisc_return += reward
        num_frames += 1

        print('t:', num_frames, '   s:', obs['state'])
        # Draw environment frame just for simple visualization
        plt.imshow(obs['image'])
        path = FLAGS.file_path + str(num_frames) + '.png'
        plt.savefig(path)
        plt.clf()

        if done:
            break

    print('Undiscounted return: %.2f' % undisc_return)
    env.close()
예제 #4
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    gin.parse_config_files_and_bindings([
        os.path.join(mon_minigrid.GIN_FILES_PREFIX, '{}.gin'.format(FLAGS.env))
    ],
                                        bindings=FLAGS.gin_bindings,
                                        skip_unknown=False)
    env_id = mon_minigrid.register_environment()
    env = gym.make(env_id)
    env = RGBImgObsWrapper(env)  # Get pixel observations
    # Get tabular observation and drop the 'mission' field:
    env = mdp_wrapper.MDPWrapper(env)
    env = coloring_wrapper.ColoringWrapper(env)
    values = np.zeros(env.num_states)
    error = FLAGS.tolerance * 2
    i = 0
    while error > FLAGS.tolerance:
        new_values = np.copy(values)
        for s in range(env.num_states):
            max_value = 0.
            for a in range(env.num_actions):
                curr_value = (env.rewards[s, a] + FLAGS.gamma *
                              np.matmul(env.transition_probs[s, a, :], values))
                if curr_value > max_value:
                    max_value = curr_value
            new_values[s] = max_value
        error = np.max(abs(new_values - values))
        values = new_values
        i += 1
        if i % 1000 == 0:
            print('Error after {} iterations: {}'.format(i, error))
    print('Found V* in {} iterations'.format(i))
    print(values)
    if FLAGS.values_image_file is not None:
        cmap = cm.get_cmap('plasma', 256)
        norm = colors.Normalize(vmin=min(values), vmax=max(values))
        obs_image = env.render_custom_observation(env.reset(),
                                                  values,
                                                  cmap,
                                                  boundary_values=[1.0, 4.5])
        m = cm.ScalarMappable(cmap=cmap, norm=norm)
        m.set_array(obs_image)
        plt.imshow(obs_image)
        plt.colorbar(m)
        plt.savefig(FLAGS.values_image_file)
        plt.clf()
    env.close()