def env_setup(name='MiniGrid-Empty-8x8-v0'): env_name = name env = gym.make(env_name) env = RGBImgObsWrapper(env) env.max_steps = min(env.max_steps, 200) env.seed(12345) env.reset() return env
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') gin.parse_config_files_and_bindings([ os.path.join(mon_minigrid.GIN_FILES_PREFIX, '{}.gin'.format( FLAGS.env_name)) ], bindings=FLAGS.gin_bindings, skip_unknown=False) env_id = mon_minigrid.register_environment() env = gym.make(env_id) env = RGBImgObsWrapper(env) # Get pixel observations # Get tabular observation and drop the 'mission' field: env = tabular_wrapper.TabularWrapper(env, get_rgb=True) env.reset() num_frames = 0 max_num_frames = 500 if not tf.io.gfile.exists(FLAGS.file_path): tf.io.gfile.makedirs(FLAGS.file_path) print('Available actions:') for a in ACTION_MAPPINGS: print('\t{}: "{}"'.format(ACTION_MAPPINGS[a], a)) print() undisc_return = 0 while num_frames < max_num_frames: draw_ascii_view(env) a = input('action: ') if a not in ACTION_MAPPINGS: print('Unrecognized action.') continue action = env.DirectionalActions[ACTION_MAPPINGS[a]].value obs, reward, done, _ = env.step(action) undisc_return += reward num_frames += 1 print('t:', num_frames, ' s:', obs['state']) # Draw environment frame just for simple visualization plt.imshow(obs['image']) path = os.path.join(FLAGS.file_path, 'obs_{}.png'.format(num_frames)) plt.savefig(path) plt.clf() if done: break print('Undiscounted return: %.2f' % undisc_return) env.close()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') gin.parse_config_files_and_bindings( [os.path.join(mon_minigrid.GIN_FILES_PREFIX, 'classic_fourrooms.gin')], bindings=FLAGS.gin_bindings, skip_unknown=False) env_id = mon_minigrid.register_environment() env = gym.make(env_id) env = RGBImgObsWrapper(env) # Get pixel observations # Get tabular observation and drop the 'mission' field: env = tabular_wrapper.TabularWrapper(env, get_rgb=True) env.reset() num_frames = 0 max_num_frames = 500 if not tf.io.gfile.exists(FLAGS.file_path): tf.io.gfile.makedirs(FLAGS.file_path) undisc_return = 0 while num_frames < max_num_frames: # Act randomly obs, reward, done, _ = env.step(env.action_space.sample()) undisc_return += reward num_frames += 1 print('t:', num_frames, ' s:', obs['state']) # Draw environment frame just for simple visualization plt.imshow(obs['image']) path = FLAGS.file_path + str(num_frames) + '.png' plt.savefig(path) plt.clf() if done: break print('Undiscounted return: %.2f' % undisc_return) env.close()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') gin.parse_config_files_and_bindings([ os.path.join(mon_minigrid.GIN_FILES_PREFIX, '{}.gin'.format(FLAGS.env)) ], bindings=FLAGS.gin_bindings, skip_unknown=False) env_id = mon_minigrid.register_environment() env = gym.make(env_id) env = RGBImgObsWrapper(env) # Get pixel observations # Get tabular observation and drop the 'mission' field: env = mdp_wrapper.MDPWrapper(env) env = coloring_wrapper.ColoringWrapper(env) values = np.zeros(env.num_states) error = FLAGS.tolerance * 2 i = 0 while error > FLAGS.tolerance: new_values = np.copy(values) for s in range(env.num_states): max_value = 0. for a in range(env.num_actions): curr_value = (env.rewards[s, a] + FLAGS.gamma * np.matmul(env.transition_probs[s, a, :], values)) if curr_value > max_value: max_value = curr_value new_values[s] = max_value error = np.max(abs(new_values - values)) values = new_values i += 1 if i % 1000 == 0: print('Error after {} iterations: {}'.format(i, error)) print('Found V* in {} iterations'.format(i)) print(values) if FLAGS.values_image_file is not None: cmap = cm.get_cmap('plasma', 256) norm = colors.Normalize(vmin=min(values), vmax=max(values)) obs_image = env.render_custom_observation(env.reset(), values, cmap, boundary_values=[1.0, 4.5]) m = cm.ScalarMappable(cmap=cmap, norm=norm) m.set_array(obs_image) plt.imshow(obs_image) plt.colorbar(m) plt.savefig(FLAGS.values_image_file) plt.clf() env.close()