def make_env(config, writer, prefix, datadir, store): suite, task = config.task.split('_', 1) if suite == 'kitchen': env = wrappers.Kitchen2D(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'dmc': env = wrappers.DeepMindControl(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari( task, config.action_repeat, (64, 64), grayscale=False, life_done=True, sticky_actions=True) env = wrappers.OneHotAction(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) if prefix == 'test': callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
def make_env(config, writer, prefix, datadir, store): suite, task = config.task.split('_', 1) if suite == 'dmc': env = wrappers.DeepMindControl(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'gym': env = wrappers.Gym(task, config, size=(128, 128)) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif task == 'door': env = wrappers.DoorOpen(config, size=(128, 128)) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif task == 'drawer': env = wrappers.DrawerOpen(config, size=(128, 128)) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) if prefix == 'test': callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
def make_env(config, logger, mode, train_eps, eval_eps): suite, task = config.task.split('_', 1) if suite == 'dmc': env = wrappers.DeepMindControl(task, config.action_repeat, config.size) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari(task, config.action_repeat, config.size, grayscale=config.grayscale, life_done=False and (mode == 'train'), sticky_actions=True, all_actions=True) env = wrappers.OneHotAction(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit) env = wrappers.SelectAction(env, key='action') callbacks = [ functools.partial(process_episode, config, logger, mode, train_eps, eval_eps) ] env = wrappers.CollectDataset(env, callbacks) env = wrappers.RewardObs(env) return env
def make_env(config, writer, prefix, datadir, store, index=None, real_world=False): suite, task = config.task.split('_', 1) if suite == 'dmc': if config.dr is None or real_world: #first index is always real world env = wrappers.DeepMindControl(task, use_state=config.use_state, real_world=real_world) else: env = wrappers.DeepMindControl(task, dr=config.dr, use_state=config.use_state, real_world=real_world) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari(task, config.action_repeat, (64, 64), grayscale=False, life_done=True, sticky_actions=True) env = wrappers.OneHotAction(env) elif suite == 'gym': if index == 0 or index is None: #first index is always real world env = wrappers.GymControl(task) else: env = wrappers.GymControl(task, dr=config.dr) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
def make_env(config, writer, prefix, datadir, store): suite, task = config.task.split('_', 1) if suite == 'dmc': env = wrappers.DeepMindControl(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari(task, config.action_repeat, (64, 64), grayscale=False, life_done=True, sticky_actions=True) env = wrappers.OneHotAction(env) elif suite == 'football': env = football_env.create_environment( representation='pixels', env_name='academy_empty_goal_close', stacked=False, logdir='./football/empty_goal_close2', write_goal_dumps=True, write_full_episode_dumps=True, render=True, write_video=True) env = wrappers.Football(env) env = wrappers.OneHotAction(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
from environments.pendulum import UnderactuatedPendulum # INITIALIZE COLORS FOR PROMPT ON WIN10 MACHINES colorama.init() # DATASET PARAMETERS max_num_episodes = 120 len_time = 100 render_shape = (500, 500) image_res = (28, 28) # INITIALIZE PENDULUM ENVIRONMENT env = gym.make('pendulum-underactuated-v0', render_shape=render_shape, model=None) env = wrappers.NormalizeActions(env) env = wrappers.MinimumDuration(env, len_time) env = wrappers.MaximumDuration(env, len_time) env = wrappers.ObservationDict(env, key='observation') env = wrappers.PixelObservations(env, image_res, np.uint8, 'image') env = wrappers.ConvertRewardToCost(env) env = wrappers.ConvertTo32Bit(env) # SEED EXPERIMENT TO CREATE REPRODUCIBLE RESULTS seed_value = 0 seed_experiment(seed=seed_value) env.seed(seed=seed_value) # GET ENVIRONMENT DATA SHAPES observation_shape = env.observation_space['image'].shape action_shape = env.action_space.shape