def start_game(game, state, directory='tmp', steps=1000000, discrete_actions=False, bk2dir=None): use_restricted_actions = retro.ACTIONS_FILTERED if discrete_actions: use_restricted_actions = retro.ACTIONS_DISCRETE try: env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions) except Exception: env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) env = RemoteEnvWrapper(env, directory) env.serve(steps)
def make_rand_env(game_states, stack=2, scale_rew=True, color=False, exp_type=['x'], exp_const=[0.002], max_episode_steps=4500, maml=False, small_size=False): """ Create an environment with some standard wrappers. """ game, state = game_states[0] env = make(game, state) if maml: env_rand = RandomEnvironmen2(env, game_states) else: env_rand = RandomEnvironmen(env, game_states) env = retro_contest.StochasticFrameSkip(env_rand, n=4, stickprob=0.25) env = BackupOriginalData(env) env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env, color, small_size) assert len(exp_type) == len(exp_const) for t, c in zip(exp_type, exp_const): if c > 0: if t == 'obs': env = ObsExplorationReward(env, c, game_specific=True) elif t == 'x': env = XExplorationReward(env, c, game_specific=True) else: raise ValueError('unknown exploration {}'.format(t)) if stack > 1: env = FrameStack(env, stack) env = EpisodeInfo(env) if maml: env.sample = env_rand.sample return env
def make(game, state=retro.State.DEFAULT, discrete_actions=False, bk2dir=None): use_restricted_actions = retro.Actions.FILTERED if discrete_actions: use_restricted_actions = retro.Actions.DISCRETE try: env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions) except Exception: env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) return env
def make(game, state, discrete_actions=False, bk2dir=None, max_episode_steps=4000): """Make the competition environment.""" print('game:', game, 'state:', state) use_restricted_actions = retro.ACTIONS_FILTERED if discrete_actions: use_restricted_actions = retro.ACTIONS_DISCRETE try: env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions) except Exception: env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps) return env
def make_env(game, state, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = make(game, state) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) env = EpisodeInfo(env) return env
def make(game, state=retro.STATE_DEFAULT, discrete_actions=False, bk2dir=None,monitordir=None, scenario='contest'): use_restricted_actions = retro.ACTIONS_FILTERED if discrete_actions: use_restricted_actions = retro.ACTIONS_DISCRETE try: #env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions) env = retro.make(game, state, scenario=scenario, use_restricted_actions=use_restricted_actions) except Exception: env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) #added this if monitordir: env = retro_contest.Monitor(env, os.path.join(monitordir, 'monitor.csv'), os.path.join(monitordir, 'log.csv')) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) #env.serve(timestep_limit=10000, ignore_reset=True) return env
def retro_make(game, state=retro.State.DEFAULT, discrete_actions=False, bk2dir=None, record='.'): use_restricted_actions = retro.Actions.FILTERED if discrete_actions: use_restricted_actions = retro.Actions.DISCRETE try: env = retro.make(game, state, scenario='deep_thought', record=record, use_restricted_actions=use_restricted_actions) except Exception as e: print('EXCEPTION in retro_make') print(traceback.format_exc()) env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) if game in game_wrappers: env = game_wrappers[game](env) else: env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) return env
def make_maml_env(game_states, stack=2, scale_rew=True, color=False, exp_type='x', exp_const=0.002, max_episode_steps=4500): """ Create an environment with some standard wrappers. """ game, state = game_states[0] env = make(game, state) env_rand = RandomEnvironmen2(env, game_states) env = retro_contest.StochasticFrameSkip(env_rand, n=4, stickprob=0.25) env = BackupOriginalData(env) env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env, color) if exp_const > 0: if exp_type == 'obs': env = ObsExplorationReward(env, exp_const, game_specific=True) elif exp_type == 'x': env = XExplorationReward(env, exp_const, game_specific=True) if stack > 1: env = FrameStack(env, stack) env = ScaledFloatFrame(env) env = EpisodeInfo(env) env.sample = env_rand.sample return env
def make(game, state=retro.State.DEFAULT, discrete_actions=False, bk2dir=None, monitordir=None, scenario='scenario'): use_restricted_actions = retro.Actions.FILTERED #retro.ACTIONS_FILTERED if discrete_actions: use_restricted_actions = retro.Actions.DISCRETE #retro.ACTIONS_DISCRETE try: #env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions) env = retro.make(game, state, scenario=scenario, use_restricted_actions=use_restricted_actions) except Exception: env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) #added this if monitordir: time_int = int(time.time()) env = retro_contest.Monitor( env, os.path.join(monitordir, 'monitor_{}.csv'.format(time_int)), os.path.join(monitordir, 'log_{}.csv'.format(time_int))) #bust a move #env = retro_contest.StochasticFrameSkip(env, n=6, stickprob=0.0) #n=10, did some analysis on this #contra env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.0) #sonic #env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=8000) #env.serve(timestep_limit=10000, ignore_reset=True) return env