def make_atari_env(args, frame_stack=True): import gym from baselines.common.atari_wrappers import FrameStack, NoopResetEnv, FrameStack env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_sonic_train(env_id=None, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') game, state = env_id.split(',') env = make(game=game, state=state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_test(): # A custom make function for contest environment # Also, time limit / max number of steps are imposed in this environment env = make_retro(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act2', record="./records") env = ActionsDiscretizer(env) env = RewardScaler(env) env = PreprocessFrame(env) env = FrameStack(env, 4) env = AllowBacktracking(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') import retro env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') #, #scenario='scenario.json') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def _thunk(): env = gym.make(env_id) env.seed(seed + rank) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=2) env = WarpFrame(env) # Janky Fix to Resize Environments to be 50x50 env.width = 50 env.height = 50 env = ScaledFloatFrame(env) if not eval: env = ClipRewardEnv(env) env = EpisodicLifeEnv(env) env = FrameStack(env, 3) env = TransposeOb(env) return env
def _wrap_unity_env(env_path, seed, base_port, unity_arguments, rank, reward_range=(-np.inf, np.inf)): env = UnityEnvironment(file_name=env_path, seed=seed, worker_id=rank, base_port=base_port, arguments=unity_arguments) env = MLToGymEnv(env, train_mode=True, reward_range=reward_range) env = FloatToUInt8Frame(env) env = WarpFrame(env) env = FrameStack(env, k=4) return env
def main(): env = make_atari("BreakoutNoFrameskip-v0") env = WarpFrame(env) env = FrameStack(env, k=4) act = deepq.load("breakout_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def make_mario_env(crop=True, frame_stack=True, clip_rewards=False): assert clip_rewards is False import gym import retro from baselines.common.atari_wrappers import FrameStack gym.undo_logger_setup() env = retro.make('SuperMarioBros-Nes', 'Level1-1') buttons = env.BUTTONS env = MarioXReward(env) env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=crop) if frame_stack: env = FrameStack(env, 4) env = LimitedDiscreteActions(env, buttons) return env
def make_sonic_test(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') #env_id = "SonicTheHedgehog-Genesis,GreenHillZone.Act1" #game, state = env_id.split(',') #env = make(game=game, state=state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False, width=84, height=84): """Configure environment for DeepMind-style Atari. """ if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=width, height=height) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def make_env(env_idx): """ Create an environment with some standard wrappers. """ dicts = [ {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act3'} ] # Make the environment print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True) #record_path = "./records/" + dicts[env_idx]['state'] env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'], bk2dir="./records")#record='/tmp') # Build the actions array, env = ActionsDiscretizer(env) # Scale the rewards env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def main(): discount = os.environ.get('RETRO_DISCOUNT') if discount != None: discount = float(discount) else: discount=0.99 print("DISCOUNT: %s" % (discount,)) vec_env = make_vec_env(extra_wrap_fn=lambda env: FrameStack(env, 4)) """Run PPO until the environment throws an exception.""" #logger.configure(dir=os.environ.get('RETRO_CHECKPOINT_DIR')) config = tf.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config) as sess: if 'RETRO_ENCODER_DIR' in os.environ: state_encoder = StateEncoder(sess, encoder_dir = os.environ['RETRO_ENCODER_DIR']) else: state_encoder = None def init_fun(): if state_encoder != None: state_encoder.initialize() if "RETRO_INIT_DIR" in os.environ: saver = tf.train.Saver(var_list=tf.trainable_variables('ppo2_model')) latest_checkpoint = tf.train.latest_checkpoint(os.environ['RETRO_INIT_DIR']) print("PPO2_LOAD_INIT_CHECKPOINT: %s" % (latest_checkpoint,)) saver.restore(sess, latest_checkpoint) #from tensorflow.python.tools import inspect_checkpoint as chkp #chkp.print_tensors_in_checkpoint_file(latest_checkpoint,'',all_tensors=True) # Take more timesteps than we need to be sure that # we stop due to an exception. ppo2.learn(policy=policies.CnnPolicy, env=RewardScalingVecEnv(ExplorationVecEnv(vec_env, Exploration, state_encoder=state_encoder), reward_scale = 0.01), nsteps=4096, nminibatches=8, lam=0.95, gamma=discount, #0.99 noptepochs=3, log_interval=1, ent_coef=0.01, lr=lambda _: 2e-4, cliprange=lambda _: 0.1, total_timesteps=int(1.5e6 * vec_env.num_envs), save_interval=1, init_fun=init_fun)
def make_env(game, state, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = make(game, state) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) env = EpisodeInfo(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = make(game='SonicTheHedgehog2-Genesis', state='MetropolisZone.Act3', bk2dir='movies_tuned/') env = retro_contest.Monitor(env, os.path.join('results', 'monitor_tuned.csv'), os.path.join('results', 'log_tuned.csv')) env = SonicDiscretizer(env, noop=True) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari. """ if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str(env.spec.id).lower()): env = WarpFrame(env, width=160, height=210, grayscale=False) if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = GrayscaleWrapper(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def make_unity_maze(env_id, seed=0, rank=0, expID=0, frame_stack=True, logdir=None, ext_coeff=1.0, recordUnityVid=False, **kwargs): import os import sys import time try: sys.path.insert(0, os.path.abspath("ml-agents/python/")) from unityagents import UnityEnvironment from unity_wrapper import GymWrapper except ImportError: print("Import error in unity environment. Ignore if not using unity.") pass from baselines.common.atari_wrappers import FrameStack # gym.undo_logger_setup() # deprecated in new version of gym # max 20 workers per expID, max 30 experiments per machine if rank >= 0 and rank <= 200: time.sleep(rank * 2) env = UnityEnvironment(file_name='envs/' + env_id, worker_id=(expID % 60) * 200 + rank) maxsteps = 3000 if 'big' in env_id else 500 env = GymWrapper(env, seed=seed, rank=rank, expID=expID, maxsteps=maxsteps, **kwargs) if "big" in env_id: env = UnityRoomCounterWrapper(env, use_ext_reward=(ext_coeff != 0.0)) if rank == 1 and recordUnityVid: env = RecordBestScores(env, directory=logdir, freq=1) print('Loaded environment %s with rank %d\n\n' % (env_id, rank)) # env = NoReward(env) # env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=False) if frame_stack: env = FrameStack(env, 4) return env
def make_retro(env_name="Breakout", naudio_samples=None, sticky_env=False, make_video=False, is_baseline=False): import retro from baselines.common.atari_wrappers import FrameStack env = retro.make(env_name + '-Atari2600', naudio_samples=naudio_samples, make_video=make_video, is_baseline=is_baseline) max_episode_steps = 4500 env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) if not sticky_env: env = ExtraTimeLimit(env, max_episode_steps) if sticky_env: env._max_episode_steps = max_episode_steps * 4 env = StickyActionEnv(env) env = RetroALEActions(env, env.buttons) return env
def make_custom(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ # Simplified, not remote env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') #env = grc.RemoteEnv('tmp/sock') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame96(env) if stack: env = FrameStack(env, 4) env = AllowBacktracking(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1', scenario="./scenario.json") env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env(stack=True, scale_rew=True, scenario='trajectory_max'): #scenario = #'contest' """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') #dqn #env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act3', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='ScrapBrainZone.Act1', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='ScrapBrainZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='SpringYardZone.Act1', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='SpringYardZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='SpringYardZone.Act3', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='StarLightZone.Act1', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='StarLightZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='StarLightZone.Act3', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='MarbleZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='MarbleZone.Act1', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='MarbleZone.Act3', bk2dir='videos', monitordir='logs') #ppo #env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act2', bk2dir='videos', monitordir='logs') #solving levels/loaded dqn #env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act2', bk2dir='videos', monitordir='logs') #env = make(game='SonicTheHedgehog-Genesis', state='MarbleZone.Act1', bk2dir='videos', monitordir='logs', scenario=scenario) #env = make(game='SonicTheHedgehog-Genesis', state='MarbleZone.Act2', bk2dir='videos', monitordir='logs',scenario=scenario) #env = make(game='SonicTheHedgehog-Genesis', state='SpringYardZone.Act1', bk2dir='videos', monitordir='logs',scenario=scenario) env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1', bk2dir='videos', monitordir='logs', scenario=scenario) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env(game=None, state=None, stack=True, scale_rew=True, allowbacktrace=False): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') env = make(game=game, state=state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if allowbacktrace: env = AllowBacktracking(env) if stack: env = FrameStack(env, 4) return env
def deepmind_wrap(atari_env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): """ matching deepmind papers """ if episode_life: env = EpisodicLifeEnv(atari_env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def make_local_env(game="SonicTheHedgehog-Genesis", state="GreenHillZone.Act1", stack=True, scale_rew=True): """ Create an instance of a local Gym environment with some standard wrappers """ print("Entering make_local_env") env = retro.make(game=game, state=state) print("Got env") env = SonicDiscretizer(env) print("discretized env") if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) print("About to return") return env
def make_env(env_idx): """ Create an environment with some standard wrappers. """ dicts = [ {'game': 'SonicTheHedgehog2-Genesis', 'state': 'EmeraldHillZone.Act1'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'ChemicalPlantZone.Act2'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'ChemicalPlantZone.Act1'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'MetropolisZone.Act1'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'MetropolisZone.Act2'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'OilOceanZone.Act1'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'OilOceanZone.Act2'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'LavaReefZone.Act2'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'CarnivalNightZone.Act2'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'CarnivalNightZone.Act1'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'MushroomHillZone.Act2'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'MushroomHillZone.Act1'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'AngelIslandZone.Act1'} ] # Make the environment print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True) #record_path = "./records/" + dicts[env_idx]['state'] env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'])#, bk2dir="./records")#record='/tmp') # Build the actions array, env = ActionsDiscretizer(env) # Scale the rewards env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def make_env(stack=True, scale_rew=True, record=False): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') # train locally here we can add a new method to automatically load different game levels # here we can only train a single game level. env = retro.make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1', record=record) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def main(): env = Env(64, 64) env = WarpFrame(env) env = ScaledFloatFrame(env) env = FrameStack(env, 1) parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) args = parser.parse_args() logger.configure() model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (32, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn(env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.25, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), restore=True) for _ in range(100): obs, done = env.reset(), False episode_rew = 0 while not done: sleep(0.01) env.render() action = act(np.array(obs)[None])[0] obs, rew, done, _ = env.step(action) episode_rew += rew # print(action, rew) print("Episode reward", episode_rew)
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if args["dyn_env"]: env = MakeEnvDynamic(env) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'mario': env = make_mario_env() if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def make_custom_env(disc_acts=True): """ Create an environment with some standard wrappers. """ env = retro.make(game='SuperMarioBros3-Nes', state="1Player.World1.Level1.state", scenario="./data/scenario.json", record="./recordings/") if disc_acts: # Build the actions array env = MarioDiscretizer(env) # PreprocessFrame env = PreprocessFrames(env) # Stack N_FRAMES number of frames env = FrameStack(env, N_FRAMES) return env
def make_env(index=0, stack=False, scale_rew=True): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') #games = ['SonicTheHedgehog-Genesis,SpringYardZone.Act3', 'SonicTheHedgehog-Genesis,SpringYardZone.Act3'] games = getListOfGames("train") game, state = games[index].split(',') env = make(game=game, state=state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env