def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'field': import gym_fieldedmove env = gym.make('FieldedMove-v0') # env = FrameStack(env, 4) elif args["env_kind"] == "ple": import gym_ple env = gym.make(args['env']) env._max_episode_steps = args['max_episode_steps'] # env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
class Evaluator(object): def __init__(self, env_name, num_episodes, exp_name, policy): self.exp_name = exp_name self.env = gym.make(env_name) self.env = ProcessFrame84(self.env, crop=False) self.env = FrameStack(self.env, 4) self.num_episodes = 1 self.ep_len = 4500 self.policy = policy if not os.path.exists('images'): os.mkdir('images') self.image_folder = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'images') def format_obs(self, obs_name, obs): nums = ",".join(map(str, obs)) dict_format = "{" + nums + "}" final_str = "observation \"{}\" - {}\n".format(obs_name, dict_format) return final_str def eval_model(self, ep_num): for i in range(self.num_episodes): trajectory_file = self.exp_name + "_ep" + str( ep_num) + "_itr" + str(i) + "_trajectory.txt" if not os.path.exists("trajectories"): os.makedirs("trajectories") trajectory_path = os.path.join("trajectories", trajectory_file) ep_images = [] ob = self.env.reset() ob = np.array(ob) eprews = [] if i == 0: ep_images.append(self.env.unwrapped._last_observation) for step in range(self.ep_len): action, vpred, nlp = self.policy.get_ac_value_nlp_eval(ob) ob, rew, done, info = self.env.step(action[0]) if i == 0: ep_images.append(self.env.unwrapped._last_observation) if rew is None: eprews.append(0) else: eprews.append(rew) if step > 0: pos_trans, pos_rot, vel_trans, vel_rot = self.env.unwrapped.get_pos_and_vel( ) with open(trajectory_path, 'a') as f: f.write(self.format_obs("DEBUG.POS.TRANS", pos_trans)) f.write(self.format_obs("DEBUG.POS.ROT", pos_rot)) f.write(self.format_obs("VEL.TRANS", vel_trans)) f.write(self.format_obs("VEL.ROT", vel_rot)) for j in range(len(ep_images)): image_file = os.path.join( self.image_folder, self.exp_name + "_{}_{}_{}_".format(ep_num, i, j) + ".png") cv2.imwrite(image_file, ep_images[j]) print("Episode {} cumulative reward: {}".format(i, sum(eprews)))
def train(): # Fetch the requested environment set in flags. env_class = attrgetter(FLAGS.env)(sc2g.env) env = env_class.make_env( map_name=FLAGS.map_name, feature_screen_size=FLAGS.screen_size, feature_minimap_size=FLAGS.minimap_size, visualize=FLAGS.visualize, save_replay_episodes=FLAGS.save_replay_episodes, replay_dir=FLAGS.replay_dir, ) # Stack frames (memory optimisation) if FLAGS.num_stack_frames > 0: print("Stack frames enabled: n=%d" % FLAGS.num_stack_frames) env = FrameStack(env, FLAGS.num_stack_frames) model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) act = deepq.learn( env, q_func=model, lr=FLAGS.learning_rate, # Learning rate for adam optimizer max_timesteps=FLAGS.max_timesteps, # Max timesteps buffer_size=FLAGS.buffer_size, # Size of replay buffer exploration_fraction=FLAGS. exploration_fraction, # Fraction of max_timesteps over which exploration rate is annealed exploration_final_eps=FLAGS. exploration_final_eps, # Final value of random action probability train_freq=FLAGS. train_freq, # How often the model is updated, in steps print_freq=FLAGS. print_freq, # How often training progress is printed, in episodes checkpoint_freq=FLAGS. checkpoint_freq, # How often to save the model, in steps learning_starts=FLAGS. learning_starts, # How many steps before learning starts gamma=FLAGS.gamma, # Discount factor target_network_update_freq=FLAGS. target_network_update_freq, # How often the target network is updated prioritized_replay=FLAGS.prioritized_replay, callback=deepq_callback, ) print("Saving model...") save_model(act) print("Saving replay...") env.unwrapped.sc2_env.save_replay(FLAGS.map_name) print("Closing environment...") env.close()
def __init__(self, env_name, num_episodes, exp_name, policy): self.exp_name = exp_name self.env = gym.make(env_name) self.env = ProcessFrame84(self.env, crop=False) self.env = FrameStack(self.env, 4) self.num_episodes = 1 self.policy = policy if not os.path.exists('images'): os.mkdir('images') self.image_folder = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'images') print('Image folder', self.image_folder)
def main(): # create the game enviroment # To use make_atari from baselines name must contains "NoFrameskip" env = make_atari("BreakoutNoFrameskip-v0") # Convert it to gray scale and resize it to 84x84 env = WarpFrame(env) # Stack last 4 frame to create history env = FrameStack(env, k=4) # initialize the model # image input so cnn # convs = [n_outputs, karnel_size, stride] model = deepq.models.cnn_to_mlp(convs=[(32, 3, 1), (32, 3, 1)], hiddens=[256]) # train the model act = deepq.learn( env, q_func=model, lr=1e-2, # number of iteration to optimizer for max_timesteps=10000, buffer_size=1000, # fraction of entire training period over which the exploration rate is annealed exploration_fraction=0.1, # final value of random action probability exploration_final_eps=0.01, print_freq=10) print("Saving model to breakout_model.pkl") act.save("breakout_model.pkl")
def make_env_all_params(rank, add_monitor, args, logdir): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'mario': env = make_mario_env() elif args["env_kind"] == "retro_multi": env = make_multi_pong() elif args["env_kind"] == 'robopong': if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() elif args["env_kind"] == "dm_suite": env = make_dm_suite(task=args["env"], logdir=logdir, to_record=args["to_record"]) if add_monitor: env = TempMonitor(env) return env
def make_env(env_idx): """ Create an environment with some standard wrappers. """ # Make the environment levelList = ['SuperMarioBros-1-1-v0','SuperMarioBros-1-2-v0','SuperMarioBros-1-3-v0','SuperMarioBros-1-4-v0','SuperMarioBros-2-1-v0','SuperMarioBros-2-2-v0','SuperMarioBros-2-3-v0','SuperMarioBros-2-4-v0'] # record_path = "./records/" + dicts[env_idx]['state'] env = gym_super_mario_bros.make(levelList[env_idx]) env = JoypadSpace(env, SIMPLE_MOVEMENT) #env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env,4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def make_test(): """ Create an environment with some standard wrappers. """ # Make the environment env = gym_super_mario_bros.make('SuperMarioBros-1-4-v0') env = JoypadSpace(env, SIMPLE_MOVEMENT) # Scale the rewards #env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) # This can be changed. # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def make_mario_env(crop=True, frame_stack=True, clip_rewards=False): assert clip_rewards is False import gym import retro import os from baselines.common.atari_wrappers import FrameStack # gym.undo_logger_setup() if MPI.COMM_WORLD.Get_rank() == 0: if not os.path.isdir('./mario_bk2'): os.mkdir('./mario_bk2') env = retro.make('SuperMarioBros-Nes', 'Level1-1', record='./mario_bk2') else: env = retro.make('SuperMarioBros-Nes', 'Level1-1') buttons = env.buttons env = MarioXReward(env) env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=crop) if frame_stack: env = FrameStack(env, 4) env = LimitedDiscreteActions(env, buttons) return env
def make_env(game=None, state=None, stack=False, scale_rew=True, allowbacktrace=False, custom=True): """ Create an environment with some standard wrappers. """ #env = grc.RemoteEnv('tmp/sock') #env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') #env = retro.make(game='StreetsOfRage2-Genesis', state='1Player.Axel.Level1') #env = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', state='Champion.Level1.RyuVsGuile') #env = retro.make(game='SuperMarioWorld-Snes', state='Bridges1') env = retro.make(game=game, state=state) #SuperMarioWorld-Snes ['Bridges1', env.seed(0) env = SonicDiscretizerV3(env) #env = StreetOfRage2Discretizer(env) #env = StreeFighter2Discretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrameRGB(env) if custom: env = CustomGym(env) if allowbacktrace: env = AllowBacktracking(env) if stack: env = FrameStack(env, 4) env = Controller_Gym(env) return env
def make_env(stack=True, scale_rew=True, local=False, level_choice=None): """ Create an environment with some standard wrappers. """ print(stack, scale_rew, local) if local: # Select Random Level if local from retro_contest.local import make levels = [ 'SpringYardZone.Act3', 'SpringYardZone.Act2', 'GreenHillZone.Act3', 'GreenHillZone.Act1', 'StarLightZone.Act2', 'StarLightZone.Act1', 'MarbleZone.Act2', 'MarbleZone.Act1', 'MarbleZone.Act3', 'ScrapBrainZone.Act2', 'LabyrinthZone.Act2', 'LabyrinthZone.Act1', 'LabyrinthZone.Act3' ] if not level_choice: level_choice = levels[random.randrange(0, 13, 1)] else: level_choice = levels[level_choice] env = make(game='SonicTheHedgehog-Genesis', state=level_choice) else: print('connecting to remote environment') env = grc.RemoteEnv('tmp/sock') print('starting episode') if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def wrap_env_dqn(env): env = ThresholdResizeFrame(env) env = ClipRewardEnv(env) env = MaxAndSkipEnv(env, skip=4) env = Monitor(env, logger.get_dir(), allow_early_resets=True) env = FrameStack(env, 4) return env
def make_test(): """ Create an environment with some standard wrappers. """ # Make the environment env = gym_super_mario_bros.make('SuperMarioBros-v0') env = BinarySpaceToDiscreteSpaceEnv(env, RIGHT_ONLY) print(env.action_space) # Build the actions array # env = ActionsDiscretizer(env) # Scale the rewards # env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 6) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. # env = AllowBacktracking(env) return env
def wrap_environment(environment, n_frames=4): environment = ActionsDiscretizer(environment) environment = RewardScaler(environment) environment = PreprocessFrame(environment) environment = FrameStack(environment, n_frames) environment = AllowBacktracking(environment) return environment
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == "atari": env = gym.make(args["env"]) assert "NoFrameskip" in env.spec.id # from self-supervised exploration via disagreement if args["stickyAtari"] == "true": env = StickyActionEnv(env) env._max_episode_steps = args["max_episode_steps"] * 4 env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args["max_episode_steps"]) if "Montezuma" in args["env"]: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) if args["noisy_tv"] == "true": env = NoisyTVEnvWrapper(env) # assert env.action_space == spaces.Discrete(7) elif args["env_kind"] == "mario": env = make_mario_env() if args["noisy_tv"] == "true": env = NoisyTVEnvWrapperMario(env) elif args["env_kind"] == "retro_multi": env = make_multi_pong() elif args["env_kind"] == "robopong": if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), "%.2i" % rank)) return env
def wrap_modified_rr(env, episode_life=True, episode_reward=False, episode_frame=False, norm_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari modified as described in RUDDER paper; """ if episode_life: print("Episode Life") env = EpisodicLifeEnv(env) if episode_reward: print("Episode Reward") env = EpisodicRewardEnv(env) if episode_frame: print("Episode Frame") env = EpisodicFrameEnv(env) _ori_r_games = ['DoubleDunk', 'Boxing', 'Freeway', 'Pong', 'Bowling', 'Skiing', 'IceHockey', 'Enduro'] original_reward = any([game in env.spec.id for game in _ori_r_games]) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if norm_rewards and not original_reward: print("Normalizing reward....") env = NormRewardEnv(env, 100.) else: print("Normal reward") if frame_stack: env = FrameStack(env, 4) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'mario': env = make_mario_env() elif args["env_kind"] == "retro_multi": env = make_multi_pong() elif args["env_kind"] == 'robopong': if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=150, height=100, grayscale=grayscale) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=1 / 100.0) return env
def wrap_deepmind(env, downsample=True, episode_life=True, clip_rewards=True, frame_stack=False, scale=False, color=False): """Configure environment for DeepMind-style Atari. """ if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str( env.spec.id).lower()): env = WarpFrame(env, width=160, height=210, grayscale=False) if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) if downsample: env = WarpFrame(env, grayscale=False) if not color: env = GrayscaleWrapper(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def make_env(env_idx): dicts = [ {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act3'} ] print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True) env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'], bk2dir="./records") env = ActionsDiscretizer(env) env = RewardScaler(env) env = PreprocessFrame(env) env = FrameStack(env, 4) env = AllowBackTracking(env) return env
def make_unity_maze(env_id, seed=0, rank=0, expID=0, frame_stack=True, logdir=None, ext_coeff=1.0, recordUnityVid=False, **kwargs): import os import sys import time try: sys.path.insert(0, os.path.abspath("ml-agents/python/")) from unityagents import UnityEnvironment from unity_wrapper import GymWrapper except ImportError: print("Import error in unity environment. Ignore if not using unity.") pass from baselines.common.atari_wrappers import FrameStack # gym.undo_logger_setup() # deprecated in new version of gym # max 20 workers per expID, max 30 experiments per machine if rank >= 0 and rank <= 200: time.sleep(rank * 2) env = UnityEnvironment(file_name='envs/' + env_id, worker_id=(expID % 60) * 200 + rank) maxsteps = 3000 if 'big' in env_id else 500 env = GymWrapper(env, seed=seed, rank=rank, expID=expID, maxsteps=maxsteps, **kwargs) if "big" in env_id: env = UnityRoomCounterWrapper(env, use_ext_reward=(ext_coeff != 0.0)) if rank == 1 and recordUnityVid: env = RecordBestScores(env, directory=logdir, freq=1) print('Loaded environment %s with rank %d\n\n' % (env_id, rank)) # env = NoReward(env) # env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=False) if frame_stack: env = FrameStack(env, 4) return env
def make_test(): """ Create an environment with some standard wrappers. """ # Make the environment env = make_retro(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act2', record="./records") # Build the actions array env = ActionsDiscretizer(env) # Scale the rewards env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id if args["stickyAtari"]: # 在智能体执行动作时增加随机性 env._max_episode_steps = args['max_episode_steps'] * 4 env = StickyActionEnv(env) else: env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) # 每个动作连续执行4步 env = ProcessFrame84(env, crop=False) # 处理观测 env = FrameStack(env, 4) # 将连续4帧叠加起来作为输入 env = ExtraTimeLimit(env, args['max_episode_steps']) if not args["stickyAtari"]: env = ExtraTimeLimit(env, args['max_episode_steps']) # 限制了一个周期的最大时间步 if 'Montezuma' in args['env']: # 记录智能体的位置, 所在的房间, 已经访问的房间 env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'mario': # 超级马里奥 env = make_mario_env() elif args["env_kind"] == "retro_multi": # 多智能体游戏, Multi-Pong env = make_multi_pong() elif args["env_kind"] == 'robopong': if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def make_val(env_idx, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ dicts = [ {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act1'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'MetropolisZone.Act3'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'HillTopZone.Act2'}, {'game': 'SonicTheHedgehog2-Genesis', 'state': 'CasinoNightZone.Act2'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'LavaReefZone.Act1'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'FlyingBatteryZone.Act2'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'HydrocityZone.Act1'}, {'game': 'SonicAndKnuckles3-Genesis', 'state': 'AngelIslandZone.Act2'} ] print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True) env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'])#, bk2dir='/tmp')#, record='/tmp') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame96(env) if stack: env = FrameStack(env, 4) env = AllowBacktracking(env) return env
def make_test(): """ Create an environment with some standard wrappers. """ envIdx = 0 # Here we add record because we want to output a video env = make(game=dicts[envIdx]['game'], state=dicts[envIdx]['state']) # Build the actions array, env = ActionsDiscretizer(env, dicts[envIdx]['game']) # Scale the rewards #env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def make_env(env_idx): """ Create an environment with some standard wrappers. """ # Make the environment print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True) #record_path = "./records/" + dicts[env_idx]['state'] env = make( game=dicts[env_idx]['game'], state=dicts[env_idx]['state']) #, bk2dir="./records")#record='/tmp') # Build the actions array, env = ActionsDiscretizer(env, dicts[env_idx]['game']) # Scale the rewards #env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) #env = SkipEnv(env, 2) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0, render=False): """ Create an environment with some standard wrappers. """ # if not is_remote: # if game is None or state is None: # import data_set_reader # train_set = data_set_reader.read_train_set() # game, state = random.choice(train_set) # print("it's local env: ", game, state) # from retro_contest.local import make # env = make(game=game, state=state) # else: # print("it's remote env") # import gym_remote.client as grc # env = grc.RemoteEnv('tmp/sock') env = make(game=game, state=state) env.seed(seed) env = AllowBacktracking(env) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True) env = SonicDiscretizer(env, render) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
class AllowBacktracking(gym.Wrapper): def __init__(self, env): super(AllowBacktracking, self).__init__(env) self._cur_x = 0 self._max_x = 0 return self.env.reset(**kwargs) def reset(self, **kwargs): # pylint: disable=E0202 self._cur_x = 0 self._max_x = 0 return self.env.reset(**kwargs) def step(self, action): # pylint: disable=E0202 obs, rew, done, info = self.env.step(action) self._cur_x += rew rew = max(0, self._cur_x - self._max_x) self._max_x = max(self._max_x, self._cur_x) return obs, rew, done, info def make_env(env_idx): dicts = [ {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act3'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act2'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act1'}, {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act3'} ] # Make the environment print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True) #record_path = "./records/" + dicts[env_idx]['state'] env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'], bk2dir="./records")#record='/tmp') # Build the actions array, env = ActionsDiscretizer(env) # Scale the rewards env = RewardScaler(env) # PreprocessFrame env = PreprocessFrame(env) # Stack 4 frames env = FrameStack(env, 4) # Allow back tracking that helps agents are not discouraged too heavily # from exploring backwards if there is no way to advance # head-on in the level. env = AllowBacktracking(env) return env
def _make_dqn(unity_env, train_mode, reward_range=(-np.inf, np.inf)): env = MLToGymEnv(unity_env, train_mode=train_mode, reward_range=reward_range) env = FloatToUInt8Frame(env) env = WarpFrame(env) # Makes sure we have 84 x 84 b&w env = FrameStack(env, 4) # Stack last 4 frames return env
def __init__(self, level=2, frame_size=64, mode=CPU, *args, **kwargs): self.env = Cls(level=level, *args, **kwargs) self.env = GRewardScaler(self.env, scale=1) if mode == CPU: self.env = GPreprocessFrame(self.env, size=frame_size) self.env = FrameStack(self.env, 4) else: self.env = SetPlayingMode(target_mode=HUMAN)(self.env)