def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_deepmind(env)) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True #True ) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=10000000, buffer_size=500000, exploration_fraction=0.1, exploration_final_eps=0.1, train_freq=4, print_freq=1, learning_starts=10000, target_network_update_freq=10000, gamma=0.99, prioritized_replay=True #True ) act.save("pong_model.pkl") env.close()
def main(): # env = gym.make("CleanerNoFrameskipSingleObjectiveRandomized-v1") env = gym.make("CleanerNoFrameskipSingleObjective-v1") env = ScaledFloatFrame(env) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[128], dueling=True ) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=1000000, buffer_size=10000, exploration_fraction=0.2, exploration_final_eps=0.1, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=False, print_freq=1, ) act.save("original_dqn_model.pkl") env.close()
def make_remote_env(stack=2, scale_rew=True, color=False, exp_type='obs', exp_const=0.002, socket_dir='/tmp'): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv(socket_dir) env = BackupOriginalData(env) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env, color) if exp_const > 0: if exp_type == 'obs': env = ObsExplorationReward(env, exp_const, game_specific=False) elif exp_type == 'x': env = XExplorationReward(env, exp_const, game_specific=False) if stack > 1: env = FrameStack(env, stack) env = ScaledFloatFrame(env) env = EpisodeInfo(env) return env
def wrap_deepmind(env, downsample=True, episode_life=True, clip_rewards=True, frame_stack=False, scale=False, color=False): """Configure environment for DeepMind-style Atari. """ if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str( env.spec.id).lower()): env = WarpFrame(env, width=160, height=210, grayscale=False) if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) if downsample: env = WarpFrame(env, grayscale=False) if not color: env = GrayscaleWrapper(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=150, height=100, grayscale=grayscale) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=1 / 100.0) return env
def wrap_modified_rr(env, episode_life=True, episode_reward=False, episode_frame=False, norm_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari modified as described in RUDDER paper; """ if episode_life: print("Episode Life") env = EpisodicLifeEnv(env) if episode_reward: print("Episode Reward") env = EpisodicRewardEnv(env) if episode_frame: print("Episode Frame") env = EpisodicFrameEnv(env) _ori_r_games = ['DoubleDunk', 'Boxing', 'Freeway', 'Pong', 'Bowling', 'Skiing', 'IceHockey', 'Enduro'] original_reward = any([game in env.spec.id for game in _ori_r_games]) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if norm_rewards and not original_reward: print("Normalizing reward....") env = NormRewardEnv(env, 100.) else: print("Normal reward") if frame_stack: env = FrameStack(env, 4) return env
def wrap_custom(env, clip_rewards=False, scale=True): """Configure environment for Openai procgen. """ if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=450, height=300, grayscale=False) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=reward_scale) return env
def main(): # env = gym.make("CleanerNoFrameskipMultiObjective-v1") #env = gym.make("CleanerNoFrameskipMultiObjectiveRandomized-v1") env = gym.make( "CleanerNoFrameskipMultiObjectiveRandomizedDeterministic-v1") # env = gym.make("CleanerNoFrameskipSingleObjective-v1") env = ScaledFloatFrame(env) models = {} objectives = env.env.get_objectives() print(objectives) num_objectives = len(objectives) for o in objectives: model = dqn_dv_models.cnn_to_mlp_with_dv( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[128], dueling=True, num_dvs=1, reuse_conv=None, ) models[o] = model act = dqn_dv.learn( env, q_func_dict=models, priorities=PRIORITIES, lr=1e-4, max_timesteps=1000000, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.1, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, print_freq=1, flat_decision_values=True, disable_dv=False, ) act.save(RUN_NAME) env.close()
def wrap_deepmind_retro(env, scale=True, frame_stack=0): """ Configure environment for retro games, using config similar to DeepMind-style Atari in wrap_deepmind """ env = WarpFrame(env, grayscale=False) env = ClipRewardEnv(env) if frame_stack > 1: env = FrameStack(env, frame_stack) if scale: env = ScaledFloatFrame(env) return env
def make_env_ice(game_name): from baselines.common.atari_wrappers import FrameStack, WarpFrame, MaxAndSkipEnv, ScaledFloatFrame import gym import cvar.dqn.ice_lake env = gym.make(game_name) # env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env) env = ScaledFloatFrame(env) env = FrameStack(env, 4) return env
def wrap_env(env, episode_life=False): if episode_life: env = EpisodicLifeEnv(env) env = NoopResetEnv(env, 30) env = MaxAndSkipEnv(env, 4) if env.unwrapped.get_action_meanings()[1] == 'FIRE': env = FireResetEnv(env) env = WarpFrame(env) # , width=84, height=84) env = FrameStack(env, 4) env = ScaledFloatFrame(env) return env
def _thunk(): env = gym.make(**env_base) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = RewardCollector(env) env = EpisodicLifeEnv(env) env = ClipRewardEnv(env) env = WarpFrame(env) env = ScaledFloatFrame(env) env = TransposeImage(env) env = UnrealEnvBaseWrapper(env) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) # minigrid keep_classes = ['agent', 'goal', 'wall', 'empty'] if 'key' in env_id.lower(): keep_classes.extend(['door', 'key']) if env_id.startswith('MiniGrid'): env = mgwr.FullyObsWrapper(env) env = mgwr.ImgObsWrapper(env) env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: if env_id.startswith('CarRacing'): env = WarpFrame(env, width=96, height=96, grayscale=True) env = ScaledFloatFrame(env) else: raise NotImplementedError # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3: env = TransposeImage(env, op=[2, 0, 1]) return env
def wrap_n64(env, reward_scale=1 / 100.0, frame_skip=4, width=150, height=100, grayscale=True, normalize_observations=True): env = MaxAndSkipEnv(env, skip=frame_skip) env = WarpFrame(env, width=width, height=height, grayscale=grayscale) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=1 / 100.0) return env
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari. """ if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def _thunk(): env = gym.make(env_id) env.seed(seed + rank) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=2) env = WarpFrame(env) # Janky Fix to Resize Environments to be 50x50 env.width = 50 env.height = 50 env = ScaledFloatFrame(env) if not eval: env = ClipRewardEnv(env) env = EpisodicLifeEnv(env) env = FrameStack(env, 3) env = TransposeOb(env) return env
def deepmind_wrap(atari_env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): """ matching deepmind papers """ if episode_life: env = EpisodicLifeEnv(atari_env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def wrap_deepmind_custom(env, episode_life=True, clip_rewards=True, frame_stack=frame_stack, scale=False): if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, size=size) if augment: env = AugmentColor(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, frame_stack) return env
def main(): env = Env(64, 64) env = WarpFrame(env) env = ScaledFloatFrame(env) env = FrameStack(env, 1) parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) args = parser.parse_args() logger.configure() model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (32, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn(env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.25, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), restore=True) for _ in range(100): obs, done = env.reset(), False episode_rew = 0 while not done: sleep(0.01) env.render() action = act(np.array(obs)[None])[0] obs, rew, done, _ = env.step(action) episode_rew += rew # print(action, rew) print("Episode reward", episode_rew)
def make_maml_env(game_states, stack=2, scale_rew=True, color=False, exp_type='x', exp_const=0.002, max_episode_steps=4500): """ Create an environment with some standard wrappers. """ game, state = game_states[0] env = make(game, state) env_rand = RandomEnvironmen2(env, game_states) env = retro_contest.StochasticFrameSkip(env_rand, n=4, stickprob=0.25) env = BackupOriginalData(env) env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env, color) if exp_const > 0: if exp_type == 'obs': env = ObsExplorationReward(env, exp_const, game_specific=True) elif exp_type == 'x': env = XExplorationReward(env, exp_const, game_specific=True) if stack > 1: env = FrameStack(env, stack) env = ScaledFloatFrame(env) env = EpisodeInfo(env) env.sample = env_rand.sample return env
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(3 * 10e6)) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) import time current_milli_time = lambda: int(round(time.time() * 1000)) env = Env(64, 44) env = WarpFrame(env) env = ScaledFloatFrame(env) model = deepq.models.cnn_to_mlp( convs=[(16, 8, 4), (16, 4, 2), (32, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn(env, q_func=model, lr=5e-4, max_timesteps=args.num_timesteps, buffer_size=100000, exploration_fraction=0.05, exploration_final_eps=0.01, train_freq=2, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, print_freq=30, checkpoint_freq=200000, prioritized_replay=bool(args.prioritized)) act.save("draw_model.pkl") env.close()
def run_dqn(model, priorities, weights, disable_dvs, episodes_count): # env = gym.make("PongNoFrameskip-v4") # env = gym.make("CleanerNoFrameskipMultiObjective-v1") env = gym.make("CleanerNoFrameskipMultiObjectiveRandomizedDeterministic-v1") # env = gym.make("CleanerNoFrameskipSingleObjective-v1") env = ScaledFloatFrame(env) print("WEIGHTS: ", weights) objectives = env.env.get_objectives() print(objectives) act = subsumption.load(model) act.flat_dvs = True act.priorities = priorities act.weights = weights act.disable_dvs = disable_dvs print("setting priorities to: ", act.priorities) all_rews = [] episodes = 0 while episodes < episodes_count: obs, done = env.reset(), False episode_rew = np.array([0.0, 0.0, 0.0]) while not done: action, q_vals_sum, dvs, selected_dvs, extra_indicators = act(obs[None]) env.env.set_extra_indicators(extra_indicators) obs, rew, done, _ = env.step(action) r = np.array([rew['collision'], rew['clean'], rew['charge']]) episode_rew += r print("[" + str(episodes) + "]Episode reward", episode_rew) all_rews.append(episode_rew) episodes += 1 score = np.mean(np.array(all_rews), axis=0) print("TOTAL SCORE:") print("collision, clean, charge") print(score[0], score[1], score[2]) env.close() return score