예제 #1
0
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_deepmind(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True  #True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=10000000,
        buffer_size=500000,
        exploration_fraction=0.1,
        exploration_final_eps=0.1,
        train_freq=4,
        print_freq=1,
        learning_starts=10000,
        target_network_update_freq=10000,
        gamma=0.99,
        prioritized_replay=True  #True
    )
    act.save("pong_model.pkl")
    env.close()
예제 #2
0
def main():
    # env = gym.make("CleanerNoFrameskipSingleObjectiveRandomized-v1")
    env = gym.make("CleanerNoFrameskipSingleObjective-v1")
    env = ScaledFloatFrame(env)

    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[128],
        dueling=True
    )


    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=1000000,
        buffer_size=10000,
        exploration_fraction=0.2,
        exploration_final_eps=0.1,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=False,
        print_freq=1,
    )
    act.save("original_dqn_model.pkl")
    env.close()
예제 #3
0
def make_remote_env(stack=2,
                    scale_rew=True,
                    color=False,
                    exp_type='obs',
                    exp_const=0.002,
                    socket_dir='/tmp'):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv(socket_dir)
    env = BackupOriginalData(env)
    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)

    if scale_rew:
        env = RewardScaler(env)

    env = WarpFrame(env, color)

    if exp_const > 0:
        if exp_type == 'obs':
            env = ObsExplorationReward(env, exp_const, game_specific=False)
        elif exp_type == 'x':
            env = XExplorationReward(env, exp_const, game_specific=False)

    if stack > 1:
        env = FrameStack(env, stack)

    env = ScaledFloatFrame(env)
    env = EpisodeInfo(env)

    return env
예제 #4
0
def wrap_deepmind(env,
                  downsample=True,
                  episode_life=True,
                  clip_rewards=True,
                  frame_stack=False,
                  scale=False,
                  color=False):
    """Configure environment for DeepMind-style Atari.
    """
    if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str(
            env.spec.id).lower()):
        env = WarpFrame(env, width=160, height=210, grayscale=False)
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    if downsample:
        env = WarpFrame(env, grayscale=False)
    if not color:
        env = GrayscaleWrapper(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
예제 #5
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=150, height=100, grayscale=grayscale)
     env = FrameStack(env, frame_stack)
     env = ScaledFloatFrame(env)
     env = RewardScaler(env, scale=1 / 100.0)
     return env
예제 #6
0
def wrap_modified_rr(env, episode_life=True, episode_reward=False, episode_frame=False,
                     norm_rewards=True,
                     frame_stack=False, scale=False):
    """Configure environment for DeepMind-style Atari modified as described in RUDDER paper;
    """
    if episode_life:
        print("Episode Life")
        env = EpisodicLifeEnv(env)
    if episode_reward:
        print("Episode Reward")
        env = EpisodicRewardEnv(env)
    if episode_frame:
        print("Episode Frame")
        env = EpisodicFrameEnv(env)
    _ori_r_games = ['DoubleDunk', 'Boxing', 'Freeway', 'Pong',
                    'Bowling', 'Skiing', 'IceHockey', 'Enduro']
    original_reward = any([game in env.spec.id for game in _ori_r_games])

    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if norm_rewards and not original_reward:
        print("Normalizing reward....")
        env = NormRewardEnv(env, 100.)
    else:
        print("Normal reward")
    if frame_stack:
        env = FrameStack(env, 4)
    return env
예제 #7
0
def wrap_custom(env, clip_rewards=False, scale=True):
    """Configure environment for Openai procgen.
    """
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    return env
예제 #8
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=450, height=300, grayscale=False)
     env = ScaledFloatFrame(env)
     if normalize_observations:
         env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
     env = RewardScaler(env, scale=reward_scale)
     return env
예제 #9
0
def main():

    # env = gym.make("CleanerNoFrameskipMultiObjective-v1")
    #env = gym.make("CleanerNoFrameskipMultiObjectiveRandomized-v1")
    env = gym.make(
        "CleanerNoFrameskipMultiObjectiveRandomizedDeterministic-v1")
    # env = gym.make("CleanerNoFrameskipSingleObjective-v1")
    env = ScaledFloatFrame(env)

    models = {}

    objectives = env.env.get_objectives()
    print(objectives)
    num_objectives = len(objectives)

    for o in objectives:
        model = dqn_dv_models.cnn_to_mlp_with_dv(
            convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
            hiddens=[128],
            dueling=True,
            num_dvs=1,
            reuse_conv=None,
        )
        models[o] = model

    act = dqn_dv.learn(
        env,
        q_func_dict=models,
        priorities=PRIORITIES,
        lr=1e-4,
        max_timesteps=1000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.1,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        print_freq=1,
        flat_decision_values=True,
        disable_dv=False,
    )
    act.save(RUN_NAME)
    env.close()
예제 #10
0
파일: envs.py 프로젝트: dhruvsreenivas/dril
def wrap_deepmind_retro(env, scale=True, frame_stack=0):
    """
    Configure environment for retro games, using config similar to DeepMind-style Atari in wrap_deepmind
    """
    env = WarpFrame(env, grayscale=False)
    env = ClipRewardEnv(env)
    if frame_stack > 1:
        env = FrameStack(env, frame_stack)
    if scale:
        env = ScaledFloatFrame(env)
    return env
예제 #11
0
def make_env_ice(game_name):
    from baselines.common.atari_wrappers import FrameStack, WarpFrame, MaxAndSkipEnv, ScaledFloatFrame
    import gym
    import cvar.dqn.ice_lake

    env = gym.make(game_name)
    # env = MaxAndSkipEnv(env, skip=4)
    env = WarpFrame(env)
    env = ScaledFloatFrame(env)
    env = FrameStack(env, 4)
    return env
예제 #12
0
def wrap_env(env, episode_life=False):
    if episode_life:
        env = EpisodicLifeEnv(env)
    env = NoopResetEnv(env, 30)
    env = MaxAndSkipEnv(env, 4)
    if env.unwrapped.get_action_meanings()[1] == 'FIRE':
        env = FireResetEnv(env)
    env = WarpFrame(env)  # , width=84, height=84)
    env = FrameStack(env, 4)
    env = ScaledFloatFrame(env)
    return env
예제 #13
0
 def _thunk():
     env = gym.make(**env_base)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = ScaledFloatFrame(env)
     env = TransposeImage(env)
     env = UnrealEnvBaseWrapper(env)
     return env
예제 #14
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        # minigrid
        keep_classes = ['agent', 'goal', 'wall', 'empty']
        if 'key' in env_id.lower():
            keep_classes.extend(['door', 'key'])

        if env_id.startswith('MiniGrid'):
            env = mgwr.FullyObsWrapper(env)
            env = mgwr.ImgObsWrapper(env)
            env = mgwr.FullyObsOneHotWrapper(env,
                                             drop_color=1,
                                             keep_classes=keep_classes,
                                             flatten=False)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            if env_id.startswith('CarRacing'):
                env = WarpFrame(env, width=96, height=96, grayscale=True)
                env = ScaledFloatFrame(env)
            else:
                raise NotImplementedError

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
예제 #15
0
def wrap_n64(env,
             reward_scale=1 / 100.0,
             frame_skip=4,
             width=150,
             height=100,
             grayscale=True,
             normalize_observations=True):
    env = MaxAndSkipEnv(env, skip=frame_skip)
    env = WarpFrame(env, width=width, height=height, grayscale=grayscale)
    env = ScaledFloatFrame(env)
    if normalize_observations:
        env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
    env = RewardScaler(env, scale=1 / 100.0)
    return env
예제 #16
0
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False):
    """Configure environment for DeepMind-style Atari.
    """
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
예제 #17
0
파일: envs.py 프로젝트: biggzlar/i2a
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=2)
     env = WarpFrame(env)
     # Janky Fix to Resize Environments to be 50x50
     env.width = 50
     env.height = 50
     env = ScaledFloatFrame(env)
     if not eval:
         env = ClipRewardEnv(env)
         env = EpisodicLifeEnv(env)
     env = FrameStack(env, 3)
     env = TransposeOb(env)
     return env
예제 #18
0
def deepmind_wrap(atari_env,
                  episode_life=True,
                  clip_rewards=True,
                  frame_stack=False,
                  scale=False):
    """ matching deepmind papers
    """
    if episode_life:
        env = EpisodicLifeEnv(atari_env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
예제 #19
0
 def wrap_deepmind_custom(env,
                          episode_life=True,
                          clip_rewards=True,
                          frame_stack=frame_stack,
                          scale=False):
     if episode_life:
         env = EpisodicLifeEnv(env)
     if 'FIRE' in env.unwrapped.get_action_meanings():
         env = FireResetEnv(env)
     env = WarpFrame(env, size=size)
     if augment:
         env = AugmentColor(env)
     if scale:
         env = ScaledFloatFrame(env)
     if clip_rewards:
         env = ClipRewardEnv(env)
     if frame_stack:
         env = FrameStack(env, frame_stack)
     return env
예제 #20
0
파일: test.py 프로젝트: sajadn/draw-rl
def main():
    env = Env(64, 64)
    env = WarpFrame(env)
    env = ScaledFloatFrame(env)
    env = FrameStack(env, 1)
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=0)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    args = parser.parse_args()
    logger.configure()
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (32, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )
    act = deepq.learn(env,
                      q_func=model,
                      lr=1e-4,
                      max_timesteps=args.num_timesteps,
                      buffer_size=10000,
                      exploration_fraction=0.25,
                      exploration_final_eps=0.01,
                      train_freq=4,
                      learning_starts=10000,
                      target_network_update_freq=1000,
                      gamma=0.99,
                      prioritized_replay=bool(args.prioritized),
                      restore=True)
    for _ in range(100):
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            sleep(0.01)
            env.render()
            action = act(np.array(obs)[None])[0]
            obs, rew, done, _ = env.step(action)
            episode_rew += rew
            # print(action, rew)
        print("Episode reward", episode_rew)
예제 #21
0
def make_maml_env(game_states,
                  stack=2,
                  scale_rew=True,
                  color=False,
                  exp_type='x',
                  exp_const=0.002,
                  max_episode_steps=4500):
    """
    Create an environment with some standard wrappers.
    """
    game, state = game_states[0]
    env = make(game, state)

    env_rand = RandomEnvironmen2(env, game_states)
    env = retro_contest.StochasticFrameSkip(env_rand, n=4, stickprob=0.25)

    env = BackupOriginalData(env)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps)

    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)

    if scale_rew:
        env = RewardScaler(env)

    env = WarpFrame(env, color)

    if exp_const > 0:
        if exp_type == 'obs':
            env = ObsExplorationReward(env, exp_const, game_specific=True)
        elif exp_type == 'x':
            env = XExplorationReward(env, exp_const, game_specific=True)

    if stack > 1:
        env = FrameStack(env, stack)

    env = ScaledFloatFrame(env)
    env = EpisodeInfo(env)

    env.sample = env_rand.sample

    return env
예제 #22
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(3 * 10e6))
    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    import time

    current_milli_time = lambda: int(round(time.time() * 1000))

    env = Env(64, 44)
    env = WarpFrame(env)
    env = ScaledFloatFrame(env)

    model = deepq.models.cnn_to_mlp(
        convs=[(16, 8, 4), (16, 4, 2), (32, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )
    act = deepq.learn(env,
                      q_func=model,
                      lr=5e-4,
                      max_timesteps=args.num_timesteps,
                      buffer_size=100000,
                      exploration_fraction=0.05,
                      exploration_final_eps=0.01,
                      train_freq=2,
                      learning_starts=10000,
                      target_network_update_freq=1000,
                      gamma=0.99,
                      print_freq=30,
                      checkpoint_freq=200000,
                      prioritized_replay=bool(args.prioritized))
    act.save("draw_model.pkl")
    env.close()
예제 #23
0
def run_dqn(model, priorities, weights, disable_dvs, episodes_count):
    # env = gym.make("PongNoFrameskip-v4")
    # env = gym.make("CleanerNoFrameskipMultiObjective-v1")
    env = gym.make("CleanerNoFrameskipMultiObjectiveRandomizedDeterministic-v1")
    # env = gym.make("CleanerNoFrameskipSingleObjective-v1")
    env = ScaledFloatFrame(env)

    print("WEIGHTS: ", weights)

    objectives = env.env.get_objectives()
    print(objectives)

    act = subsumption.load(model)
    act.flat_dvs = True
    act.priorities = priorities
    act.weights = weights
    act.disable_dvs = disable_dvs
    print("setting priorities to: ", act.priorities)

    all_rews = []
    episodes = 0
    while episodes < episodes_count:
        obs, done = env.reset(), False
        episode_rew = np.array([0.0, 0.0, 0.0])
        while not done:
            action, q_vals_sum, dvs, selected_dvs, extra_indicators = act(obs[None])
            env.env.set_extra_indicators(extra_indicators)
            obs, rew, done, _ = env.step(action)
            r = np.array([rew['collision'], rew['clean'], rew['charge']])
            episode_rew += r
        print("[" + str(episodes) + "]Episode reward", episode_rew)
        all_rews.append(episode_rew)
        episodes += 1

    score = np.mean(np.array(all_rews), axis=0)
    print("TOTAL SCORE:")
    print("collision, clean, charge")
    print(score[0], score[1], score[2])

    env.close()

    return score