예제 #1
0
 def make_vec_env(self,dataset, env_args):
     env_args["df"]= dataset
     env = make_vec_env('crypt-v001', env_kwargs=env_args)
     env = VecCheckNan(env, raise_exception=True)
     env = VecNormalize(
         env, norm_obs=True, norm_reward=False, clip_obs=10.0, gamma=0.95
     )
     return env
예제 #2
0
 def make_dummy_env(self, dataset, env_args):
     env = gym.make("crypt-v001", df=dataset, **env_args)
     check_env(env)
     env = DummyVecEnv([lambda: env])
     env = VecCheckNan(env, raise_exception=True)
     env = VecNormalize(
         env, norm_obs=True, norm_reward=False, clip_obs=10.0, gamma=0.95
     )
     return env
def test_check_nan():
    """Test VecCheckNan Object"""

    env = DummyVecEnv([NanAndInfEnv])
    env = VecCheckNan(env, raise_exception=True)

    env.step([[0]])

    with pytest.raises(ValueError):
        env.step([[float('NaN')]])

    with pytest.raises(ValueError):
        env.step([[float('inf')]])

    with pytest.raises(ValueError):
        env.step([[-1]])

    with pytest.raises(ValueError):
        env.step([[1]])

    env.step(np.array([[0, 1], [0, 1]]))

    env.reset()
def test_check_nan():
    """Test VecCheckNan Object"""

    env = DummyVecEnv([NanAndInfEnv])
    env = VecCheckNan(env, raise_exception=True)

    env.step([[0]])

    try:
        env.step([[float('NaN')]])
    except ValueError:
        pass
    else:
        assert False

    try:
        env.step([[float('inf')]])
    except ValueError:
        pass
    else:
        assert False

    try:
        env.step([[-1]])
    except ValueError:
        pass
    else:
        assert False

    try:
        env.step([[1]])
    except ValueError:
        pass
    else:
        assert False

    env.step(np.array([[0, 1], [0, 1]]))
예제 #5
0
def _check_nan(env: gym.Env) -> None:
    """Check for Inf and NaN using the VecWrapper."""
    vec_env = VecCheckNan(DummyVecEnv([lambda: env]))
    for _ in range(10):
        action = np.array([env.action_space.sample()])
        _, _, _, _ = vec_env.step(action)
예제 #6
0
def make_env(
    args,
    num_envs=None,
    include_norm=False,
    norm_reward=True,
    **kwargs,
):
    """Return a vectorized environment containing `num_envs` or `args.num_envs`
    environments (depending on whether `num_envs is None`).
    `args`, the command line arguments, specify several values. See `kwargs`
    for a more detailed explanation on their interaction.
    `include_norm` specifies whether the environment is wrapped in a
    normalizing environment.
    `norm_reward` indicates whether the rewards are normalized (only
    revelant if `include_norm is True`).
    `kwargs` are passed directly to the environment creation function. Any
    value given via `kwargs` has priority over the one given by `args`.
    """
    if num_envs is None:
        num_envs = args.num_envs

    # `kwargs` given via `args`
    args_kwargs = {}
    for arg in [
            'M',
            'dt',
            'restol',
            'lambda_real_interval',
            'lambda_imag_interval',
            'lambda_real_interpolation_interval',
            'norm_factor',
            'residual_weight',
            'step_penalty',
            'reward_iteration_only',
            'reward_strategy',
            'collect_states',
            'example',
    ]:
        args_kwargs[arg] = kwargs.pop(arg, getattr(args, arg))
    all_kwargs = {**kwargs, **args_kwargs}

    # SAC does not support float64
    if args.model_class == 'SAC':
        all_kwargs['use_doubles'] = False

    seed = all_kwargs.pop('seed', args.seed)

    def gym_make(i):
        return lambda: gym.make(
            args.envname,
            seed=seed + i if seed is not None else None,
            **all_kwargs,
        )

    env = DummyVecEnv([gym_make(i) for i in range(num_envs)])
    if include_norm:
        if hasattr(args, 'env_path') and args.env_path is not None:
            env = VecNormalize.load(str(Path(args.env_path)), env)
        else:
            # When training, set `norm_reward = True`, I hear...
            if 'gamma' in args.model_kwargs:
                env = VecNormalize(
                    env,
                    norm_obs=args.norm_obs,
                    norm_reward=norm_reward,
                    gamma=args.model_kwargs['gamma'],
                )
            else:
                env = VecNormalize(
                    env,
                    norm_obs=args.norm_obs,
                    norm_reward=norm_reward,
                )
    if debug_nans:
        env = VecCheckNan(env, raise_exception=True)
    return env
    game = "SuperMarioKart-Snes"
    scenario = "C:\\Projects\\OpenAI Games\\retro-ai-hacking\\scenarios\\SuperMarioKart-Snes\\custom_rewards.json"
    state = "C:\\Users\\joncocks\\anaconda3\\envs\\retro_ai_3\\Lib\\site-packages\\retro\\data\\contrib\\SuperMarioKart-Snes\\MarioCircuit1.GP.50cc.1P.Luigi.Start.state"

    # game = "Fzero-Snes"
    # scenario = "C:\\Users\\joncocks\\anaconda3\\envs\\retro_ai_3\\Lib\\site-packages\\retro\\data\\contrib\\Fzero-Snes\\scenario.json"
    # state = "C:\\Users\\joncocks\\anaconda3\\envs\\retro_ai_3\\Lib\\site-packages\\retro\\data\\contrib\\Fzero-Snes\\practice.mutecity.bluefalcon.norival.start.state"

    experiment_id = str(uuid4())

    n_cpus = 8
    env = SubprocVecEnv(
        [lambda: get_env(game, state, scenario) for i in range(n_cpus)])
    # env = DummyVecEnv([lambda: get_env(game, state, scenario)])
    # env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env = VecCheckNan(env, raise_exception=True)

    # Create a callback to save every n timesteps
    prefix = "ppo_" + game + "_" + experiment_id
    checkpoint_callback = CheckpointCallback(
        save_freq=100000,
        save_path="C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models",
        name_prefix=prefix)

    savefile_name = prefix + "_final"

    savefile_name = os.path.join(
        "C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models", savefile_name)

    model = PPO(
        CnnPolicy,