def make_vec_env(self,dataset, env_args): env_args["df"]= dataset env = make_vec_env('crypt-v001', env_kwargs=env_args) env = VecCheckNan(env, raise_exception=True) env = VecNormalize( env, norm_obs=True, norm_reward=False, clip_obs=10.0, gamma=0.95 ) return env
def make_dummy_env(self, dataset, env_args): env = gym.make("crypt-v001", df=dataset, **env_args) check_env(env) env = DummyVecEnv([lambda: env]) env = VecCheckNan(env, raise_exception=True) env = VecNormalize( env, norm_obs=True, norm_reward=False, clip_obs=10.0, gamma=0.95 ) return env
def test_check_nan(): """Test VecCheckNan Object""" env = DummyVecEnv([NanAndInfEnv]) env = VecCheckNan(env, raise_exception=True) env.step([[0]]) with pytest.raises(ValueError): env.step([[float('NaN')]]) with pytest.raises(ValueError): env.step([[float('inf')]]) with pytest.raises(ValueError): env.step([[-1]]) with pytest.raises(ValueError): env.step([[1]]) env.step(np.array([[0, 1], [0, 1]])) env.reset()
def test_check_nan(): """Test VecCheckNan Object""" env = DummyVecEnv([NanAndInfEnv]) env = VecCheckNan(env, raise_exception=True) env.step([[0]]) try: env.step([[float('NaN')]]) except ValueError: pass else: assert False try: env.step([[float('inf')]]) except ValueError: pass else: assert False try: env.step([[-1]]) except ValueError: pass else: assert False try: env.step([[1]]) except ValueError: pass else: assert False env.step(np.array([[0, 1], [0, 1]]))
def _check_nan(env: gym.Env) -> None: """Check for Inf and NaN using the VecWrapper.""" vec_env = VecCheckNan(DummyVecEnv([lambda: env])) for _ in range(10): action = np.array([env.action_space.sample()]) _, _, _, _ = vec_env.step(action)
def make_env( args, num_envs=None, include_norm=False, norm_reward=True, **kwargs, ): """Return a vectorized environment containing `num_envs` or `args.num_envs` environments (depending on whether `num_envs is None`). `args`, the command line arguments, specify several values. See `kwargs` for a more detailed explanation on their interaction. `include_norm` specifies whether the environment is wrapped in a normalizing environment. `norm_reward` indicates whether the rewards are normalized (only revelant if `include_norm is True`). `kwargs` are passed directly to the environment creation function. Any value given via `kwargs` has priority over the one given by `args`. """ if num_envs is None: num_envs = args.num_envs # `kwargs` given via `args` args_kwargs = {} for arg in [ 'M', 'dt', 'restol', 'lambda_real_interval', 'lambda_imag_interval', 'lambda_real_interpolation_interval', 'norm_factor', 'residual_weight', 'step_penalty', 'reward_iteration_only', 'reward_strategy', 'collect_states', 'example', ]: args_kwargs[arg] = kwargs.pop(arg, getattr(args, arg)) all_kwargs = {**kwargs, **args_kwargs} # SAC does not support float64 if args.model_class == 'SAC': all_kwargs['use_doubles'] = False seed = all_kwargs.pop('seed', args.seed) def gym_make(i): return lambda: gym.make( args.envname, seed=seed + i if seed is not None else None, **all_kwargs, ) env = DummyVecEnv([gym_make(i) for i in range(num_envs)]) if include_norm: if hasattr(args, 'env_path') and args.env_path is not None: env = VecNormalize.load(str(Path(args.env_path)), env) else: # When training, set `norm_reward = True`, I hear... if 'gamma' in args.model_kwargs: env = VecNormalize( env, norm_obs=args.norm_obs, norm_reward=norm_reward, gamma=args.model_kwargs['gamma'], ) else: env = VecNormalize( env, norm_obs=args.norm_obs, norm_reward=norm_reward, ) if debug_nans: env = VecCheckNan(env, raise_exception=True) return env
game = "SuperMarioKart-Snes" scenario = "C:\\Projects\\OpenAI Games\\retro-ai-hacking\\scenarios\\SuperMarioKart-Snes\\custom_rewards.json" state = "C:\\Users\\joncocks\\anaconda3\\envs\\retro_ai_3\\Lib\\site-packages\\retro\\data\\contrib\\SuperMarioKart-Snes\\MarioCircuit1.GP.50cc.1P.Luigi.Start.state" # game = "Fzero-Snes" # scenario = "C:\\Users\\joncocks\\anaconda3\\envs\\retro_ai_3\\Lib\\site-packages\\retro\\data\\contrib\\Fzero-Snes\\scenario.json" # state = "C:\\Users\\joncocks\\anaconda3\\envs\\retro_ai_3\\Lib\\site-packages\\retro\\data\\contrib\\Fzero-Snes\\practice.mutecity.bluefalcon.norival.start.state" experiment_id = str(uuid4()) n_cpus = 8 env = SubprocVecEnv( [lambda: get_env(game, state, scenario) for i in range(n_cpus)]) # env = DummyVecEnv([lambda: get_env(game, state, scenario)]) # env = VecNormalize(env, norm_obs=True, norm_reward=False) env = VecCheckNan(env, raise_exception=True) # Create a callback to save every n timesteps prefix = "ppo_" + game + "_" + experiment_id checkpoint_callback = CheckpointCallback( save_freq=100000, save_path="C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models", name_prefix=prefix) savefile_name = prefix + "_final" savefile_name = os.path.join( "C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models", savefile_name) model = PPO( CnnPolicy,