def env(**kwargs): env = raw_env(**kwargs) env = wrappers.CaptureStdoutWrapper(env) env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.NaNRandomWrapper(env) env = wrappers.OrderEnforcingWrapper(env) return env
def env(**kwargs): env = raw_env(**kwargs) env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.NanNoOpWrapper(env, 26**2 * 2, "executing the 'do nothing' action.") env = wrappers.OrderEnforcingWrapper(env) return env
def env(**kwargs): env = raw_env(**kwargs) env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) pass_action = 3 env = wrappers.NanNoOpWrapper(env, pass_action, "'checked' with action {}".format(pass_action)) env = wrappers.OrderEnforcingWrapper(env) return env
def env(**kwargs): env = r_env = raw_env(**kwargs) env = wrappers.CaptureStdoutWrapper(env) env = wrappers.TerminateIllegalWrapper( env, illegal_reward=HanabiScorePenalty(r_env)) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.OrderEnforcingWrapper(env) return env
def env(**kwargs): env = raw_env(**kwargs) pass_move = env._N * env._N env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.NanNoOpWrapper( env, pass_move, "passing turn with action {}".format(pass_move)) env = wrappers.OrderEnforcingWrapper(env) return env
def env(**kwargs): env = raw_env(**kwargs) player_losing_reward = -3 env = wrappers.TerminateIllegalWrapper(env, illegal_reward=player_losing_reward) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.NaNRandomWrapper(env) env = wrappers.OrderEnforcingWrapper(env) return env
def env(**kwargs): env = raw_env(**kwargs) env = wrappers.CaptureStdoutWrapper(env) env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) pass_move = 308 env = wrappers.NanNoOpWrapper(env, pass_move, "passing turn with action number {}".format(pass_move)) env = wrappers.OrderEnforcingWrapper(env) return env
def test_unwrapped(name, env_module): env = env_module.env() base_env = env.unwrapped env.reset() agents = env.agents if discrete_observation(env, agents): env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.BaseWrapper(env) env = wrappers.CaptureStdoutWrapper(env) if box_observation(env, agents) and box_action(env, agents): env = wrappers.ClipOutOfBoundsWrapper(env) env = wrappers.OrderEnforcingWrapper(env) env = wrappers.TerminateIllegalWrapper(env, 1.0) if env.metadata["is_parallelizable"]: env = conversions.aec_to_parallel(env) env = conversions.parallel_to_aec(env) env = conversions.turn_based_aec_to_parallel(env) assert env.unwrapped == base_env, "Unwrapped Test: unequal envs"
def env(): env = raw_env() env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.OrderEnforcingWrapper(env) return env