def __init__(self, env): gym.Wrapper.__init__(self, env) RewardShapingInterface.__init__(self) self.num_agents = env.num_agents self.is_multiagent = env.is_multiagent # save a reference to this wrapper in the actual env class, for other wrappers and for outside access self.env.unwrapped.reward_shaping_interface = self
def __init__(self, env, reward_shaping_scheme=None, is_multiagent=False): gym.Wrapper.__init__(self, env) RewardShapingInterface.__init__(self) self.reward_shaping_scheme = reward_shaping_scheme self.cumulative_rewards = None self.episode_actions = None self.num_agents = env.num_agents if hasattr(env, 'num_agents') else 1 self.is_multiagent = is_multiagent # save a reference to this wrapper in the actual env class, for other wrappers and for outside access self.env.unwrapped.reward_shaping_interface = self
def __init__(self, env, increase_team_spirit, max_team_spirit_steps): gym.Wrapper.__init__(self, env) RewardShapingInterface.__init__(self) TrainingInfoInterface.__init__(self) self.num_agents = env.unwrapped.num_agents self.is_multiagent = env.unwrapped.is_multiagent self.episode_rewards = [0] * self.num_agents self.increase_team_spirit = increase_team_spirit self.max_team_spirit_steps = max_team_spirit_steps self.approx_total_training_steps = 0
def __init__(self, env, reward_shaping_scheme=None, annealing=None): gym.Wrapper.__init__(self, env) RewardShapingInterface.__init__(self) TrainingInfoInterface.__init__(self) self.reward_shaping_scheme = reward_shaping_scheme self.cumulative_rewards = None self.episode_actions = None self.num_agents = env.num_agents if hasattr(env, 'num_agents') else 1 self.reward_shaping_updated = True self.annealing = annealing
def __init__(self, num_agents, make_env_func, env_config, skip_frames): gym.Env.__init__(self) RewardShapingInterface.__init__(self) self.num_agents = num_agents log.debug('Multi agent env, num agents: %d', self.num_agents) self.skip_frames = skip_frames # number of frames to skip (1 = no skip) env = make_env_func( player_id=-1 ) # temporary env just to query observation_space and stuff self.action_space = env.action_space self.observation_space = env.observation_space self.default_reward_shaping = get_default_reward_shaping(env) env.close() self.current_reward_shaping = [ self.default_reward_shaping for _ in self.num_agents ] self.make_env_func = make_env_func self.safe_init = env_config is not None and env_config.get( 'safe_init', False) if self.safe_init: sleep_seconds = env_config.worker_index * 1.0 log.info( 'Sleeping %.3f seconds to avoid creating all envs at once', sleep_seconds) time.sleep(sleep_seconds) log.info('Done sleeping at %d', env_config.worker_index) self.env_config = env_config self.workers = None # only needed when rendering self.enable_rendering = False self.last_obs = None self.reset_on_init = True self.initialized = False
def __init__(self, env, reward_shaping_scheme=None, true_reward_func=None): gym.Wrapper.__init__(self, env) RewardShapingInterface.__init__(self) self.reward_shaping_scheme = reward_shaping_scheme self.true_reward_func = true_reward_func # without this we reward using BFG and shotguns too much self.reward_delta_limits = dict(DAMAGECOUNT=200, HITCOUNT=5) self.prev_vars = dict() self.prev_dead = True self.orig_env_reward = self.total_shaping_reward = 0.0 self.selected_weapon = deque([], maxlen=5) self.reward_structure = {} self.verbose = False self.print_once = False # save a reference to this wrapper in the actual env class, for other wrappers self.env.unwrapped.reward_shaping_interface = self