예제 #1
0
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        RewardShapingInterface.__init__(self)

        self.num_agents = env.num_agents
        self.is_multiagent = env.is_multiagent

        # save a reference to this wrapper in the actual env class, for other wrappers and for outside access
        self.env.unwrapped.reward_shaping_interface = self
예제 #2
0
    def __init__(self, env, reward_shaping_scheme=None, is_multiagent=False):
        gym.Wrapper.__init__(self, env)
        RewardShapingInterface.__init__(self)

        self.reward_shaping_scheme = reward_shaping_scheme
        self.cumulative_rewards = None
        self.episode_actions = None

        self.num_agents = env.num_agents if hasattr(env, 'num_agents') else 1
        self.is_multiagent = is_multiagent

        # save a reference to this wrapper in the actual env class, for other wrappers and for outside access
        self.env.unwrapped.reward_shaping_interface = self
예제 #3
0
    def __init__(self, env, increase_team_spirit, max_team_spirit_steps):
        gym.Wrapper.__init__(self, env)
        RewardShapingInterface.__init__(self)
        TrainingInfoInterface.__init__(self)

        self.num_agents = env.unwrapped.num_agents
        self.is_multiagent = env.unwrapped.is_multiagent

        self.episode_rewards = [0] * self.num_agents

        self.increase_team_spirit = increase_team_spirit
        self.max_team_spirit_steps = max_team_spirit_steps

        self.approx_total_training_steps = 0
예제 #4
0
    def __init__(self, env, reward_shaping_scheme=None, annealing=None):
        gym.Wrapper.__init__(self, env)
        RewardShapingInterface.__init__(self)
        TrainingInfoInterface.__init__(self)

        self.reward_shaping_scheme = reward_shaping_scheme
        self.cumulative_rewards = None
        self.episode_actions = None

        self.num_agents = env.num_agents if hasattr(env, 'num_agents') else 1

        self.reward_shaping_updated = True

        self.annealing = annealing
    def __init__(self, num_agents, make_env_func, env_config, skip_frames):
        gym.Env.__init__(self)
        RewardShapingInterface.__init__(self)

        self.num_agents = num_agents
        log.debug('Multi agent env, num agents: %d', self.num_agents)
        self.skip_frames = skip_frames  # number of frames to skip (1 = no skip)

        env = make_env_func(
            player_id=-1
        )  # temporary env just to query observation_space and stuff
        self.action_space = env.action_space
        self.observation_space = env.observation_space

        self.default_reward_shaping = get_default_reward_shaping(env)
        env.close()

        self.current_reward_shaping = [
            self.default_reward_shaping for _ in self.num_agents
        ]

        self.make_env_func = make_env_func

        self.safe_init = env_config is not None and env_config.get(
            'safe_init', False)

        if self.safe_init:
            sleep_seconds = env_config.worker_index * 1.0
            log.info(
                'Sleeping %.3f seconds to avoid creating all envs at once',
                sleep_seconds)
            time.sleep(sleep_seconds)
            log.info('Done sleeping at %d', env_config.worker_index)

        self.env_config = env_config
        self.workers = None

        # only needed when rendering
        self.enable_rendering = False
        self.last_obs = None

        self.reset_on_init = True

        self.initialized = False
예제 #6
0
    def __init__(self, env, reward_shaping_scheme=None, true_reward_func=None):
        gym.Wrapper.__init__(self, env)
        RewardShapingInterface.__init__(self)

        self.reward_shaping_scheme = reward_shaping_scheme
        self.true_reward_func = true_reward_func

        # without this we reward using BFG and shotguns too much
        self.reward_delta_limits = dict(DAMAGECOUNT=200, HITCOUNT=5)

        self.prev_vars = dict()
        self.prev_dead = True

        self.orig_env_reward = self.total_shaping_reward = 0.0

        self.selected_weapon = deque([], maxlen=5)

        self.reward_structure = {}

        self.verbose = False
        self.print_once = False

        # save a reference to this wrapper in the actual env class, for other wrappers
        self.env.unwrapped.reward_shaping_interface = self