Exemple #1
1
def test_environment():
    game = DoomGame()
    # https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Deep%20Q%20Learning/Doom/basic.cfg
    game.load_config('basic.cfg')
    game.set_doom_scenario_path('basic.wad')
    game.init()
    shoot = [0, 0, 1]
    left = [1, 0, 0]
    right = [0, 1, 0]
    actions = [shoot, left, right]

    episodes = 10
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            print('Action', action)
            reward = game.make_action(action)
            print('Reward', reward)
            time.sleep(0.02)
        print('Result', game.get_total_reward())
        time.sleep(2)
    game.close()
Exemple #2
1
class Game(object):
    def __init__(self,
                 scenario,
                 action_builder,
                 reward_values=None,
                 score_variable='FRAGCOUNT',
                 freedoom=True,
                 screen_resolution='RES_400X225',
                 screen_format='CRCGCB',
                 use_screen_buffer=True,
                 use_depth_buffer=False,
                 labels_mapping='',
                 game_features='',
                 mode='PLAYER',
                 player_rank=0,
                 players_per_game=1,
                 render_hud=False,
                 render_minimal_hud=False,
                 render_crosshair=True,
                 render_weapon=True,
                 render_decals=False,
                 render_particles=False,
                 render_effects_sprites=False,
                 respawn_protect=True,
                 spawn_farthest=True,
                 freelook=False,
                 name='Arnold',
                 color=0,
                 visible=False,
                 n_bots=0,
                 use_scripted_marines=None,
                 doom_skill=2):
        """
        Create a new game.
        score_variable: indicates in which game variable the user score is
            stored. by default it's in FRAGCOUNT, but the score in ACS against
            built-in AI bots can be stored in USER1, USER2, etc.
        render_decals: marks on the walls
        render_particles: particles like for impacts / traces
        render_effects_sprites: gun puffs / blood splats
        color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray,
               5 - light brown, 6 - light red, 7 - light blue
        """
        # game resources
        game_filename = '%s.wad' % ('freedoom2' if freedoom else 'Doom2')
        self.scenario_path = os.path.join(RESOURCES_DIR, 'scenarios',
                                          '%s.wad' % scenario)
        self.game_path = os.path.join(RESOURCES_DIR, game_filename)

        # check parameters
        assert os.path.isfile(self.scenario_path)
        assert os.path.isfile(self.game_path)
        assert hasattr(GameVariable, score_variable)
        assert hasattr(ScreenResolution, screen_resolution)
        assert hasattr(ScreenFormat, screen_format)
        assert use_screen_buffer or use_depth_buffer
        assert hasattr(Mode, mode)
        assert not (render_minimal_hud and not render_hud)
        assert len(name.strip()) > 0 and color in range(8)
        assert n_bots >= 0
        assert (type(use_scripted_marines) is bool
                or use_scripted_marines is None and n_bots == 0)
        assert 0 <= doom_skill <= 4
        assert 0 < players_per_game
        assert 0 <= player_rank

        # action builder
        self.action_builder = action_builder

        # add the score variable to the game variables list
        self.score_variable = score_variable
        game_variables.append(('score', getattr(GameVariable, score_variable)))

        self.player_rank = player_rank
        self.players_per_game = players_per_game

        # screen buffer / depth buffer / labels buffer / mode
        self.screen_resolution = screen_resolution
        self.screen_format = screen_format
        self.use_screen_buffer = use_screen_buffer
        self.use_depth_buffer = use_depth_buffer
        self.labels_mapping = parse_labels_mapping(labels_mapping)
        self.game_features = parse_game_features(game_features)
        self.use_labels_buffer = self.labels_mapping is not None
        self.use_game_features = any(self.game_features)
        self.mode = mode

        # rendering options
        self.render_hud = render_hud
        self.render_minimal_hud = render_minimal_hud
        self.render_crosshair = render_crosshair
        self.render_weapon = render_weapon
        self.render_decals = render_decals
        self.render_particles = render_particles
        self.render_effects_sprites = render_effects_sprites

        # respawn invincibility / distance
        self.respawn_protect = respawn_protect
        self.spawn_farthest = spawn_farthest

        # freelook / agent name / agent color
        self.freelook = freelook
        self.name = name.strip()
        self.color = color

        # window visibility
        self.visible = visible

        # actor reward
        self.reward_builder = RewardBuilder(self, reward_values)

        # game statistics
        self.stat_keys = [
            'kills', 'deaths', 'suicides', 'frags', 'k/d', 'medikits',
            'armors', 'pistol', 'shotgun', 'chaingun', 'rocketlauncher',
            'plasmarifle', 'bfg9000', 'bullets', 'shells', 'rockets', 'cells'
        ]
        self.statistics = {}

        # number of bots in the game
        self.n_bots = n_bots
        self.use_scripted_marines = use_scripted_marines

        # doom skill
        self.doom_skill = doom_skill

        # manual control
        self.count_non_forward_actions = 0
        self.count_non_turn_actions = 0

    def update_game_variables(self):
        """
        Check and update game variables.
        """
        # read game variables
        new_v = {k: self.game.get_game_variable(v) for k, v in game_variables}
        assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z']
                   for k, v in new_v.items())
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }
        health = new_v['health']
        armor = new_v['armor']
        sel_weapon = new_v['sel_weapon']
        sel_ammo = new_v['sel_ammo']
        bullets = new_v['bullets']
        shells = new_v['shells']
        rockets = new_v['rockets']
        cells = new_v['cells']
        fist = new_v['fist']
        pistol = new_v['pistol']
        shotgun = new_v['shotgun']
        chaingun = new_v['chaingun']
        rocketlauncher = new_v['rocketlauncher']
        plasmarifle = new_v['plasmarifle']
        bfg9000 = new_v['bfg9000']

        # check game variables
        if sel_weapon == -1:
            logger.warning("SELECTED WEAPON is -1!")
            new_v['sel_weapon'] = 1
            sel_weapon = 1
        if sel_ammo == -1:
            logger.warning("SELECTED AMMO is -1!")
            new_v['sel_ammo'] = 0
            sel_ammo = 0
        assert sel_weapon in range(1, 8), sel_weapon
        assert sel_ammo >= 0, sel_ammo
        assert all(x in [0, 1] for x in [
            fist, pistol, shotgun, chaingun, rocketlauncher, plasmarifle,
            bfg9000
        ])
        assert 0 <= health <= 200 or health < 0 and self.game.is_player_dead()
        assert 0 <= armor <= 200, (health, armor)
        assert 0 <= bullets <= 200 and 0 <= shells <= 50
        assert 0 <= rockets <= 50 and 0 <= cells <= 300

        # fist
        if sel_weapon == 1:
            assert sel_ammo == 0
        # pistol
        elif sel_weapon == 2:
            assert pistol and sel_ammo == bullets
        # shotgun
        elif sel_weapon == 3:
            assert shotgun and sel_ammo == shells
        # chaingun
        elif sel_weapon == 4:
            assert chaingun and sel_ammo == bullets
        # rocket launcher
        elif sel_weapon == 5:
            assert rocketlauncher and sel_ammo == rockets
        # plasma rifle
        elif sel_weapon == 6:
            assert plasmarifle and sel_ammo == cells
        # BFG9000
        elif sel_weapon == 7:
            assert bfg9000 and sel_ammo == cells

        # update actor properties
        self.prev_properties = self.properties
        self.properties = new_v

    def update_statistics_and_reward(self, action):
        """
        Update statistics of the current game based on the previous
        and the current properties, and create a reward.
        """
        stats = self.statistics[self.map_id]

        # reset reward
        self.reward_builder.reset()

        # we need to know the current and previous properties
        assert self.prev_properties is not None and self.properties is not None

        # distance
        moving_forward = action[self.mapping['MOVE_FORWARD']]
        turn_left = action[self.mapping['TURN_LEFT']]
        turn_right = action[self.mapping['TURN_RIGHT']]
        if moving_forward and not (turn_left or turn_right):
            diff_x = self.properties['position_x'] - self.prev_properties[
                'position_x']
            diff_y = self.properties['position_y'] - self.prev_properties[
                'position_y']
            distance = math.sqrt(diff_x**2 + diff_y**2)
            self.reward_builder.distance(distance)

        # kill
        d = self.properties['score'] - self.prev_properties['score']
        if d > 0:
            self.reward_builder.kill(d)
            stats['kills'] += d
            for _ in range(int(d)):
                self.log('Kill')

        # death
        if self.game.is_player_dead():
            self.reward_builder.death()
            stats['deaths'] += 1
            self.log('Dead')

        # suicide
        if self.properties['frag_count'] < self.prev_properties['frag_count']:
            self.reward_builder.suicide()
            stats['suicides'] += 1
            self.log('Suicide')

        # found / lost health
        d = self.properties['health'] - self.prev_properties['health']
        if d != 0:
            if d > 0:
                self.reward_builder.medikit(d)
                stats['medikits'] += 1
            else:
                self.reward_builder.injured(d)
            self.log('%s health (%i -> %i)' % (
                'Found' if d > 0 else 'Lost',
                self.prev_properties['health'],
                self.properties['health'],
            ))

        # found / lost armor
        d = self.properties['armor'] - self.prev_properties['armor']
        if d != 0:
            if d > 0:
                self.reward_builder.armor()
                stats['armors'] += 1
            self.log('%s armor (%i -> %i)' % (
                'Found' if d > 0 else 'Lost',
                self.prev_properties['armor'],
                self.properties['armor'],
            ))

        # change weapon
        if self.properties['sel_weapon'] != self.prev_properties['sel_weapon']:
            self.log('Switched weapon: %s -> %s' % (
                WEAPON_NAMES[self.prev_properties['sel_weapon']],
                WEAPON_NAMES[self.properties['sel_weapon']],
            ))

        # found weapon
        for i, weapon in enumerate([
                'pistol', 'shotgun', 'chaingun', 'rocketlauncher',
                'plasmarifle', 'bfg9000'
        ]):
            if self.prev_properties[weapon] == self.properties[weapon]:
                continue
            # assert(self.prev_properties[weapon] == 0 and  # TODO check
            #        self.properties[weapon] == 1), (weapon, self.prev_properties[weapon], self.properties[weapon])
            self.reward_builder.weapon()
            stats[weapon] += 1
            self.log('Found weapon: %s' % WEAPON_NAMES[i + 1])

        # found / lost ammo
        for ammo in ['bullets', 'shells', 'rockets', 'cells']:
            d = self.properties[ammo] - self.prev_properties[ammo]
            if d != 0:
                if d > 0:
                    self.reward_builder.ammo()
                    stats[ammo] += 1
                else:
                    self.reward_builder.use_ammo()
                self.log('%s ammo: %s (%i -> %i)' %
                         ('Found' if d > 0 else 'Lost', ammo,
                          self.prev_properties[ammo], self.properties[ammo]))

    def log(self, message):
        """
        Log the game event.
        During training, we don't want to display events.
        """
        if self.log_events:
            logger.info(message)

    def start(self,
              map_id,
              episode_time=None,
              manual_control=False,
              log_events=False):
        """
        Start the game.
        If `episode_time` is given, the game will end after the specified time.
        """
        assert type(manual_control) is bool
        self.manual_control = manual_control

        # Save statistics for this map
        self.statistics[map_id] = {k: 0 for k in self.stat_keys}

        # Episode time
        self.episode_time = episode_time

        # initialize the game
        self.game = DoomGame()
        self.game.set_doom_scenario_path(self.scenario_path)
        self.game.set_doom_game_path(self.game_path)

        # map
        assert map_id > 0
        self.map_id = map_id
        self.game.set_doom_map("map%02i" % map_id)

        # time limit
        if episode_time is not None:
            self.game.set_episode_timeout(int(35 * episode_time))

        # log events that happen during the game (useful for testing)
        self.log_events = log_events

        # game parameters
        args = []

        # host / server
        if self.players_per_game > 1:
            port = 5092 + self.player_rank // self.players_per_game
            if self.player_rank % self.players_per_game == 0:
                args.append('-host %i -port %i' %
                            (self.players_per_game, port))
            else:
                args.append('-join 127.0.0.1:%i' % port)
        else:
            args.append('-host 1')

        # screen buffer / depth buffer / labels buffer / mode
        screen_resolution = getattr(ScreenResolution, self.screen_resolution)
        self.game.set_screen_resolution(screen_resolution)
        self.game.set_screen_format(getattr(ScreenFormat, self.screen_format))
        self.game.set_depth_buffer_enabled(self.use_depth_buffer)
        self.game.set_labels_buffer_enabled(self.use_labels_buffer
                                            or self.use_game_features)
        self.game.set_mode(getattr(Mode, self.mode))

        # rendering options
        self.game.set_render_hud(self.render_hud)
        self.game.set_render_minimal_hud(self.render_minimal_hud)
        self.game.set_render_crosshair(self.render_crosshair)
        self.game.set_render_weapon(self.render_weapon)
        self.game.set_render_decals(self.render_decals)
        self.game.set_render_particles(self.render_particles)
        self.game.set_render_effects_sprites(self.render_effects_sprites)

        # deathmatch mode
        # players will respawn automatically after they die
        # autoaim is disabled for all players
        args.append('-deathmatch')
        args.append('+sv_forcerespawn 1')
        args.append('+sv_noautoaim 1')

        # respawn invincibility / distance
        # players will be invulnerable for two second after spawning
        # players will be spawned as far as possible from any other players
        args.append('+sv_respawnprotect %i' % self.respawn_protect)
        args.append('+sv_spawnfarthest %i' % self.spawn_farthest)

        # freelook / agent name / agent color
        args.append('+freelook %i' % (1 if self.freelook else 0))
        args.append('+name %s' % self.name)
        args.append('+colorset %i' % self.color)

        # enable the cheat system (so that we can still
        # send commands to the game in self-play mode)
        args.append('+sv_cheats 1')

        # load parameters
        self.args = args
        for arg in args:
            self.game.add_game_args(arg)

        # window visibility
        self.game.set_window_visible(self.visible)

        # available buttons
        self.mapping = add_buttons(self.game,
                                   self.action_builder.available_buttons)

        # doom skill (https://zdoom.org/wiki/GameSkill)
        self.game.set_doom_skill(self.doom_skill + 1)

        # start the game
        self.game.init()

        # initialize the game after player spawns
        self.initialize_game()

    def reset(self):
        """
        Reset the game if necessary. This can be because:
            - we reach the end of an episode (we restart the game)
            - because the agent is dead (we make it respawn)
        """
        self.count_non_forward_actions = 0
        # if the player is dead
        if self.is_player_dead():
            # respawn it (deathmatch mode)
            if self.episode_time is None:
                self.respawn_player()
            # or reset the episode (episode ends when the agent dies)
            else:
                self.new_episode()

        # start a new episode if it is finished
        if self.is_episode_finished():
            self.new_episode()

        # deal with a ViZDoom issue
        while self.is_player_dead():
            logger.warning('Player %i is still dead after respawn.' %
                           self.params.player_rank)
            self.respawn_player()

    def update_bots(self):
        """
        Add built-in AI bots.
        There are two types of AI: built-in AI and ScriptedMarines.
        """
        # only the host takes care of the bots
        if self.player_rank % self.players_per_game != 0:
            return
        if self.use_scripted_marines:
            command = "pukename set_value always 2 %i" % self.n_bots
            self.game.send_game_command(command)
        else:
            self.game.send_game_command("removebots")
            for _ in range(self.n_bots):
                self.game.send_game_command("addbot")

    def is_player_dead(self):
        """
        Detect whether the player is dead.
        """
        return self.game.is_player_dead()

    def is_episode_finished(self):
        """
        Return whether the episode is finished.
        This should only be the case after the episode timeout.
        """
        return self.game.is_episode_finished()

    def is_final(self):
        """
        Return whether the game is in a final state.
        """
        return self.is_player_dead() or self.is_episode_finished()

    def new_episode(self):
        """
        Start a new episode.
        """
        assert self.is_episode_finished() or self.is_player_dead()
        self.game.new_episode()
        self.log('New episode')
        self.initialize_game()

    def respawn_player(self):
        """
        Respawn the player on death.
        """
        assert self.is_player_dead()
        self.game.respawn_player()
        self.log('Respawn player')
        self.initialize_game()

    def initialize_game(self):
        """
        Initialize the game after the player spawns / respawns.
        Be sure that properties from the previous
        life are not considered in this one.
        """
        # generate buffers
        game_state = self.game.get_state()
        self._screen_buffer = game_state.screen_buffer
        self._depth_buffer = game_state.depth_buffer
        self._labels_buffer = game_state.labels_buffer
        self._labels = game_state.labels

        # actor properties
        self.prev_properties = None
        self.properties = None

        # advance a few steps to avoid bugs due
        # to initial weapon changes in ACS
        self.game.advance_action(SKIP_INITIAL_ACTIONS)
        self.update_game_variables()

        # if there are bots in the game, and if this is a new game
        self.update_bots()

    def randomize_textures(self, randomize):
        """
        Randomize the textures of the map.
        """
        assert type(randomize) is bool
        randomize = 1 if randomize else 0
        self.game.send_game_command("pukename set_value always 4 %i" %
                                    randomize)

    def init_bots_health(self, health):
        """
        Initial bots health.
        """
        assert self.use_scripted_marines or health == 100
        assert 0 < health <= 100
        self.game.send_game_command("pukename set_value always 5 %i" % health)

    def make_action(self, action, frame_skip=1, sleep=None):
        """
        Make an action.
        If `sleep` is given, the network will wait
        `sleep` seconds between each action.
        """
        assert frame_skip >= 1

        # convert selected action to the ViZDoom action format
        action = self.action_builder.get_action(action)

        # select agent favorite weapon
        for weapon_name, weapon_ammo, weapon_id in WEAPONS_PREFERENCES:
            min_ammo = 40 if weapon_name == 'bfg9000' else 1
            if self.properties[weapon_name] > 0 and self.properties[
                    weapon_ammo] >= min_ammo:
                if self.properties['sel_weapon'] != weapon_id:
                    # action = ([False] * self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True]
                    switch_action = (
                        [False] *
                        self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True]
                    action = action + switch_action[len(action):]
                    self.log("Manual weapon change: %s -> %s" %
                             (WEAPON_NAMES[self.properties['sel_weapon']],
                              weapon_name))
                break

        if action[self.mapping['MOVE_FORWARD']]:
            self.count_non_forward_actions = 0
        else:
            self.count_non_forward_actions += 1

        if action[self.mapping['TURN_LEFT']] or action[
                self.mapping['TURN_RIGHT']]:
            self.count_non_turn_actions = 0
        else:
            self.count_non_turn_actions += 1

        if self.manual_control and (self.count_non_forward_actions >= 30
                                    or self.count_non_turn_actions >= 60):
            manual_action = [False] * len(action)
            manual_action[self.mapping['TURN_RIGHT']] = True
            manual_action[self.mapping['SPEED']] = True
            if self.count_non_forward_actions >= 30:
                manual_action[self.mapping['MOVE_FORWARD']] = True
            manual_repeat = 40
            self.count_non_forward_actions = 0
            self.count_non_turn_actions = 0
        else:
            manual_action = None

        # if we are visualizing the experiment, show all the frames one by one
        if self.visible:
            if manual_action is not None:
                logger.warning('Activated manual control')
                for _ in range(manual_repeat):
                    self.game.make_action(manual_action)
            else:
                for _ in range(frame_skip):
                    self.game.make_action(action)
                    # death or episode finished
                    if self.is_player_dead() or self.is_episode_finished():
                        break
                    # sleep for smooth visualization
                    if sleep is not None:
                        time.sleep(sleep)
        else:
            if manual_action is not None:
                logger.warning('Activated manual control')
                self.game.make_action(manual_action, manual_repeat)
            else:
                self.game.make_action(action, frame_skip)

        # generate buffers
        game_state = self.game.get_state()
        if game_state is not None:
            self._screen_buffer = game_state.screen_buffer
            self._depth_buffer = game_state.depth_buffer
            self._labels_buffer = game_state.labels_buffer
            self._labels = game_state.labels

        # update game variables / statistics rewards
        self.update_game_variables()
        self.update_statistics_and_reward(action)

    @property
    def reward(self):
        """
        Return the reward value.
        """
        return self.reward_builder.reward

    def close(self):
        """
        Close the current game.
        """
        self.game.close()

    def print_statistics(self, eval_time=None):
        """
        Print agent statistics.
        If `map_id` is given, statistics are given for the specified map only.
        Otherwise, statistics are given for all maps, with a summary.
        """
        if 'all' in self.statistics:
            del self.statistics['all']
        map_ids = sorted(self.statistics.keys())
        if len(map_ids) == 0:
            logger.info("No statistics to show!")
            return
        for v in self.statistics.values():
            assert set(self.stat_keys) == set(v.keys())

        # sum the results on all maps for global statistics
        self.statistics['all'] = {
            k: sum(v[k] for v in self.statistics.values())
            for k in self.stat_keys
        }

        # number of frags (kills - suicides)
        # 100% accurate if the number of frags is given by 'FRAGCOUNT'
        # almost 100% accurate if it is based on an internal ACS variable
        for v in self.statistics.values():
            v['frags'] = v['kills'] - v['suicides']

        # number of frags per minutes (with and without respawn time)
        if eval_time is not None:
            assert eval_time % 60 == 0
            for k, v in self.statistics.items():
                eval_minutes = eval_time / 60
                if k == 'all':
                    eval_minutes *= (len(self.statistics) - 1)
                respawn_time = (v['deaths'] * RESPAWN_SECONDS * 1.0 / 60)
                v['frags_pm'] = v['frags'] * 1.0 / eval_minutes
                v['frags_pm_r'] = v['frags'] * 1.0 / (eval_minutes +
                                                      respawn_time)

        # Kills / Deaths
        # 100% accurate if the number of kills is given by an ACS variable
        # almost 100% accurate if it is based on 'FRAGCOUNT'
        for v in self.statistics.values():
            v['k/d'] = v['kills'] * 1.0 / max(1, v['deaths'])

        # statistics to log
        log_lines = [
            [''] + ['Map%02i' % i for i in map_ids] + ['All'],
            ('Kills', 'kills'),
            ('Deaths', 'deaths'),
            ('Suicides', 'suicides'),
            ('Frags', 'frags'),
            ('Frags/m', 'frags_pm'),
            ('Frags/m (r)', 'frags_pm_r'),
            ('K/D', 'k/d'),
            None,
            ('Medikits', 'medikits'),
            ('Armors', 'armors'),
            ('SuperShotgun', 'shotgun'),
            ('Chaingun', 'chaingun'),
            ('RocketLauncher', 'rocketlauncher'),
            ('PlasmaRifle', 'plasmarifle'),
            ('BFG9000', 'bfg9000'),
            ('Bullets', 'bullets'),
            ('Shells', 'shells'),
            ('Rockets', 'rockets'),
            ('Cells', 'cells'),
        ]

        # only show statistics on all maps if there is more than one map
        if len(map_ids) > 1:
            map_ids.append('all')

        logger.info('*************** Game statistics summary ***************')
        log_pattern = '{: >15}' + ('{: >8}' * len(map_ids))
        for line in log_lines:
            if line is None:
                logger.info('')
            else:
                if type(line) is tuple:
                    assert len(line) == 2
                    name, k = line
                    if k in ['frags_pm', 'frags_pm_r'] and eval_time is None:
                        continue
                    line = ['%s:' % name]
                    line += [self.statistics[map_id][k] for map_id in map_ids]
                else:
                    assert type(line) is list
                    line = line[:len(map_ids) + 1]
                line = ['%.3f' % x if type(x) is float else x for x in line]
                logger.info(log_pattern.format(*line))

    def observe_state(self, params, last_states):
        """
        Observe the current state of the game.
        """
        # read game state
        screen, game_features = process_buffers(self, params)
        variables = [self.properties[x[0]] for x in params.game_variables]
        last_states.append(GameState(screen, variables, game_features))

        # update most recent states
        if len(last_states) == 1:
            last_states.extend([last_states[0]] * (params.hist_size - 1))
        else:
            assert len(last_states) == params.hist_size + 1
            del last_states[0]

        # return the screen and the game features
        return screen, game_features
Exemple #3
1
class Experiment(object):
    """
    Used to perform experiment combined with a Agent 
    Main methods : 
        - 
    """
    def __init__(self,
                 scenario,
                 action_builder,
                 reward_builder,
                 logger,
                 living_reward=0,
                 custom_reward=False,
                 score_variable='FRAGCOUNT',
                 game_features=[],
                 freedoom=True,
                 screen_resolution='RES_400X225',
                 screen_format='CRCGCB',
                 use_screen_buffer=True,
                 use_depth_buffer=False,
                 use_labels_buffer=True,
                 mode='PLAYER',
                 player_rank=0,
                 players_per_game=1,
                 render_hud=False,
                 render_minimal_hud=False,
                 render_crosshair=True,
                 render_weapon=True,
                 render_decals=False,
                 render_particles=False,
                 render_effects_sprites=False,
                 respawn_protect=True,
                 spawn_farthest=True,
                 name='Hubert_Bonnisseur_de_la_Bate',
                 visible=False,
                 n_bots=0,
                 use_scripted_marines=None,
                 doom_skill=2):
        """
        Create a new game.
        render_decals: marks on the walls
        render_particles: particles like for impacts / traces
        render_effects_sprites: gun puffs / blood splats
        color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray,
               5 - light brown, 6 - light red, 7 - light blue
        """
        # game resources
        game_filename = 'freedoom2.wad'
        self.scenario = scenario
        self.scenario_path = os.path.join(PATH,
                                          'scenarios/{}.wad'.format(scenario))
        self.game_path = os.path.join(PATH, game_filename)

        # check parameters
        assert os.path.isfile(self.scenario_path)
        assert os.path.isfile(self.game_path)
        assert hasattr(GameVariable, score_variable)
        assert hasattr(ScreenResolution, screen_resolution)
        assert hasattr(ScreenFormat, screen_format)
        assert use_screen_buffer or use_depth_buffer
        assert hasattr(Mode, mode)
        assert not (render_minimal_hud and not render_hud)
        assert len(name.strip()) > 0
        assert n_bots >= 0
        assert (type(use_scripted_marines) is bool
                or use_scripted_marines is None and n_bots == 0)
        assert 0 <= doom_skill <= 4
        assert 0 < players_per_game
        assert 0 <= player_rank

        # screen buffer / depth buffer / labels buffer / mode
        self.screen_resolution = screen_resolution
        self.screen_format = screen_format
        self.use_screen_buffer = use_screen_buffer
        self.use_depth_buffer = use_depth_buffer
        self.game_features = parse_game_features(game_features, logger)
        self.use_labels_buffer = use_labels_buffer
        self.use_game_features = any(self.game_features)
        self.mode = mode

        # rendering options
        self.render_hud = render_hud
        self.render_minimal_hud = render_minimal_hud
        self.render_crosshair = render_crosshair
        self.render_weapon = render_weapon
        self.render_decals = render_decals
        self.render_particles = render_particles
        self.render_effects_sprites = render_effects_sprites

        # window visibility
        self.visible = visible

        # actor reward
        ''' used for reward shaping (LSTM & Curiosity A3C) '''
        self.reward_builder = reward_builder
        self.living_reward = living_reward
        self.custom_reward = custom_reward

        # number of bots in the game
        self.n_bots = n_bots
        self.use_scripted_marines = use_scripted_marines

        # doom skill (ie difficulty of the game)
        self.doom_skill = doom_skill

        # bot name
        self.name = name

        # action builder
        self.action_builder = action_builder

        # save game statistics for each episode (used for model comparison and reward shaping)
        self.stats = {}

        # use logging for DEBUG purpose
        self.logger = logger

#==============================================================================
# Game start
#==============================================================================

    def start(self, map_id, episode_time=None, log_events=False):
        """
        Start the game.
        If `episode_time` is given, the game will end after the specified time.
        """

        # Episode time
        self.episode_time = episode_time

        # initialize the game
        self.game = DoomGame()
        self.game.set_doom_scenario_path(self.scenario_path)
        self.game.set_doom_game_path(self.game_path)

        # map
        assert map_id > 0
        self.map_id = map_id
        self.game.set_doom_map('map{:02d}'.format(map_id))

        # time limit
        if episode_time is not None:
            self.game.set_episode_timeout(episode_time)

        # Save statistics for this map
        self.stats[self.map_id] = []

        # log events that happen during the game (useful for testing)
        #        self.log_events = log_events

        # game parameters
        args = []

        # screen buffer / depth buffer / labels buffer / mode
        screen_resolution = getattr(ScreenResolution, self.screen_resolution)
        self.game.set_screen_resolution(screen_resolution)
        self.game.set_screen_format(getattr(ScreenFormat, self.screen_format))
        self.game.set_depth_buffer_enabled(self.use_depth_buffer)
        self.game.set_labels_buffer_enabled(self.use_labels_buffer)
        self.game.set_mode(getattr(Mode, self.mode))

        # rendering options
        self.game.set_render_hud(self.render_hud)
        self.game.set_render_minimal_hud(self.render_minimal_hud)
        self.game.set_render_crosshair(self.render_crosshair)
        self.game.set_render_weapon(self.render_weapon)
        self.game.set_render_decals(self.render_decals)
        self.game.set_render_particles(self.render_particles)
        self.game.set_render_effects_sprites(self.render_effects_sprites)

        # deathmatch mode
        # players will respawn automatically after they die
        # autoaim is disabled for all players
        #        args.append('-deathmatch')
        args.append('+sv_forcerespawn 1')
        args.append('+sv_noautoaim 1')

        # agent name
        args.append('+name %s' % self.name)

        # load parameters
        self.args = args
        for arg in args:
            self.game.add_game_args(arg)

        # window visibility
        self.game.set_window_visible(self.visible)

        # define available buttons
        self.action_builder.set_buttons(self.game)

        # doom skill (https://zdoom.org/wiki/GameSkill)
        self.game.set_doom_skill(self.doom_skill + 1)

        # define basic rewards
        self.game.set_living_reward(self.living_reward)

        # start the game
        self.game.init()

        # initialize the game after player spawns
        self.initialize_game()
        self.logger.info('start_game')

#==============================================================================
# Game statistics
#==============================================================================

    def update_game_properties(self):
        """
        Update game properties.
        """
        # read game variables
        new_v = {
            k: self.game.get_game_variable(v)
            for k, v in GAME_FEATURES.items()
        }
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }

        # update game properties
        self.prev_properties = self.properties
        self.properties = new_v

    def update_game_statistics(self):
        """
        Calculate game statistics and store them in the running stats dict
        """
        stats = self.run_stats

        # init r if custom rewards
        r = []

        # calculate stats
        # kill
        d = self.properties['kill_count'] - self.prev_properties['kill_count']
        if d > 0:
            r.extend(d * ['kill_count'])
            stats['kills'] += d

        # death
        if self.game.is_player_dead():
            r.append('dead')
            stats['deaths'] += 1

        # suicide
        if self.properties['frag_count'] < self.prev_properties['frag_count']:
            r.append('suicide')
            stats['suicides'] += 1

        # found health
        d = self.properties['health'] - self.prev_properties['health']
        if d != 0:
            if d > 0:
                r.append('medikit')
                stats['medikit'] += 1
        stats['health'] = self.properties['health']

        # health lost
        d = self.properties['damage_count'] - self.prev_properties[
            'damage_count']
        if d > 0:
            r.append('health_lost')

        # found armor
        d = self.properties['armor'] - self.prev_properties['armor']
        if d != 0:
            if d > 0:
                r.append('armor')
                stats['armor'] += 1

        # found weapon
        if self.prev_properties['sel_weapon'] != self.properties['sel_weapon']:
            r.append('weapon')
            stats['found_weapon'] += 1

        # found / lost ammo
        d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo']
        if self.prev_properties['sel_weapon'] == self.properties['sel_weapon']:
            if d != 0:
                if d > 0:
                    r.append('ammo')
                    stats['ammo'] += 1
                else:
                    r.append('use_ammo')

        # auxiliary stats not used for rewards
        stats['frag_count'] = self.properties['frag_count']

        return r

    def calculate_final_stats(self):
        """
        Calculate the final stats from the running stats
        """
        self.run_stats['k/d'] = self.run_stats['kills'] * 1.0 / max(
            1, self.run_stats['deaths'])

#==============================================================================
# Game handling
#==============================================================================

    def is_player_dead(self):
        """
        Detect whether the player is dead.
        """
        return self.game.is_player_dead()

    def is_episode_finished(self):
        """
        Return whether the episode is finished.
        This should only be the case after the episode timeout.
        """
        return self.game.is_episode_finished()

    def is_final(self):
        """
        Return whether the game is in a final state.
        """
        return self.is_player_dead() or self.is_episode_finished()

    def reset(self):
        """
        Reset the game if necessary. This can be because:
            - we reach the end of an episode (we restart the game)
            - because the agent is dead (we make it respawn)
        """
        self.stats[self.map_id].append(self.run_stats)
        # if the player is dead
        if self.is_player_dead():
            # respawn it (deathmatch mode)
            if self.episode_time is None:
                self.respawn_player()
            # or reset the episode (episode ends when the agent dies)
            else:
                self.new_episode()

        # start a new episode if it is finished
        if self.is_episode_finished():
            self.new_episode()

        # deal with a ViZDoom issue
#        while self.is_player_dead():
#            logger.warning('Player %i is still dead after respawn.' %
#                           self.params.player_rank)
#            self.respawn_player()

    def respawn_player(self):
        """
        Respawn the player on death.
        """
        assert self.is_player_dead()
        self.game.respawn_player()
        #        self.log('Respawn player')
        self.initialize_game()

    def new_episode(self):
        """
        Start a new episode.
        """
        # init new stats for the episode
        self.run_stats = {k: 0 for k in STAT_KEYS}
        # init new game
        self.game.new_episode()

        # init episode properties
        self.initialize_game()

#        self.log('New episode')

    def initialize_game(self):
        """
        Reset game properties
        """
        new_v = {
            k: self.game.get_game_variable(v)
            for k, v in GAME_FEATURES.items()
        }
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }

        self.stats
        self.prev_properties = None
        self.properties = new_v

    def close(self):
        """
        Close the current experiment.
        """
        self.game.close()

    def observe_state(self, variable_names, feature_names):
        """
        Observe the current state of the game.
        """
        # read game state
        screen, variables, game_features = process_game_info(
            self.game, variable_names, feature_names)
        #        last_states.append(GameState(screen, variables, game_features))

        # return the screen and the game features
        return screen, variables, game_features

    def make_action(self,
                    action,
                    variable_names,
                    feature_names,
                    frame_skip=1,
                    sleep=None):
        """
        Process action and give the next state according to the game motor
        Inputs :
            action :
            frame_skips : nb of frames during which the same action is performed
            sleep : pause game for sleep seconds in order to smooth visualization
        Output :
            reward defined in the game motor or customized
            screen          |
            variables       | of the next state (if not final state)
            game_features   |
        """
        assert frame_skip >= 1

        # convert selected action to the ViZDoom action format
        action = self.action_builder.get_action(action)

        # smooth visualization if needed for make
        if self.visible:
            r = 0
            for _ in range(frame_skip):
                r += self.game.make_action(action)
                # death or episode finished
                if self.is_player_dead() or self.is_episode_finished():
                    break
                # sleep for smooth visualization
                if sleep is not None:
                    time.sleep(sleep)
        else:
            r = self.game.make_action(action, frame_skip)

        # observe resulting state
        if not self.is_final():
            screen, variables, game_features = self.observe_state(
                variable_names, feature_names)
        else:
            screen = None
            variables = None
            game_features = None
        # update game statistics and return custom rewards
        self.update_game_properties()
        list_r = self.update_game_statistics()
        r_bis = 0
        if self.custom_reward and self.reward_builder:
            r_bis = self.reward_builder.get_reward(list_r)

        return r + r_bis, screen, variables, game_features
Exemple #4
1
class Game(object):
    def __init__(self,
                 scenario,
                 action_builder,
                 score_variable='FRAGCOUNT',
                 freedoom=True,
                 screen_resolution='RES_400X225',
                 screen_format='CRCGCB',
                 use_screen_buffer=True,
                 use_depth_buffer=False,
                 labels_mapping='',
                 game_features='',
                 mode='ASYNC_PLAYER',
                 render_hud=False,
                 render_minimal_hud=False,
                 render_crosshair=True,
                 render_weapon=True,
                 render_decals=False,
                 render_particles=False,
                 render_effects_sprites=False,
                 respawn_protect=True,
                 spawn_farthest=True,
                 freelook=False,
                 name='LUBAN',
                 color=0,
                 visible=False,
                 n_bots=0,
                 use_scripted_marines=None,
                 doom_skill=2):
        """
        Create a new game.
        score_variable: indicates in which game variable the user score is
                stored. by default it's in FRAGCOUNT, but the score in ACS against
                built-in AI bots can be stored in USER1, USER2, etc.
        render_decals: marks on the walls
        render_particles: particles like for impacts / traces
        render_effects_sprites: gun puffs / blood splats
        color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray,
               5 - light brown, 6 - light red, 7 - light blue
        """

        # game resources
        game_filename = '%s.wad' % ('freedoom2' if freedoom else 'Doom2')
        self.scenario_path = os.path.join(RESOURCES_DIR, '%s.wad' % scenario)
        self.game_path = os.path.join(RESOURCES_DIR, game_filename)
        print(self.scenario_path)
        print(self.game_path)

        # check parameters
        assert os.path.isfile(self.scenario_path)
        assert os.path.isfile(self.game_path)
        assert hasattr(GameVariable, score_variable)
        assert hasattr(ScreenResolution, screen_resolution)
        assert hasattr(ScreenFormat, screen_format)
        assert use_screen_buffer or use_depth_buffer
        assert hasattr(Mode, mode)
        assert len(name.strip()) > 0 and color in range(8)
        assert n_bots >= 0
        assert (type(use_scripted_marines) is bool
                or use_scripted_marines is None and n_bots == 0)
        assert 0 <= doom_skill <= 4

        # action builder
        self.action_builder = action_builder

        # add the score variable to the game variables list
        self.score_variable = score_variable
        game_variables.append(('score', getattr(GameVariable, score_variable)))

        # screen buffer / depth buffer / labels buffer / mode
        self.screen_resolution = screen_resolution
        self.screen_format = screen_format
        self.use_screen_buffer = use_screen_buffer
        self.use_depth_buffer = use_depth_buffer
        self.labels_mapping = parse_labels_mapping(labels_mapping)
        self.game_features = parse_game_features(game_features)
        self.use_labels_buffer = self.labels_mapping is not None
        self.use_game_features = any(self.game_features)
        self.mode = mode

        # rendering options
        self.render_hud = render_hud
        self.render_minimal_hud = render_minimal_hud
        self.render_crosshair = render_crosshair
        self.render_weapon = render_weapon
        self.render_decals = render_decals
        self.render_particles = render_particles
        self.render_effects_sprites = render_effects_sprites

        # respawn invincibility / distance
        self.respawn_protect = respawn_protect
        self.spawn_farthest = spawn_farthest

        # freelook / agent name / agent color
        self.freelook = freelook
        self.name = name.strip()
        self.color = color

        # window visibility
        self.visible = visible

        # game statistics
        self.stat_keys = [
            'distance', 'kills', 'deaths', 'suicides', 'frags', 'k/d',
            'medikits', 'armors', 'pistol', 'shotgun', 'chaingun',
            'rocketlauncher', 'plasmarifle', 'bfg9000', 'bullets', 'shells',
            'rockets', 'cells'
        ]
        self.statistics = {}

        # number of bots in the game
        self.n_bots = n_bots
        self.use_scripted_marines = use_scripted_marines

        # doom skill
        self.doom_skill = doom_skill

        # manual control
        self.count_non_forward_actions = 0
        self.count_non_turn_actions = 0

    def update_game_variables(self):
        """
        Check and update game variables.
        """
        # read game variables
        new_v = {k: self.game.get_game_variable(v) for k, v in game_variables}
        assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z']
                   for k, v in new_v.items())
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }
        health = new_v['health']
        armor = new_v['armor']
        sel_weapon = new_v['sel_weapon']
        sel_ammo = new_v['sel_ammo']
        bullets = new_v['bullets']
        shells = new_v['shells']
        rockets = new_v['rockets']
        cells = new_v['cells']
        fist = new_v['fist']
        pistol = new_v['pistol']
        shotgun = new_v['shotgun']
        chaingun = new_v['chaingun']
        rocketlauncher = new_v['rocketlauncher']
        plasmarifle = new_v['plasmarifle']
        bfg9000 = new_v['bfg9000']

        # check game variables
        if sel_weapon == -1:
            new_v['sel_weapon'] = 1
            sel_weapon = 1
        if sel_ammo == -1:
            new_v['sel_ammo'] = 0
            sel_ammo = 0
        assert sel_weapon in range(1, 8), sel_weapon
        assert sel_ammo >= 0, sel_ammo
        assert all(x in [0, 1] for x in [
            fist, pistol, shotgun, chaingun, rocketlauncher, plasmarifle,
            bfg9000
        ])
        assert 0 <= health <= 200 or health < 0 and self.game.is_player_dead()
        assert 0 <= armor <= 200, (health, armor)
        assert 0 <= bullets <= 200 and 0 <= shells <= 50
        assert 0 <= rockets <= 50 and 0 <= cells <= 300

        # fist
        if sel_weapon == 1:
            assert sel_ammo == 0
        # pistol
        elif sel_weapon == 2:
            assert pistol and sel_ammo == bullets
        # shotgun
        elif sel_weapon == 3:
            assert shotgun and sel_ammo == shells
        # chaingun
        elif sel_weapon == 4:
            assert chaingun and sel_ammo == bullets
        # rocket launcher
        elif sel_weapon == 5:
            assert rocketlauncher and sel_ammo == rockets
        # plasma rifle
        elif sel_weapon == 6:
            assert plasmarifle and sel_ammo == cells
        # BFG9000
        elif sel_weapon == 7:
            assert bfg9000 and sel_ammo == cells

        # update actor properties
        self.prev_properties = self.properties
        self.properties = new_v

    def update_statistics(self, action):
        """
        Update statistics of the current game based on the previous
        and the current properties for evaluating the agent performance.
        """
        stats = self.statistics[self.map_id]

        # we need to know the current and previous properties
        assert self.prev_properties is not None and self.properties is not None

        # distance
        moving_forward = action[self.mapping['MOVE_FORWARD']]
        turn_left = action[self.mapping['TURN_LEFT']]
        turn_right = action[self.mapping['TURN_RIGHT']]
        if moving_forward and not (turn_left or turn_right):
            diff_x = self.properties['position_x'] - self.prev_properties[
                'position_x']
            diff_y = self.properties['position_y'] - self.prev_properties[
                'position_y']
            distance = math.sqrt(diff_x**2 + diff_y**2)
            stats['distance'] += distance

        # kill
        d = self.properties['score'] - self.prev_properties['score']
        if d > 0:
            stats['kills'] += d

        # death
        if self.game.is_player_dead():
            stats['deaths'] += 1

        # suicide
        if self.properties['frag_count'] < self.prev_properties['frag_count']:
            stats['suicides'] += 1

        # found / lost health
        d = self.properties['health'] - self.prev_properties['health']
        if d != 0:
            if d > 0:
                stats['medikits'] += 1

        # found / lost armor
        d = self.properties['armor'] - self.prev_properties['armor']
        if d != 0:
            if d > 0:
                stats['armors'] += 1

        # found weapon
        for i, weapon in enumerate([
                'pistol', 'shotgun', 'chaingun', 'rocketlauncher',
                'plasmarifle', 'bfg9000'
        ]):
            if self.prev_properties[weapon] == self.properties[weapon]:
                continue
            stats[weapon] += 1

        # found / lost ammo
        for ammo in ['bullets', 'shells', 'rockets', 'cells']:
            d = self.properties[ammo] - self.prev_properties[ammo]
            if d != 0:
                if d > 0:
                    stats[ammo] += 1

    def start(self, map_id, episode_time=None, manual_control=False):
        """
        Start the game.
        If `episode_time` is given, the game will end after the specified time.
        """
        assert type(manual_control) is bool
        self.manual_control = manual_control

        # Save statistics for this map
        self.statistics[map_id] = {k: 0 for k in self.stat_keys}

        # Episode time
        self.episode_time = episode_time

        # initialize the game
        self.game = DoomGame()
        self.game.set_doom_scenario_path(self.scenario_path)
        self.game.set_doom_game_path(self.game_path)

        # map
        assert map_id > 0
        self.map_id = map_id
        self.game.set_doom_map("map%02i" % map_id)

        # time limit
        if episode_time is not None:
            self.game.set_episode_timeout(int(35 * episode_time))

        # game parameters
        args = []

        # host / server
        args.append('-host 1')

        # screen buffer / depth buffer / labels buffer / mode
        screen_resolution = getattr(ScreenResolution, self.screen_resolution)
        self.game.set_screen_resolution(screen_resolution)
        self.game.set_screen_format(getattr(ScreenFormat, self.screen_format))
        self.game.set_depth_buffer_enabled(self.use_depth_buffer)
        self.game.set_labels_buffer_enabled(self.use_labels_buffer
                                            or self.use_game_features)
        self.game.set_mode(getattr(Mode, self.mode))

        # rendering options
        self.game.set_render_hud(self.render_hud)
        self.game.set_render_minimal_hud(self.render_minimal_hud)
        self.game.set_render_crosshair(self.render_crosshair)
        self.game.set_render_weapon(self.render_weapon)
        self.game.set_render_decals(self.render_decals)
        self.game.set_render_particles(self.render_particles)
        self.game.set_render_effects_sprites(self.render_effects_sprites)

        # deathmatch mode
        # players will respawn automatically after they die
        # autoaim is disabled for all players
        args.append('-deathmatch')
        args.append('+sv_forcerespawn 1')
        args.append('+sv_noautoaim 1')

        # respawn invincibility / distance
        # players will be invulnerable for two second after spawning
        # players will be spawned as far as possible from any other players
        args.append('+sv_respawnprotect %i' % self.respawn_protect)
        args.append('+sv_spawnfarthest %i' % self.spawn_farthest)

        # freelook / agent name / agent color
        args.append('+freelook %i' % (1 if self.freelook else 0))
        args.append('+name %s' % self.name)
        args.append('+colorset %i' % self.color)

        # enable the cheat system (so that we can still
        # send commands to the game in self-play mode)
        args.append('+sv_cheats 1')

        # load parameters
        self.args = args
        for arg in args:
            self.game.add_game_args(arg)

        # window visibility
        self.game.set_window_visible(self.visible)

        # available buttons
        self.mapping = add_buttons(self.game,
                                   self.action_builder.available_buttons)

        # doom skill
        self.game.set_doom_skill(self.doom_skill + 1)

        # start the game
        self.game.init()

        # initialize the game after player spawns
        self.initialize_game()

    def update_bots(self):
        """
        Add built-in AI bots.
        There are two types of AI: built-in AI and ScriptedMarines.
        """
        # only the host takes care of the bots
        if self.use_scripted_marines:
            command = "pukename set_value always 2 %i" % self.n_bots
            self.game.send_game_command(command)
        else:
            self.game.send_game_command("removebots")
            for _ in range(self.n_bots):
                self.game.send_game_command("addbot")

    def is_player_dead(self):
        """
        Detect whether the player is dead.
        """
        return self.game.is_player_dead()

    def is_episode_finished(self):
        """
        Return whether the episode is finished.
        This should only be the case after the episode timeout.
        """
        return self.game.is_episode_finished()

    def is_final(self):
        """
        Return whether the game is in a final state.
        """
        return self.is_player_dead() or self.is_episode_finished()

    def new_episode(self):
        """
        Start a new episode.
        """
        assert self.is_episode_finished() or self.is_player_dead()
        self.game.new_episode()
        self.initialize_game()

    def respawn_player(self):
        """
        Respawn the player on death.
        """
        assert self.is_player_dead()
        self.game.respawn_player()
        self.initialize_game()

    def initialize_game(self):
        """
        Initialize the game after the player spawns / respawns.
        Be sure that properties from the previous life are not considered in this one.
        """
        # generate buffers
        game_state = self.game.get_state()
        self._screen_buffer = game_state.screen_buffer
        self._depth_buffer = game_state.depth_buffer
        self._labels_buffer = game_state.labels_buffer
        self._labels = game_state.labels

        # actor properties
        self.prev_properties = None
        self.properties = None

        # advance a few steps to avoid bugs due to initial weapon changes in ACS
        self.game.advance_action(SKIP_INITIAL_ACTIONS)
        self.update_game_variables()

        # if there are bots in the game, and if this is a new game
        self.update_bots()

    def randomize_textures(self, randomize):
        """
        Randomize the textures of the map.
        """
        assert type(randomize) is bool
        randomize = 1 if randomize else 0
        self.game.send_game_command("pukename set_value always 4 %i" %
                                    randomize)

    def init_bots_health(self, health):
        """
        Initial bots health.
        """
        assert self.use_scripted_marines or health == 100
        assert 0 < health <= 100
        self.game.send_game_command("pukename set_value always 5 %i" % health)

    def make_action(self, action, frame_skip=1, sleep=None):
        """
        Make an action.
        If `sleep` is given, the network will wait `sleep` seconds between each action.
        """
        assert frame_skip >= 1

        # convert selected action to the ViZDoom action format
        action = self.action_builder.get_action(action)

        # select agent favorite weapon
        for weapon_name, weapon_ammo, weapon_id in WEAPONS_PREFERENCES:
            min_ammo = 40 if weapon_name == 'bfg9000' else 1
            if self.properties[weapon_name] > 0 and self.properties[
                    weapon_ammo] >= min_ammo:
                if self.properties['sel_weapon'] != weapon_id:
                    switch_action = (
                        [False] *
                        self.mapping['SELECT_WEAPON%i' % weapon_id]) + [True]
                    action = action + switch_action[len(action):]
                break

        if action[self.mapping['MOVE_FORWARD']]:
            self.count_non_forward_actions = 0
        else:
            self.count_non_forward_actions += 1

        if action[self.mapping['TURN_LEFT']] or action[
                self.mapping['TURN_RIGHT']]:
            self.count_non_turn_actions = 0
        else:
            self.count_non_turn_actions += 1

        if self.manual_control and (self.count_non_forward_actions >= 30
                                    or self.count_non_turn_actions >= 60):
            manual_action = [False] * len(action)
            manual_action[self.mapping['TURN_RIGHT']] = True
            manual_action[self.mapping['SPEED']] = True
            if self.count_non_forward_actions >= 30:
                manual_action[self.mapping['MOVE_FORWARD']] = True
            manual_repeat = 40
            self.count_non_forward_actions = 0
            self.count_non_turn_actions = 0
        else:
            manual_action = None

        # if we are visualizing the experiment, show all the frames one by one
        if self.visible:
            if manual_action is not None:
                for _ in range(manual_repeat):
                    self.game.make_action(manual_action)
            else:
                for _ in range(frame_skip):
                    self.game.make_action(action)
                    # death or episode finished
                    if self.is_player_dead() or self.is_episode_finished():
                        break
                    # sleep for smooth visualization
                    if sleep is not None:
                        time.sleep(sleep)
        else:
            if manual_action is not None:
                self.game.make_action(manual_action, manual_repeat)
            else:
                self.game.make_action(action, frame_skip)

        # generate buffers
        game_state = self.game.get_state()
        if game_state is not None:
            self._screen_buffer = game_state.screen_buffer
            self._depth_buffer = game_state.depth_buffer
            self._labels_buffer = game_state.labels_buffer
            self._labels = game_state.labels

        # update game variables / statistics rewards
        self.update_game_variables()
        self.update_statistics(action)

    def close(self):
        """
        Close the current game.
        """
        self.game.close()

    def print_statistics(self, eval_time=None):
        """
        Print agent statistics.
        If `map_id` is is given, statistics are given for the specified map only.
        """
        map_ids = self.statistics.keys()
        assert len(map_ids) == 1

        for v in self.statistics.values():
            assert set(self.stat_keys) == set(v.keys())

        # number of frags (kills - suicides)
        # 100% accurate if the number of frags is given by 'FRAGCOUNT'
        # almost 100% accurate if it is based on an internal ACS variable
        for v in self.statistics.values():
            v['frags'] = v['kills'] - v['suicides']

        # Kills / Deaths
        # 100% accurate if the number of kills is given by an ACS variable
        # almost 100% accurate if it is based on 'FRAGCOUNT'
        for v in self.statistics.values():
            v['k/d'] = v['kills'] * 1.0 / max(1, v['deaths'])

        print("******************Game statistics summary********************")
        print("Map%02d" % self.map_id)
        for item in self.stat_keys:
            print(item + ":\t%d" % self.statistics[self.map_id][item])

    def observe_state(self, params, last_states):
        """
        Observe the current state of the game.
        """
        # read game state
        screen, game_features = process_buffers(self, params)
        variables = [self.properties[x[0]] for x in params.game_variables]
        last_states.append(GameState(screen, variables, game_features))

        # update most recent states
        if len(last_states) == 1:
            last_states.extend([last_states[0]] * (params.hist_size - 1))
        else:
            assert len(last_states) == params.hist_size + 1
            del last_states[0]

        # return the screen and the game features
        return screen, game_features
Exemple #5
1
class DoomEnv(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 35
    }

    def __init__(self, level='deathmatch', obs_type='ram'):
        # super(DoomEnv, self).__init__()
        EzPickle.__init__(self, level.split('.')[0], obs_type)
        assert obs_type in ('ram', 'image')
        level = level.split('.')[0]
        Config.init(level)

        self.curr_seed = 0
        self.game = DoomGame()
        self.lock = (DoomLock()).get_lock()

        self.level = level
        self.obs_type = obs_type
        self.tick = 4

        self._mode = 'algo'

        self.is_render_in_human_mode = True
        self.is_game_initialized = False
        self.is_level_loaded = False

        self.viewer = None

        self.set_game(self.level, resolution=None, render=True)
        print()

    # todo: add frame skip option by using tick
    def step(self, action):
        reward = 0.0
        # self.tick = 4
        if self._mode == 'algo':
            if self.tick:
                reward = self.game.make_action(action, self.tick)
            else:
                reward = self.game.make_action(action)

            # self.game.set_action(action)
            # self.game.advance_action(4)
            # reward = self.game.get_last_reward()

        return self.get_obs(), reward, self.isDone(), self.get_info()

    def reset(self):
        if not self.is_game_initialized:
            self.__load_level()
            self.__init_game()

        self.__start_episode()
        return self.get_obs()

    def render(self, mode='human', **kwargs):
        if 'close' in kwargs and kwargs['close']:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if mode == 'human' and not self.is_render_in_human_mode:
            return
        img = self.get_image()

        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def close(self):
        with self.lock:
            self.game.close()

    def seed(self, seed=None):
        self.curr_seed = seeding.hash_seed(seed) % 2**32
        return [self.curr_seed]

    # ================================== GETTERS SETTERS ===============================================================
    def set_game(self, level, resolution, render):
        self.__configure()
        self.__load_level(level)
        self.__set_resolution(resolution)
        self.__set_obs_and_ac_space()
        self.__set_player(render)

    def __configure(self, lock=None, **kwargs):
        self.seed()
        if lock is not None:
            self.lock = lock

    def __load_level(self, level=None):
        if level is not None:
            self.level = level.split('.')[0]
            self.is_level_loaded = False

        if self.is_level_loaded:
            return
        if self.is_game_initialized:
            self.is_game_initialized = False
            self.game.close()
            self.game = DoomGame()

        if not self.is_game_initialized:
            self.game.set_vizdoom_path(Config.VIZDOOM_PATH)
            self.game.set_doom_game_path(Config.FREEDOOM_PATH)

        # Common settings
        self.record_file_path = Config.RECORD_FILE_PATH
        self.game.load_config(Config.VIZDOOM_SCENARIO_PATH +
                              Config.DOOM_SETTINGS[self.level][Config.CONFIG])
        self.game.set_doom_scenario_path(
            Config.VIZDOOM_SCENARIO_PATH +
            Config.DOOM_SETTINGS[self.level][Config.SCENARIO])

        if Config.DOOM_SETTINGS[self.level][Config.MAP] != '':
            self.game.set_doom_map(
                Config.DOOM_SETTINGS[self.level][Config.MAP])
        self.game.set_doom_skill(
            Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY])

        self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS]
        self.available_game_variables = Config.DOOM_SETTINGS[self.level][
            Config.GAME_VARIABLES]

        self.is_level_loaded = True

    def __set_resolution(self, resolution=None):
        if resolution is None:
            resolution = Config.DEFAULT_SCREEN_RESOLUTION
        resolution_l = resolution.lower()
        if resolution_l not in resolutions:
            raise gym.error.Error(
                'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid'
                'resolutions: {}'.format(resolution, resolutions))
        if '_' in resolution_l:
            resolution_l = resolution_l.split('_')[1]
        self.scr_width = int(resolution_l.split("x")[0])
        self.scr_height = int(resolution_l.split("x")[1])
        self.game.set_screen_resolution(
            getattr(ScreenResolution,
                    'RES_{}X{}'.format(self.scr_width, self.scr_height)))

        self.screen_format = self.game.get_screen_format()
        self.screen_height = self.game.get_screen_height()
        self.screen_width = self.game.get_screen_width()

    def __set_obs_and_ac_space(self):
        if self.obs_type == 'ram':
            self.observation_space = spaces.Box(
                low=0,
                high=255,
                dtype=np.uint8,
                shape=(len(self.available_game_variables), ))
        elif self.obs_type == 'image':
            # self.observation_space = self.screen_resized
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.scr_height,
                                                       self.scr_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self.obs_type))

        if self.screen_format in inverted_screen_formats:
            self.dummy_screen = np.zeros(shape=(3, self.scr_height,
                                                self.scr_width),
                                         dtype=np.uint8)
        else:
            self.dummy_screen = np.zeros(shape=(self.scr_height,
                                                self.scr_width, 3),
                                         dtype=np.uint8)

        self.dummy_ram = [0] * len(self.available_game_variables)

        self.available_action_codes = [
            list(a)
            for a in it.product([0, 1],
                                repeat=self.game.get_available_buttons_size())
        ]
        # self.__delete_conflict_actions()
        self.action_space = spaces.MultiDiscrete(
            [len(self.available_action_codes)])

    def __set_player(self, render=True):
        self.game.set_window_visible(render)
        self.game.set_mode(Mode.PLAYER)

    def __init_game(self):
        try:
            with self.lock:
                self.game.init()
                self.is_game_initialized = True
        except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
            raise error.Error('Could not start the game.')

    def __start_episode(self):
        if self.curr_seed > 0:
            self.game.set_seed(self.curr_seed)
            self.curr_seed = 0
        if self.record_file_path:
            self.game.new_episode(self.record_file_path)
        else:
            self.game.new_episode()
        return

    def getState(self):
        return self.game.get_state()

    def getLastAction(self):
        return self.game.get_last_action()

    def getButtonsNames(self, action):
        return action_to_buttons(self.allowed_actions, action)

    def get_info(self):
        info = {
            "LEVEL": self.level,
            "TOTAL_REWARD": round(self.game.get_total_reward(), 4)
        }

        state_variables = self.get_ram()
        for i in range(len(self.available_game_variables)):
            info[self.available_game_variables[i]] = state_variables[i]

        return info

    def get_ram(self):
        if not self.is_game_initialized:
            raise NotImplementedError(
                "The game was not initialized. Run env.reset() first!")
        try:
            ram = self.getState().game_variables
        except AttributeError:
            ram = self.dummy_ram
        return ram

    def get_image(self):
        try:
            screen = self.getState().screen_buffer.copy()
        except AttributeError:
            screen = self.dummy_screen
        return self.invert_screen(screen)

    def get_obs(self):
        if self.obs_type == 'ram':
            return self.get_ram()
        elif self.obs_type == 'image':
            return self.get_image()

    def isDone(self):
        return self.game.is_episode_finished() or self.game.is_player_dead(
        ) or self.getState() is None

    # ===========================================  ==============================================================

    def invert_screen(self, img):
        if self.screen_format in inverted_screen_formats:
            return np.rollaxis(img, 0, 3)
        else:
            return img

    def __delete_conflict_actions(self):
        if self._mode == 'human':
            return
        action_codes_copy = self.available_action_codes.copy()

        print("Initial actions size: " + str(len(action_codes_copy)))
        for i in tqdm.trange(len(self.available_action_codes)):
            action = self.available_action_codes[i]
            ac_names = action_to_buttons(self.allowed_actions, action)

            if all(elem in ac_names
                   for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all(
                       elem in ac_names
                       for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all(
                           elem in ac_names
                           for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all(
                               elem in ac_names for elem in
                               ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']):
                action_codes_copy.remove(action)

        print("Final actions size: " + str(len(action_codes_copy)))
        self.available_action_codes = action_codes_copy

    def __initHumanPlayer(self):
        self._mode = 'human'
        self.__load_level()

        self.game.add_game_args('+freelook 1')
        self.game.set_window_visible(True)
        self.game.set_mode(Mode.SPECTATOR)
        self.is_render_in_human_mode = False

        self.__init_game()

    def advanceAction(self, tick=0):
        try:
            if tick:
                self.game.advance_action(tick)
            else:
                self.game.advance_action()
            return True
        except ViZDoomUnexpectedExitException:
            return False

    def playHuman(self):
        self.__initHumanPlayer()

        while not self.game.is_episode_finished(
        ) and not self.game.is_player_dead():
            self.advanceAction()

            state = self.getState()
            if state is None:
                if self.record_file_path is None:
                    self.game.new_episode()
                else:
                    self.game.new_episode(self.record_file_path)
                state = self.getState()

            total_reward = self.game.get_total_reward()
            info = self.get_info()
            info["TOTAL_REWARD"] = round(total_reward, 4)
            print('===============================')
            print('State: #' + str(state.number))
            print('Action: \t' + str(self.game.get_last_action()) +
                  '\t (=> only allowed actions)')
            print('Reward: \t' + str(self.game.get_last_reward()))
            print('Total Reward: \t' + str(total_reward))
            print('Variables: \n' + str(info))
            sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        print('===============================')
        print('Done')
        return
Exemple #6
1
class DoomEnvironment:
    def __init__(self, scenario, path_to_config="doom/config"):
        self.game = DoomGame()
        self.game.load_config(path_to_config + "/" + scenario + ".cfg")
        self.game.set_doom_scenario_path(path_to_config + "/" + scenario +
                                         ".wad")
        self.game.set_window_visible(False)
        self.game.init()
        self.num_actions = len(self.game.get_available_buttons())

    def reset(self):
        self.game.new_episode()
        game_state = self.game.get_state()
        obs = game_state.screen_buffer
        self.h, self.w = obs.shape[1:3]
        self.current_obs = self.preprocess_obs(obs)
        if self.game.get_available_game_variables_size() == 2:
            self.ammo, self.health = game_state.game_variables
        return self.get_obs()

    def get_obs(self):
        return self.current_obs[:, :, None]

    def get_obs_rgb(self):
        img = self.game.get_state().screen_buffer
        img = np.rollaxis(img, 0, 3)
        img = np.reshape(img, [self.h, self.w, 3])
        return img.astype(np.uint8)

    def preprocess_obs(self, obs):
        img = np.rollaxis(obs, 0, 3)
        img = np.reshape(img, [self.h, self.w, 3]).astype(np.float32)
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :,
                                                                2] * 0.114
        img = Image.fromarray(img)
        img = img.resize((84, 84), Image.BILINEAR)
        img = np.array(img)
        return img.astype(np.uint8)

    def action_to_doom(self, a):
        action = [0 for i in range(self.num_actions)]
        action[int(a)] = 1
        return action

    def step(self, a):
        action = self.action_to_doom(a)
        reward = self.game.make_action(action)

        done = self.game.is_episode_finished()

        if done:
            new_obs = np.zeros_like(self.current_obs, dtype=np.uint8)
        else:
            game_state = self.game.get_state()
            new_obs = game_state.screen_buffer
            new_obs = self.preprocess_obs(new_obs)

        self.current_obs = new_obs

        return self.get_obs(), reward, done

    def watch_random_play(self, max_ep_length=1000, frame_skip=4):
        self.reset()
        for i in range(max_ep_length):
            a = np.random.randint(self.num_actions)
            obs, reward, done = self.step(a)
            if done: break

            img = self.get_obs_rgb()
            if i % frame_skip == 0:
                plt.imshow(img)
                display.clear_output(wait=True)
                display.display(plt.gcf())
Exemple #7
1
class DoomEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 35
    }

    def __init__(self, level):
        self.previous_level = -1
        self.level = level
        self.game = DoomGame()
        self.loader = Loader()
        self.doom_dir = os.path.dirname(os.path.abspath(__file__))
        self._mode = 'algo'  # 'algo' or 'human'
        self.no_render = False  # To disable double rendering in human mode
        self.viewer = None
        self.is_initialized = False  # Indicates that reset() has been called
        self.curr_seed = 0
        self.lock = (DoomLock()).get_lock()
        # self.action_space = spaces.Discrete(43)   # used to be in the old code
        self.action_space = spaces.MultiBinary(NUM_ACTIONS)
        self.allowed_actions = list(range(NUM_ACTIONS))
        self.screen_height = 120
        self.screen_width = 160
        self.screen_resolution = ScreenResolution.RES_160X120
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_height,
                                                   self.screen_width, 3),
                                            dtype=np.uint8)
        self.seed()
        self._configure()

    def _configure(self, lock=None, **kwargs):
        if 'screen_resolution' in kwargs:
            logger.warn(
                'Deprecated - Screen resolution must now be set using a wrapper. See documentation for details.'
            )
        # Multiprocessing lock
        if lock is not None:
            self.lock = lock

    def _load_level(self):
        # Closing if is_initialized
        if self.is_initialized:
            self.is_initialized = False
            self.game.close()
            self.game = DoomGame()

        # Customizing level
        if getattr(self, '_customize_game', None) is not None and callable(
                self._customize_game):
            self.level = -1
            self._customize_game()

        else:
            # Loading Paths
            if not self.is_initialized:
                self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
                self.game.set_doom_game_path(self.loader.get_freedoom_path())

            # Common settings
            self.game.load_config(
                os.path.join(self.doom_dir,
                             'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
            self.game.set_doom_scenario_path(
                self.loader.get_scenario_path(
                    DOOM_SETTINGS[self.level][SCENARIO]))
            if DOOM_SETTINGS[self.level][MAP] != '':
                if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[
                        self.level][CONFIG].lower():
                    if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower():
                        # mapId = 'map%02d'%np.random.randint(1, 23)
                        mapId = 'map%02d' % np.random.randint(4, 8)
                    else:
                        mapId = 'map%02d' % np.random.randint(
                            1, RANDOMIZE_MAPS + 1)
                    print(
                        '\t=> Special Config: Randomly Loading Maps. MapID = '
                        + mapId)
                    self.game.set_doom_map(mapId)
                else:
                    print('\t=> Default map loaded. MapID = ' +
                          DOOM_SETTINGS[self.level][MAP])
                    self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])
            self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
            self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
            self.game.set_screen_resolution(self.screen_resolution)

        self.previous_level = self.level
        self._closed = False

        # Algo mode
        if 'human' != self._mode:
            if NO_MONSTERS:
                print('\t=> Special Config: Monsters Removed.')
                self.game.add_game_args('-nomonsters 1')
            self.game
            self.game.set_window_visible(False)
            self.game.set_mode(Mode.PLAYER)
            self.no_render = False
            try:
                with self.lock:
                    self.game.init()
            except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
                raise error.Error(
                    'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. '
                    +
                    'To run VizDoom across multiple processes, you need to pass a lock when you configure the env '
                    +
                    '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env '
                    +
                    'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a '
                    + 'singleton lock in memory.')
            self._start_episode()
            self.is_initialized = True
            return self.game.get_state().screen_buffer.copy()

        # Human mode
        else:
            if NO_MONSTERS:
                print('\t=> Special Config: Monsters Removed.')
                self.game.add_game_args('-nomonsters 1')
            self.game.add_game_args('+freelook 1')
            self.game.set_window_visible(True)
            self.game.set_mode(Mode.SPECTATOR)
            self.no_render = True
            with self.lock:
                self.game.init()
            self._start_episode()
            self.is_initialized = True
            self._play_human_mode()
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)

    def _start_episode(self):
        if self.curr_seed > 0:
            self.game.set_seed(self.curr_seed)
            self.curr_seed = 0
        self.game.new_episode()
        return

    def _play_human_mode(self):
        while not self.game.is_episode_finished():
            self.game.advance_action()
            state = self.game.get_state()
            total_reward = self.game.get_total_reward()
            info = self._get_game_variables(state.game_variables)
            info["TOTAL_REWARD"] = round(total_reward, 4)
            print('===============================')
            print('State: #' + str(state.number))
            print('Action: \t' + str(self.game.get_last_action()) +
                  '\t (=> only allowed actions)')
            print('Reward: \t' + str(self.game.get_last_reward()))
            print('Total Reward: \t' + str(total_reward))
            print('Variables: \n' + str(info))
            sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        print('===============================')
        print('Done')
        return

    def old_step(self, action):
        """
        action: a number in range 0..42

        We get this from the simontudo and his predecessors, it transforms
        a numeric action from space Discrete(43) into a indicator action .

        However, we can only press one button at the same time.
        """
        # Convert to array
        action_arr = np.zeros(NUM_ACTIONS, dtype=int)
        action_arr[action] = 1
        action = action_arr
        assert self.is_initialized, "Doom env not reset, call .reset()"
        # action is a list of numbers but DoomGame.make_action expects a list of ints
        if len(self.allowed_actions) > 0:
            list_action = [
                int(action[action_idx]) for action_idx in self.allowed_actions
            ]
        else:
            list_action = [int(x) for x in action]
        try:
            reward = self.game.make_action(list_action)
            state = self.game.get_state()

            if self.game.is_episode_finished():
                info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)}
                is_finished = True
                return np.zeros(shape=self.observation_space.shape,
                                dtype=np.uint8), reward, is_finished, info
            else:
                info = self._get_game_variables(state.game_variables)
                info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
                is_finished = False
                return state.screen_buffer.copy(), reward, is_finished, info

        except vizdoom.vizdoom.ViZDoomIsNotRunningException:
            return np.zeros(shape=self.observation_space.shape,
                            dtype=np.uint8), 0, True, {}

    def step(self, action):
        """
        action: iterable of length 43, contains indicators of whether given buttons was pressed.

        Written by me.
        """
        list_action = [int(x) for x in action]

        try:
            reward = self.game.make_action(list_action)
            state = self.game.get_state()

            if self.game.is_episode_finished():
                info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)}
                is_finished = True
                return np.zeros(shape=self.observation_space.shape,
                                dtype=np.uint8), reward, is_finished, info
            else:
                info = self._get_game_variables(state.game_variables)
                info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
                is_finished = False
                return state.screen_buffer.copy(), reward, is_finished, info

        except vizdoom.vizdoom.ViZDoomIsNotRunningException:
            return np.zeros(shape=self.observation_space.shape,
                            dtype=np.uint8), 0, True, {}

    def reset(self):
        if self.is_initialized and not self._closed:
            self._start_episode()
            screen_buffer = self.game.get_state().screen_buffer
            if screen_buffer is None:
                raise error.Error(
                    'VizDoom incorrectly initiated. This is likely caused by a missing multiprocessing lock. '
                    +
                    'To run VizDoom across multiple processes, you need to pass a lock when you configure the env '
                    +
                    '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env '
                    +
                    'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a '
                    + 'singleton lock in memory.')
            return screen_buffer.copy()
        else:
            return self._load_level()

    def render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None  # If we don't None out this reference pyglet becomes unhappy
            return
        try:
            if 'human' == mode and self.no_render:
                return
            state = self.game.get_state()
            img = state.screen_buffer
            # VizDoom returns None if the episode is finished, let's make it
            # an empty image so the recorder doesn't stop
            if img is None:
                img = np.zeros(shape=self.observation_space.shape,
                               dtype=np.uint8)
            if mode == 'rgb_array':
                return img
            elif mode is 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)
        except vizdoom.vizdoom.ViZDoomIsNotRunningException:
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        except AttributeError:
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)

    def close(self):
        # Lock required for VizDoom to close processes properly
        with self.lock:
            self.game.close()

    def seed(self, seed=None):
        self.curr_seed = seeding.hash_seed(seed) % 2**32
        return [self.curr_seed]

    def _get_game_variables(self, state_variables):
        info = {"LEVEL": self.level}
        if state_variables is None:
            return info
        info['KILLCOUNT'] = state_variables[0]
        info['ITEMCOUNT'] = state_variables[1]
        info['SECRETCOUNT'] = state_variables[2]
        info['FRAGCOUNT'] = state_variables[3]
        info['HEALTH'] = state_variables[4]
        info['ARMOR'] = state_variables[5]
        info['DEAD'] = state_variables[6]
        info['ON_GROUND'] = state_variables[7]
        info['ATTACK_READY'] = state_variables[8]
        info['ALTATTACK_READY'] = state_variables[9]
        info['SELECTED_WEAPON'] = state_variables[10]
        info['SELECTED_WEAPON_AMMO'] = state_variables[11]
        info['AMMO1'] = state_variables[12]
        info['AMMO2'] = state_variables[13]
        info['AMMO3'] = state_variables[14]
        info['AMMO4'] = state_variables[15]
        info['AMMO5'] = state_variables[16]
        info['AMMO6'] = state_variables[17]
        info['AMMO7'] = state_variables[18]
        info['AMMO8'] = state_variables[19]
        info['AMMO9'] = state_variables[20]
        info['AMMO0'] = state_variables[21]
        return info
Exemple #8
0
class DoomEnvironment:
    def __init__(self, config, visible, skiprate):
        self._game = DoomGame()
        self._game.load_config(config)
        self._game.set_window_visible(visible)
        self._game.set_mode(Mode.PLAYER)
        self._game.init()

        n_actions = self._game.get_available_buttons_size()
        self._actions = [list(a) for a in it.product([0, 1], repeat=n_actions)]
        self._skiprate = skiprate

    def make_visible(self):
        self._game.close()
        self._game.set_window_visible(True)
        self._game.set_mode(Mode.ASYNC_PLAYER)
        self._game.init()

    def get_n_buttons(self):
        return self._game.get_available_buttons_size()

    def observe(self):
        observation = self._game.get_state()
        screen = observation.screen_buffer
        game_variables = observation.game_variables
        return screen, game_variables

    def step(self, action_id):
        """Takes id of single action and performs it for self.skiprate frames

        :param action_id: index of action to perform
        :return: reward, is_done
        """
        reward = self._game.make_action(self._actions[action_id],
                                        self._skiprate)
        return reward, self._game.is_episode_finished()

    def advance_action_step(self, action_id):
        """Takes id of single action and performs it for self.skiprate frames
        and renders every frame

        :param action_id: index of action to perform
        :return: is_done
        """
        reward = 0.0
        for _ in range(self._skiprate):
            reward += self._game.make_action(self._actions[action_id])
            # it is vital to break if done for correct reward shaping
            if self._game.is_episode_finished():
                break
        return reward, self._game.is_episode_finished()

    def reset(self):
        self._game.new_episode()

    def get_episode_reward(self):
        """Careful! Returns ___non-shaped___ episode reward"""
        return self._game.get_total_reward()
Exemple #9
0
class VizDoomEnv(gym.Env):

  def __init__(self, config='my_way_home.cfg', repeat_action=1, render=False):
    self._game = DoomGame()
    self._game.load_config(config)
    self._game.set_mode(Mode.PLAYER)
    self._game.set_screen_format(ScreenFormat.GRAY8)
    self._game.set_screen_resolution(ScreenResolution.RES_640X480)
    self._game.set_window_visible(render)
    self._game.init()
    self._actions = self._get_actions()
    self._repeat_action = repeat_action
    self._is_rendered = False

  def _get_actions(self):
    num_actions = self._game.get_available_buttons_size()
    actions = []
    for perm in itertools.product([False, True], repeat=num_actions):
      actions.append(list(perm))
    return actions

  def _get_observation(self):
    state = self._game.get_state()
    if state is not None:
      return state.screen_buffer
    return None

  def _get_terminal(self):
    return self._game.is_episode_finished()

  def reset(self):
    self._game.new_episode()
    return self._get_observation()

  def step(self, action):
    action_ = self._actions[action]
    reward = self._game.make_action(action_, self._repeat_action)
    return self._get_observation(), reward, self._get_terminal(), []

  def render(self, mode='human'):
    self._game.set_window_visible(True)

  def close(self):
    self._game.close()
    print("Episode #" + str(i + True))

    # Starts a new episode. It is not needed right after init() but it doesn't cost much. At least the loop is nicer.
    game.new_episode()

    while not game.is_episode_finished():

        # Gets the state
        s = game.get_state()
        state = s.number
        img = s.image_buffer
        game_variables = s.game_variables

        # Makes a random action and get remember reward.
        action = choice(actions)
        r = game.make_action(action)

        # Prints state's game variables. Printing the image is quite pointless.
        print("State #" + str(state))
        print("Game variables:", game_variables)
        print("action:", action)
        print("Reward:", r)
        print("=====================")

        if sleep_time > 0:
            sleep(sleep_time)

    # Check how the episode went.
    print("Episode finished.")
    print("total reward:", game.get_total_reward())
    print("************************")
Exemple #11
0
class VizDoomEnv(gym.Env):
    '''
    Wrapper for vizdoom to use as an OpenAI gym environment.
    '''
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, params):
        super(VizDoomEnv, self).__init__()
        self.params = params
        self.game = DoomGame()
        self.game.load_config(params.scenarioPath)
        self._viewer = None
        self.frameskip = params.frameskip
        self.inputShape = params.inputShape
        self.sequenceLength = params.sequenceLength
        self.seqInputShape = (self.inputShape[0] * self.sequenceLength,
                              self.inputShape[1], self.inputShape[2])
        self.gameVariables = params.gameVariables
        self.numGameVariables = len(self.gameVariables)
        self.action_space = spaces.MultiDiscrete(
            [2] * self.game.get_available_buttons_size())
        self.action_space.dtype = 'uint8'
        output_shape = (self.game.get_screen_channels(),
                        self.game.get_screen_height(),
                        self.game.get_screen_width())
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=output_shape,
                                            dtype='uint8')
        self.game.init()

        # Maintain a buffer of last seq len frames.
        self.frameBuffer = [np.zeros(self.inputShape)] * self.sequenceLength

    def close(self):
        self.game.close()
        if self._viewer is not None:
            self._viewer.close()
            self._viewer = None

    def seed(self, seed=None):
        self.game.set_seed(seed)

    def step(self, action):
        reward = self.game.make_action(list(action), self.frameskip)
        state = self.game.get_state()
        done = self.game.is_episode_finished()
        if state is not None:
            observation = state.screen_buffer
            info = state.game_variables  # Return the chosen game variables in info
        else:
            observation = np.zeros(shape=self.observation_space.shape,
                                   dtype=np.uint8)
            info = None
        processedObservation = self._preProcessImage(observation)
        del self.frameBuffer[0]
        self.frameBuffer.append(processedObservation)
        return self.frameBuffer, reward, done, info

    # Preprocess image for use in network
    def _preProcessImage(self, image):
        if image.shape != self.inputShape:
            image = cv2.resize(image.transpose(1, 2, 0),
                               (self.inputShape[2], self.inputShape[1]),
                               interpolation=cv2.INTER_AREA).transpose(
                                   2, 0, 1)
        return image

    def reset(self):
        self.game.new_episode()
        state = self._preProcessImage(self.game.get_state().screen_buffer)
        self.frameBuffer = [state] * self.sequenceLength
        return self.frameBuffer

    def render(self, mode='human', close=False):
        if close:
            if self._viewer is not None:
                self._viewer.close()
                self._viewer = None
            return
        img = None
        state = self.game.get_state()
        if state is not None:
            img = state.screen_buffer
        if img is None:
            # at the end of the episode
            img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self._viewer is None:
                self._viewer = rendering.SimpleImageViewer()
            self._viewer.imshow(img.transpose(1, 2, 0))
    episodes = 10
    sleep_time = 0.028

    for i in range(episodes):
        print("Episode #" + str(i + 1))

        # Not needed for the first episode but the loop is nicer.
        game.new_episode()
        while not game.is_episode_finished():

            # Gets the state and possibly to something with it
            state = game.get_state()

            # Makes a random action and save the reward.
            reward = game.make_action(np.random.choice(actions))

            print("State #" + str(state.number))
            print("Game Variables:", state.game_variables)
            print("Performed action:", game.get_last_action())
            print("Last Reward:", reward)
            print("=====================")

            # Sleep some time because processing is too fast to watch.
            if sleep_time > 0:
                sleep(sleep_time)

        print("Episode finished!")
        print("total reward:", game.get_total_reward())
        print("************************")
    train.learn()
Exemple #13
0
        # Which consists of:
        screen_buf = preprocess(state.screen_buffer)

        # Guess Q
        qvals = nn.model.predict([screen_buf], batch_size=1)
        if np.random.rand() < epsilon:
            action_ndx = np.random.randint(0, 3)
            print("%.3f:RANDOM:" % epsilon, action_ndx)
        else:
            action_ndx = (np.argmax(qvals))
            print("%.3f:ARGMAX:" % epsilon, action_ndx)

        # Perform Action
        try:
            game.make_action(actions[action_ndx])
        except Exception as e:
            nn.save()
            sys.exit(0)

        if not game.is_episode_finished():

            new_state = game.get_state()
            new_screen_buf = preprocess(new_state.screen_buffer)
            reward = calc.calc_reward(game)

            new_qvals = nn.model.predict([new_screen_buf], batch_size=1)
            max_q = np.max(new_qvals)
            y = np.zeros((1, 3))
            y[:] = qvals[:]
Exemple #14
0
class ViZDoom(Environment):
    """
    ViZDoom environment (https://github.com/mwydmuch/ViZDoom).
    """
    def __init__(self, config_file):
        """
        Initialize ViZDoom environment.

        Args:
            config_file: .cfg file path, which defines how a world works and look like (maps)
        """
        self.game = DoomGame()

        # load configurations from file
        self.game.load_config(config_file)
        self.game.init()

        self.state_shape = self.featurize(self.game.get_state()).shape
        self.num_actions = len(self.game.get_available_buttons())

    def __str__(self):
        return 'ViZDoom'

    def states(self):
        return dict(type='float', shape=self.state_shape)

    def actions(self):
        return dict(type='int', shape=(), num_values=self.num_actions)

    def close(self):
        self.game.close()

    def reset(self):
        self.game.new_episode()
        return self.featurize(self.game.get_state())

    def seed(self, seed):
        self.game.setSeed(seed)
        return seed

    def featurize(self, state):
        H = state.screen_buffer.shape[0]
        W = state.screen_buffer.shape[1]
        _vars = state.game_variables.reshape(-1).astype(np.float32)
        _screen_buf = state.screen_buffer.reshape(-1).astype(np.float32)

        if state.depth_buffer is None:
            _depth_buf = np.zeros(H * W * 1, dtype=np.float32)
        else:
            _depth_buf = state.depth_buffer.reshape(-1).astype(np.float32)

        if state.labels_buffer is None:
            _labels_buf = np.zeros(H * W * 1, dtype=np.float32)
        else:
            _labels_buf = state.labels_buffer.reshape(-1).astype(np.float32)

        if state.automap_buffer is None:
            _automap_buf = np.zeros(H * W * 1, dtype=np.float32)
        else:
            _automap_buf = state.automap_buffer.reshape(-1).astype(np.float32)

        return np.concatenate(
            (_vars, _screen_buf, _depth_buf, _labels_buf, _automap_buf))

    def execute(self, action):
        one_hot_enc = [0] * self.num_actions
        one_hot_enc[action] = 1
        reward = self.game.make_action(one_hot_enc)
        terminal = self.game.is_episode_finished()
        states = self.featurize(self.game.get_state())
        return states, terminal, reward
    def play(self):

        # Create DoomGame instance. It will run the game and communicate with you.
        print("Initializing doom...")
        game = DoomGame()

        game.load_config("./examples/config/deepdoomplayer.cfg")
        game.init()
        print("Doom initialized.")

        episodes = 1
        training_steps_per_epoch = 100

        sleep_time = 0.100

        train_episodes_finished = 0
        train_rewards = []

        for epoch in range(episodes):

            train_loss = []

            game.new_episode()

            while (train_episodes_finished < 20):

                sleep(sleep_time)

                if game.is_episode_finished():

                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self._last_state = None
                    self.last_action[1] = 1

                # first frame must be handled differently
                if self.last_state is None:
                    # the _last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(
                        self.convert_image(game.get_state().image_buffer)
                        for _ in range(self.state_frames)),
                                               axis=2)
                    continue

                reward = game.make_action(
                    DeepDoomPlayer.define_keys_to_action_pressed(
                        self.last_action), 7)

                reward *= 0.01

                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    screen_resized_binary = np.zeros((40, 40))

                imagebufferlast = imagebuffer

                if imagebuffer is not None:
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)

                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary,
                                                       axis=2)

                current_state = np.append(self.last_state[:, :, 1:],
                                          screen_resized_binary,
                                          axis=2)

                self.last_state = current_state

                self.last_action = self.choose_next_action_only_on_q()

            print(train_episodes_finished, "training episodes played.")
            print("Training results:")

            train_rewards = np.array(train_rewards)

            print("mean:", train_rewards.mean(), "std:", train_rewards.std(),
                  "max:", train_rewards.max(), "min:", train_rewards.min())

        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self._last_state = None
Exemple #16
0
class VizDoomEnv(Env):
    '''
    Wrapper for vizdoom to use as an OpenAI gym environment.
    '''
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, cfg_name, repeat=1):
        super(VizDoomEnv, self).__init__()
        self.game = DoomGame()
        self.game.load_config('./slm_lab/env/vizdoom/cfgs/' + cfg_name +
                              '.cfg')
        self._viewer = None
        self.repeat = 1
        # TODO In future, need to update action to handle (continuous) DELTA buttons using gym's Box space
        self.action_space = spaces.MultiDiscrete(
            [2] * self.game.get_available_buttons_size())
        self.action_space.dtype = 'uint8'
        output_shape = (self.game.get_screen_height(),
                        self.game.get_screen_width(),
                        self.game.get_screen_channels())
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=output_shape,
                                            dtype='uint8')
        self.game.init()

    def close(self):
        self.game.close()
        if self._viewer is not None:
            self._viewer.close()
            self._viewer = None

    def seed(self, seed=None):
        self.game.set_seed(seed)

    def step(self, action):
        reward = self.game.make_action(list(action), self.repeat)
        state = self.game.get_state()
        done = self.game.is_episode_finished()
        # info = self._get_game_variables(state.game_variables)
        info = {}
        if state is not None:
            observation = state.screen_buffer.transpose(1, 2, 0)
        else:
            observation = np.zeros(shape=self.observation_space.shape,
                                   dtype=np.uint8)
        return observation, reward, done, info

    def reset(self):
        # self.seed(seed)
        self.game.new_episode()
        return self.game.get_state().screen_buffer.transpose(1, 2, 0)

    def render(self, mode='human', close=False):
        if close:
            if self._viewer is not None:
                self._viewer.close()
                self._viewer = None
            return
        img = None
        state = self.game.get_state()
        if state is not None:
            img = state.screen_buffer
        if img is None:
            # at the end of the episode
            img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self._viewer is None:
                self._viewer = rendering.SimpleImageViewer()
            self._viewer.imshow(img.transpose(1, 2, 0))

    def _get_game_variables(self, state_variables):
        info = {}
        if state_variables is not None:
            info['KILLCOUNT'] = state_variables[0]
            info['ITEMCOUNT'] = state_variables[1]
            info['SECRETCOUNT'] = state_variables[2]
            info['FRAGCOUNT'] = state_variables[3]
            info['HEALTH'] = state_variables[4]
            info['ARMOR'] = state_variables[5]
            info['DEAD'] = state_variables[6]
            info['ON_GROUND'] = state_variables[7]
            info['ATTACK_READY'] = state_variables[8]
            info['ALTATTACK_READY'] = state_variables[9]
            info['SELECTED_WEAPON'] = state_variables[10]
            info['SELECTED_WEAPON_AMMO'] = state_variables[11]
            info['AMMO1'] = state_variables[12]
            info['AMMO2'] = state_variables[13]
            info['AMMO3'] = state_variables[14]
            info['AMMO4'] = state_variables[15]
            info['AMMO5'] = state_variables[16]
            info['AMMO6'] = state_variables[17]
            info['AMMO7'] = state_variables[18]
            info['AMMO8'] = state_variables[19]
            info['AMMO9'] = state_variables[20]
            info['AMMO0'] = state_variables[21]
        return info
Exemple #17
0
        s = game.get_state()
        img = s.image_buffer
        img = scipy.misc.imresize(img, (84, 84, 3))
        scipy.misc.imsave('../games/current' + str(port) + '.png', img)

        socket.send(
            get_state('../games/current' + str(port) + '.png', r,
                      game.is_episode_finished()))

    elif msg == "step":
        socket.send("action")
        a_indx = socket.recv()
        a_indx = int(a_indx) - 1
        if a_indx == 1 or a_indx == 2:
            for ii in range(5):
                r = game.make_action(actions[a_indx])
        else:
            r = game.make_action(actions[a_indx])
        # for our toy world, ammo is reward

        r = s.game_variables[0] - 50  #50 is baseline
        if r == 0:
            r = -0.01
        terminal = game.is_episode_finished()
        if r >= 0:
            terminal = True
        if terminal == False:
            s = game.get_state()
            img = s.image_buffer

        img = s.image_buffer
Exemple #18
0
class VizdoomEnv(gym.Env):
    def __init__(self, level):

        # init game
        self.game = DoomGame()
        self.game.set_screen_resolution(ScreenResolution.RES_640X480)
        scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios')
        self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0]))
        self.game.set_window_visible(False)
        self.game.init()
        self.state = None

        self.action_space = spaces.Discrete(CONFIGS[level][1])
        self.observation_space = spaces.Box(
            0,
            255, (self.game.get_screen_height(), self.game.get_screen_width(),
                  self.game.get_screen_channels()),
            dtype=np.uint8)
        self.viewer = None

    def step(self, action):
        # convert action to vizdoom action space (one hot)
        act = np.zeros(self.action_space.n)
        act[action] = 1
        act = np.uint8(act)
        act = act.tolist()

        reward = self.game.make_action(act)
        state = self.game.get_state()
        done = self.game.is_episode_finished()
        info = {}
        if not done:
            observation = np.transpose(state.screen_buffer, (1, 2, 0))
        else:
            observation = np.uint8(np.zeros(self.observation_space.shape))
            info = {"episode": {"r": self.game.get_total_reward()}}

        return observation, reward, done, info

    def seed(self, seed):
        self.game.set_seed(seed)

    def close(self):
        self.game.close()

    def reset(self):
        self.game.new_episode()
        self.state = self.game.get_state()
        img = self.state.screen_buffer
        return np.transpose(img, (1, 2, 0))

    def render(self, mode='human'):
        try:
            img = self.game.get_state().screen_buffer
            img = np.transpose(img, [1, 2, 0])

            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
        except AttributeError:
            pass

    @staticmethod
    def get_keys_to_action():
        # you can press only one key at a time!
        keys = {
            (): 2,
            (ord('a'), ): 0,
            (ord('d'), ): 1,
            (ord('w'), ): 3,
            (ord('s'), ): 4,
            (ord('q'), ): 5,
            (ord('e'), ): 6
        }
        return keys
def train(conf):
    #to get total time of training
    start_time = time.time()  

    game = DoomGame()
    game.load_config("VizDoom/scenarios/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.set_living_reward(0.1)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    agent = RandomAgent(action_size, conf)

    episode = conf.episode


    # Start training
    GAME = 0
    t = 0
    max_life = 0 # Maximum episode life (Proxy for agent performance)
    life = 0

    scores, episodes, steps, kills, ammos = [], [], [], [], []
    step = 0
    episode = conf.episode
    e = 0
    score = 0

    while e < episode:
        loss = 0
        Q_max = 0
        r_t = 0
        a_t = np.zeros([action_size])
        action_idx = agent.select_action()

        a_t[action_idx] = 1
        a_t = a_t.astype(int)

        r_t = game.make_action(a_t.tolist(), 4)

        game_state = game.get_state()  # Observe again after we take the action
        is_terminated = game.is_episode_finished()

        score += r_t
        step += 1

        if (is_terminated):
            if (life > max_life):
                max_life = life
            GAME += 1
            kills.append(misc[0])
            ammos.append(misc[1])
            print ("Episode Finish ", misc)
            # print(scores)
            game.new_episode()
            game_state = game.get_state()
            misc = game_state.game_variables
            x_t1 = game_state.screen_buffer

            scores.append(score)
            score = 0
            steps.append(step)
            episodes.append(e)
            e += 1

        misc = game_state.game_variables
        r_t = agent.shape_reward(r_t, misc, prev_misc, t)

        if (is_terminated):
            life = 0
        else:
            life += 1

        # Update the cache
        prev_misc = misc

        t += 1

    total_time = time.time() - start_time

    return steps, scores, total_time, kills, ammos

    # return steps, returns, total_time
	print("Episode #" + str(i+True))

	# Starts a new episode. It is not needed right after init() but it doesn't cost much. At least the loop is nicer.
	game.new_episode()

	while not game.is_episode_finished():
		
		# Gets the state
		s = game.get_state()
		state = s.number
		img = s.image_buffer
		game_variables = s.game_variables

		# Makes a random action and get remember reward.
		action = choice(actions)
		r = game.make_action(action)

		# Prints state's game variables. Printing the image is quite pointless.
		print("State #" + str(state))
		print("Game variables:", game_variables)
		print("action:", action)
		print("Reward:", r)
		print("=====================")

		if sleep_time>0:
			sleep(sleep_time)

	# Check how the episode went.
	print("Episode finished.")
	print("total reward:", game.get_total_reward())
	print("************************")
    def start(self):
        """
         this will get passed hier
        """
        
        
        # Create DoomGame instance. It will run the game and communicate with you.
        print ("Initializing doom...")
        game = DoomGame()
        game.load_config("./examples/config/learningtensorflow.cfg")
        game.init()
        print ("Doom initialized.")
        train_rewards = []
        
        
        for epoch in range(DeepDoom.episodes):
            print ("\nEpoch", epoch)
            train_time = 0
            train_episodes_finished = 0
            train_loss = []
            
            
                        
            #start saving after 20 epoch
            if epoch > 20:
                 if not os.path.exists(DeepDoom.checkpoint_path):
                    os.mkdir(DeepDoom.checkpoint_path)
                 self.saver.save(self.session, DeepDoom.checkpoint_path, global_step=epoch )
   
                 

            train_start = time()

            game.new_episode()
        
            for learning_step in tqdm(range(DeepDoom.training_steps_per_epoch)):


                if game.is_episode_finished():
                    #print("game is finished")
                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self.last_state = None
                    #sleep(sleep_time)

                
                # first frame must be handled differently
                if self.last_state is None:
                    #print ("ich bin hier")
                    # the last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2)
                    continue
 
                reward = game.make_action(DeepDoom.define_keys_to_action_pressed(self.last_action), 7)
 
                reward *= 0.01
         
                #if screen_array is not None:   
                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    #print(reward)
                    screen_resized_binary =  np.zeros((40,40))
                    
                imagebufferlast = imagebuffer 
                    
                if imagebuffer is not None: 
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)
                
                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2)

                current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2)
        
                self.observations.append((self.last_state, self.last_action, reward, current_state, terminal))


                #zeugs.write("oberservations %s \n" %len(self.observations))

                if len(self.observations) > self.memory_size:
                    self.observations.popleft()
                    #sleep(sleep_time)

                # only train if done observing
                if len(self.observations) > self.observation_steps:
                    #print("train")
                    self.train()
                    self.time += 1
                
                self.last_state = current_state

                self.last_action = self.choose_next_action()
                
                
                if self.probability_of_random_action > self.final_random_action_prob \
                        and len(self.observations) > self.observation_steps:
                    self.probability_of_random_action -= \
                        (self.initial_random_action_prob - self.final_random_action_prob) / self.explore_steps
                        

            print (train_episodes_finished, "training episodes played.")
            print ("Training results:")

            train_rewards = np.array(train_rewards)
            
            train_end = time()
            train_time = train_end - train_start
            mean_loss = np.mean(train_loss)


            print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min(),  "epsilon:", self.probability_of_random_action)
            print ("t:", str(round(train_time, 2)) + "s")
            train_rewards = []
            
            
        
        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self.last_state = None
    # Starts a new episode. It is not needed right after init() but it doesn't cost much. At least the loop is nicer.
    game.new_episode()
    t = 1

    action_index = 1

    while not game.is_episode_finished():

        # Gets the state

        t += 1
        game_observation = game.get_state()
        observation_2 = game_observation.image_buffer

        # Makes a random action and get remember reward.
        reward = game.make_action(actions[action_index - 1])
        #reward = game.make_action(choice(actions))

        game_state_after_action = game.get_state()
        observation = game_state_after_action.image_buffer


        game_state_after_action = np.ascontiguousarray(game_state_after_action)



        # Prints state's game variables. Printing the image is quite pointless.
        print("State #" + str(game_observation.number))
        print("Game variables:", game_observation.game_variables[0])
        print("Reward:", reward)
        print("=====================")
        print("Episode #" + str(i + 1))

        # Not needed for the first episode but the loop is nicer.
        game.new_episode()
        while not game.is_episode_finished():
            # Gets the state and possibly to something with it

            for i in range(episodes):
                # Not needed for the first episode but the loop is nicer.
                game.new_episode()
                while not game.is_episode_finished():
                    state = game.get_state()
                    s = sp.parse_state(state, game)[0]
                    print(s)
                    current_count = len(s.split(" "))
                    game.make_action(actions[randint(0, n_actions - 1)], 4)
            print(sp.parse_state(state, game)[0])
            # print("State:")
            # print(sp.parse_state(state, game)[0])
            #print(len(sp.parse_state(state, game)[0].split(" ")))

            # if flag == False:
            # fig = plt.figure()
            # ax = fig.add_subplot(111)
            # fig,ax = plt.subplots(nrows=1,ncols=2)
            #ax.set_aspect('auto')
            # plt.title("game state GloVe embedding")
            # ax.imshow(we.game_state_to_image(state,game))
            # ax.set_aspect('auto')
            # plt.xlabel("word")
            # plt.ylabel("embedding")
Exemple #24
0
class VizDoomGym(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self):
        raise NotImplementedError

    def _init(self, mission_file: str, scaled_resolution: list):
        """
        :param mission_file: name of the mission (.cfg) to run,
        :param scaled_resolution: resolution (height, width) of the video frames
                                  to run training on
        """
        super(VizDoomGym, self).__init__()
        self.mission_file = mission_file
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)",
                          self.mission_file)

        self.deathmatch = True
        # distance we need the agent to travel per time-step, otherwise we penalise
        self.distance_threshold = 15

        self.prev_properties = None
        self.properties = None

        self.cum_kills = np.array([0])

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.mission_file)
        self.env.set_window_visible(False)
        self.env.set_screen_format(ScreenFormat.RGB24)
        if self.deathmatch:
            self.env.add_game_args("-deathmatch")

        self.env.set_doom_skill(4)
        self._action_frame_repeat = 4
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discrete actions are supported (no delta actions)
        self.available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        # print(available_actions)
        assert len((set(self.available_actions) - set(not_supported_actions))) \
            == len(self.available_actions)

        self.metadata['render_modes'] = ['rgb_array']

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size() - 1)

        self.rows = scaled_resolution[0]
        self.columns = scaled_resolution[1]
        self.observation_space = gym.spaces.Box(low=0.0,
                                                high=1.0,
                                                shape=(self.rows, self.columns,
                                                       3),
                                                dtype=np.float32)

        self._rgb_array = None
        self.steps = 0
        self.global_steps = 0
        self.reset()

    def _process_image(self, img):
        # PIL resize has indexing opposite to numpy array
        img = np.array(Image.fromarray(img).resize((self.columns, self.rows)))
        img = img.astype(np.float32)
        img = img / 255.0
        return img

    def update_game_variables(self):
        """
        Check and update game variables.
        """
        # read game variables
        new_v = {
            k: self.env.get_game_variable(v)
            for k, v in game_variables.items()
        }
        assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z']
                   for k, v in new_v.items())
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }
        health = new_v['health']
        armor = new_v['armor']

        # check game variables
        assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead()
        assert 0 <= armor <= 200, (health, armor)

        # update actor properties
        self.prev_properties = self.properties
        self.properties = new_v

    def update_reward(self):
        """
        Update reward.
        """

        # we need to know the current and previous properties
        assert self.prev_properties is not None and self.properties is not None

        reward = 0

        # kill
        d = self.properties['score'] - self.prev_properties['score']
        if d > 0:
            self.cum_kills += d
            reward += d * default_reward_values['KILL']

        # death
        if self.env.is_player_dead():
            reward += default_reward_values['DEATH']

        # suicide
        if self.properties['frag_count'] < self.prev_properties['frag_count']:
            reward += default_reward_values['SUICIDE']

        # found / lost health
        d = self.properties['health'] - self.prev_properties['health']
        if d != 0:
            if d > 0:
                reward += default_reward_values['MEDIKIT']
            else:
                reward += default_reward_values['INJURED']

        # found / lost armor
        d = self.properties['armor'] - self.prev_properties['armor']
        if d != 0:
            if d > 0:
                reward += default_reward_values['ARMOR']

        # found / lost ammo
        d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo']
        if d != 0:
            if d > 0:
                reward += default_reward_values['AMMO']
            else:
                reward += default_reward_values['USE_AMMO']

        # distance
        # turn_left = (Button.TURN_LEFT == self.available_actions[action])
        # turn_right = (Button.TURN_RIGHT == self.available_actions[action])
        # if not (turn_left or turn_right):
        diff_x = self.properties['position_x'] - self.prev_properties[
            'position_x']
        diff_y = self.properties['position_y'] - self.prev_properties[
            'position_y']
        distance = np.sqrt(diff_x**2 + diff_y**2)
        if distance > self.distance_threshold:
            reward += default_reward_values['DISTANCE'] * distance
        else:
            reward += default_reward_values['STANDSTILL']

        # living
        reward += default_reward_values['LIVING']

        return reward

    # def increase_difficulty(self):
    #     self.curr_skill += 1
    #     self.env.close()
    #     self.env.set_doom_skill(self.curr_skill)
    #     self.env.init()
    #     print('changing skill to', self.curr_skill)

    # def update_map(self):
    #     self.map_level += 1
    #     map_str = 'map0' + str(self.map_level)
    #     # go with initial wad file if there's still maps on it
    #     self.env.close()
    #     self.env.set_doom_map(map_str)
    #     self.env.init()

    def sub_reset(self):
        """Reset environment"""
        self.steps = 0
        self.cum_kills = np.array([0])
        self.prev_properties = None
        self.properties = None
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image(self._rgb_array)
        return observation

    def reset(self):
        observation = self.sub_reset()
        return observation

    def sub_step(self, action):
        """Take step"""
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        # ALWAYS SPRINTING
        one_hot_action = np.append(one_hot_action, [1])
        assert len(one_hot_action) == len(self.env.get_available_buttons())

        _ = self.env.make_action(list(one_hot_action),
                                 self._action_frame_repeat)

        self.update_game_variables()

        if self.steps > 1:
            reward = self.update_reward()
        else:
            reward = 0

        self.steps += 1
        self.global_steps += 1
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image(self._rgb_array)
        return observation, reward, done

    def step(self, action):
        observation, reward, done = self.sub_step(action)
        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def seed(self, seed=None):
        """Seed"""
        if seed:
            self.env.set_seed(seed)

    def render(self, mode='human'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array
        raise NotImplementedError
def train(conf):

    #to get total time of training
    start_time = time.time()    

    #set the seeds for reproductability
    random.seed(conf.seed)
    np.random.seed(conf.seed)
    tf.set_random_seed(conf.seed)

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("VizDoom/scenarios/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.set_living_reward(0.1)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    img_rows , img_cols = 64, 64
    # Convert image into Black and white
    img_channels = 4 # We stack 4 frames

    state_size = (img_rows, img_cols, img_channels)
    agent = DoubleDQNAgent(state_size, action_size, conf)

    agent.model = Networks.dqn(state_size, action_size, agent.learning_rate)
    agent.target_model = Networks.dqn(state_size, action_size, agent.learning_rate)

    x_t = game_state.screen_buffer # 480 x 640
    x_t = preprocessImg(x_t, size=(img_rows, img_cols))
    s_t = np.stack(([x_t]*4), axis=2) # It becomes 64x64x4
    s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4

    is_terminated = game.is_episode_finished()

    # Start training
    epsilon = agent.initial_epsilon
    GAME = 0
    t = 0
    max_life = 0 # Maximum episode life (Proxy for agent performance)
    life = 0

    # Buffer to compute rolling statistics 
    life_buffer, ammo_buffer, kills_buffer = [], [], [] 

    scores, episodes, steps, kills, ammos = [], [], [], [], []
    step = 0
    episode = conf.episode
    e = 0
    score = 0
    while e < episode:
        loss = 0
        Q_max = 0
        r_t = 0
        a_t = np.zeros([action_size])

        # Epsilon Greedy
        action_idx  = agent.get_action(s_t)
        a_t[action_idx] = 1
        a_t = a_t.astype(int)

        r_t = game.make_action(a_t.tolist(), agent.frame_per_action)

        game_state = game.get_state()  # Observe again after we take the action
        is_terminated = game.is_episode_finished()

        # print(r_t)
        score += r_t
        step += 1

        if (is_terminated):
            if (life > max_life):
                max_life = life
            GAME += 1
            life_buffer.append(life)
            ammo_buffer.append(misc[1])
            kills_buffer.append(misc[0])
            kills.append(misc[0])
            ammos.append(misc[1])
            print ("Episode Finish ", misc)
            # print(scores)
            game.new_episode()
            game_state = game.get_state()
            misc = game_state.game_variables
            x_t1 = game_state.screen_buffer

            scores.append(score)
            score = 0
            steps.append(step)
            episodes.append(e)
            e += 1
            

        x_t1 = game_state.screen_buffer
        misc = game_state.game_variables

        x_t1 = preprocessImg(x_t1, size=(img_rows, img_cols))
        x_t1 = np.reshape(x_t1, (1, img_rows, img_cols, 1))
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)

        r_t = agent.shape_reward(r_t, misc, prev_misc, t)

        if (is_terminated):
            life = 0
        else:
            life += 1

        # Update the cache
        prev_misc = misc

        # save the sample <s, a, r, s'> to the replay memory and decrease epsilon
        agent.replay_memory(s_t, action_idx, r_t, s_t1, is_terminated, t)

        # Do the training
        if t > agent.observe and t % agent.timestep_per_train == 0:
            Q_max, loss = agent.train_replay()
            
        s_t = s_t1
        t += 1

        # print info
        state = ""
        if t <= agent.observe:
            state = "observe"
        elif t > agent.observe and agent.epsilon > agent.final_epsilon:
            state = "explore"
        else:
            state = "train"

        if (is_terminated):
            print("TIME", t, "/ GAME", GAME, "/ STATE", state, \
                  "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", score, \
                  "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss)

            # Save Agent's Performance Statistics
            if GAME % agent.stats_window_size == 0 and t > agent.observe: 
                print("Update Rolling Statistics")
                agent.mavg_score.append(np.mean(np.array(life_buffer)))
                agent.var_score.append(np.var(np.array(life_buffer)))
                agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer)))
                agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer)))

                # Reset rolling stats buffer
                life_buffer, ammo_buffer, kills_buffer = [], [], [] 

    total_time = time.time() - start_time

    return steps, scores, total_time, kills, ammos
Exemple #26
0
class ViZDoom(Environment):
    """
    [ViZDoom](https://github.com/mwydmuch/ViZDoom) environment adapter (specification key:
    `vizdoom`).

    Args:
        level (string): ViZDoom configuration file
            (<span style="color:#C00000"><b>required</b></span>).
        include_variables (bool): Whether to include game variables to state
            (<span style="color:#00C000"><b>default</b></span>: false).
        factored_action (bool): Whether to use factored action representation
            (<span style="color:#00C000"><b>default</b></span>: false).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 12).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """
    def __init__(self,
                 level,
                 visualize=False,
                 include_variables=False,
                 factored_action=False,
                 frame_skip=12,
                 seed=None):
        from vizdoom import DoomGame, Mode, ScreenFormat, ScreenResolution

        self.config_file = level
        self.include_variables = include_variables
        self.factored_action = factored_action
        self.visualize = visualize
        self.frame_skip = frame_skip

        self.environment = DoomGame()
        self.environment.load_config(self.config_file)
        if self.visualize:
            self.environment.set_window_visible(True)
            self.environment.set_mode(Mode.ASYNC_PLAYER)
        else:
            self.environment.set_window_visible(False)
            self.environment.set_mode(Mode.PLAYER)
        # e.g. CRCGCB, RGB24, GRAY8
        self.environment.set_screen_format(ScreenFormat.RGB24)
        # e.g. RES_320X240, RES_640X480, RES_1920X1080
        self.environment.set_screen_resolution(ScreenResolution.RES_640X480)
        self.environment.set_depth_buffer_enabled(False)
        self.environment.set_labels_buffer_enabled(False)
        self.environment.set_automap_buffer_enabled(False)
        if seed is not None:
            self.environment.setSeed(seed)
        self.environment.init()

        self.state_shape = (640, 480, 3)
        self.num_variables = self.environment.get_available_game_variables_size(
        )
        self.num_buttons = self.environment.get_available_buttons_size()
        self.actions = [
            tuple(a)
            for a in itertools.product([0, 1], repeat=self.num_buttons)
        ]

    def __str__(self):
        return super().__str__() + '({})'.format(self.config_file)

    def states(self):
        if self.include_variables:
            return OrderedDict(screen=dict(type='float',
                                           shape=self.state_shape),
                               variables=dict(type='float',
                                              shape=self.num_variables))
        else:
            return dict(type='float', shape=self.state_shape)

    def actions(self):
        if self.factored_action:
            return dict(type='bool', shape=self.num_buttons)
        else:
            return dict(type='int', shape=(), num_values=len(self.actions))

    def close(self):
        self.environment.close()
        self.environment = None

    def get_states(self):
        state = self.environment.get_state()
        screen = state.screen_buffer.astype(dtype=np.float32) / 255.0
        if self.include_variables:
            return OrderedDict(screen=screen, variables=state.game_variables)
        else:
            return screen

    def reset(self):
        self.environment.new_episode()
        return self.get_states()

    def execute(self, actions):
        if self.factored_action:
            action = np.where(actions, 1.0, 0.0)
        else:
            action = self.actions[actions]
        if self.visualize:
            self.environment.set_action(action)
            reward = 0.0
            for _ in range(self.frame_skip):
                self.environment.advance_action()
                reward += self.environment.get_last_reward()
        else:
            reward = self.environment.make_action(action, self.frame_skip)
        terminal = self.environment.is_episode_finished()
        states = self.get_states()
        return states, terminal, reward
class VizDoom(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self,
                 cfg_path,
                 number_maps,
                 scaled_resolution=(42, 42),
                 action_frame_repeat=4,
                 clip=(-1, 1),
                 seed=None,
                 data_augmentation=False):
        """
        Gym environment for training reinforcement learning agents.

        :param cfg_path: name of the mission (.cfg) to run
        :param number_maps: number of maps which are contained within the cfg file
        :param scaled_resolution: resolution (height, width) of the observation to be returned with each step
        :param action_frame_repeat: how many game tics should an action be active
        :param clip: how much the reward returned on each step should be clipped to
        :param seed: seed for random, used to determine the other that the doom maps should be shown.
        :param data_augmentation: bool to determine whether or not to use data augmentation
            (adding randomly colored, randomly sized boxes to observation)
        """

        self.cfg_path = str(cfg_path)
        if not os.path.exists(self.cfg_path):
            raise ValueError("Cfg file not found", cfg_path)

        if not self.cfg_path.endswith('.cfg'):
            raise ValueError("cfg_path must end with .cfg")

        self.number_maps = number_maps
        self.scaled_resolution = scaled_resolution
        self.action_frame_repeat = action_frame_repeat
        self.clip = clip
        self.data_augmentation = data_augmentation

        if seed:
            random.seed(seed)

        super(VizDoom, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path)

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.cfg_path)
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        # assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discreete actions are supported (no delta actions)
        available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        assert len((set(available_actions) -
                    set(not_supported_actions))) == len(available_actions)

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size())

        rows = scaled_resolution[1]
        columns = scaled_resolution[0]
        self.observation_space = gym.spaces.Box(0.0,
                                                255.0,
                                                shape=(columns, rows, 3),
                                                dtype=np.float32)
        self._rgb_array = None
        self.reset()

    def _process_image(self, shape=None):
        """
        Convert the vizdoom environment observation numpy are into the desired resolution and shape
        :param shape: desired shape in the format (rows, columns)
        :return: resized and rescaled image in the format (rows, columns, channels)
        """
        if shape is None:
            rows, columns, _ = self.observation_space.shape
        else:
            rows, columns = shape
        # PIL resize has indexing opposite to numpy array
        img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0),
                              (columns, rows))
        return img

    @staticmethod
    def _augment_data(img):
        """
        Augment input image with N randomly colored boxes of dimension x by y
        where N is randomly sampled between 0 and 6
        and x and y are randomly sampled from between 0.1 and 0.35
        :param img: input image to be augmented - format (rows, columns, channels)
        :return img: augmented image - format (rows, columns, channels)
        """
        dimx = img.shape[0]
        dimy = img.shape[1]
        max_rand_dim = .25
        min_rand_dim = .1
        num_blotches = np.random.randint(0, 6)

        for _ in range(num_blotches):
            # locations in [0,1]
            rand = np.random.rand
            rx = rand()
            ry = rand()
            rdx = rand() * max_rand_dim + min_rand_dim
            rdy = rand() * max_rand_dim + min_rand_dim

            rx, rdx = [round(r * dimx) for r in (rx, rdx)]
            ry, rdy = [round(r * dimy) for r in (ry, rdy)]
            for c in range(3):
                img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255)
        return img

    @staticmethod
    def _resize(img, shape):
        """Resize the specified image.

        :param img: image to resize
        :param shape: desired shape in the format (rows, columns)
        :return: resized image
        """
        if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE):
            raise ValueError('No image library backend found.'
                             ' Install either '
                             'OpenCV or Pillow to support image processing.')

        if OPENCV_AVAILABLE:
            return cv2.resize(img, shape, interpolation=cv2.INTER_AREA)

        if PILLOW_AVAILABLE:
            return np.array(PIL.Image.fromarray(img).resize(shape))

        raise NotImplementedError

    def reset(self):
        """
        Resets environment to start a new mission.

        If there is more than one maze it will randomly select a new maze.

        :return: initial observation of the environment as an rgb array in the format (rows, columns, channels)
        """
        if self.number_maps is not 0:
            self.doom_map = random.choice(
                ["map" + str(i).zfill(2) for i in range(self.number_maps)])
            self.env.set_doom_map(self.doom_map)
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()
        return observation

    def step(self, action):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = self.env.make_action(list(one_hot_action),
                                      self.action_frame_repeat)
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.data_augmentation:
            observation = VizDoom._augment_data(observation)

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def step_record(self, action, record_path, record_shape=(120, 140)):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :param record_path: the path to save the image of the environment to
        :param record_shape: the shape of the image to save
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = 0
        for _ in range(self.action_frame_repeat // 2):
            reward += self.env.make_action(list(one_hot_action), 2)
            env_state = self.env.get_state()
            if env_state:
                self._rgb_array = self.env.get_state().screen_buffer
                imageio.imwrite(
                    os.path.join(record_path,
                                 str(datetime.datetime.now()) + ".png"),
                    self._process_image(record_shape))

        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def render(self, mode='rgb_array'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array

        raise NotImplementedError

    def create_env(self):
        """
        Returns a function to create an environment with the generated mazes.

        Used for vectorising the environment. For example as used by Stable Baselines

        :return: a function to create an environment with the generated mazes
        """
        return lambda: VizDoom(self.cfg_path,
                               number_maps=self.number_maps,
                               scaled_resolution=self.scaled_resolution,
                               action_frame_repeat=self.action_frame_repeat)
    def play(self):
        
        # Create DoomGame instance. It will run the game and communicate with you.
        print ("Initializing doom...")
        game = DoomGame()

        game.load_config("./examples/config/deepdoomplayer.cfg")
        game.init()
        print ("Doom initialized.")
 
        episodes = 1
        training_steps_per_epoch = 100

        sleep_time = 0.100

        train_episodes_finished = 0
        train_rewards = []
        
        for epoch in range(episodes):
           
            train_loss = []
            
            game.new_episode()
        
            while(train_episodes_finished < 20 ):
        
                sleep(sleep_time)   

                if game.is_episode_finished():
                    
                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self._last_state = None
                    self.last_action[1] = 1

                # first frame must be handled differently
                if self.last_state is None:
                    # the _last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2)
                    continue

                
                reward = game.make_action(DeepDoomPlayer.define_keys_to_action_pressed(self.last_action), 7)
           
                reward *= 0.01

                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    screen_resized_binary =  np.zeros((40,40))
                    
                imagebufferlast = imagebuffer 
                    
                if imagebuffer is not None: 
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)
                
                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2)

                current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2)

                self.last_state = current_state

                self.last_action = self.choose_next_action_only_on_q()

            print (train_episodes_finished, "training episodes played.")
            print ("Training results:")
            
            train_rewards = np.array(train_rewards)
  
            print ("mean:", train_rewards.mean(), 
                   "std:", train_rewards.std(), 
                   "max:", train_rewards.max(), 
                   "min:", train_rewards.min())
           
            
        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self._last_state = None
Exemple #29
0
sleep_time = 0.028

for i in range(episodes):
    print("Episode #" + str(i + 1))

    # Not needed for the first episdoe but the loop is nicer.
    game.new_episode()
    while not game.is_episode_finished():

        # Gets the state and possibly to something with it
        s = game.get_state()
        img = s.image_buffer
        misc = s.game_variables

        # Makes a random action and save the reward.
        r = game.make_action(choice(actions))

        # Makes a "prolonged" action and skip frames:
        # skiprate = 3
        # r = game.make_action(choice(actions), skiprate)

        # The same could be achieved with:
        # game.set_action(choice(actions))
        # skiprate = 3
        # game.advance_action(skiprate)
        # r = game.get_last_reward()

        print("State #" + str(s.number))
        print("Game Variables:", misc)
        print("Performed action:", game.get_last_action())
        print("Last Reward:", r)
Exemple #30
0
class Vizdoom_env(object):
    def __init__(self, config='vizdoom_env/asset/default.cfg', verbose=False,
                 perception_type='more_simple'):
        self.verbose = verbose
        self.game = DoomGame()
        self.game.load_config(config)
        if self.verbose:
            self.game.set_window_visible(True)
            self.game.set_screen_resolution(ScreenResolution.RES_1280X960)

        self.game_variables = self.game.get_available_game_variables()
        self.buttons = self.game.get_available_buttons()
        self.action_strings = [b.__str__().replace('Button.', '')
                               for b in self.buttons]
        self.game_variable_strings = [v.__str__().replace('GameVariable.', '')
                                      for v in self.game_variables]
        self.perception_type = perception_type
        if perception_type == 'clear':
            self.distance_dict = CLEAR_DISTANCE_DICT
            self.horizontal_dict = CLEAR_HORIZONTAL_DICT
        elif perception_type == 'simple':
            pass
        elif perception_type == 'more_simple':
            pass
        else:
            self.distance_dict = DISTANCE_DICT
            self.horizontal_dict = HORIZONTAL_DICT

    def init_game(self):
        self.game.init()
        self.new_episode()

    def new_episode(self, init_state=None):
        self.game.new_episode()
        if init_state is not None:
            self.initialize_state(init_state)
        self.take_action('NONE')
        state = self.game.get_state()
        if state is None:
            raise RuntimeError('Cannot get initial states')
        img_arr = np.transpose(state.screen_buffer.copy(), [1, 2, 0])
        self.x_size = img_arr.shape[1]
        self.y_size = img_arr.shape[0]
        self.channel = img_arr.shape[2]
        self.get_state()
        if self.verbose:
            self.call_all_perception_primitives()
        p_v = self.get_perception_vector()
        self.s_h = [img_arr.copy()]
        self.a_h = []
        self.p_v_h = [p_v.copy()]  # perception vector

    def end_game(self):
        self.game.close()

    def state_transition(self, action_string):
        if action_string == 'NONE' or action_string in self.action_strings:
            self.take_action(action_string)
            self.a_h.append(action_string)
            if self.verbose:
                self.print_state()
            if FRAME_SKIP[action_string][2] == 0:
                self.get_state()
                self.s_h.append(self.screen.copy())
                p_v = self.get_perception_vector()
                self.p_v_h.append(p_v.copy())  # perception vector
            self.post_none(action_string)
            if FRAME_SKIP[action_string][2] == 1:
                self.get_state()
                self.s_h.append(self.screen.copy())
                p_v = self.get_perception_vector()
                self.p_v_h.append(p_v.copy())  # perception vector
            if self.verbose:
                self.call_all_perception_primitives()
        else:
            raise ValueError('Unknown action')

    def call_all_perception_primitives(self):
        for actor in MONSTER_LIST + ITEMS_IN_INTEREST:
            self.in_target(actor)
            for dist in self.distance_dict.keys():
                for horz in self.horizontal_dict.keys():
                    self.exist_actor_in_distance_horizontal(actor, dist, horz)
        for weapon_slot in range(1, 10):
            self.have_weapon(weapon_slot)
            self.have_ammo(weapon_slot)
            self.selected_weapon(weapon_slot)
        for actor in MONSTER_LIST:
            self.is_there(actor)
        self.no_selected_weapon_ammo()

    def take_action(self, action):
        action_vector = [a == action for a in self.action_strings]
        frame_skip = FRAME_SKIP[action][0]
        if action == 'ATTACK':
            state = self.game.get_state()
            gv_values = dict(zip(self.game_variable_strings,
                                 state.game_variables))
            weapon_num = int(gv_values['SELECTED_WEAPON'])
            frame_skip = ATTACK_FRAME_SKIP[weapon_num]
        self.game.make_action(action_vector, frame_skip)

    def post_none(self, action):
        none_vector = [a == 'NONE' for a in self.action_strings]
        self.game.make_action(none_vector, FRAME_SKIP[action][1])

    def get_action_list(self):
        return self.action_strings

    def init_actors(self):
        self.actors = {}

    def check_and_add_to_actors(self, actor_name, label):
        if actor_name not in self.actors:
            self.actors[actor_name] = []
        self.actors[actor_name].append(label)

    def get_actor_by_name(self, actor_name):
        if actor_name not in self.actors:
            self.actors[actor_name] = []
        return self.actors[actor_name]

    def get_state(self):
        state = self.game.get_state()
        if state is None:
            self.game_variables = dict()
            self.player = None
            self.monsters = []
            self.ammo = []
            self.init_actors()
            return
        self.game_variable_values = dict(zip(self.game_variable_strings, state.game_variables))
        self.monsters = []
        self.ammo = []
        self.weapons = []
        self.actors = {}
        for l in state.labels:
            if l.object_name in PLAYER_NAME:
                self.player = l
            elif l.object_name in MONSTER_LIST:
                self.monsters.append(l)
                self.check_and_add_to_actors(l.object_name, l)
            else:
                self.check_and_add_to_actors(l.object_name, l)

        self.labels = state.labels
        self.screen = np.transpose(state.screen_buffer, [1, 2, 0]).copy()

    def get_perception_vector_cond(self):
        if self.perception_type == 'simple' or \
                self.perception_type == 'more_simple':
            return self.get_perception_vector_cond_simple()
        else:
            return self.get_perception_vector_cond_basic()

    def get_perception_vector_cond_basic(self):
        vec = []
        for dist in self.distance_dict.keys():
            for horz in self.horizontal_dict.keys():
                for actor in MONSTER_LIST + ITEMS_IN_INTEREST:
                    vec.append('EXIST {} IN {} {}'.format(actor, dist, horz))
        for actor in MONSTER_LIST:
            vec.append('INTARGET {}'.format(actor))
        return vec

    def get_perception_vector_cond_simple(self):
        vec = []
        for actor in MONSTER_LIST:
            vec.append('ISTHERE {}'.format(actor))
        if self.perception_type == 'more_simple':
            return vec
        for actor in MONSTER_LIST:
            vec.append('INTARGET {}'.format(actor))
        return vec

    def get_perception_vector(self):
        if self.perception_type == 'simple' or\
                self.perception_type == 'more_simple':
            return self.get_perception_vector_simple()
        else: return self.get_perception_vector_basic()

    def get_perception_vector_basic(self):
        vec = []
        for dist in self.distance_dict.keys():
            for horz in self.horizontal_dict.keys():
                for actor in MONSTER_LIST + ITEMS_IN_INTEREST:
                    vec.append(self.exist_actor_in_distance_horizontal(actor, dist, horz))
        for actor in MONSTER_LIST:
            vec.append(self.in_target(actor))
        return np.array(vec)

    def get_perception_vector_simple(self):
        vec = []
        for actor in MONSTER_LIST:
            vec.append(self.is_there(actor))
        if self.perception_type == 'more_simple':
            return np.array(vec)
        for actor in MONSTER_LIST:
            vec.append(self.in_target(actor))
        return np.array(vec)

    def print_state(self):
        state = self.game.get_state()
        if state is None:
            print('No state')
            return
        game_variables = dict(zip(self.game_variable_strings, state.game_variables))
        game_variable_print = ''
        for key in sorted(game_variables.keys()):
            game_variable_print += '{}: {}, '.format(key, game_variables[key])
        game_variable_print += '\n'
        print(game_variable_print)
        for l in state.labels:
            print("id: {id}, name: {name}, position: [{pos_x},{pos_y},{pos_z}], "
                  "velocity: [{vel_x},{vel_y},{vel_z}], "
                  "angle: [{angle},{pitch},{roll}], "
                  "box: [{x},{y},{width},{height}]\n".format(
                      id=l.object_id, name=l.object_name,
                      pos_x=l.object_position_x, pos_y=l.object_position_y,
                      pos_z=l.object_position_z,
                      vel_x=l.object_velocity_x, vel_y=l.object_velocity_y,
                      vel_z=l.object_velocity_z,
                      angle=l.object_angle, pitch=l.object_pitch,
                      roll=l.object_roll,
                      x=l.x, y=l.y, width=l.width, height=l.height))

    def is_there(self, actor):
        if len(self.get_actor_by_name(actor)) > 0:
            if self.verbose: print('ISTHERE {}'.format(actor))
            return True
        else: return False

    def in_target(self, actor):
        center_x = self.x_size / 2
        center_y = self.y_size / 2
        for a in self.get_actor_by_name(actor):
            a_x_min, a_x_max = a.x, a.x + a.width
            a_y_min, a_y_max = a.y, a.y + a.height
            if center_x > a_x_min and center_x < a_x_max and\
                    center_y > a_y_min and center_y < a_y_max:
                        if self.verbose:
                            print('INTARGET {}'.format(actor))
                        return True
        return False

    def exist_actor_in_distance_horizontal(self, actor, dist, horz):
        cen_x = self.x_size / 2
        p = self.player
        for a in self.get_actor_by_name(actor):
            a_x_min, a_x_max = a.x, a.x + a.width
            d_x = a.object_position_x - p.object_position_x
            d_y = a.object_position_y - p.object_position_y
            d = math.sqrt(d_x**2 + d_y**2)
            if self.distance_dict[dist](d) and self.horizontal_dict[horz](a_x_min, a_x_max, cen_x):
                if self.verbose:
                    print('EXIST {} in {} {}'.format(actor, dist, horz))
                return True
        return False

    # Weapons
    # 1: Fist, chainsaw, 2: pistol, 3: shotgun, 4: chaingun, 5: rocket launcher, 6: plazma rifle
    # SELECT_WEAPON_1 switch between fist and chainsaw
    def have_weapon(self, weapon_slot):
        if self.game_variable_values['WEAPON{}'.format(weapon_slot)] > 0:
            if self.verbose:
                print('Have weapon {}'.format(weapon_slot))
            return True
        return False

    def have_ammo(self, weapon_slot):
        if weapon_slot == 1:  # Fist or Chainsaw
            if self.verbose:
                print('Have ammo {}'.format(weapon_slot))
            return True
        if self.game_variable_values['AMMO{}'.format(weapon_slot)] > 0:
            if self.verbose:
                print('Have ammo {}'.format(weapon_slot))
            return True
        return False

    def selected_weapon(self, weapon_slot):
        if self.game_variable_values['SELECTED_WEAPON'] == weapon_slot:
            if self.verbose:
                print('Weapon {} is selected'.format(weapon_slot))
            return True
        return False

    def no_selected_weapon_ammo(self):
        if self.game_variable_values['SELECTED_WEAPON_AMMO'] == 0:
            if self.verbose:
                print('no selected weapon ammo is left')
            return True
        return False

    def initialize_state(self, init_state):
        """ Takes random arguments and initialies the state

        Assumes that the max number of monster and ammo spawns is 5

        Params:
            init_state  [{"player_pos": [x, y], "monster_pos": [[x1, y1], [x2, y2]]}]
        """
        if 'player_pos' in init_state:
            x, y = init_state['player_pos']
            self.game.send_game_command('puke 20 {} {}'.format(x, y))
        if 'demon_pos' in init_state:
            for i, (x, y) in enumerate(init_state['demon_pos']):
                self.game.send_game_command(
                        'puke {} {} {}'.format(21 + i, x, y))
        if 'revenant_pos' in init_state:
            for i, (x, y) in enumerate(init_state['revenant_pos']):
                self.game.send_game_command(
                        'puke {} {} {}'.format(5 + i, x, y))
        if 'hellknight_pos' in init_state:
            for i, (x, y) in enumerate(init_state['hellknight_pos']):
                self.game.send_game_command(
                        'puke {} {} {}'.format(15 + i, x, y))
        if 'ammo_pos' in init_state:
            for i, (x, y) in enumerate(init_state['ammo_pos']):
                self.game.send_game_command(
                    'puke {} {} {}'.format(10 + i, x, y))