Beispiel #1
0
class AgentA3C(Agent):
    '''Agent which leverages Actor Critic Learning'''

    def __init__(self,
                 model_path,
                 dtype,
                 seed=451):
        self._seed = seed
        self._idx = 0
        self._dtype = dtype
        self.env = LoveLetterEnv(AgentRandom(seed), seed)
        state = self.env.reset()

        self._model = ActorCritic(
            state.shape[0], self.env.action_space).type(dtype)
        self._model.load_state_dict(torch.load(model_path))

    def _move(self, game):
        '''Return move which ends in score hole'''
        assert game.active()
        self._idx += 1

        state = self.env.force(game)
        state = torch.from_numpy(state).type(self._dtype)
        cx = Variable(torch.zeros(1, 256).type(self._dtype), volatile=True)
        hx = Variable(torch.zeros(1, 256).type(self._dtype), volatile=True)

        _, logit, (hx, cx) = self._model(
            (Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
        prob = F.softmax(logit)
        action_idx = prob.max(1)[1].data.cpu().numpy()[0, 0]

        player_action = self.env.action_from_index(action_idx, game)
        if player_action is None:
            # print("ouch")
            options = Agent.valid_actions(game, self._seed + self._idx)
            if len(options) < 1:
                raise Exception("Unable to play without actions")

            random.seed(self._seed + self._idx)
            return random.choice(options)

        # print("playing ", self._idx, player_action)
        return player_action
Beispiel #2
0
class TFAgent(Agent):
    '''Agent which leverages tensorflow'''
    def __init__(self, model_path, seed=451):
        self._seed = seed
        self._idx = 0
        self.env = LoveLetterEnv(AgentRandom(seed), seed)
        self.vec_env = DummyVecEnv([
            lambda: self.env
        ])  # The algorithms require a vectorized environment to run

        state = self.env.reset()

        self._model = PPO2(MlpPolicy,
                           self.vec_env,
                           verbose=0,
                           tensorboard_log="./tensorboard/")
        self._model.load(model_path)

    def _move(self, game):
        '''Return move which ends in score hole'''
        assert game.active()
        self._idx += 1

        state = self.env.force(game)
        action_idx = self._model.predict(state, deterministic=True)[0]

        player_action = self.env.action_from_index(action_idx, game)
        if player_action is None:
            # print("ouch")
            options = Agent.valid_actions(game, self._seed + self._idx)
            if len(options) < 1:
                raise Exception("Unable to play without actions")

            random.seed(self._seed + self._idx)
            return random.choice(options)

        # print("playing ", self._idx, player_action)
        return player_action