Exemple #1
0
class Bot:
    def __init__(self, name, q_table):
        self.name = name
        self.q_table = q_table
        self.env = Environment()
        self.player = Player(name=name,
                             exploratory=0.0,
                             environment=self.env)

    def take_turn(self, state):
        state_idx = self.env.hasher(state)
        actions = self.player.actions(state)
        # print('state_idx: ', state_idx, 'state: ', state)
        # print('q_table: ', self.q_table[state_idx])
        if self.q_table[state_idx] is None:
            # pick a random action
            # this happens when the given bot has not thoroughly discovered
            print('!', end='')
            best_action_idx = random.randrange(len(actions))
        else:
            # stick to the policy
            best_action_idx, _ = max(enumerate(self.q_table[state_idx]), key=operator.itemgetter(1))
        # if best_action_idx >= len(actions):
        #     print('problem state: ', state, 'action_idx: ', best_action_idx, 'actions: ', actions)
        return actions[best_action_idx]
Exemple #2
0
 def do_invert():
     env = Environment()
     result = [None for each in q_table]
     for state_idx, q_state in enumerate(q_table):
         state = env.dehasher(state_idx)
         for i, row in enumerate(state):
             for j, col in enumerate(row):
                 if col is 1:
                     state[i][j] = 2
                 elif col is 2:
                     state[i][j] = 1
         inverted_state_idx = env.hasher(state)
         result[inverted_state_idx] = q_state
     return result