class Bot: def __init__(self, name, q_table): self.name = name self.q_table = q_table self.env = Environment() self.player = Player(name=name, exploratory=0.0, environment=self.env) def take_turn(self, state): state_idx = self.env.hasher(state) actions = self.player.actions(state) # print('state_idx: ', state_idx, 'state: ', state) # print('q_table: ', self.q_table[state_idx]) if self.q_table[state_idx] is None: # pick a random action # this happens when the given bot has not thoroughly discovered print('!', end='') best_action_idx = random.randrange(len(actions)) else: # stick to the policy best_action_idx, _ = max(enumerate(self.q_table[state_idx]), key=operator.itemgetter(1)) # if best_action_idx >= len(actions): # print('problem state: ', state, 'action_idx: ', best_action_idx, 'actions: ', actions) return actions[best_action_idx]
def do_invert(): env = Environment() result = [None for each in q_table] for state_idx, q_state in enumerate(q_table): state = env.dehasher(state_idx) for i, row in enumerate(state): for j, col in enumerate(row): if col is 1: state[i][j] = 2 elif col is 2: state[i][j] = 1 inverted_state_idx = env.hasher(state) result[inverted_state_idx] = q_state return result