def get_reward(state, stateprime, player): # TODO ''' REWARD RULESET 1. 1 point for every 2 in a row 2. 2 points for every 3 in a row 3. -1 point for every opponent 2 in a row 4. -2 points for every opponent 3 in a row 5. 9999999? for four in a row 6. -9999999? for opponent 4 in a row ''' scorecard = {2: 10, 3: 100, 4: 99999999} # brute force arr = [] for column in state: tern = ternery(column) while len(tern) != 6: tern = '0' + tern arr.append(list(tern)) p1_rows = findInARow(arr, '1') p2_rows = findInARow(arr, '2') state1_score = 0 for i in p1_rows: state1_score = state1_score + scorecard[len( i)] if player == '1' else state1_score - scorecard[len(i)] for i in p2_rows: state1_score = state1_score - scorecard[len( i)] if player == '1' else state1_score + scorecard[len(i)] arr = [] for column in stateprime: tern = ternery(column) while len(tern) != 6: tern = '0' + tern arr.append(list(tern)) p1_rows = findInARow(arr, '1') p2_rows = findInARow(arr, '2') state2_score = 0 for i in p1_rows: state2_score = state2_score + scorecard[len( i)] if player == '1' else state2_score - scorecard[len(i)] for i in p2_rows: state2_score = state2_score - scorecard[len( i)] if player == '1' else state2_score + scorecard[len(i)] return state2_score - state1_score if player == '1' else state1_score - state2_score
def transition(self, column_choice, player): column = self.state[column_choice] tern = ternery(column) if len(tern) == 6: return False new = str(player) + tern if tern != '0' else str(player) self.state[column_choice] = int(new, 3) return True
def pBoard(b): for i in range(6): for j in b.state: t = ternery(j) while len(t) < 6: t = '0' + t print(t[i], end='') print()
def isFull(self): l = 0 for i in range(7): column = self.state[i] tern = ternery(column) if len(tern) == 6: l += 1 if l == 7: return True else: return False
def neural_network_choose(board, player, model): inputs = [] for col in board.state: tern = ternery(col) if col != ' ' else '' a = [int(n) for n in list(tern)] for n, i in enumerate(a): if i == 2: a[n] = -1 while len(a) != 6: a.append(0) inputs += a array = np.array([inputs]) t = model.predict(array, batch_size=10).tolist() board.transition(t.index(max(t)), player) return board
def gameOver(self): arr = [] for column in self.state: tern = ternery(column) while len(tern) != 6: tern = '0' + tern arr.append(list(tern)) p1_rows = findInARow(arr, '1') p2_rows = findInARow(arr, '2') if 4 in [len(n) for n in p1_rows]: self.winner = '1' return True if 4 in [len(n) for n in p2_rows]: self.winner = '2' return True elif self.isFull(): return True else: return False