def evaluate_model_on_testdata(model, filepath, extended_model=True, threshold=0.7): num_games = num_games_in_file(filepath) with open(filepath, 'rb') as f: count_correct_positives = 0 count_false_positives = 0 num_pred = 0 for num in range(num_games): data_dic = pickle.load(f) if extended_model: card_sequences, aux_input_hands, hands_to_predict = prepare_extended_data_inference( data_dic, num_samples=26) y_list = hands_to_predict for i in range(26): x = [ np.array([card_sequences[i]]), np.array([aux_input_hands[i]]) ] y = y_list[i] predictions = model.predict(x)[0] top_indices = top_k_indices(predictions, k=96) correct_indices = np.where(y == 1)[0] for index in top_indices: if predictions[top_indices[index]] > threshold: num_pred += 1 if top_indices[index] in correct_indices: count_correct_positives += 1 else: count_false_positives += 1 return count_correct_positives / (count_correct_positives + count_false_positives), num_pred
import matplotlib.pyplot as plt from schafkopf.players.data.load_data import num_games_in_file thresholds = [0.001, 0.005, 0.01, 0.015, 0.02, 0.03, 0.05, 0.1, 0.15000000000000002, 0.2, 0.25, 0.30000000000000004, 0.35000000000000003, 0.4, 0.45, 0.5, 0.55, 0.6000000000000001, 0.65, 0.7000000000000001, 0.75, 0.8, 0.8500000000000001, 0.9, 0.9500000000000001, 0.99] num_games_solo = num_games_in_file('../data/test_data_solo.p') num_games_wenz = num_games_in_file('../data/test_data_wenz.p') num_games_partner = num_games_in_file('../data/test_data_partner.p') accuracies_solo = [0.18656352516150465, 0.25040410362252025, 0.2952363783856866, 0.3143047401174231, 0.3244600869148532, 0.3359061365963591, 0.3504456352045063, 0.375468472858878, 0.39238518241046705, 0.4063424391525894, 0.42030425231959606, 0.4363004822990868, 0.4558156091483492, 0.47930930568948893, 0.5070730125368429, 0.5417167220220008, 0.5866302864938608, 0.6415736420119106, 0.7004273819173237, 0.7555690256099409, 0.8052702702702703, 0.8479860217031451, 0.8885108610311436, 0.9334669338677355, 0.9767972613160898, 0.9912854030501089] accuracies_wenz = [0.16940401467243643, 0.207739178145485, 0.24553515034192674, 0.2694933363324523, 0.28522364058684563, 0.3043627750611247, 0.3236259769039694, 0.3460311710787125, 0.36146232493233627, 0.3755506536687604, 0.38847746090156393, 0.4014680704144083, 0.414751717657396, 0.43055170953397076, 0.4526612539467749, 0.480916274754455, 0.516091741422362, 0.558817615121657, 0.6099387338325392, 0.6611188811188812, 0.7297242083758938, 0.7859462651313847, 0.8403470715835141, 0.9000684462696783, 0.9368556701030928, 0.9929577464788732] accuracies_partner = [0.23075312909419135, 0.3208712139203886, 0.33223830569140617, 0.3356925860699358, 0.3377281598216617, 0.34054074826180786, 0.34466917116694057, 0.35394023859309653, 0.3647043769973656, 0.37847533936115074, 0.3948749422599224, 0.4148928856200654, 0.44062376657746344, 0.47494868126399603, 0.5174121932266268, 0.5655079030091944, 0.6138481588317729, 0.661810043975649, 0.7089967537486473, 0.757091379469648, 0.8033961376419513, 0.8474087721383573, 0.8903728670739415, 0.9322002007024586, 0.9696173254835997, 0.9892715979672502]
from schafkopf.suits import SUITS from schafkopf.ranks import OBER, UNTER from schafkopf.game_modes import SOLO from schafkopf.players.data.load_data import num_games_in_file import pickle import random # synthetical data from SOLOS: expected value is always the best OBER/UNTER which is still in the game filename = 'train_data.p' num = num_games_in_file(filename) with open(filename, 'rb') as f: for game_num in range(num): data_dic = pickle.load(f) if data_dic['game_mode'][0] == SOLO: off_player = data_dic['declaring_player'] played_cards = [data[0] for data in data_dic['played_cards']] high_trumpcards = [(OBER, suit) for suit in SUITS] + [(UNTER, suit) for suit in SUITS] while True: seq_len = random.choice(27) card_sequence = played_cards[:seq_len] best_trumpcard = None
def evaluate_model_on_testdata(model, filepath, extended_model=True, threshold=0.7): num_games = num_games_in_file(filepath) with open(filepath, 'rb') as f: count_best_card = 0 count_two_best = 0 count_three_best = 0 count_four_best = 0 count_five_best = 0 count_best_all_hands = 0 count_two_all_hands = 0 count_three_all_hands = 0 count_four_all_hands = 0 count_five_all_hands = 0 count_num_correct_in_top_5 = 0 count_num_correct_in_top_5_all_hands = 0 count_correct_positives = 0 count_correct_positives_all_hands = 0 count_false_positives = 0 count_false_positives_all_hands = 0 for num in range(num_games): data_dic = pickle.load(f) if extended_model: card_sequences, aux_input_hands, hands_to_predict = prepare_extended_data_inference( data_dic, num_samples=26) y_list = hands_to_predict for i in range(26): x = [ np.array([card_sequences[i]]), np.array([aux_input_hands[i]]) ] y = y_list[i] predictions = model.predict(x)[0] top_indices = top_k_indices(predictions, k=5) correct_indices = np.where(y == 1)[0] num_correct = 0 if top_indices[0] in correct_indices: count_best_card += 1 num_correct += 1 if predictions[top_indices[0]] > threshold: count_correct_positives += 1 elif predictions[top_indices[0]] > threshold: count_false_positives += 1 if top_indices[1] in correct_indices: count_two_best += 1 num_correct += 1 if predictions[top_indices[1]] > threshold: count_correct_positives += 1 elif predictions[top_indices[1]] > threshold: count_false_positives += 1 if top_indices[2] in correct_indices: count_three_best += 1 num_correct += 1 if predictions[top_indices[2]] > threshold: count_correct_positives += 1 elif predictions[top_indices[2]] > threshold: count_false_positives += 1 if top_indices[3] in correct_indices: count_four_best += 1 num_correct += 1 if predictions[top_indices[3]] > threshold: count_correct_positives += 1 elif predictions[top_indices[3]] > threshold: count_false_positives += 1 if top_indices[4] in correct_indices: count_five_best += 1 num_correct += 1 if predictions[top_indices[4]] > threshold: count_correct_positives += 1 elif predictions[top_indices[4]] > threshold: count_false_positives += 1 count_num_correct_in_top_5 += num_correct else: x_list, y_list = prepare_data_inference(data_dic, num_samples=26) for i in range(26): x = x_list[i] y = y_list[i] curr_player = find_curr_player(x) curr_player_hand_indices = range(curr_player * 32, curr_player * 32 + 32) predictions = model.predict(np.array([x]))[0] top_indices = top_k_indices(predictions, k=5) correct_indices = np.where(y == 1)[0] num_correct = 0 num_correct_all_hands = 0 # Analyze general prediction rate for all hands if top_indices[0] in correct_indices: count_best_all_hands += 1 num_correct_all_hands += 1 if predictions[top_indices[0]] > threshold: count_correct_positives_all_hands += 1 elif predictions[top_indices[0]] > threshold: count_false_positives_all_hands += 1 if top_indices[1] in correct_indices: count_two_all_hands += 1 num_correct_all_hands += 1 if predictions[top_indices[1]] > threshold: count_correct_positives_all_hands += 1 elif predictions[top_indices[1]] > threshold: count_false_positives_all_hands += 1 if top_indices[2] in correct_indices: count_three_all_hands += 1 num_correct_all_hands += 1 if predictions[top_indices[2]] > threshold: count_correct_positives_all_hands += 1 elif predictions[top_indices[2]] > threshold: count_false_positives_all_hands += 1 if top_indices[3] in correct_indices: count_four_all_hands += 1 num_correct_all_hands += 1 if predictions[top_indices[3]] > threshold: count_correct_positives_all_hands += 1 elif predictions[top_indices[3]] > threshold: count_false_positives_all_hands += 1 if top_indices[4] in correct_indices: count_five_all_hands += 1 num_correct_all_hands += 1 if predictions[top_indices[4]] > threshold: count_correct_positives_all_hands += 1 elif predictions[top_indices[4]] > threshold: count_false_positives_all_hands += 1 count_num_correct_in_top_5_all_hands += num_correct_all_hands # analyze opponent hand predictions only predictions = model.predict(np.array([x]))[0] predictions[curr_player_hand_indices] = 0 top_indices = top_k_indices(predictions, k=5) if top_indices[0] in correct_indices and top_indices[ 0] not in curr_player_hand_indices: count_best_card += 1 num_correct += 1 if predictions[top_indices[0]] > threshold: count_correct_positives += 1 elif predictions[top_indices[0]] > threshold: count_false_positives += 1 if top_indices[1] in correct_indices and top_indices[ 1] not in curr_player_hand_indices: count_two_best += 1 num_correct += 1 if predictions[top_indices[1]] > threshold: count_correct_positives += 1 elif predictions[top_indices[1]] > threshold: count_false_positives += 1 if top_indices[2] in correct_indices and top_indices[ 2] not in curr_player_hand_indices: count_three_best += 1 num_correct += 1 if predictions[top_indices[2]] > threshold: count_correct_positives += 1 elif predictions[top_indices[2]] > threshold: count_false_positives += 1 if top_indices[3] in correct_indices and top_indices[ 3] not in curr_player_hand_indices: count_four_best += 1 num_correct += 1 if predictions[top_indices[3]] > threshold: count_correct_positives += 1 elif predictions[top_indices[3]] > threshold: count_false_positives += 1 if top_indices[4] in correct_indices and top_indices[ 4] not in curr_player_hand_indices: count_five_best += 1 num_correct += 1 if predictions[top_indices[4]] > threshold: count_correct_positives += 1 elif predictions[top_indices[4]] > threshold: count_false_positives += 1 count_num_correct_in_top_5 += num_correct if not extended_model: print('Analysis all hands : ') print(count_best_all_hands, ' / ', num_games * 26, ' Predicted best card in : ', count_best_all_hands / (num_games * 26)) print(count_two_all_hands, ' / ', num_games * 26, ' Predicted sec card in : ', count_two_all_hands / (num_games * 26)) print(count_three_all_hands, ' / ', num_games * 26, ' Predicted third card in : ', count_three_all_hands / (num_games * 26)) print(count_four_all_hands, ' / ', num_games * 26, ' Predicted fourth card in : ', count_four_all_hands / (num_games * 26)) print(count_five_all_hands, ' / ', num_games * 26, ' Predicted fifth card in : ', count_five_all_hands / (num_games * 26)) print('Average number of correct predictions: ', count_num_correct_in_top_5_all_hands / (num_games * 26)) print('Bigger then threshold {} : {} / {} correct, {}'.format( threshold, count_correct_positives_all_hands, count_correct_positives_all_hands + count_false_positives_all_hands, count_correct_positives_all_hands / (count_correct_positives_all_hands + count_false_positives_all_hands))) print('Analysis only opponent hands : ') print(count_best_card, ' / ', num_games * 26, ' Predicted best card in : ', count_best_card / (num_games * 26)) print(count_two_best, ' / ', num_games * 26, ' Predicted sec card in : ', count_two_best / (num_games * 26)) print(count_three_best, ' / ', num_games * 26, ' Predicted third card in : ', count_three_best / (num_games * 26)) print(count_four_best, ' / ', num_games * 26, ' Predicted fourth card in : ', count_four_best / (num_games * 26)) print(count_five_best, ' / ', num_games * 26, ' Predicted fifth card in : ', count_five_best / (num_games * 26)) print('Average number of correct predictions: ', count_num_correct_in_top_5 / (num_games * 26)) print('Bigger then threshold {} : {} / {} correct, {}'.format( threshold, count_correct_positives, count_correct_positives + count_false_positives, count_correct_positives / (count_correct_positives + count_false_positives))) return count_correct_positives / (count_correct_positives + count_false_positives)
def test_num_games_in_file(): assert ld.num_games_in_file(filepath) == 2751
def evaluate_model_on_testdata(modelpath, filepath, extended_model): model = keras.models.load_model(modelpath) num_games = num_games_in_file(filepath) with open(filepath, 'rb') as f: count = 0 for num in range(num_games): data_dic = pickle.load(f) if not extended_model: x_list, y_list = prepare_data_trickplay(data_dic, num_samples=27) else: card_seq, aux_hands, y_list = prepare_extended_data_trickplay(data_dic, num_samples=27) for i in range(27): y = y_list[i] if not extended_model: x = x_list[i] predictions = model.predict(np.array([x]))[0] else: x = [np.array([card_seq[i]]), np.array([aux_hands[i]])] predictions = model.predict(x)[0] card_to_predict = enc.decode_one_hot_card(y) card_sequence = [] # find current player for card, pl in data_dic['played_cards']: if card == card_to_predict: player = pl break else: card_sequence.append((card, pl)) player_hand = data_dic['player_hands'][player] assert card_to_predict in player_hand, 'Card to predict was not in player hand' for crd, pl in data_dic['played_cards']: if crd == card_to_predict: break elif pl == player: player_hand = [c for c in player_hand if c != crd] if 'solo' in filepath: game_mode = (SOLO, HEARTS) elif 'wenz' in filepath: game_mode = (WENZ, None) else: game_mode = (PARTNER_MODE, ACORNS) options = get_possible_cards(game_mode, card_sequence, player_hand) deck = [(i // 4, i % 4) for i in range(32)] pred_actual = predictions[:] for c in deck: if c not in options: index = c[0] * 4 + c[1] pred_actual[index] = 0 if np.argmax(y) == np.argmax(pred_actual): count += 1 print(count, ' / ', num_games * 27, ' Accuracy : ', count / (num_games * 27))