def validate(players, n_games): game = Game(players=players, dealer_type=PerfectDealer, verbose=False, collect_stats=True) # turn training off for player in players: player.training = False for _ in range(n_games): game.start_round() # turn training back on for player in players: player.training = True # average the dealer reward dealer_reward = sum(game.collector.rewards['dealer']) / n_games # average player rewards player_reward = 0 for player in players: player_reward += sum(game.collector.rewards[player.name]) / n_games player_reward /= len(players) return player_reward, dealer_reward
with open('validation.pickle', 'rb') as f: validation = pickle.load(f) except FileNotFoundError: validation = [] game = Game(players=players, dealer_type=PerfectDealer, verbose=False, collect_stats=False) # collect validation results here stats = [] start_time = time.time() for i in range(TRAINING_ROUNDS): game.start_round() # validate if VALIDATE_EVERY > 0 and i % VALIDATE_EVERY == VALIDATE_EVERY - 1: print('VALIDATING...', end='', flush=True) stats.append(validate(players, VALIDATION_ROUNDS)) print('PLAYER:', stats[-1][0], 'DEALER:', stats[-1][1]) # also save stats with open('validation.pickle', 'wb') as f: pickle.dump(stats, f) # save if i % SAVE_EVERY == SAVE_EVERY - 1: print('SAVING --', i + 1) save_values(players, SAVE_DIR)