Ejemplo n.º 1
0
def validate(players, n_games):
    game = Game(players=players,
                dealer_type=PerfectDealer,
                verbose=False,
                collect_stats=True)

    # turn training off
    for player in players:
        player.training = False

    for _ in range(n_games):
        game.start_round()

    # turn training back on
    for player in players:
        player.training = True

    # average the dealer reward
    dealer_reward = sum(game.collector.rewards['dealer']) / n_games

    # average player rewards
    player_reward = 0
    for player in players:
        player_reward += sum(game.collector.rewards[player.name]) / n_games
    player_reward /= len(players)

    return player_reward, dealer_reward
Ejemplo n.º 2
0
            with open('validation.pickle', 'rb') as f:
                validation = pickle.load(f)
        except FileNotFoundError:
            validation = []

    game = Game(players=players,
                dealer_type=PerfectDealer,
                verbose=False,
                collect_stats=False)

    # collect validation results here
    stats = []

    start_time = time.time()
    for i in range(TRAINING_ROUNDS):
        game.start_round()

        # validate
        if VALIDATE_EVERY > 0 and i % VALIDATE_EVERY == VALIDATE_EVERY - 1:
            print('VALIDATING...', end='', flush=True)
            stats.append(validate(players, VALIDATION_ROUNDS))
            print('PLAYER:', stats[-1][0], 'DEALER:', stats[-1][1])

            # also save stats
            with open('validation.pickle', 'wb') as f:
                pickle.dump(stats, f)

        # save
        if i % SAVE_EVERY == SAVE_EVERY - 1:
            print('SAVING --', i + 1)
            save_values(players, SAVE_DIR)