Beispiel #1
0
        # Save the hands as training data for the betting NN
        init_hands[p].sort()
        x_train.append(init_hands[p].get_cards_as_matrix())
        y_train.append(game.tricks[p])

    # Save the data from the game
    Hands.append(game.initialHands)
    History.append(game.h)
    Bets.append(game.bets)
    Scores.append(scores)
    Tricks.append(game.tricks)

    # Save the game state as training data for the playing NN
    for rd in range(n):
        # Get the game state up until this round
        round_state = game.action_state(rd)

        # Save data for training
        for p in range(4):
            # Make the state relative to player p
            state = game.relativize_state(round_state, p, rd)
            for key in state:
                x_train_RL[key].append(state[key])

            # Target Q value
            if rd == n:  # Final round in this episode; reward is final total team score
                y_hat = game.tricks[p] + game.tricks[(p + 2) % 4]
                #y_hat = scores[p] + scores[(p+2)%4]
            else:  # More rounds left; get reward based on target action
                y_hat = gamma * np.max(
                    target_action_model.predict(
Beispiel #2
0
    History.append(game.h)
    Bets.append(game.bets)
    Scores.append(scores)
    Tricks.append(game.tricks)

    # Save data for training; pick a random player and train with their data
    for p in range(4):  #rnd.sample(range(4),1):
        # Save the hands as training data for the betting NN
        init_hands[p].sort()
        x_train.append(init_hands[p].get_cards_as_matrix())
        y_train.append(game.tricks[p])

        # Save the game state as training data for the playing NN
        for rd in range(n):
            # Get and update the game state based on the action taken
            state = game.action_state(p, rd)
            for key in state:
                x_train_RL[key].append(state[key])
            # REWARD:
            my_team_score = int(game.T[rd] == p or game.T[rd] == ((p + 2) % 4))
            discounted_reward = gamma**(n - 1 - rd) * (scores[p] +
                                                       scores[(p + 2) % 4])
            y_train_RL.append(discounted_reward + my_team_score)

    # Train the betting NN
    if t % train_interval == 0:
        print 'Training betting...'
        # Train the NN
        hist = batch_loss_history()
        bet_model.fit(np.asarray(x_train),
                      np.asarray(y_train),