Beispiel #1
0
            if next_move_val < min_val:
                min_val = next_move_val
                best_move = m
                if next_move_val < 0:
                    break
    if i == 0:
        return best_move

    return max_val if i % 2 == 0 else min_val


if (x_first):
    print("valid moves are ", game.valid_moves)
    move = int(input("player one(X), make first move: "))
    game.make_move(move)

while not game.is_over():
    if game.player == X:
        print("valid moves are ", game.valid_moves)
        move = int(input("please make move: "))
    else:
        move = minimax(game, 0)
        print("computer is now making move ", move)
    game.make_move(move)
    print(game)

if game.winner_exists():
    print("You win!" if game.get_winner() == X else "You lose!")
else:
    print("player one and two ties")
Beispiel #2
0
                    game.board.reshape(-1))]

            y[0][move] = reward
            loss = criterion(q, y)
            losses += loss.item()
            counter += 1
            if counter % 1000 == 0:
                avg_losses.append(str(losses / 1000))
                losses = 0

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            y_turn = False
        else:
            game.make_move(get_random_valid_move(game))
            y_turn = True
        state = t.Tensor(game.board.reshape(1, -1))
        if game.is_over():
            break
    if epsilon > 0.05:
        epsilon -= 2 / epochs

t.save(model.state_dict(), "q_learning_meta/model.pth")
f = open("q_learning_meta/losses_over_time", "w")
f.write("\n".join(avg_losses))

end = time.time()
print(end - start)