예제 #1
0
def reinforceGame():

    # Hyperparameters
    input_size = 21
    num_layers = 3
    num_hidden_units = 10
    output_size = 1
    learning_rate = 0.005
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = nets.NeuralNet(input_size, num_hidden_units, num_layers,
                           output_size).to(device)

    if os.path.exists("reinforce.pt"):

        model.load_state_dict(torch.load('reinforce.pt'))
        print("Loaded reinforce.pt")

    tot = 0
    trials = int(sys.argv[2]) if len(sys.argv) > 2 else 10
    scores = []
    max_heur = -float('inf')

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    playerAI = PlayerAIReinforce(model, criterion, optimizer, device)

    for _ in range(trials):

        computerAI = ComputerAI()
        displayer = Displayer()
        gameManager = GameManager(4, playerAI, computerAI, displayer)
        maxTile = gameManager.start_reinforce()
        scores.append(maxTile)
        print(maxTile)
        tot += math.log(maxTile, 2)
        max_heur = max(max_heur, playerAI.max_heur)
    print(tot / trials)
    print(sorted(scores, reverse=True))
    print("Max value of heuristic: ", max_heur)
    print("percentage of 1024s: ", round(100 * scores.count(1024) / trials, 2),
          "%")
    print("percentage of 2048s: ", round(100 * scores.count(2048) / trials, 2),
          "%")
    print("percentage of 4096s: ", round(100 * scores.count(4096) / trials, 2),
          "%")
    print("percentage of 8192s: ", round(100 * scores.count(8192) / trials, 2),
          "%")

    torch.save(playerAI.model.state_dict(), 'reinforce.pt')
 def __init__(self, weights = [1,1,2,5], memo_dic = {}):
     #store previously computed states to reduce redundant computation
     self.memo = memo_dic
     self.timed_out = False
     #self.MemoCalls = 0
     #self.totalCalls = 0
     #weights for the heuristic
     self.weights = list(weights)
     #time limit to make sure we don't use too much time
     self.time_limit = .2
     #upper bound on heuristic function for alpha-beta pruning (only for expectimax)
     self.UPPER_BOUND = 50 #18 #10972520
     self.max_heur = -float('inf')
     self.outstr = []
     self.model = nets.NeuralNet(input_size, num_hidden_units, num_layers, output_size)
     self.model.load_state_dict(torch.load('0.pt', map_location=torch.device('cpu')))
batch_size = 1000
threshold_index = -1
threshold = 41
regression = True


train_set = dataloader.Dataset2048(path="2048_training_data.csv", threshold_index = threshold_index, threshold = threshold, classification = True)
test_set = dataloader.Dataset2048(path="2048_testing_data.csv", threshold_index = threshold_index, threshold = threshold, classification = True)

print("Size of training set:", len(train_set))
print("Size of test set:", len(test_set))

# Loads training data into Pytorch dataloader to allow batch training
train_loader = torch.utils.data.DataLoader(dataset = train_set, batch_size=batch_size, shuffle=True)

model = nets.NeuralNet(input_size, num_hidden_units, num_layers, output_size).to(device)
    
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)

# Train the model
for epoch in range(num_epochs):

    for i, (instances, targets) in enumerate(train_loader):
    
        instances = instances.to(device)
        targets = targets.to(device)