def reinforceGame(): # Hyperparameters input_size = 21 num_layers = 3 num_hidden_units = 10 output_size = 1 learning_rate = 0.005 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = nets.NeuralNet(input_size, num_hidden_units, num_layers, output_size).to(device) if os.path.exists("reinforce.pt"): model.load_state_dict(torch.load('reinforce.pt')) print("Loaded reinforce.pt") tot = 0 trials = int(sys.argv[2]) if len(sys.argv) > 2 else 10 scores = [] max_heur = -float('inf') criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) playerAI = PlayerAIReinforce(model, criterion, optimizer, device) for _ in range(trials): computerAI = ComputerAI() displayer = Displayer() gameManager = GameManager(4, playerAI, computerAI, displayer) maxTile = gameManager.start_reinforce() scores.append(maxTile) print(maxTile) tot += math.log(maxTile, 2) max_heur = max(max_heur, playerAI.max_heur) print(tot / trials) print(sorted(scores, reverse=True)) print("Max value of heuristic: ", max_heur) print("percentage of 1024s: ", round(100 * scores.count(1024) / trials, 2), "%") print("percentage of 2048s: ", round(100 * scores.count(2048) / trials, 2), "%") print("percentage of 4096s: ", round(100 * scores.count(4096) / trials, 2), "%") print("percentage of 8192s: ", round(100 * scores.count(8192) / trials, 2), "%") torch.save(playerAI.model.state_dict(), 'reinforce.pt')
def __init__(self, weights = [1,1,2,5], memo_dic = {}): #store previously computed states to reduce redundant computation self.memo = memo_dic self.timed_out = False #self.MemoCalls = 0 #self.totalCalls = 0 #weights for the heuristic self.weights = list(weights) #time limit to make sure we don't use too much time self.time_limit = .2 #upper bound on heuristic function for alpha-beta pruning (only for expectimax) self.UPPER_BOUND = 50 #18 #10972520 self.max_heur = -float('inf') self.outstr = [] self.model = nets.NeuralNet(input_size, num_hidden_units, num_layers, output_size) self.model.load_state_dict(torch.load('0.pt', map_location=torch.device('cpu')))
batch_size = 1000 threshold_index = -1 threshold = 41 regression = True train_set = dataloader.Dataset2048(path="2048_training_data.csv", threshold_index = threshold_index, threshold = threshold, classification = True) test_set = dataloader.Dataset2048(path="2048_testing_data.csv", threshold_index = threshold_index, threshold = threshold, classification = True) print("Size of training set:", len(train_set)) print("Size of test set:", len(test_set)) # Loads training data into Pytorch dataloader to allow batch training train_loader = torch.utils.data.DataLoader(dataset = train_set, batch_size=batch_size, shuffle=True) model = nets.NeuralNet(input_size, num_hidden_units, num_layers, output_size).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) total_step = len(train_loader) # Train the model for epoch in range(num_epochs): for i, (instances, targets) in enumerate(train_loader): instances = instances.to(device) targets = targets.to(device)