class ProLoTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self,num_schedules): self.arguments = Logger() self.alpha = .9 self.num_schedules = num_schedules self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + 'dist_early_hili_pairwise.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_data(self.num_schedules, self.data) self.start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions(self.X) self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=16, output_dim=1, bayesian_embedding_dim=None, alpha=1.5, use_gpu=True, vectorized=True, is_value=True) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) self.opt = torch.optim.RMSprop(self.model.parameters()) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ # loss = nn.CrossEntropyLoss() training_done = False criterion = torch.nn.BCELoss() # variables to keep track of loss and number of tasks trained over while not training_done: # sample a timestep before the cutoff for cross_validation set_of_twenty = np.random.choice(self.start_of_each_set_twenty) truth = self.Y[set_of_twenty] # find feature vector of true action taken phi_i_num = truth + set_of_twenty phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) running_loss_predict_tasks = 0 num_iterations_predict_task = 0 # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: # if counter == phi_i_num: label = torch.ones((1, 1)) else: label = torch.zeros((1, 1)) phi_j = self.X[counter] phi = np.asarray(phi_j) feature_input = phi if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable(torch.Tensor(label).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(label.reshape(1, 1))) output = self.model.forward(feature_input) sig = torch.nn.Sigmoid() output = sig(output) self.opt.zero_grad() loss = criterion(output, label) if counter == phi_i_num: loss *= 25 # print(self.total_iterations) if torch.isnan(loss): print('nan occurred at iteration ', self.total_iterations, ' at', num_iterations_predict_task) loss.backward() # print(self.model.state_dict()) # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # add average loss to array # print(list(self.model.parameters())) self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) self.total_iterations += 1 if self.total_iterations % 500 == 499: print('total loss (average for each 40, averaged) at iteration ', self.total_iterations, ' is ', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 10000 and np.mean(self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate_on_test_data(self, model, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. This is tested on 20% of the data and will be stored in a text file. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] if load_in_model: model.load_state_dict(torch.load('/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_homog.tar')['nn_state_dict']) for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] while step < schedule_bounds[1]: probability_vector = np.zeros((1, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) feature_input = phi_i_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = model.forward(feature_input) sig = torch.nn.Sigmoid() preference_prob = sig(preference_prob) probability_vector[0][m] = preference_prob[0].data.detach()[ 0].item() # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() print(probability_vector) highest_val = max(probability_vector[0]) all_indexes_that_have_highest_val = [i for i, e in enumerate(list(probability_vector[0])) if e == highest_val] # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # choice = np.argmax(probability_vector) # top 3 _, top_three = torch.topk(torch.Tensor(probability_vector), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'DDT_pointwise'+ str(self.num_schedules)) def save_trained_nets(self, name): """ saves the model :return: """ torch.save({'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments}, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/BNN_' + name + '.tar') def save_performance_results(self, top1, top3, special_string): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = {'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3))} save_pickle(file=data, file_location=self.home_dir + '/saved_models/pairwise_saved_models/', special_string=special_string)
class ProLoTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, num_schedules): self.arguments = Logger() self.alpha = .9 self.num_schedules = num_schedules self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + 'dist_early_hili_pairwise.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_data( self.num_schedules, self.data) self.start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions( self.X) self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=32, output_dim=1, bayesian_embedding_dim=8, alpha=1.5, use_gpu=True, vectorized=True, is_value=True) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) self.opt = torch.optim.RMSprop( [{ 'params': list(self.model.parameters())[:-1] }, { 'params': self.model.bayesian_embedding.parameters(), 'lr': .01 }], lr=.01) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 self.embedding_list = [ torch.ones(8) * 1 / 3 for _ in range(self.num_schedules) ] def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ # loss = nn.CrossEntropyLoss() sig = torch.nn.Sigmoid() training_done = False criterion = torch.nn.BCELoss() # variables to keep track of loss and number of tasks trained over while not training_done: # sample a timestep before the cutoff for cross_validation set_of_twenty = np.random.choice(self.start_of_each_set_twenty) truth = self.Y[set_of_twenty] which_schedule = find_which_schedule_this_belongs_to( self.schedule_array, set_of_twenty) self.model.set_bayesian_embedding( self.embedding_list[which_schedule]) # find feature vector of true action taken phi_i_num = truth + set_of_twenty phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) running_loss_predict_tasks = 0 num_iterations_predict_task = 0 # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): # positive counterfactuals if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable( torch.Tensor(torch.ones((1, 1))).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(torch.ones((1, 1)))) output = self.model.forward(feature_input) sig = torch.nn.Sigmoid() output = sig(output) self.opt.zero_grad() loss = criterion(output, label) loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # Negative counterfactuals for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable( torch.Tensor(torch.zeros((1, 1))).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(torch.zeros((1, 1)))) output = self.model.forward(feature_input) sig = torch.nn.Sigmoid() output = sig(output) loss = criterion(output, label) loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # add average loss to array # print(list(self.model.parameters())) self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) self.embedding_list[which_schedule] = torch.Tensor( self.model.get_bayesian_embedding().detach().cpu().numpy() ) # very ugly self.total_iterations += 1 if self.total_iterations % 500 == 499: print( 'total loss (average for each 40, averaged) at iteration ', self.total_iterations, ' is ', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 10000 and np.mean( self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate_on_test_data(self, model, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. This is tested on 20% of the data and will be stored in a text file. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' sig = torch.nn.Sigmoid() data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] embedding_optimizer = torch.optim.SGD( [{ 'params': self.model.bayesian_embedding.parameters() }], lr=.01) criterion = torch.nn.BCELoss() embedding_list = [torch.ones(3) * 1 / 3 for i in range(num_schedules)] for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] model.set_bayesian_embedding(embedding_list[j]) while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = model.forward(feature_input) sig = torch.nn.Sigmoid() preference_prob = sig(preference_prob) probability_matrix[m][n] = preference_prob[ 0].data.detach()[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) embedding_list[j] = torch.Tensor( self.model.get_bayesian_embedding().detach().cpu().numpy( )) # very ugly # top 1 # given all inputs, and their liklihood of being scheduled, predict the output highest_val = max(column_vec) all_indexes_that_have_highest_val = [ i for i, e in enumerate(list(column_vec)) if e == highest_val ] if len(all_indexes_that_have_highest_val) > 1: print('length of indexes greater than 1: ', all_indexes_that_have_highest_val) # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # choice = np.argmax(probability_vector) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # Then do training update loop phi_i_num = truth + step phi_i = X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable( torch.Tensor(torch.ones((1, 1))).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(torch.ones((1, 1)))) output = model(feature_input) output = sig(output) loss = criterion(output, label) # prepare optimizer, compute gradient, update params embedding_optimizer.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) embedding_optimizer.step() # print(model.EmbeddingList.state_dict()) for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable( torch.Tensor(torch.zeros((1, 1))).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(torch.zeros((1, 1)))) output = model.forward(feature_input) output = sig(output) embedding_optimizer.zero_grad() loss = criterion(output, label) loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) embedding_optimizer.step() # print(model.EmbeddingList.state_dict()) # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results( percentage_accuracy_top1, percentage_accuracy_top3, 'PDDT_pairwise' + str(self.num_schedules)) def save_trained_nets(self, name): """ saves the model :return: """ torch.save( { 'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments }, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/BNN_' + name + '.tar') def save_performance_results(self, top1, top3, special_string): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = { 'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3)) } save_pickle(file=data, file_location=self.home_dir + '/saved_models/pairwise_saved_models/', special_string=special_string)
class Trainer: def __init__(self): self.loss_array = [] checkpoint = torch.load( '/home/ghost/PycharmProjects/scheduling_environment/learn_action_embeddings.pkl' ) self.action_embedding_list = checkpoint['embedding_list'] self.all_data_train_dir = '/home/ghost/PycharmProjects/scheduling_environment' + '/training_encoding_states_starcraft' self.mmr = '/home/ghost/PycharmProjects/scheduling_environment' + '/games_that_have_an_win_loss.pkl' self.list_of_games_mmr_train = pickle.load(open(self.mmr, "rb")) self.size_of_training_set = len(self.list_of_games_mmr_train) self.all_data_test_dir = '/home/ghost/PycharmProjects/scheduling_environment/testing_encoding_states_starcraft' self.gamma = .9 self.criterion = torch.nn.BCELoss() self.not_converged = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.embedding_list_Bayesian_neur_net = [ torch.ones(12) * 1 / 2 for i in range(4000) ] self.model = ProLoNet(input_dim=256 + 36, weights=None, comparators=None, leaves=64, output_dim=1, bayesian_embedding_dim=12, alpha=1.5, use_gpu=True, vectorized=True, is_value=True) self.optimizer_main_net = torch.optim.Adam( [{ 'params': list(self.model.parameters())[:-1] }, { 'params': self.model.bayesian_embedding.parameters(), 'lr': .01 }], lr=.001) self.embedding_optimizer_bnn = torch.optim.SGD( self.model.bayesian_embedding.parameters(), lr=.1) # noinspection PyArgumentList def train(self, num_steps_for_batch): sig = torch.nn.Sigmoid() iteration = 0 while self.not_converged: # files in training directory files_in_training_directory = os.listdir(self.all_data_train_dir) # choose a batch of data batch = np.random.choice(files_in_training_directory) # open batch set_of_games = pickle.load( open(os.path.join(self.all_data_train_dir, batch), 'rb')) games = set_of_games['state_embeddings'] # state embedding dict actions = set_of_games['actions_at_each_frame'] players = set_of_games['player_per_game'] big_loss = set_of_games['big_loss'] # maybe games to skip for l in range(num_steps_for_batch): file_with_low_loss_found = False filename = None # loop to find a filename with low loss (fairly inefficient) while not file_with_low_loss_found: filename = np.random.choice(list(games.keys())) if len(big_loss[filename]) < 50: file_with_low_loss_found = True states_filename = games[filename] actions_filename = actions[filename] player_filename = players[filename] length_of_game = len(states_filename) # print("iteration ", iteration) # choose a random frame frame = np.random.randint(1, length_of_game) player_id = int(re.findall('\d+', batch)[0]) + list( games.keys()).index(filename) self.model.set_bayesian_embedding( self.embedding_list_Bayesian_neur_net[player_id]) X = states_filename[frame] # input - a actions_taken_at_frame = actions_filename[ frame] # set of actions taken actions_taken_list = [ i for i, e in enumerate(actions_taken_at_frame) if e != 0 ] set_of_non_actions = self.compute_set_of_non_actions( actions_taken_list) # True Y = torch.ones((1, 1)) # label if torch.cuda.is_available(): X = Variable(torch.Tensor(X).cuda()) Y = Variable(torch.Tensor(Y).cuda()) else: X = Variable(torch.Tensor(X)) Y = Variable(torch.Tensor(Y)) running_loss = [] for a in actions_taken_list: action_embedding_a = self.action_embedding_list[int(a)] for i, non_action in enumerate(set_of_non_actions): action_embeddimg_a_prime = self.action_embedding_list[ non_action] subtracted_input = action_embedding_a.cuda( ) - action_embeddimg_a_prime.cuda() prediction = self.model( torch.cat([X, subtracted_input.reshape(36)], dim=0).reshape(1, 292)) prediction = sig(prediction) prediction_loss = self.criterion(prediction, Y) self.optimizer_main_net.zero_grad() # if int(a) != 0: # prediction_loss *= 5 prediction_loss.backward() self.optimizer_main_net.step() running_loss.append(prediction_loss.item()) # False Y = torch.zeros((1, 1)) # label if torch.cuda.is_available(): Y = Variable(torch.Tensor(Y).cuda()) else: Y = Variable(torch.Tensor(Y)) for a in actions_taken_list: action_embedding_a = self.action_embedding_list[int(a)] for i, non_action in enumerate(set_of_non_actions): action_embeddimg_a_prime = self.action_embedding_list[ non_action] subtracted_input = action_embeddimg_a_prime.cuda( ) - action_embedding_a.cuda() prediction = self.model( torch.cat([X, subtracted_input.reshape(36)], dim=0).reshape(1, 292)) prediction = sig(prediction) # print(prediction) prediction_loss = self.criterion(prediction, Y) self.optimizer_main_net.zero_grad() prediction_loss.backward() self.optimizer_main_net.step() running_loss.append(prediction_loss.item()) if iteration > 55 and iteration % 50 == 49: print('Mean loss for iteration: ', iteration, ' is ', np.mean(running_loss[-50:])) self.loss_array.append(np.mean(running_loss)) self.embedding_list_Bayesian_neur_net[ player_id] = torch.Tensor( self.model.get_bayesian_embedding().detach().cpu( ).numpy()) # very ugly iteration += 1 if iteration % 1000 == 999: # self.plot_networks() self.save() if iteration > 6000 and np.mean( self.loss_array[-500:]) - np.mean( self.loss_array[-1000:]) < .001: self.not_converged = False # noinspection PyArgumentList def test(self, load_in_model=False): iteration = 0 tot_test_games = 0 max_frames = 0 loss_lists_per_timestep_nn = [] sig = torch.nn.Sigmoid() game_accuracies = [] embedding_list = [torch.ones(12) * 1 / 2 for i in range(4000)] if load_in_model == True: checkpoint = torch.load( '/home/ghost/PycharmProjects/bayesian_prolo/starcraft/pairwise_sc2_PDDT.pkl' ) self.model.load_state_dict(checkpoint['state_dict']) files_in_testing_directory = os.listdir(self.all_data_test_dir) for each_batch in files_in_testing_directory: print(each_batch) set_of_games = pickle.load( open(os.path.join(self.all_data_test_dir, each_batch), 'rb')) games = set_of_games['state_embeddings'] # state embedding dict actions = set_of_games['actions_at_each_frame'] players = set_of_games['player_per_game'] big_loss = set_of_games['big_loss'] # maybe games to skip tot_test_games += len(games.keys()) for game_num, filename in enumerate(games.keys()): if len(big_loss[filename]) > 50: continue states_filename = games[filename] actions_filename = actions[filename] player_filename = players[filename] player_id = int(re.findall('\d+', each_batch)[0]) + list( games.keys()).index(filename) self.model.set_bayesian_embedding(embedding_list[player_id]) print("iteration ", iteration) frame = 0 iteration += 1 length_of_game = len(states_filename) # choose a random frame while frame < length_of_game - 2: self.probability_matrix_nn = np.zeros((40, 40)) print('reached frame ', frame) # choose a random frame X = states_filename[frame] # input - a actions_taken_at_frame = actions_filename[ frame] # set of actions taken actions_taken_list = [ i for i, e in enumerate(actions_taken_at_frame) if e != 0 ] set_of_non_actions = self.compute_set_of_non_actions( actions_taken_list) if torch.cuda.is_available(): X = Variable(torch.Tensor(X).cuda()) else: X = Variable(torch.Tensor(X)) for i in range(40): action_embedding_a = self.action_embedding_list[int(i)] for j in range(40): if i == j: continue else: action_embedding_a_prime = self.action_embedding_list[ int(j)] subtracted_input = action_embedding_a.cuda( ) - action_embedding_a_prime.cuda() prediction = self.model.forward( torch.cat( [X, subtracted_input.reshape(36)], dim=0).reshape(1, 292)) prediction = sig(prediction) # add all these to matrixes self.probability_matrix_nn[i][ j] = prediction.item() column_vec_nn = np.sum(self.probability_matrix_nn, axis=1) loss = torch.nn.BCELoss() soft = nn.Softmax(dim=0) column_vec_nn = soft(torch.Tensor(column_vec_nn)) loss_nn = loss( column_vec_nn, Variable(torch.Tensor(actions_taken_at_frame))) # loss_nn = np.linalg.norm(column_vec_nn - actions_taken_at_frame) loss_lists_per_timestep_nn.append(loss_nn.item()) # True Y = torch.ones((1, 1)) # label if torch.cuda.is_available(): Y = Variable(torch.Tensor(Y).cuda()) else: Y = Variable(torch.Tensor(Y)) for a in actions_taken_list: action_embedding_a = self.action_embedding_list[int(a)] for i, non_action in enumerate(set_of_non_actions): action_embeddimg_a_prime = self.action_embedding_list[ non_action] subtracted_input = action_embedding_a.cuda( ) - action_embeddimg_a_prime.cuda() prediction = self.model( torch.cat([X, subtracted_input.reshape(36)], dim=0).reshape(1, 292)) prediction = sig(prediction) prediction_loss = self.criterion(prediction, Y) self.embedding_optimizer_bnn.zero_grad() prediction_loss.backward() self.embedding_optimizer_bnn.step() # False Y = torch.zeros((1, 1)) # label if torch.cuda.is_available(): Y = Variable(torch.Tensor(Y).cuda()) else: Y = Variable(torch.Tensor(Y)) for a in actions_taken_list: action_embedding_a = self.action_embedding_list[int(a)] for i, non_action in enumerate(set_of_non_actions): action_embeddimg_a_prime = self.action_embedding_list[ non_action] subtracted_input = action_embeddimg_a_prime.cuda( ) - action_embedding_a.cuda() prediction = self.model( torch.cat([X, subtracted_input.reshape(36)], dim=0).reshape(1, 292)) prediction = sig(prediction) prediction_loss = self.criterion(prediction, Y) self.embedding_optimizer_bnn.zero_grad() prediction_loss.backward() self.embedding_optimizer_bnn.step() frame += 1 game_accuracies.append(np.mean(loss_lists_per_timestep_nn)) loss_lists_per_timestep_nn = [] self.print_and_store_accs(game_accuracies) if len(game_accuracies) == 5: self.save_embeddings(embedding_list) if len(game_accuracies) == 15: self.print_and_store_accs(game_accuracies) self.save_embeddings() exit() # game has ended # do end of schedule tings # a batch has ended # finished errthang print(tot_test_games) print(max_frames) # self.plot_with_errorbars() def print_and_store_accs(self, game_accs): print('Loss: {}'.format(np.mean(game_accs))) file = open('starcraft_learning_results.txt', 'a') file.write('PDDT_pairwise: mean: ' + str(np.mean(game_accs)) + ', std: ' + str(np.std(game_accs)) + '\n') file.close() def save(self): torch.save({ 'state_dict': self.model.state_dict() }, '/home/ghost/PycharmProjects/bayesian_prolo/starcraft/pairwise_sc2_PDDT.pkl' ) def save_embeddings(self, embeddings): torch.save( { 'state_dict': self.model.state_dict(), 'embeddings': embeddings }, '/home/ghost/PycharmProjects/bayesian_prolo/starcraft/pairwise_sc2_PDDT_embeddings.pkl' ) def compute_set_of_non_actions(self, actions): set_of_non_actions = list(range(40)) for each_action in actions: set_of_non_actions.remove(each_action) return set_of_non_actions
class ProLoTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, num_schedules): self.arguments = Logger() self.alpha = .9 self.num_schedules = num_schedules self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + 'dist_early_hili_naive.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_dataset(num_schedules=self.num_schedules, data=self.data) for i, each_element in enumerate(self.X): self.X[i] = each_element + list(range(20)) self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=64, output_dim=20, bayesian_embedding_dim=8, alpha=1.5, use_gpu=True, vectorized=True, is_value=False) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) self.opt = torch.optim.RMSprop([{'params': list(self.model.parameters())[:-1]}, {'params': self.model.bayesian_embedding.parameters(), 'lr': .01}]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 self.max_depth = 10 self.embedding_list = [torch.ones(8) * 1 / 3 for _ in range(self.num_schedules)] def train(self): """ Trains PDDT. :return: """ threshold = .05 training_done = False while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(len(self.X)) input_nn = self.X[rand_timestep_within_sched] truth_nn = self.Y[rand_timestep_within_sched] which_schedule = find_which_schedule_this_belongs_to(self.schedule_array, rand_timestep_within_sched) self.model.set_bayesian_embedding(self.embedding_list[which_schedule]) if torch.cuda.is_available(): input_nn = Variable(torch.Tensor(np.asarray(input_nn).reshape(1, 242)).cuda()) # change to 5 to increase batch size truth = Variable(torch.Tensor(np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable(torch.Tensor(np.asarray(input_nn).reshape(1, 242))) truth = Variable(torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) self.opt.zero_grad() output = self.model.forward(input_nn) loss = F.cross_entropy(output, truth) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() self.embedding_list[which_schedule] = torch.Tensor(self.model.get_bayesian_embedding().detach().cpu().numpy()) # very ugly # add average loss to array # print(list(self.model.parameters())) self.total_loss_array.append(loss.item()) self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) # print(self.model.state_dict()) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: self.save_trained_nets('PDDT' + str(self.num_schedules)) threshold -= .1 if self.total_iterations > 100000 and np.mean(self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate_on_test_data(self, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient num_schedules = 100 load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_naive.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_dataset(num_schedules=num_schedules, data=data) for i, each_element in enumerate(X): X[i] = each_element + list(range(20)) embedding_optimizer = torch.optim.SGD([{'params': self.model.bayesian_embedding.parameters()}], lr=.1) embedding_list = [torch.ones(8) * 1 / 3 for _ in range(num_schedules)] prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] if load_in_model: self.model.load_state_dict(torch.load('/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_homog.tar')['nn_state_dict']) for i, schedule in enumerate(schedule_array): self.model.set_bayesian_embedding(self.embedding_list[i]) for count in range(schedule[0], schedule[1] + 1): net_input = X[count] truth = Y[count] if torch.cuda.is_available(): input_nn = Variable(torch.Tensor(np.asarray(net_input).reshape(1, 242)).cuda()) # change to 5 to increase batch size truth = Variable(torch.Tensor(np.asarray(truth).reshape(1)).cuda().long()) else: input_nn = Variable(torch.Tensor(np.asarray(net_input).reshape(1, 242))) truth = Variable(torch.Tensor(np.asarray(truth).reshape(1)).long()) #####forward##### output = self.model.forward(input_nn) embedding_optimizer.zero_grad() loss = F.cross_entropy(output, truth) loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) embedding_optimizer.step() index = torch.argmax(output).item() # top 3 _, top_three = torch.topk(output, 3) if index == truth.item(): prediction_accuracy[0] += 1 if truth.item() in top_three.detach().cpu().tolist()[0]: prediction_accuracy[1] += 1 # add average loss to array embedding_list[i] = torch.Tensor(self.model.get_bayesian_embedding().detach().cpu().numpy()) # very ugly # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'DDT_w_embedding') def save_trained_nets(self, name): """ saves the model :return: """ torch.save({'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments}, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/naive_saved_models/BNN_' + name + '.tar') def save_performance_results(self, top1, top3, special_string): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = {'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3))} save_pickle(file=data, file_location=self.home_dir + '/saved_models/naive_saved_models/', special_string=special_string)