def train(self): """ Trains LSTM. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ timesteps = None training_done = False loss_func = AlphaLoss() which_schedule = None while not training_done: # sample a timestep before the cutoff for cross_validation # Quick Fix found_a_suitable_candidate = False while not found_a_suitable_candidate: rand_timestep_within_sched = np.random.randint(len(self.X)) which_schedule = find_which_schedule_this_belongs_to( self.schedule_array, rand_timestep_within_sched) if rand_timestep_within_sched + 2 > self.schedule_array[ which_schedule][1]: pass else: found_a_suitable_candidate = True timesteps = [ rand_timestep_within_sched, rand_timestep_within_sched + 1, rand_timestep_within_sched + 2 ] self.model.reinitialize_hidden_to_random() load_in_embedding_bnn(self.model, self.embedding_list, which_schedule) for timestep in timesteps: truth = self.Y[timestep] previous_hidden_state = tuple( [t.detach().cuda() for t in self.model.hidden]) input_nn = self.X[timestep] truth_nn = self.Y[timestep] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) P = Variable( torch.Tensor( np.ones((1, 20) * self.distribution_epsilon))) P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) self.opt.zero_grad() output = self.model.forward(input_nn, previous_hidden_state) loss = loss_func.forward(P, output, self.alpha) if loss.item() < .05 or loss.item() > 5: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() self.total_loss_array.append(loss.item()) self.embedding_list = store_embedding_back_bnn( self.model, self.embedding_list, which_schedule) self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: self.save_trained_nets('lstm_small' + str(self.num_schedules)) if self.total_iterations > 11000 and np.mean( self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:] ) < self.convergence_epsilon: training_done = True
def evaluate_on_test_data(self, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 75 loss_func = AlphaLoss() # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str( num_schedules) + '_inf_hetero_deadline_naive.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_dataset(num_schedules=num_schedules, data=data) for i, each_element in enumerate(X): X[i] = each_element + list(range(20)) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] embedding_optimizer = torch.optim.Adam( self.model.EmbeddingList.parameters(), lr=.001) embedding_list = [ torch.ones(1, 8) * 1 / 3 for i in range(num_schedules) ] if load_in_model: # TODO: somehow get the string when the update_model flag is true self.model.load_state_dict( torch.load( '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/NN_homog.tar' )['nn_state_dict']) for i, schedule in enumerate(schedule_array): self.model.reinitialize_hidden_to_random() load_in_embedding_bnn(self.model, embedding_list, i) for count in range(schedule[0], schedule[1] + 1): previous_hidden_state = tuple( [t.detach().cuda() for t in self.model.hidden]) net_input = X[count] truth = Y[count] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape( 1, 242)).cuda()) truth = Variable( torch.Tensor( np.asarray(truth).reshape(1)).cuda().long()) P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth] = 1 - 19 * self.distribution_epsilon else: input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape(1, 242))) truth = Variable(torch.Tensor( np.asarray(truth).reshape(1))) P = Variable(torch.Tensor(np.ones((1, 20)))) P *= self.distribution_epsilon P[0][truth] = 1 - 19 * self.distribution_epsilon #####forward##### output = self.model.forward(input_nn, previous_hidden_state) loss = loss_func.forward(P, output, self.alpha) if loss.item() < .05 or loss.item() > 5: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) embedding_optimizer.step() index = torch.argmax(output).item() # top 3 _, top_three = torch.topk(output, 3) if index == truth.item(): prediction_accuracy[0] += 1 if truth.item() in top_three.detach().cpu().tolist()[0]: prediction_accuracy[1] += 1 store_embedding_back_bnn(self.model, embedding_list, i) # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results( percentage_accuracy_top1, percentage_accuracy_top3, 'inf_blstm_small_' + str(self.num_schedules))
def evaluate_on_test_data(self, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 75 # load in new data loss_func = AlphaLoss() load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str( num_schedules) + '_inf_hetero_deadline_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions( X) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] embedding_optimizer = torch.optim.Adam( self.model.EmbeddingList.parameters(), lr=.001) embedding_list = [ torch.ones(1, 8) * 1 / 3 for i in range(num_schedules) ] if load_in_model: # TODO: somehow get the string when the update_model flag is true self.model.load_state_dict( torch.load( '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/NN_homog.tar' )['nn_state_dict']) for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] load_in_embedding_bnn(self.model, embedding_list, j) self.model.reinitialize_hidden_to_random() while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) previous_hidden_state = tuple( [t.detach().cuda() for t in self.model.hidden]) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagonals set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = self.model.forward( feature_input, previous_hidden_state) probability_matrix[m][n] = preference_prob[ 0].data.detach()[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 choice = np.argmax(column_vec) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # forward phi_i_num = truth + step # old method: set_of_twenty[0] + truth phi_i = X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(step, step + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy # label = add_noise_pairwise(label, self.noise_percentage) if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ])) output = self.model(feature_input, previous_hidden_state) loss = loss_func.forward(P, output, self.alpha) # prepare optimizer, compute gradient, update params if loss.item() < .001 or loss.item() > 50: pass else: embedding_optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) embedding_optimizer.step() for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ])) output = self.model(feature_input, previous_hidden_state) loss = loss_func.forward(P, output, self.alpha) # print('loss is :', loss.item()) # clip any very high gradients # prepare optimizer, compute gradient, update params if loss.item() < .001 or loss.item() > 50: pass else: embedding_optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) embedding_optimizer.step() # add average loss to array store_embedding_back_bnn(self.model, embedding_list, j) step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results( percentage_accuracy_top1, percentage_accuracy_top3, 'inf_blstm_small_' + str(self.num_schedules))
def train(self): """ Trains NN. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ sets_of_twenty = None which_schedule = None training_done = False loss_func = AlphaLoss() # variables to keep track of loss and number of tasks trained over running_loss_predict_tasks = 0 num_iterations_predict_task = 0 while not training_done: # sample a timestep before the cutoff for cross_validation # Quick Fix found_a_suitable_candidate = False while not found_a_suitable_candidate: rand_timestep_within_sched = np.random.randint( len(self.start_of_each_set_twenty)) set_of_twenty = self.start_of_each_set_twenty[ rand_timestep_within_sched] which_schedule = find_which_schedule_this_belongs_to( self.schedule_array, set_of_twenty) if set_of_twenty + 59 > self.schedule_array[which_schedule][1]: pass else: found_a_suitable_candidate = True sets_of_twenty = [ set_of_twenty, set_of_twenty + 20, set_of_twenty + 40 ] self.model.reinitialize_hidden_to_random() load_in_embedding_bnn(self.model, self.embedding_list, which_schedule) # 3 schedules for set_of_twenty in sets_of_twenty: truth = self.Y[set_of_twenty] # find feature vector of true action taken phi_i_num = truth + set_of_twenty phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # changes each timestep previous_hidden_state = tuple( [t.detach().cuda() for t in self.model.hidden]) # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ])) output = self.model.forward(feature_input, previous_hidden_state) self.opt.zero_grad() self.embedding_optimizer.zero_grad() loss = loss_func.forward(P, output, self.alpha) if torch.isnan(loss): print(self.alpha, ' :nan occurred at iteration ', self.total_iterations) if loss.item() < .001 or loss.item() > 55: pass else: loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) self.opt.step() self.embedding_optimizer.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # second loop for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ])) output = self.model.forward(feature_input, previous_hidden_state) self.opt.zero_grad() self.embedding_optimizer.zero_grad() loss = loss_func.forward(P, output, self.alpha) if torch.isnan(loss): print(self.alpha, ' :nan occurred at iteration ', self.total_iterations, ' at', num_iterations_predict_task) if loss.item() < .001 or loss.item() > 55: pass else: loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) self.opt.step() self.embedding_optimizer.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 self.embedding_list = store_embedding_back_bnn( self.model, self.embedding_list, which_schedule) self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) num_iterations_predict_task = 0 running_loss_predict_tasks = 0 self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: self.save_trained_nets('bnn_small' + str(self.num_schedules)) if self.total_iterations > 2000 and np.mean( self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:] ) < self.convergence_epsilon: training_done = True
def evaluate_on_test_data(self): """ Evaluate performance of a trained network. This is tested on 20% of the data and will be stored in a text file. :return: """ loss_func = AlphaLoss() num_schedules = 75 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str( num_schedules) + '_inf_hetero_deadline_naive.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_dataset(data, num_schedules) for i, each_element in enumerate(X): X[i] = each_element + list(range(20)) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] embedding_optimizer = torch.optim.SGD( self.model.EmbeddingList.parameters(), lr=.001) embedding_list = [ torch.ones(1, 8) * 1 / 3 for i in range(num_schedules) ] for i, schedule in enumerate(schedule_array): load_in_embedding_bnn(self.model, embedding_list, i) for count in range(schedule[0], schedule[1] + 1): net_input = X[count] truth = Y[count] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape( 1, 242)).cuda()) truth = Variable( torch.Tensor( np.asarray(truth).reshape(1)).cuda().long()) P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth] = 1 - 19 * self.distribution_epsilon else: input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape(1, 242))) truth = Variable(torch.Tensor( np.asarray(truth).reshape(1))) P = Variable(torch.Tensor(np.ones((1, 20)))) P *= self.distribution_epsilon P[0][truth] = 1 - 19 * self.distribution_epsilon #####forward##### output = self.model.forward(input_nn) loss = loss_func.forward(P, output, self.alpha) if loss.item() < .05 or loss.item() > 5: pass else: embedding_optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) embedding_optimizer.step() index = torch.argmax(output).item() # top 3 _, top_three = torch.topk(output, 3) if index == truth.item(): prediction_accuracy[0] += 1 if truth.item() in top_three.detach().cpu().tolist()[0]: prediction_accuracy[1] += 1 print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] store_embedding_back_bnn(self.model, embedding_list, i) print(np.mean(percentage_accuracy_top1)) self.save_performance_results( percentage_accuracy_top1, percentage_accuracy_top3, 'inf_bnn_small_' + str(self.num_schedules))
def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, and passes in the corresponding data in an attempt to classify which task was scheduled :return: """ total_iterations = 0 loss_func = AlphaLoss() training_done = False while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(len(self.X)) input_nn = self.X[rand_timestep_within_sched] truth_nn = self.Y[rand_timestep_within_sched] which_schedule = find_which_schedule_this_belongs_to( self.schedule_array, rand_timestep_within_sched) load_in_embedding_bnn(self.model, self.embedding_list, which_schedule) # iterate over pairwise comparisons if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) P = Variable( torch.Tensor(np.ones((1, 20) * self.distribution_epsilon))) P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) self.opt.zero_grad() self.embedding_optimizer.zero_grad() output = self.model.forward(input_nn) loss = loss_func.forward(P, output, self.alpha) if loss.item() < .05 or loss.item() > 5: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() self.embedding_optimizer.step() self.total_loss_array.append(loss.item()) total_iterations = len(self.total_loss_array) self.embedding_list = store_embedding_back_bnn( self.model, self.embedding_list, which_schedule) if total_iterations % 50 == 49: print('loss at', total_iterations, ', total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) if total_iterations > 20000: training_done = True
def tune_both(): bnn = NeuronBNN() # now we let network tune both bnn.__init__() data, labels = create_dual_neuron_dataset(100) params = list(bnn.parameters()) del params[6] opt = torch.optim.SGD(params, lr=.0001) embedding_opt = torch.optim.SGD(bnn.EmbeddingList[0].parameters(), lr=.01) embedding_list = [torch.ones(1, 1) * 1 / 2 for _ in range(100)] loss = nn.L1Loss() # can use L1 as well, shouldn't matter too much epochs = 1000 # sorry for the copy paste # even sets of twenty are lam = 1 # odd sets of twenty are lam = 0 even_lambdas = np.linspace(0, 1960, num=50) for epoch in range(epochs): for j in range(5): # chose an even schedule even = int(np.random.choice(even_lambdas)) load_in_embedding_bnn(bnn, embedding_list, int(even / 20)) for i in range(even, even + 20): x = data[i][0:2] label = labels[i] x = torch.Tensor([x]).reshape((2)) label = torch.Tensor([label]).reshape((1, 1)) output = bnn.forward(x) if j % 2 == 0: opt.zero_grad() error = loss(output, label) error.backward() opt.step() else: # opt.zero_grad() embedding_opt.zero_grad() error = loss(output, label) error.backward() embedding_opt.step() # opt.step() embedding_list = store_embedding_back_bnn(bnn, embedding_list, int(even / 20)) for j in range(5): # chose an even schedule odd = int(np.random.choice(even_lambdas)) + 20 load_in_embedding_bnn(bnn, embedding_list, int(odd / 20)) for i in range(odd, odd + 20): x = data[i][0:2] label = labels[i] x = torch.Tensor([x]).reshape((2)) label = torch.Tensor([label]).reshape((1, 1)) output = bnn.forward(x) if j % 2 == 0: opt.zero_grad() error = loss(output, label) error.backward() opt.step() else: # opt.zero_grad() embedding_opt.zero_grad() error = loss(output, label) error.backward() embedding_opt.step() # opt.step() embedding_list = store_embedding_back_bnn(bnn, embedding_list, int(odd / 20)) test_data, test_labels = create_dual_neuron_dataset(20) print(bnn.state_dict()) avg_loss = 0 test_embedding_list = [torch.ones(1, 1) * 1 / 2 for _ in range(20)] embedding_opt = torch.optim.SGD(bnn.EmbeddingList[0].parameters(), lr=.1) counter = 0 for i in range(20 * 20): load_in_embedding_bnn(bnn, test_embedding_list, int(i / 20)) x = test_data[i][0:2] label = test_labels[i] x = torch.Tensor([x]).reshape((2)) label = torch.Tensor([label]).reshape((1, 1)) output = bnn.forward(x) error = loss(output, label) print('output is ', output) print('label is ', label) print('error is ', error.item()) avg_loss += error.item() if error.item() < .05: counter += 1 if error.item() > .05: flag = False tracker = 0 while not flag: embedding_opt.zero_grad() error.backward() embedding_opt.step() output = bnn.forward(x) error = loss(output, label) tracker += 1 if tracker > 100: flag = True if error.item() < .1: flag = True test_embedding_list = store_embedding_back_bnn(bnn, test_embedding_list, int(i / 20)) print(test_embedding_list) print(embedding_list) avg_loss /= 400 print('avg loss is', avg_loss) print('accuracy', counter / 400)