def evaluate(self, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() # Set to evaluation mode for batch_norm layers self.model.eval() saved_model_str = self.saved_model.replace('/','_') + prefix # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xtruth_file = os.path.join(save_dir, 'test_Xtruth_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join(save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) # Open those files to append with open(Xtruth_file, 'a') as fxt,open(Ytruth_file, 'a') as fyt,\ open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: # Loop through the eval data and evaluate for ind, (geometry, spectra) in enumerate(self.test_loader): if cuda: geometry = geometry.cuda() spectra = spectra.cuda() Xpred = self.model.inference(spectra).cpu().data.numpy() np.savetxt(fxt, geometry.cpu().data.numpy()) np.savetxt(fyt, spectra.cpu().data.numpy()) np.savetxt(fxp, Xpred) if self.flags.data_set != 'meta_material': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) tk.record(1) # Record the total time of the eval period return Ypred_file, Ytruth_file
def predict(self, Ytruth_file, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model_b.cuda() self.model_b.eval() saved_model_str = self.saved_model.replace('/', '_') + prefix Ytruth = pd.read_csv(Ytruth_file, header=None, delimiter=',') # Read the input if len(Ytruth.columns) == 1: # The file is not delimitered by ',' but ' ' Ytruth = pd.read_csv(Ytruth_file, header=None, delimiter=' ') Ytruth_tensor = torch.from_numpy(Ytruth.values).to(torch.float) print('shape of Ytruth tensor :', Ytruth_tensor.shape) # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join(save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # keep time tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) if cuda: Ytruth_tensor = Ytruth_tensor.cuda() print('model in eval:', self.model_b) Xpred = self.model_b(Ytruth_tensor).detach().cpu().numpy() # Open those files to append with open(Ytruth_file, 'a') as fyt, open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: np.savetxt(fyt, Ytruth_tensor.cpu().data.numpy()) np.savetxt(fxp, Xpred) if self.flags.data_set != 'Yang_sim': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) tk.record(1) return Ypred_file, Ytruth_file
def eval_from_simulator(Xpred_file, flags): """ Evaluate using simulators from pred_file and return a new file with simulator results :param Xpred_file: The prediction file with the Xpred in its name :param data_set: The name of the dataset """ Xpred = np.loadtxt(Xpred_file, delimiter=' ') Ypred = simulator(flags.data_set, Xpred) Ypred_file = Xpred_file.replace('Xpred', 'Ypred_Simulated') np.savetxt(Ypred_file, Ypred) Ytruth_file = Xpred_file.replace('Xpred','Ytruth') plotMSELossDistrib(Ypred_file, Ytruth_file, flags)
def predict(self, Ytruth_file, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() self.model.eval() saved_model_str = self.saved_model.replace('/', '_') + prefix Ytruth = pd.read_csv(Ytruth_file, header=None, delimiter=',') # Read the input if len(Ytruth.columns ) == 1: # The file is not delimitered by ',' but ' ' Ytruth = pd.read_csv(Ytruth_file, header=None, delimiter=' ') Ytruth_tensor = torch.from_numpy(Ytruth.values).to(torch.float) print('shape of Ytruth tensor :', Ytruth_tensor.shape) # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # keep time tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) dim_x = self.flags.dim_x dim_y = self.flags.dim_y dim_z = self.flags.dim_z dim_tot = self.flags.dim_tot batch_size = len(Ytruth_tensor) # Create random value for the padding for yz pad_yz = self.flags.zeros_noise_scale * torch.randn( batch_size, dim_tot - dim_y - dim_z, device=device) if cuda: Ytruth_tensor = Ytruth_tensor.cuda() # Create a noisy z vector with noise level same as y z = torch.randn(batch_size, dim_z, device=device) y_cat = torch.cat((z, pad_yz, Ytruth_tensor), dim=1) # Initialize the x first Xpred = self.model(y_cat, rev=True) Xpred = Xpred[:, :dim_x].cpu().data.numpy() # Open those files to append with open(Ytruth_file, 'a') as fyt, open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: np.savetxt(fyt, Ytruth_tensor.cpu().data.numpy()) np.savetxt(fxp, Xpred) if self.flags.data_set != 'Yang_sim': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) tk.record(1) return Ypred_file, Ytruth_file
def evaluate(self, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() # Set to evaluation mode for batch_norm layers self.model.eval() # Set the dimensions dim_x = self.flags.dim_x dim_y = self.flags.dim_y dim_z = self.flags.dim_z dim_tot = self.flags.dim_tot saved_model_str = self.saved_model.replace('/', '_') + prefix # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xtruth_file = os.path.join( save_dir, 'test_Xtruth_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) # Open those files to append with open(Xtruth_file, 'a') as fxt,open(Ytruth_file, 'a') as fyt,\ open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: # Loop through the eval data and evaluate for ind, (x, y) in enumerate(self.test_loader): if cuda: x = x.cuda() # Put data onto GPU y = y.cuda() # Put data onto GPU batch_size = len(x) # Create random value for the padding for yz pad_yz = self.flags.zeros_noise_scale * torch.randn( batch_size, dim_tot - dim_y - dim_z, device=device) # Create a noisy z vector with noise level same as y z = torch.randn(batch_size, dim_z, device=device) print("shape of z:", np.shape(z)) print("shape of pad_yz:", np.shape(pad_yz)) print("shape of y:", np.shape(y)) y_cat = torch.cat((z, pad_yz, y), dim=1) # Initialize the x first Xpred = self.model(y_cat, rev=True) Xpred = Xpred[:, :dim_x].cpu().data.numpy() #np.savetxt(fxt, x.cpu().data.numpy()) #np.savetxt(fyt, y.cpu().data.numpy()) if self.flags.data_set != 'meta_material': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) #np.savetxt(fxp, Xpred) tk.record(1) return Ypred_file, Ytruth_file
def evaluate_forward_model(dirx, n_samples, invs=False): print("DIRECTORY: ", dirx) flags = load_flags(dirx) flags.batch_size = 1 train_loader, test_loader = data_reader.read_data(flags) GEN = GA(flags, train_loader, test_loader, inference_mode=True, saved_model=dirx) GEN.model.eval() avg_mse, avg_mre, avg_rse = 0, 0, 0 for i, (g, s) in enumerate(test_loader): if invs: z = s s = g g = z g = g.cuda() s = s.cuda() ps = GEN.model(g) if invs: pg = ps z = g g = s s = z ps = simulator(flags.data_set, pg.cpu().data.numpy()) ps = torch.from_numpy(ps).cuda() mse = torch.nn.functional.mse_loss(s, ps) rse = torch.sqrt(torch.sum(torch.pow(s - ps, 2))) / torch.sqrt( torch.sum(torch.pow(s, 2))) mre = torch.mean(torch.abs(torch.div(s - ps, s))) avg_mse += mse.item() avg_rse += rse.item() avg_mre += mre.item() if i == (n_samples - 1): print('BROKE at sample {}'.format(i)) break avg_mse /= n_samples avg_mre /= n_samples avg_rse /= n_samples print("\nMSE:\t{}\nMRE:\t{}\nRSE:\t{}".format(avg_mse, avg_mre, avg_rse))
def evaluate(self, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() # Set to evaluation mode for batch_norm layers self.model.eval() # Set the dimensions dim_x = self.flags.dim_x dim_z = self.flags.dim_z saved_model_str = self.saved_model.replace('/', '_') + prefix # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xtruth_file = os.path.join( save_dir, 'test_Xtruth_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # Open those files to append with open(Xtruth_file, 'a') as fxt,open(Ytruth_file, 'a') as fyt,\ open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: # Loop through the eval data and evaluate for ind, (x, y) in enumerate(self.test_loader): batch_size = len(x) # Create a noisy z vector with noise level same as y z = torch.randn(batch_size, dim_z, device=device) # Initialize the x first if self.flags.data_set == 'gaussian_mixture': y_prev = np.copy(y.data.numpy()) y = torch.nn.functional.one_hot(y.to(torch.int64), 4).to( torch.float) # Change the gaussian labels into one-hot if cuda: x = x.cuda() y = y.cuda() Xpred = self.model(z, y, rev=True).cpu().data.numpy() np.savetxt(fxt, x.cpu().data.numpy()) np.savetxt(fxp, Xpred) if self.flags.data_set == 'gaussian_mixture': np.savetxt(fyt, y_prev) else: np.savetxt(fyt, y.cpu().data.numpy()) if self.flags.data_set != 'meta_material': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) return Ypred_file, Ytruth_file
def evaluate(self, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() # Set to evaluation mode for batch_norm layers self.model.eval() # Set the dimensions dim_x = self.flags.dim_x dim_z = self.flags.dim_z saved_model_str = self.saved_model.replace('/', '_') + prefix # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xtruth_file = os.path.join( save_dir, 'test_Xtruth_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) tk = time_keeper( time_keeping_file=os.path.join(save_dir, 'evaluation time.txt')) # Open those files to append with open(Xtruth_file, 'a') as fxt,open(Ytruth_file, 'a') as fyt,\ open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: # Loop through the eval data and evaluate for ind, (x, y) in enumerate(self.test_loader): batch_size = len(x) # Create a noisy z vector with noise level same as y z = torch.randn(batch_size, dim_z, device=device) if cuda: x = x.cuda() y = y.cuda() Xpred = self.model(z, y, rev=True).cpu().data.numpy() np.savetxt(fxt, x.cpu().data.numpy()) np.savetxt(fxp, Xpred) np.savetxt(fyt, y.cpu().data.numpy()) if self.flags.data_set != 'Yang_sim': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) tk.record(1) return Ypred_file, Ytruth_file
def sim_one(dirx, dset, plot=False): flags = flag_reader.read_flag() flags.data_set = dset flags.model_name = flags.data_set.lower() flags.eval_model = flags.model_name if '.csv' in dirx: fxp = dirx fyp = fxp.replace('Xpred', 'Ypred') fyt = fxp.replace('Xpred', 'Ytruth') else: fxp = dirx + 'test_Xpred_' + flags.data_set + '_best_model.csv' fyp = dirx + 'test_Ypred_' + flags.data_set + '_best_model.csv' fyt = dirx + 'test_Ytruth_' + flags.data_set + '_best_model.csv' xmat = np.genfromtxt(fxp, delimiter=' ') ypred = simulator(flags.data_set, xmat) np.savetxt(fyp, ypred, delimiter=' ') if plot: pl(fyp, fyt, flags, save_dir=dirx)
def evaluate(self, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() self.model.eval() saved_model_str = self.saved_model.replace('/', '_') + prefix # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xtruth_file = os.path.join( save_dir, 'test_Xtruth_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # keep time tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) # Open those files to append with open(Xtruth_file, 'a') as fxt,open(Ytruth_file, 'a') as fyt,\ open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: # Loop through the eval data and evaluate for ind, (geometry, spectra) in enumerate(self.test_loader): if cuda: geometry = geometry.cuda() spectra = spectra.cuda() # Initialize the geometry first print('model in eval:', self.model) pi, sigma, mu = self.model(spectra) # Get the output Xpred = mdn.sample(pi, sigma, mu).detach().cpu().numpy() # self.plot_histogram(loss, ind) # Debugging purposes np.savetxt(fxt, geometry.cpu().data.numpy()) np.savetxt(fyt, spectra.cpu().data.numpy()) np.savetxt(fxp, Xpred) if self.flags.data_set != 'meta_material': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) tk.record(1) return Ypred_file, Ytruth_file
def predict_inverse(self, Ytruth_file, multi_flag, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() self.model.eval() saved_model_str = self.saved_model.replace('/', '_') + prefix Ytruth = pd.read_csv(Ytruth_file, header=None, delimiter=',') # Read the input if len(Ytruth.columns ) == 1: # The file is not delimitered by ',' but ' ' Ytruth = pd.read_csv(Ytruth_file, header=None, delimiter=' ') Ytruth_tensor = torch.from_numpy(Ytruth.values).to(torch.float) print('shape of Ytruth tensor :', Ytruth_tensor.shape) # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # keep time tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) # Set the save_simulator_ytruth save_Simulator_Ypred = True if 'Yang' in self.flags.data_set: save_Simulator_Ypred = False if cuda: Ytruth_tensor = Ytruth_tensor.cuda() print('model in eval:', self.model) # Open those files to append with open(Ytruth_file, 'a') as fyt, open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: np.savetxt(fyt, Ytruth_tensor.cpu().data.numpy()) for ind in range(len(Ytruth_tensor)): spectra = Ytruth_tensor[ind, :] Xpred, Ypred, loss = self.evaluate_one( spectra, save_dir=save_dir, save_all=multi_flag, ind=ind, MSE_Simulator=False, save_misc=False, save_Simulator_Ypred=save_Simulator_Ypred) np.savetxt(fxp, Xpred) if self.flags.data_set != 'Yang_sim': Ypred = simulator(self.flags.data_set, Xpred) np.savetxt(fyp, Ypred) tk.record(1) return Ypred_file, Ytruth_file
def sim(self, X): return torch.from_numpy(simulator( self.data_set, X.cpu().data.numpy())).float().cuda()
def evaluate_one(self, target_spectra, save_dir='data/', MSE_Simulator=False, save_all=False, ind=None, save_misc=False, save_Simulator_Ypred=False, init_from_Xpred=None, FF=True): """ The function which being called during evaluation and evaluates one target y using # different trails :param target_spectra: The target spectra/y to backprop to :param save_dir: The directory to save to when save_all flag is true :param MSE_Simulator: Use Simulator Loss to get the best instead of the default NN output logit :param save_all: The multi_evaluation where each trail is monitored (instad of the best) during backpropagation :param ind: The index of this target_spectra in the batch :param save_misc: The flag to print misc information for degbugging purposes, usually printed to best_mse :return: Xpred_best: The 1 single best Xpred corresponds to the best Ypred that is being backproped :return: Ypred_best: The 1 singe best Ypred that is reached by backprop :return: MSE_list: The list of MSE at the last stage :param FF(forward_filtering): [default to be true for historical reason] The flag to control whether use forward filtering or not """ # expand the target spectra to eval batch size target_spectra_expand = target_spectra.expand( [self.flags.population, -1]) self.algorithm.set_pop_and_target(target_spectra_expand) # select_gen = [0,5,10,25,50,75,99] # store = np.empty((1,len(select_gen))) begin = time.time() for i in range(self.flags.generations): #curr = time.time() #print('{}\t'.format(i),end='') #print("\r\tGEN: {}\tTIME: {}".format(i,curr-strt),end='') logit = self.algorithm.evolve() loss = self.make_loss(logit, target_spectra_expand) print("Evolution: {}".format(time.time() - begin)) # if i in select_gen: # good_index = torch.argmin(loss, dim=0).cpu().data.item() # best_val = loss[good_index].cpu().item() # idx = select_gen.index(i) # store[0][idx] = best_val # with open(os.path.join(save_dir, 'best_val_by_gen_and_sample.csv'), 'a') as bfile: # np.savetxt(bfile,store,delimiter=',') good_index = torch.argmin(loss, dim=0).cpu().data.numpy() geometry_eval_input = self.algorithm.old_gen.cpu().data.numpy() if save_all: # If saving all the results together instead of the first one mse_loss = np.reshape( np.sum(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1), [-1, 1]) # The strategy of re-using the BPed result. Save two versions of file: one with FF and one without mse_loss = np.concatenate( (mse_loss, np.reshape(np.arange(self.flags.eval_batch_size), [-1, 1])), axis=1) loss_sort = mse_loss[mse_loss[:, 0].argsort( kind='mergesort')] # Sort the loss list exclude_top = 0 trail_nums = 2048 good_index = loss_sort[exclude_top:trail_nums + exclude_top, 1].astype('int') # Get the indexs saved_model_str = self.saved_model.replace('/', '_') Ypred_file = os.path.join( save_dir, 'test_Ypred_point{}{}{}.csv'.format(saved_model_str, 'inference', ind)) Xpred_file = os.path.join( save_dir, 'test_Xpred_point{}{}{}.csv'.format(saved_model_str, 'inference', ind)) print("HERE:\t", Ypred_file) if self.flags.data_set != 'Yang_sim': # This is for meta-meterial dataset, since it does not have a simple simulator Ypred = simulator(self.flags.data_set, geometry_eval_input[good_index, :]) with open(Xpred_file, 'a') as fxp, open(Ypred_file, 'a') as fyp: np.savetxt(fyp, Ypred) np.savetxt(fxp, geometry_eval_input[good_index, :]) else: with open(Xpred_file, 'a') as fxp: np.savetxt(fxp, geometry_eval_input[good_index, :]) ################################### # From candidates choose the best # ################################### Ypred = logit.cpu().data.numpy() # calculate the MSE list and get the best one MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) best_estimate_index = np.argmin(MSE_list) # print("The best performing one is:", best_estimate_index) Xpred_best = np.reshape( np.copy(geometry_eval_input[best_estimate_index, :]), [1, -1]) if save_Simulator_Ypred and self.flags.data_set != 'Yang_sim': begin = time.time() Ypred = simulator(self.flags.data_set, geometry_eval_input) print("Simulation: ", time.time() - begin) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) return Xpred_best, Ypred_best, MSE_list
def evaluate(self, save_dir='data/', prefix=''): self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model_b.cuda() self.model_f.cuda() # Set to evaluation mode for batch_norm layers self.model_f.eval() self.model_b.eval() print('using data set simulator: ', self.flags.data_set) saved_model_str = self.saved_model.replace('/', '_') + prefix # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xtruth_file = os.path.join( save_dir, 'test_Xtruth_{}.csv'.format(saved_model_str)) Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join(save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # For gaussian itself #Ypre_pred_file = os.path.join(save_dir, 'test_Ypre_pred_{}.csv'.format(saved_model_str)) #YSIM_Truth_file = os.path.join(save_dir, 'test_YSim_truth_{}.csv'.format(saved_model_str)) tk = time_keeper(os.path.join(save_dir, 'evaluation_time.txt')) # Open those files to append with open(Xtruth_file, 'a') as fxt,open(Ytruth_file, 'a') as fyt,\ open(Ypred_file, 'a') as fyp, open(Xpred_file, 'a') as fxp: #,\ #open(Ypre_pred_file, 'a') as fypp, open(YSIM_Truth_file, 'a') as fyst: # Loop through the eval data and evaluate for ind, (geometry, spectra) in enumerate(self.test_loader): """ Older version when we have gaussian_mixture data back then if self.flags.data_set == 'gaussian_mixture': spectra_origin = np.copy(spectra.cpu().data.numpy()) spectra = torch.nn.functional.one_hot(spectra.to(torch.int64), 4).to(torch.float) # Change the gaussian labels into one-hot np.savetxt(fxt, geometry.cpu().data.numpy()) if self.flags.data_set == 'gaussian_mixture': Xpred = self.model_b(spectra) #Ypre_pred = self.model_f(Xpred).cpu().data.numpy() Xpred = Xpred.cpu().data.numpy() Ypred = simulator(self.flags.data_set, Xpred) #Ysim_truth = simulator(self.flags.data_set, geometry.cpu().data.numpy()) #np.savetxt(fyst, Ysim_truth) np.savetxt(fyp, Ypred) np.savetxt(fxp, Xpred) np.savetxt(fyt, spectra_origin) #np.savetxt(fypp, Ypre_pred) else: """ if cuda: geometry = geometry.cuda() spectra = spectra.cuda() Xpred = self.model_b(spectra) #Ypred = self.model_f(Xpred).cpu().data.numpy() np.savetxt(fyt, spectra.cpu().data.numpy()) np.savetxt(fxt, geometry.cpu().data.numpy()) if self.flags.data_set != 'meta_material': Ypred = simulator(self.flags.data_set, Xpred.cpu().data.numpy()) np.savetxt(fyp, Ypred) if self.flags.data_set == 'ballistics': Xpred[:, 3] *= 15 np.savetxt(fxp, Xpred.cpu().data.numpy()) tk.record(1) return Ypred_file, Ytruth_file
def train(self): """ The major training function. This would start the training using information given in the flags :return: None """ cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() # Construct optimizer after the model moved to GPU self.optm = self.make_optimizer() self.lr_scheduler = self.make_lr_scheduler(self.optm) # Time keeping tk = time_keeper( time_keeping_file=os.path.join(self.ckpt_dir, 'training time.txt')) for epoch in range(self.flags.train_step): # Set to Training Mode train_loss = 0 # boundary_loss = 0 # Unnecessary during training since we provide geometries self.model.train() for j, (geometry, spectra) in enumerate(self.train_loader): if cuda: geometry = geometry.cuda() # Put data onto GPU spectra = spectra.cuda() # Put data onto GPU #print('spectra = ', spectra) #print('geometry = ', geometry) self.optm.zero_grad() # Zero the gradient first pi, sigma, mu = self.model(spectra) # Get the output #print('spectra = {}, pi, sigma, mu = {}, {}, {}'.format(spectra.cpu().numpy(), # pi.detach().cpu().numpy()[0,:], # sigma.detach().cpu().numpy()[0,:,0], # mu.detach().cpu().numpy()[0,:,0])) #print('geometry shape', geometry.size()) loss = self.make_loss(pi, sigma, mu, geometry) # Get the loss tensor #loss = self.make_loss(pi, sigma, mu, geometry, warmup=epoch) # Get the loss tensor #Xpred = mdn.sample(pi, sigma, mu).detach().cpu().numpy() #Ypred = torch.tensor(simulator(self.flags.data_set, Xpred), requires_grad=False) #if cuda: # Ypred = Ypred.cuda() #simulator_loss = nn.functional.mse_loss(Ypred, spectra).detach().cpu().numpy() # Get the loss tensor #print('nll loss at epoch {}, batch {} is {} '.format(epoch, j, loss.detach().cpu().numpy())) loss.backward() # Calculate the backward gradients # gradient clipping torch.nn.utils.clip_grad_value_(self.model.parameters(), 1) self.optm.step() # Move one step the optimizer train_loss += loss # Aggregate the loss # boundary_loss += self.Boundary_loss # Aggregate the BDY loss # Calculate the avg loss of training train_avg_loss = train_loss.cpu().data.numpy() / (j + 1) # boundary_avg_loss = boundary_loss.cpu().data.numpy() / (j + 1) if epoch % self.flags.eval_step == 0: # For eval steps, do the evaluations and tensor board # Record the training loss to the tensorboard self.log.add_scalar('Loss/train', train_avg_loss, epoch) #self.log.add_scalar('Loss/simulator_train', simulator_loss, epoch) # self.log.add_scalar('Loss/BDY_train', boundary_avg_loss, epoch) # Set to Evaluation Mode self.model.eval() print("Doing Evaluation on the model now") test_loss = 0 for j, (geometry, spectra) in enumerate( self.test_loader): # Loop through the eval set if cuda: geometry = geometry.cuda() spectra = spectra.cuda() pi, sigma, mu = self.model(spectra) # Get the output if self.flags.data_set == 'meta_material': loss = self.make_loss(pi, sigma, mu, geometry) # Get the loss tensor test_loss += loss.detach().cpu().numpy() else: Xpred = mdn.sample(pi, sigma, mu).numpy() Ypred_np = simulator(self.flags.data_set, Xpred) mae, mse = compare_truth_pred(Ypred_np, spectra.cpu().numpy(), cut_off_outlier_thres=10, quiet_mode=True) test_loss += np.mean(mse) # Aggregate the loss break # only get the first batch that is enough # Record the testing loss to the tensorboard test_avg_loss = test_loss / (j + 1) self.log.add_scalar('Loss/test', test_avg_loss, epoch) print("This is Epoch %d, training loss %.5f, validation loss %.5f"\ % (epoch, train_avg_loss, test_avg_loss )) #print("This is Epoch %d, training loss %.5f, validation loss %.5f, training simulator loss %.5f" \ # % (epoch, train_avg_loss, test_avg_loss, simulator_loss )) # Plotting the first spectra prediction for validation # f = self.compare_spectra(Ypred=logit[0,:].cpu().data.numpy(), Ytruth=spectra[0,:].cpu().data.numpy()) # self.log.add_figure(tag='spectra compare',figure=f,global_step=epoch) # Model improving, save the model down if test_avg_loss < self.best_validation_loss: self.best_validation_loss = test_avg_loss self.save() print("Saving the model down...") if self.best_validation_loss < self.flags.stop_threshold: print("Training finished EARLIER at epoch %d, reaching loss of %.5f" %\ (epoch, self.best_validation_loss)) return None # Learning rate decay upon plateau self.lr_scheduler.step(train_avg_loss) self.log.close() tk.record(1) # Record the total time of the training peroid
def evaluate_one(self, target_spectra, save_dir='data/', MSE_Simulator=False, save_all=False, ind=None, save_misc=False, save_Simulator_Ypred=False): """ The function which being called during evaluation and evaluates one target y using # different trails :param target_spectra: The target spectra/y to backprop to :param save_dir: The directory to save to when save_all flag is true :param MSE_Simulator: Use Simulator Loss to get the best instead of the default NN output logit :param save_all: The multi_evaluation where each trail is monitored (instad of the best) during backpropagation :param ind: The index of this target_spectra in the batch :param save_misc: The flag to print misc information for degbugging purposes, usually printed to best_mse :return: Xpred_best: The 1 single best Xpred corresponds to the best Ypred that is being backproped :return: Ypred_best: The 1 singe best Ypred that is reached by backprop :return: MSE_list: The list of MSE at the last stage """ # Initialize the geometry_eval or the initial guess xs geometry_eval = self.initialize_geometry_eval() # Set up the learning schedule and optimizer self.optm_eval = self.make_optimizer_eval( geometry_eval) #, optimizer_type='SGD') self.lr_scheduler = self.make_lr_scheduler(self.optm_eval) # expand the target spectra to eval batch size target_spectra_expand = target_spectra.expand( [self.flags.eval_batch_size, -1]) # Begin NA for i in range(self.flags.backprop_step): # Make the initialization from [-1, 1], can only be in loop due to gradient calculator constraint geometry_eval_input = self.initialize_from_uniform_to_dataset_distrib( geometry_eval) if save_misc and ind == 0 and i == 0: # save the modified initial guess to verify distribution np.savetxt('geometry_initialization.csv', geometry_eval_input.cpu().data.numpy()) self.optm_eval.zero_grad() # Zero the gradient first logit = self.model(geometry_eval_input) # Get the output ################################################### # Boundar loss controled here: with Boundary Loss # ################################################### loss = self.make_loss(logit, target_spectra_expand, G=geometry_eval_input) # Get the loss loss.backward() # Calculate the Gradient # update weights and learning rate scheduler if i != self.flags.backprop_step - 1: self.optm_eval.step() # Move one step the optimizer self.lr_scheduler.step(loss.data) if save_all: # If saving all the results together instead of the first one ############################################################## # Choose the top "trail_nums" points from NA solutions # ############################################################## mse_loss = np.reshape( np.sum(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1), [-1, 1]) mse_loss = np.concatenate( (mse_loss, np.reshape(np.arange(self.flags.eval_batch_size), [-1, 1])), axis=1) loss_sort = mse_loss[mse_loss[:, 0].argsort( kind='mergesort')] # Sort the loss list exclude_top = 0 trail_nums = 1000 good_index = loss_sort[exclude_top:trail_nums + exclude_top, 1].astype('int') # Get the indexs saved_model_str = self.saved_model.replace( '/', '_') + 'inference' + str(ind) Ypred_file = os.path.join( save_dir, 'test_Ypred_point{}.csv'.format(saved_model_str)) Xpred_file = os.path.join( save_dir, 'test_Xpred_point{}.csv'.format(saved_model_str)) if self.flags.data_set != 'meta_material': # This is for meta-meterial dataset, since it does not have a simple simulator # 2 options: simulator/logit Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if not save_Simulator_Ypred: # The default is the simulator Ypred output Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) with open(Xpred_file, 'a') as fxp, open(Ypred_file, 'a') as fyp: np.savetxt(fyp, Ypred[good_index, :]) np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) else: with open(Xpred_file, 'a') as fxp: np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) ################################### # From candidates choose the best # ################################### print("Your MSE_Simulator status is :", MSE_Simulator) if MSE_Simulator: # If we are using Simulator as Ypred standard Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) else: Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) # calculate the MSE list and get the best one MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) best_estimate_index = np.argmin(MSE_list) Xpred_best = np.reshape( np.copy(geometry_eval_input.cpu().data.numpy()[ best_estimate_index, :]), [1, -1]) if save_Simulator_Ypred: Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) return Xpred_best, Ypred_best, MSE_list
def evaluate_one(self, target_spectra, save_dir='data/', MSE_Simulator=False, save_all=False, ind=None, save_misc=False, save_Simulator_Ypred=True, init_from_Xpred=None, FF=True): """ The function which being called during evaluation and evaluates one target y using # different trails :param target_spectra: The target spectra/y to backprop to :param save_dir: The directory to save to when save_all flag is true :param MSE_Simulator: Use Simulator Loss to get the best instead of the default NN output logit :param save_all: The multi_evaluation where each trail is monitored (instad of the best) during backpropagation :param ind: The index of this target_spectra in the batch :param save_misc: The flag to print misc information for degbugging purposes, usually printed to best_mse :return: Xpred_best: The 1 single best Xpred corresponds to the best Ypred that is being backproped :return: Ypred_best: The 1 singe best Ypred that is reached by backprop :return: MSE_list: The list of MSE at the last stage :param FF(forward_filtering): [default to be true for historical reason] The flag to control whether use forward filtering or not """ # Initialize the geometry_eval or the initial guess xs geometry_eval = self.initialize_geometry_eval(init_from_Xpred) # Set up the learning schedule and optimizer ###################################################### # 02.02 for emperically proff of NA bound, SGD optim # ###################################################### self.optm_eval = self.make_optimizer_eval(geometry_eval, optimizer_type='SGD') self.lr_scheduler = self.make_lr_scheduler(self.optm_eval) # expand the target spectra to eval batch size target_spectra_expand = target_spectra.expand( [self.flags.eval_batch_size, -1]) # Begin NA for i in range(self.flags.backprop_step): # Make the initialization from [-1, 1], can only be in loop due to gradient calculator constraint if init_from_Xpred is None: geometry_eval_input = self.initialize_from_uniform_to_dataset_distrib( geometry_eval) else: geometry_eval_input = geometry_eval #if save_misc and ind == 0 and i == 0: # save the modified initial guess to verify distribution # np.savetxt('geometry_initialization.csv',geometry_eval_input.cpu().data.numpy()) self.optm_eval.zero_grad() # Zero the gradient first logit = self.model(geometry_eval_input) # Get the output ################################################### # Boundar loss controled here: with Boundary Loss # ################################################### loss = self.make_loss(logit, target_spectra_expand, G=geometry_eval_input) # Get the loss loss.backward() # Calculate the Gradient # update weights and learning rate scheduler if i != self.flags.backprop_step - 1: self.optm_eval.step() # Move one step the optimizer self.lr_scheduler.step(loss.data) if save_all: # If saving all the results together instead of the first one ############################################################## # Choose the top "trail_nums" points from NA solutions # ############################################################## # This is not ok because the filtering should take the boundary loss as well! # Disabling the sorting!! mse_loss = np.reshape( np.sum(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1), [-1, 1]) BDY_loss = self.get_boundary_loss_list_np( geometry_eval_input.cpu().data.numpy()) if self.flags.data_set == 'ballistics': BDY_strength = 0.5 elif self.flags.data_set == 'sine_wave': BDY_strength = 0.1 elif self.flags.data_set == 'robotic_arm': BDY_strength = 0.01 / 2048 else: BDY_strength = 10 / 2048 mse_loss += BDY_strength * np.reshape(BDY_loss, [-1, 1]) # The strategy of re-using the BPed result. Save two versions of file: one with FF and one without mse_loss = np.concatenate( (mse_loss, np.reshape(np.arange(self.flags.eval_batch_size), [-1, 1])), axis=1) ########################################### # 02.02 for emperically proff of NA bound # ########################################### #loss_sort = mse_loss loss_sort = mse_loss[mse_loss[:, 0].argsort( kind='mergesort')] # Sort the loss list loss_sort_FF_off = mse_loss exclude_top = 0 trail_nums = 2048 good_index = loss_sort[exclude_top:trail_nums + exclude_top, 1].astype('int') # Get the indexs good_index_FF_off = loss_sort_FF_off[exclude_top:trail_nums + exclude_top, 1].astype( 'int') # Get the indexs #print("In save all funciton, the top 10 index is:", good_index[:10]) if init_from_Xpred is None: saved_model_str = self.saved_model.replace( '/', '_') + 'inference' + str(ind) else: saved_model_str = self.saved_model.replace( '/', '_') + 'modulized_inference' + str(ind) Ypred_file = os.path.join( save_dir, 'test_Ypred_point{}.csv'.format(saved_model_str)) Yfake_file = os.path.join( save_dir, 'test_Yfake_point{}.csv'.format(saved_model_str)) Xpred_file = os.path.join( save_dir, 'test_Xpred_point{}.csv'.format(saved_model_str)) if 'BP_on_FF_on' in save_dir: # The strategy of re-using the BPed result. Save two versions of file: one with FF and one without save_model_str_FF_off = saved_model_str.replace( 'BP_on_FF_on', 'BP_on_FF_off') Ypred_file_FF_off = Ypred_file.replace('BP_on_FF_on', 'BP_on_FF_off') Xpred_file_FF_off = Xpred_file.replace('BP_on_FF_on', 'BP_on_FF_off') if self.flags.data_set != 'Yang': # This is for meta-meterial dataset, since it does not have a simple simulator # 2 options: simulator/logit Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if not save_Simulator_Ypred: # The default is the simulator Ypred output Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) with open(Xpred_file, 'a') as fxp, open(Ypred_file, 'a') as fyp: np.savetxt(fyp, Ypred[good_index, :]) np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) if 'BP_on_FF_on' in save_dir: print("outputting files for FF_off as well") with open(Xpred_file_FF_off, 'a') as fxp, open(Ypred_file_FF_off, 'a') as fyp: np.savetxt(fyp, Ypred[good_index_FF_off, :]) np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[ good_index_FF_off, :]) else: with open(Xpred_file, 'a') as fxp: np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) if 'BP_on_FF_on' in save_dir: with open(Xpred_file_FF_off, 'a') as fxp: np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[ good_index_FF_off, :]) ########################################### # 02.02 for emperically proff of NA bound # ########################################### # Save the fake Yp as well #with open(Yfake_file, 'a') as fypf: # np.savetxt(fypf, logit.cpu().data.numpy()[good_index, :]) ################################### # From candidates choose the best # ################################### Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) # calculate the MSE list and get the best one MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) BDY_list = self.get_boundary_loss_list_np( geometry_eval_input.cpu().data.numpy()) MSE_list += BDY_list best_estimate_index = np.argmin(MSE_list) #print("The best performing one is:", best_estimate_index) Xpred_best = np.reshape( np.copy(geometry_eval_input.cpu().data.numpy()[ best_estimate_index, :]), [1, -1]) if save_Simulator_Ypred and self.flags.data_set != 'Yang': Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) return Xpred_best, Ypred_best, MSE_list
def evaluate_one(self, target_spectra, save_dir='data/', MSE_Simulator=False, save_all=False, ind=None, save_misc=False, save_Simulator_Ypred=False): """ The function which being called during evaluation and evaluates one target y using # different trails :param target_spectra: The target spectra/y to backprop to :param save_dir: The directory to save to when save_all flag is true :param MSE_Simulator: Use Simulator Loss to get the best instead of the default NN output logit :param save_all: The multi_evaluation where each trail is monitored (instad of the best) during backpropagation :param ind: The index of this target_spectra in the batch :param save_misc: The flag to print misc information for degbugging purposes, usually printed to best_mse :return: Xpred_best: The 1 single best Xpred corresponds to the best Ypred that is being backproped :return: Ypred_best: The 1 singe best Ypred that is reached by backprop :return: MSE_list: The list of MSE at the last stage """ # Initialize the geometry_eval or the initial guess xs geometry_eval = self.initialize_geometry_eval() # Set up the learning schedule and optimizer self.optm_eval = self.make_optimizer_eval( geometry_eval) #, optimizer_type='SGD') self.lr_scheduler = self.make_lr_scheduler(self.optm_eval) # expand the target spectra to eval batch size target_spectra_expand = target_spectra.expand( [self.flags.eval_batch_size, -1]) # If saving misc files, initialize them if save_misc: Best_MSE_list = [] Avg_MSE_list = [] Xpred_best = None Best_MSE = 999 # Define the best MSE list and place holder for Best Xpred Ypreds save_all_Best_MSE_list = np.ones([self.flags.eval_batch_size, 1 ]) * 999 save_all_Xpred_best = np.zeros_like( geometry_eval.cpu().data.numpy()) save_all_Ypred_best = None # Define the full loss matrix, real means simulator loss, fake means NN loss Full_loss_matrix_real = np.zeros( [self.flags.eval_batch_size, self.flags.backprop_step]) Full_loss_matrix_fake = np.zeros( [self.flags.eval_batch_size, self.flags.backprop_step]) # Begin Backprop for i in range(self.flags.backprop_step): # Make the initialization from [-1, 1], can only be in loop due to gradient calculator constraint geometry_eval_input = self.initialize_from_uniform_to_dataset_distrib( geometry_eval) if save_misc and ind == 0 and i == 0: # save the modified initial guess np.savetxt('geometry_initialization.csv', geometry_eval_input.cpu().data.numpy()) self.optm_eval.zero_grad() # Zero the gradient first logit = self.model(geometry_eval_input) # Get the output ################################################### # Boundar loss controled here: with Boundary Loss # ################################################### loss = self.make_loss(logit, target_spectra_expand, G=geometry_eval_input) # Get the loss ################################################## # Boundar loss controled here: NO Boundary Loss # ################################################## #loss = self.make_loss(logit, target_spectra_expand) # Get the loss loss.backward() # Calculate the Gradient """ if save_misc: ################################### # evaluate through simulator part # ################################### Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len(np.shape(Ypred)) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) # Get the MSE list of these MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) # Get the best and the index of it best_MSE_in_batch = np.min(MSE_list) avg_MSE_in_batch = np.mean(MSE_list) Best_MSE_list.append(best_MSE_in_batch) Avg_MSE_list.append(avg_MSE_in_batch) best_estimate_index = np.argmin(MSE_list) if best_MSE_in_batch < Best_MSE: # Update the best one Best_MSE = best_MSE_in_batch # Get the best Xpred Xpred_best = np.reshape(np.copy(geometry_eval_input.cpu().data.numpy()[best_estimate_index, :]), [1, -1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) # record the full loss matrix Full_loss_matrix_real[:, i] = np.squeeze(MSE_list) Real_MSE_list = np.mean(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1) Full_loss_matrix_fake[:, i] = np.copy(Real_MSE_list) if save_all and chose_middle_value: save_all_Ypred_best = Ypred # Record the trails that gets better better_index = save_all_Best_MSE_list > MSE_list # Update those MSE that is better now save_all_Best_MSE_list = np.where(better_index, MSE_list, save_all_Best_MSE_list) save_all_Xpred_best = np.where(better_index, geometry_eval_input.cpu().data.numpy(), save_all_Xpred_best) save_all_Ypred_best = np.where(better_index, Ypred, save_all_Ypred_best) """ # update weights and learning rate scheduler if i != self.flags.backprop_step - 1: self.optm_eval.step() # Move one step the optimizer self.lr_scheduler.step(loss.data) """ ################################################# # Save the Best_MSE list for first few to sample# ################################################# if save_misc and ind < 20: np.savetxt('best_mse/best_mse_list{}.csv'.format(ind), Best_MSE_list) np.savetxt('best_mse/avg_mse_list{}.csv'.format(ind), Avg_MSE_list) #np.savetxt('best_mse/full_loss_mat_real{}.csv'.format(ind), Full_loss_matrix_real) #np.savetxt('best_mse/full_loss_mat_fake{}.csv'.format(ind), Full_loss_matrix_fake) """ if save_all: ####################################################### # Choose the top 1,000 points from Backprop solutions # ####################################################### mse_loss = np.reshape( np.sum(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1), [-1, 1]) #print("shape of mse_loss", np.shape(mse_loss)) mse_loss = np.concatenate( (mse_loss, np.reshape(np.arange(self.flags.eval_batch_size), [-1, 1])), axis=1) #print("shape of mse_loss", np.shape(mse_loss)) loss_sort = mse_loss[mse_loss[:, 0].argsort( kind='mergesort')] # Sort the loss list #print("shape of loss_sort is:", np.shape(loss_sort)) #print("print loss_srt", loss_sort) #print(loss_sort) exclude_top = 0 trail_nums = 1000 good_index = loss_sort[exclude_top:trail_nums + exclude_top, 1].astype('int') # Get the indexs #print("good index", good_index) saved_model_str = self.saved_model.replace( '/', '_') + 'inference' + str(ind) Ypred_file = os.path.join( save_dir, 'test_Ypred_point{}.csv'.format(saved_model_str)) Xpred_file = os.path.join( save_dir, 'test_Xpred_point{}.csv'.format(saved_model_str)) if self.flags.data_set != 'meta_material': # 2 options: simulator/logit Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) #print("shape of Ypred is", np.shape(Ypred)) #print("shape of good index is", np.shape(good_index)) if not save_Simulator_Ypred: # The default is the simulator Ypred output Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) with open(Xpred_file, 'a') as fxp, open(Ypred_file, 'a') as fyp: np.savetxt(fyp, Ypred[good_index, :]) np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) else: # This is meta-meterial dataset, handle with special with open(Xpred_file, 'a') as fxp: np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) ############################# # After BP, choose the best # ############################# print("Your MSE_Simulator status is :", MSE_Simulator) if MSE_Simulator: # If we are using Simulator as Ypred standard Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) else: Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) # calculate the MSE list and get the best one MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) best_estimate_index = np.argmin(MSE_list) Xpred_best = np.reshape( np.copy(geometry_eval_input.cpu().data.numpy()[ best_estimate_index, :]), [1, -1]) if save_Simulator_Ypred: Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) ###################### # Test code on 04.23 # ###################### """ As one of the attempts to make Backprop better (or maybe as before), this saves all the Xpred and Ypred made after the backpropagation and then try to visualize them afterwards """ if save_misc: # Save the Xpred, Ypred, Ypred_Simulator, Ytruth, Xtruth np.savetxt('visualize_final/point{}_Xpred.csv'.format(ind), geometry_eval_input.cpu().data.numpy()) Ypred = logit.cpu().data.numpy() if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) np.savetxt('visualize_final/point{}_Ypred.csv'.format(ind), Ypred) Ypred_Simulator = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len( np.shape(Ypred_Simulator) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred_Simulator, [-1, 1]) np.savetxt( 'visualize_final/point{}_Ypred_Simulator.csv'.format(ind), Ypred_Simulator) return Xpred_best, Ypred_best, MSE_list
def evaluate(self, save_dir='data/', save_all=False, MSE_Simulator=False, save_misc=False, save_Simulator_Ypred=True): """ The function to evaluate how good the models is (outputs validation loss) Note that Ypred and Ytruth still refer to spectra, while Xpred and Xtruth still refer to geometries. :return: """ self.load() # load the model as constructed cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() self.model.eval() saved_model_str = self.saved_model.replace('/', '_') # Get the file names Ypred_file = os.path.join(save_dir, 'test_Ypred_{}.csv'.format( saved_model_str)) #Input associated? No real value Xtruth_file = os.path.join(save_dir, 'test_Xtruth_{}.csv'.format( saved_model_str)) #Output to compare against Ytruth_file = os.path.join( save_dir, 'test_Ytruth_{}.csv'.format(saved_model_str)) #Input of Neural Net Xpred_file = os.path.join( save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) #Output of Neural Net print("evalution output pattern:", Ypred_file) # Time keeping tk = time_keeper( time_keeping_file=os.path.join(save_dir, 'evaluation_time.txt')) # Open those files to append with open(Xtruth_file, 'w') as fxt,open(Ytruth_file, 'w') as fyt,\ open(Ypred_file, 'w') as fyp, open(Xpred_file, 'w') as fxp: # Loop through the eval data and evaluate geometry, spectra = next(iter(self.test_loader)) if cuda: geometry = geometry.cuda() spectra = spectra.cuda() # Initialize the geometry first Xpred = self.model(spectra).cpu().data.numpy() Ytruth = spectra.cpu().data.numpy() if save_Simulator_Ypred and not (self.flags.data_set == 'Yang' or self.flags.data_set == 'Yang_sim'): Ypred = simulator(self.flags.data_set, Xpred) else: Ypred = spectra.cpu().data.numpy() MSE_List = np.mean(np.power(Ypred - Ytruth, 2), axis=1) mse = np.mean(MSE_List) print(mse) np.savetxt(fxt, geometry.cpu().data.numpy()) np.savetxt(fyt, Ytruth) if self.flags.data_set != 'Yang': np.savetxt(fyp, Ypred) np.savetxt(fxp, Xpred) return Ypred_file, Ytruth_file
x_dim = 8 num_of_files = 1000 num_of_points = 420 data_dir = '/work/sr365/multi_eval/Random/meta_material/' Xpred_file_prefix = 'test_Xpred_random_guess_answers_inference' data_set = 'meta_material' # data_set = 'robotic_arm' # data_set = 'sine_wave' # data_set = 'gaussian_mixture' if __name__ == '__main__': for i in range(num_of_files): print( "Generating random uniform distribution fake Xpred data_set for ", data_set, "file", i) Xpred = np.random.uniform(-1, 1, size=(num_of_points, x_dim)) Xpred_file = data_dir + data_set + Xpred_file_prefix + str(i) + '.csv' Ypred_file = Xpred_file.replace('Xpred', 'Ypred') np.savetxt(Xpred_file, Xpred, fmt='%.3f') if data_set == 'meta_material': # meta_material does not have a simulator, it needs forward model Ytruth = pd.read_csv(os.path.join(data_dir, 'yt.csv'), header=None, delimiter=' ').values Ytruth_file = Ypred_file.replace('Ypred', 'Ytruth') np.savetxt(Ytruth_file, Ytruth, fmt='%.3f') continue Ypred = simulator(data_set, Xpred) np.savetxt(Ypred_file, Ypred, fmt='%.3f')
def evaluate_one(self, target_spectra, save_dir='data/', save_all=False, ind=None): print("evaluate_one gets save_dir:", save_dir) if torch.cuda.is_available(): # Initialize UNIFORM RANDOM NUMBER geometry_eval = torch.rand( [self.flags.eval_batch_size, self.flags.linear[0]], requires_grad=True, device='cuda') else: geometry_eval = torch.rand( [self.flags.eval_batch_size, self.flags.linear[0]], requires_grad=True) self.optm_eval = self.make_optimizer_eval(geometry_eval) self.lr_scheduler = self.make_lr_scheduler(self.optm_eval) # expand the target spectra to eval batch size target_spectra_expand = target_spectra.expand( [self.flags.eval_batch_size, -1]) # Start backprop #print("shape of logit", np.shape(logit)) #print("shape of target_spectra_expand", np.shape(target_spectra_expand)) #print("shape of geometry_eval", np.shape(geometry_eval)) Best_MSE_list = [] Avg_MSE_list = [] Xpred_best = None Best_MSE = 999 save_all_Best_MSE_list = np.ones([self.flags.eval_batch_size, 1]) * 999 save_all_Xpred_best = np.zeros_like(geometry_eval.cpu().data.numpy()) save_all_Ypred_best = None # Define the full loss matrix, real means simulator loss, fake means NN loss Full_loss_matrix_real = np.zeros( [self.flags.eval_batch_size, self.flags.backprop_step]) Full_loss_matrix_fake = np.zeros( [self.flags.eval_batch_size, self.flags.backprop_step]) Full_Xpred_path = np.zeros( [self.flags.eval_batch_size, self.flags.backprop_step]) Full_Ypred_path = np.zeros( [self.flags.eval_batch_size, self.flags.backprop_step]) for i in range(self.flags.backprop_step): # Make the initialization from [-1, 1] geometry_eval_input = geometry_eval * 2 - 1 self.optm_eval.zero_grad() # Zero the gradient first logit = self.model(geometry_eval_input) # Get the output loss = self.make_loss( logit, target_spectra_expand ) #, G=geometry_eval_input) # Get the loss loss.backward() # Calculate the Gradient ################################### # evaluate through simulator part # ################################### Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) # Get the MSE list of these ############# # Test code # ############# #print("X is:", geometry_eval_input) MSE_list = np.mean( np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) # Get the best and the index of it best_MSE_in_batch = np.min(MSE_list) avg_MSE_in_batch = np.mean(MSE_list) Best_MSE_list.append(best_MSE_in_batch) Avg_MSE_list.append(avg_MSE_in_batch) best_estimate_index = np.argmin(MSE_list) if best_MSE_in_batch < Best_MSE: # Update the best one Best_MSE = best_MSE_in_batch # Get the best Xpred Xpred_best = np.reshape( np.copy(geometry_eval_input.cpu().data.numpy()[ best_estimate_index, :]), [1, -1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) # If choose the record the process if save_all: # In the first epoch this is none, assign value to this if save_all_Ypred_best is None: save_all_Ypred_best = Ypred # Record the trails that gets better #print("shape of MSE_list", np.shape(MSE_list)) #print("shape of save_all_best", np.shape(save_all_Best_MSE_list)) MSE_list = np.reshape(MSE_list, [-1, 1]) better_index = save_all_Best_MSE_list > MSE_list # Update those MSE that is better now save_all_Best_MSE_list = np.where(better_index, MSE_list, save_all_Best_MSE_list) save_all_Xpred_best = np.where( better_index, geometry_eval_input.cpu().data.numpy(), save_all_Xpred_best) save_all_Ypred_best = np.where(better_index, Ypred, save_all_Ypred_best) #print("shape of best MSE List", np.shape(save_all_Best_MSE_list)) #print("shape of Xpred best", np.shape(save_all_Xpred_best)) #print("shape of Ypred best", np.shape(save_all_Ypred_best)) # record the full loss matrix Full_loss_matrix_real[:, i] = np.squeeze(MSE_list) Real_MSE_list = np.mean( np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1) Full_loss_matrix_fake[:, i] = np.copy(Real_MSE_list) Full_Xpred_path[:, i] = np.copy( np.reshape(geometry_eval_input.cpu().data.numpy(), [ -1, ])) Full_Ypred_path[:, i] = np.copy( np.reshape(logit.cpu().data.numpy(), [ -1, ])) # Learning rate decay upon plateau if i != self.flags.backprop_step - 1: self.optm_eval.step() # Move one step the optimizer self.lr_scheduler.step(loss.data) """ ########################################## # Old version before change to simulator # ########################################## # check periodically to stop and print stuff if i % self.flags.eval_step == 0: print("loss at inference step{} : {}".format(i, loss.data)) # Print loss #print("printing the first 5 geometry_eval_input") #print(self.model.geometry_eval_input.cpu().data.numpy()[0:5,:]) if loss.data < self.flags.stop_threshold: # Check if stop print("Loss is lower than threshold{}, inference stop".format(self.flags.stop_threshold)) break """ # Save the Best_MSE list for first few to sample if ind < 40: #np.savetxt('best_mse/best_mse_list{}.csv'.format(ind), Best_MSE_list) #np.savetxt('best_mse/avg_mse_list{}.csv'.format(ind), Avg_MSE_list) #np.savetxt('best_mse/full_loss_mat_real{}.csv'.format(ind), Full_loss_matrix_real) #np.savetxt('best_mse/full_loss_mat_fake{}.csv'.format(ind), Full_loss_matrix_fake) np.savetxt('best_mse/full_Xpred_path{}.csv'.format(ind), Full_Xpred_path) np.savetxt('best_mse/full_Ypred_path{}.csv'.format(ind), Full_Ypred_path) if save_all: for i in range(len(geometry_eval_input.cpu().data.numpy())): saved_model_str = self.saved_model.replace( '/', '_') + 'inference' + str(i) Ypred_file = os.path.join( save_dir, 'test_Ypred_{}.csv'.format(saved_model_str)) Xpred_file = os.path.join( save_dir, 'test_Xpred_{}.csv'.format(saved_model_str)) # 2 options: simulator/logit #Ypred = simulator(self.flags.data_set, geometry_eval.cpu().data.numpy()) #if len(np.shape(Ypred)) == 1: # If this is the ballistics dataset where it only has 1d y' # Ypred = np.reshape(Ypred, [-1, 1]) #ypred = np.reshape(Ypred[i,:], [1, -1]) ##ypred = np.reshape(logit.cpu().data.numpy()[i,:], [1, -1]) #xpred = np.reshape(geometry_eval_input.cpu().data.numpy()[i,:], [1, -1]) with open(Xpred_file, 'a') as fxp, open(Ypred_file, 'a') as fyp: np.savetxt(fyp, save_all_Ypred_best[i, :]) np.savetxt(fxp, save_all_Xpred_best[i, :]) ########################### # Old version of Backprop # ########################### # Get the best performing one, 2 possibility, logit / simulator #Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) MSE_list = np.mean(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1) #MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) #print("shape of MSE list", np.shape(MSE_list)) best_estimate_index = np.argmin(MSE_list) #print("best_estimate_index = ", best_estimate_index, " best error is ", MSE_list[best_estimate_index]) Xpred_best = np.reshape( np.copy(geometry_eval_input.cpu().data.numpy()[ best_estimate_index, :]), [1, -1]) Ypred_best = np.reshape( np.copy(logit.cpu().data.numpy()[best_estimate_index, :]), [1, -1]) #Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) #print("the shape of Xpred_best is", np.shape(Xpred_best)) return Xpred_best, Ypred_best, MSE_list
def evaluate_one(self, target_spectra, save_dir='data/', MSE_Simulator=False, save_all=False, ind=None, save_misc=False, save_Simulator_Ypred=True, init_from_Xpred=None, FF=True, save_MSE_each_epoch=False, noise_level=0): """ The function which being called during evaluation and evaluates one target y using # different trails :param target_spectra: The target spectra/y to backprop to :param save_dir: The directory to save to when save_all flag is true :param MSE_Simulator: Use Simulator Loss to get the best instead of the default NN output logit :param save_all: The multi_evaluation where each trail is monitored (instad of the best) during backpropagation :param ind: The index of this target_spectra in the batch :param save_misc: The flag to print misc information for degbugging purposes, usually printed to best_mse :param noise_level: For datasets that need extra level of exploration, we add some gaussian noise to the resulting geometry :return: Xpred_best: The 1 single best Xpred corresponds to the best Ypred that is being backproped :return: Ypred_best: The 1 singe best Ypred that is reached by backprop :return: MSE_list: The list of MSE at the last stage :param FF(forward_filtering): [default to be true for historical reason] The flag to control whether use forward filtering or not """ # Initialize the geometry_eval or the initial guess xs geometry_eval = self.initialize_geometry_eval(init_from_Xpred) # Set up the learning schedule and optimizer self.optm_eval = self.make_optimizer_eval(geometry_eval) self.lr_scheduler = self.make_lr_scheduler(self.optm_eval) # expand the target spectra to eval batch size target_spectra_expand = target_spectra.expand( [self.flags.eval_batch_size, -1]) # # Extra for early stopping loss_list = [] # end_lr = self.flags.lr / 8 # print(self.optm_eval) # param_group_1 = self.optm_eval.param_groups[0] # if self.flags.data_set == 'Chen': # stop_threshold = 1e-4 # elif self.flags.data_set == 'Peurifoy': # stop_threshold = 1e-3 # else: # stop_threshold = 1e-3 # Begin NA begin = time.time() for i in range(self.flags.backprop_step): # Make the initialization from [-1, 1], can only be in loop due to gradient calculator constraint if init_from_Xpred is None: geometry_eval_input = self.initialize_from_uniform_to_dataset_distrib( geometry_eval) else: geometry_eval_input = geometry_eval #if save_misc and ind == 0 and i == 0: # save the modified initial guess to verify distribution # np.savetxt('geometry_initialization.csv',geometry_eval_input.cpu().data.numpy()) self.optm_eval.zero_grad() # Zero the gradient first logit = self.model(geometry_eval_input) # Get the output ################################################### # Boundar loss controled here: with Boundary Loss # ################################################### loss = self.make_loss(logit, target_spectra_expand, G=geometry_eval_input, epoch=i) # Get the loss loss.backward() # Calculate the Gradient # update weights and learning rate scheduler self.optm_eval.step() # Move one step the optimizer loss_np = loss.data self.lr_scheduler.step(loss_np) # Extra step of recording the MSE loss of each epoch #loss_list.append(np.copy(loss_np.cpu())) # Comment the below 2 for maximum performance #if loss_np < stop_threshold or param_group_1['lr'] < end_lr: # break; if save_MSE_each_epoch: with open( 'data/{}_MSE_progress_point_{}.txt'.format( self.flags.data_set, ind), 'a') as epoch_file: np.savetxt(epoch_file, loss_list) if save_all: # If saving all the results together instead of the first one mse_loss = np.reshape( np.sum(np.square(logit.cpu().data.numpy() - target_spectra_expand.cpu().data.numpy()), axis=1), [-1, 1]) BDY_loss = self.get_boundary_loss_list_np( geometry_eval_input.cpu().data.numpy()) BDY_strength = 0.5 mse_loss += BDY_strength * np.reshape(BDY_loss, [-1, 1]) # The strategy of re-using the BPed result. Save two versions of file: one with FF and one without mse_loss = np.concatenate( (mse_loss, np.reshape(np.arange(self.flags.eval_batch_size), [-1, 1])), axis=1) loss_sort = mse_loss[mse_loss[:, 0].argsort( kind='mergesort')] # Sort the loss list loss_sort_FF_off = mse_loss exclude_top = 0 trail_nums = 200 good_index = loss_sort[exclude_top:trail_nums + exclude_top, 1].astype('int') # Get the indexs good_index_FF_off = loss_sort_FF_off[exclude_top:trail_nums + exclude_top, 1].astype( 'int') # Get the indexs #print("In save all funciton, the top 10 index is:", good_index[:10]) if init_from_Xpred is None: saved_model_str = self.saved_model.replace( '/', '_') + 'inference' + str(ind) else: saved_model_str = self.saved_model.replace( '/', '_') + 'modulized_inference' + str(ind) # Adding some random noise to the result #print("Adding random noise to the output for increasing the diversity!!") geometry_eval_input += torch.randn_like( geometry_eval_input) * noise_level Ypred_file = os.path.join( save_dir, 'test_Ypred_point{}.csv'.format(saved_model_str)) Yfake_file = os.path.join( save_dir, 'test_Yfake_point{}.csv'.format(saved_model_str)) Xpred_file = os.path.join( save_dir, 'test_Xpred_point{}.csv'.format(saved_model_str)) if 'Yang' not in self.flags.data_set: # This is for meta-meterial dataset, since it does not have a simple simulator # 2 options: simulator/logit Ypred = simulator( self.flags.data_set, geometry_eval_input.cpu().data.numpy()[good_index, :]) with open(Xpred_file, 'a') as fxp, open(Ypred_file, 'a') as fyp: np.savetxt(fyp, Ypred) np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) else: with open(Xpred_file, 'a') as fxp: np.savetxt( fxp, geometry_eval_input.cpu().data.numpy()[good_index, :]) ################################### # From candidates choose the best # ################################### Ypred = logit.cpu().data.numpy() # calculate the MSE list and get the best one MSE_list = np.mean(np.square(Ypred - target_spectra_expand.cpu().data.numpy()), axis=1) BDY_list = self.get_boundary_loss_list_np( geometry_eval_input.cpu().data.numpy()) MSE_list += BDY_list best_estimate_index = np.argmin(MSE_list) #print("The best performing one is:", best_estimate_index) Xpred_best = np.reshape( np.copy(geometry_eval_input.cpu().data.numpy()[ best_estimate_index, :]), [1, -1]) if save_Simulator_Ypred and self.flags.data_set != 'Yang': begin = time.time() Ypred = simulator(self.flags.data_set, geometry_eval_input.cpu().data.numpy()) #print("SIMULATOR: ",time.time()-begin) if len( np.shape(Ypred) ) == 1: # If this is the ballistics dataset where it only has 1d y' Ypred = np.reshape(Ypred, [-1, 1]) Ypred_best = np.reshape(np.copy(Ypred[best_estimate_index, :]), [1, -1]) return Xpred_best, Ypred_best, MSE_list