def main(): # ---------------------------------------生成数据------------------------------------------ t_generate_start = time() # 设置模拟数据参数 r = 3 # the grid dimension for the output tests test_split = r * r # number of testing samples to use optical_model = 'km' # the optical model to use ydim = 31 # number of data samples bound = [0.1, 0.9, 0.1, 0.9] seed = 1 # seed for generating data # 生成训练数据 # concentrations, reflectance, x, info = data.generate( # model=optical_model, # total_dataset_size=2 ** 20 * 20, # ydim=ydim, # prior_bound=bound, # seed=seed # ) concentrations, reflectance, x, info = data.math_optimized_generate() print("\n\nGenerating data took %.2f minutes\n" % ((time() - t_generate_start) / 60)) colors = np.arange(0, concentrations.shape[-1], 1) # 选取几个不参与训练,用作最后的测试样本 c_test = concentrations[-test_split:] r_test = reflectance[-test_split:] # 测试样本分光反射率图,用于观察,与模型无关 plt.figure(figsize=(6, 6)) fig, axes = plt.subplots(r, r, figsize=(6, 6)) cnt = 0 for i in range(r): for j in range(r): axes[i, j].plot(x, np.array(r_test[cnt, :]), '-') cnt += 1 axes[i, j].axis([400, 700, 0, 1]) plt.savefig('test_target_reflectance.png', dpi=360) plt.close() print("\n\nGenerating data took %.2f minutes\n" % ((time() - t_generate_start) / 60)) # ---------------------------------------构建网络------------------------------------------ # 设置模型参数值 ndim_x = concentrations.shape[-1] # 配方的维度,即待选色浆的种类数 ndim_y = ydim # 反射率的维度 31 ndim_z = 13 # 潜在空间的维度 ndim_tot = max(ndim_x, ndim_y + ndim_z) # 定义神经网络的不同部分 # 定义输入层节点 inp = InputNode(ndim_tot, name='input') # 定义隐藏层节点 t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p1 = Node([t1.out0], permute_layer, {'seed': 1}) t2 = Node([p1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p2 = Node([t2.out0], permute_layer, {'seed': 2}) t3 = Node([p2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p3 = Node([t3.out0], permute_layer, {'seed': 1}) t4 = Node([p3.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p4 = Node([t4.out0], permute_layer, {'seed': 2}) t5 = Node([p4.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) # 定义输出层节点 outp = OutputNode([t5.out0], name='output') # 构建网络 nodes = [inp, t1, p1, t2, p2, t3, p3, t4, p4, t5, outp] model = ReversibleGraphNet(nodes) # ---------------------------------------训练网络------------------------------------------ # 超参数 # n_epochs = 3000 # 训练轮数 n_epochs = 0 # 训练轮数 plot_cadence = 100 # 每100步画一次损失函数图 meta_epoch = 12 # 调整学习率的步长 n_its_per_epoch = 12 # 每次训练12批数据 batch_size = 1600 # 每批1600个样本 lr = 1.5e-3 # 初始学习率 gamma = 0.004**(1. / 1333) # 学习率下降的乘数因子 l2_reg = 2e-5 # 权重衰减(L2惩罚) # 为了让输入和输出维度相同,对维度进行补齐,不使用0,而是使用一些很小的值 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # 损失的权重 lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # 定义优化器 # params:待优化参数,lr:学习率,betas:用于计算梯度以及梯度平方的运行平均值的系数 # eps:为了增加数值计算的稳定性而加到分母里的项 # weight_decay:权重衰减 optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) # 学习率调整 # optimizer:优化器 # step_size:调整学习率的步长 # gamma:学习率下降的乘数因子 scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # 损失函数设置 # x,z无监督:MMD,y有监督:平方误差 loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # 训练集数据加载 train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( concentrations[test_split:], reflectance[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # 初始化网络权重 for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # 初始化测试结果图表 fig, axes = plt.subplots(r, r, figsize=(6, 6)) # 测试用例数量 N_samp = 256 # ---------------------------------------开始训练------------------------------------------ try: t_start = time() # 训练开始时间 loss_for_list = [] # 记录前向训练的损失 loss_rev_list = [] # 记录反向训练的损失 tsne = TSNE(n_components=2, init='pca') # 颜色编号 color_names = [ '07H', '08', '08S', '09', '09B', '09S', '10B', '12', '13', '14', '15', '16', '17A', '18A', '19A', '20A-2', '23A', '2704', '2803', '2804', '2807' ] # n_epochs次迭代过程 for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # TODO:这个if并不会进入 # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # 训练模型 avg_loss, loss_for, loss_rev = train( model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # 添加正向和逆向的损失 loss_for_list.append(loss_for.item()) loss_rev_list.append(loss_rev.item()) inn_losses = [loss_for_list, loss_rev_list] if ((i_epoch + 1) % plot_cadence == 0) & (i_epoch > 0): plot_losses(inn_losses, legend=['PE-GEN'], lossNo=int((i_epoch + 1) / plot_cadence)) # TODO model = torch.load('model_dir/km_impl_model') # torch.save(model, 'model_dir/km_impl_model') fig, axes = plt.subplots(1, 1, figsize=(2, 2)) # 真实样本对应的反射率信息 test_samps = np.array([[ 0.2673378, 0.3132285, 0.3183329, 0.3234908, 0.3318701, 0.3409707, 0.3604081, 0.4168356, 0.5351773, 0.6202191, 0.6618687, 0.6919741, 0.7136238, 0.7292901, 0.7314631, 0.7131701, 0.6773048, 0.6302681, 0.5738088, 0.5133060, 0.4535525, 0.4108878, 0.3908512, 0.3808001, 0.3752591, 0.3727644, 0.3801365, 0.3976869, 0.4237110, 0.4332685, 0.4433292 ]]) # 真实样本对应的配方 test_cons = np.array([[ 0, 0.8014, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1491, 0, 0, 0, 0.2241, 0 ]]) for cnt in range(test_samps.shape[0]): print('before:', cnt, test_samps[cnt, :]) test_samp = np.tile(np.array(test_samps[cnt, :]), N_samp).reshape(N_samp, ydim) test_samp = torch.tensor(test_samp, dtype=torch.float) test_samp += y_noise_scale * torch.randn(N_samp, ydim) test_samp = torch.cat( [ torch.randn(N_samp, ndim_z), # zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), test_samp ], dim=1) test_samp = test_samp.to(device) print('after:', cnt, test_samp) # use the network to predict parameters test_rev = model(test_samp, rev=True)[:, :colors.size] test_rev = test_rev.cpu().data.numpy() # 假设涂料浓度小于一定值,就不需要这种涂料 test_rev = np.where(test_rev < 0.1, 0, test_rev) # 计算预测配方的反射率信息 # recipe_ref = data.recipe_reflectance(test_rev, optical_model) # 使用修正后的模型计算配方的反射率信息 recipe_ref = data.correct_recipe_reflectance(test_rev) print("######## Test Sample %d ########" % cnt) # 用于记录色差最小的三个配方 top3 = [[100, 0], [100, 0], [100, 0]] for n in range(test_rev.shape[0]): # print(test_rev[n, :]) diff = data.color_diff(test_samps[cnt, :], recipe_ref[n, :]) if diff < top3[2][0]: top3[2][0] = diff top3[2][1] = n top3.sort() # 将色差最小的三个配方打印出来 for n in range(3): print(test_rev[top3[n][1], :]) print("color diff: %.2f \n" % top3[n][0]) print("\n\n") # draw # feature scaling test_x = test_cons[cnt, :].reshape(1, test_cons[cnt, :].shape[-1]) plot_x = np.concatenate((test_rev, test_x), axis=0) # use tsne to decrease dimensionality x_norm = pd.DataFrame(plot_x, columns=color_names) # 根据需要的涂料种类(需要为1,不需要为0)将配方分类 classes = np.zeros(N_samp).reshape(N_samp, 1) paint_needed = np.where(test_rev == 0, 0, 1) for paint_no in colors: classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no class_norm = pd.DataFrame(np.concatenate( (classes, np.zeros(1).reshape(1, 1)), axis=0), columns=['class']) data_plot = pd.concat( [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm], axis=1) class_data = data_plot['class'] axes.clear() recipe_classes = np.array( class_norm[:-1].drop_duplicates()).reshape(1, -1).tolist()[0] for recipe_class in recipe_classes: axes.scatter(data_plot[class_data == recipe_class][0], data_plot[class_data == recipe_class][1], s=2, alpha=0.5) axes.scatter(data_plot[class_data == 0][0], data_plot[class_data == 0][1], marker='+', s=10) fig.canvas.draw() plt.savefig('test_result%d.png' % cnt, dpi=360) # loop over a few cases and plot results in a grid cnt = 0 for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(r_test[cnt, :]), N_samp).reshape(N_samp, ydim) y_samps = torch.tensor(y_samps, dtype=torch.float) y_samps += y_noise_scale * torch.randn(N_samp, ydim) y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), # zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) # use the network to predict parameters rev_x = model(y_samps, rev=True)[:, :colors.size] rev_x = rev_x.cpu().data.numpy() # 假设涂料浓度小于一定值,就不需要这种涂料 rev_x = np.where(rev_x < 0.1, 0, rev_x) # feature scaling test_x = c_test[cnt, :].reshape(1, c_test[cnt, :].shape[-1]) plot_x = np.concatenate((rev_x, test_x), axis=0) # use pca to decrease dimensionality x_norm = pd.DataFrame(plot_x, columns=color_names) # 根据需要的涂料种类(需要为1,不需要为0)将配方分类 classes = np.zeros(N_samp).reshape(N_samp, 1) paint_needed = np.where(rev_x == 0, 0, 1) for paint_no in colors: classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no class_norm = pd.DataFrame(np.concatenate( (classes, np.zeros(1).reshape(1, 1)), axis=0), columns=['class']) data_plot = pd.concat( [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm], axis=1) class_data = data_plot['class'] # plot the predicted and the true recipe axes.clear() recipe_classes = np.array( class_norm[:-1].drop_duplicates()).reshape(1, -1).tolist()[0] for recipe_class in recipe_classes: axes.scatter(data_plot[class_data == recipe_class][0], data_plot[class_data == recipe_class][1], s=2, alpha=0.5) axes.scatter(data_plot[class_data == 0][0], data_plot[class_data == 0][1], marker='+', s=10) fig.canvas.draw() plt.savefig('training_result%d.png' % cnt, dpi=360) # recipe_ref = data.recipe_reflectance(rev_x, optical_model) # 使用修正后的模型计算配方的反射率信息 recipe_ref = data.correct_recipe_reflectance(rev_x) print("######## Test %d ########" % cnt) print(c_test[cnt]) print("################") # 用于记录色差最小的三个配方 top3 = [[100, 0], [100, 0], [100, 0]] for n in range(rev_x.shape[0]): # print(rev_x[n, :]) diff = data.color_diff(r_test[cnt].numpy(), recipe_ref[n, :]) if diff < top3[2][0]: top3[2][0] = diff top3[2][1] = n top3.sort() # 将色差最小的三个配方打印出来 for n in range(3): print(test_rev[top3[n][1], :]) print("color diff: %.2f \n" % top3[n][0]) print("\n\n") cnt += 1 except KeyboardInterrupt: pass finally: print("\n\nTraining took %.2f minutes\n" % ((time() - t_start) / 60))
def main(): # Set up data # make training signals signal_train_pars = [] signal_train_images = [] for i in range(total_temp_num): signal_train_pars.append( [np.random.uniform(-1.0, 1.0), np.random.uniform(0.5, 1.5)]) signal_train_images.append( np.random.normal(loc=signal_train_pars[i][0], scale=signal_train_pars[i][1], size=(1, n_pix))) signal_train_pars = np.array(signal_train_pars) signal_train_images = np.array(signal_train_images).reshape( total_temp_num, n_pix) # make random 1D gaussian signal noise_signal = np.random.normal(loc=0.0, scale=1.0, size=(1, n_pix)) #noise_signal = norm.rvs(0,1.0,(1,n_pix)) signal_pars = [0.0, 1.0] # load in lalinference samples #with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f: # lalinf_post = pickle.load(f) #lalinf_mc = lalinf_post[0] #lalinf_q = lalinf_post[1] # declare gw variants of positions and labels labels = torch.tensor(signal_train_images, dtype=torch.float) pos = torch.tensor(signal_train_pars, dtype=torch.float) # setting up the model ndim_tot = n_pix + n_neurons # two times the number data dimensions? ndim_x = 2 # number of parameter dimensions ndim_y = n_pix # number of data dimensions ndim_z = 10 # number of latent space dimensions? # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) t2 = Node([t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) """ t3 = Node([t2.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.2}}) t4 = Node([t3.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) """ # define output layer node outp = OutputNode([t2.out0], name='output') nodes = [inp, t1, t2, outp] model = ReversibleGraphNet(nodes) # Train model lr = 1e-2 decayEpochs = (n_epochs * n_its_per_epoch) // meta_epoch gamma = 0.004**(1.0 / decayEpochs) l2_reg = 2e-5 #gamma = 0.01**(1./120) y_noise_scale = 3e-2 # amount of noise to add to y parameter? zeros_noise_scale = 3e-2 # what is this?? # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up test set data loader test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[:test_split], labels[:test_split]), batch_size=batch_size, shuffle=True, drop_last=True) # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[:], labels[:]), batch_size=batch_size, shuffle=True, drop_last=True) # what is happening here? More set up of network? for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # number of test samples to use after training N_samp = 4000 # choose test samples to use after training # 1000 iterations of test signal burried in noise. Only need to change z parameter. #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp] #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp] #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale y_samps_nparray = np.repeat(noise_signal, N_samp, axis=0) y_samps = torch.tensor(y_samps_nparray, dtype=torch.float) # make test samples. First element is the latent space dimension # second element is the extra zeros needed to pad the input. # the third element is the time series y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros( N_samp, ndim_tot - ndim_y - ndim_z), # zeros_noise_scale * y_samps ], dim=1) # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048 y_samps = y_samps.to(device) # get control contour values cont_mu, cont_sig, prob, levels = compute_like(noise_signal.reshape( n_pix, ), N=n_pix) #lalinf_post_blah = np.array([np.random.normal(loc=0,scale=1.0,size=(N_samp)), np.random.normal(loc=1.0,scale=1.0,size=(N_samp))]) # start training loop lossf_hist = [] lossrev_hist = [] beta_score_hist = [] try: # print('#Epoch \tIt/s \tl_total') t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 #print(i_epoch, end='\t ') _, lossf, lossrev = train(model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # append current loss value to loss histories lossf_hist.append(lossf.item()) lossrev_hist.append(lossrev.item()) pe_losses = [lossf_hist, lossrev_hist] # predict parameters of signal rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # plot pe results and loss if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)): #pe_std = [0.005, 0.01] # this will need to be removed #beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std)]) #plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist) #plt.savefig('%s/latest/beta_hist.png' % out_path) #plt.close() # plot loss curves - non-log and log plot_losses(pe_losses, '%s/latest/pe_losses.png' % out_path, legend=['PE-GEN']) plot_losses(pe_losses, '%s/latest/pe_losses_logscale.png' % out_path, logscale=True, legend=['PE-GEN']) # make PE scatter plots with contours and beta score mu0 = 0.0 sig0 = 1.0 plt.scatter(rev_x[:, 0], rev_x[:, 1], s=1., c='red', label='INN Results') plt.contour(cont_mu, cont_sig, prob, levels=[0.68, 0.9, 0.95, 0.99]) plt.plot(mu0, sig0, '+', label='Truth') plt.xlabel('mean') plt.ylabel('standard deviation') plt.legend() plt.savefig('%s/latest/predicted_pe.png' % out_path) plt.close() except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up data batch_size = 1600 # set batch size test_split = 10000 # number of testing samples to use # generate data # makes a torch.tensor() with arrays of (n_samples X parameters) and (n_samples X data) # labels are the colours and pos are the x,y coords # however, labels are 1-hot encoded pos, labels = data.generate(labels='all', tot_dataset_size=2**20) # just simply renaming the colors properly. #c = np.where(labels[:test_split])[1] c = labels[:test_split, :] plt.figure(figsize=(6, 6)) plt.scatter(pos[:test_split, 0], pos[:test_split, 1], c=c, cmap='Set1', s=0.25) plt.xticks([]) plt.yticks([]) plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png') plt.close() # setting up the model ndim_tot = 16 # ? ndim_x = 2 # number of parameter dimensions (x,y) ndim_y = 3 # number of label dimensions (colours for 1-hot encoding) ndim_z = 2 # number of latent space dimensions? # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) t2 = Node([t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) t3 = Node([t2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) # define output layer node outp = OutputNode([t3.out0], name='output') nodes = [inp, t1, t2, t3, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 3000 meta_epoch = 12 # what is this??? n_its_per_epoch = 4 batch_size = 1600 lr = 1e-2 gamma = 0.01**(1. / 120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) print(pad_x.shape, pad_yz.shape) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up test set data loader test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[:test_split], labels[:test_split]), batch_size=batch_size, shuffle=True, drop_last=True) # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # initialize gif for showing training procedure fig, axes = plt.subplots(1, 2, figsize=(8, 4)) axes[0].set_xticks([]) axes[0].set_yticks([]) axes[0].set_title('Predicted labels (Forwards Process)') axes[1].set_xticks([]) axes[1].set_yticks([]) axes[1].set_title('Generated Samples (Backwards Process)') #fig.show() #fig.canvas.draw() # number of test samples to use after training N_samp = 4096 # choose test samples to use after training x_samps = torch.cat([x for x, y in test_loader], dim=0)[:N_samp] y_samps = torch.cat([y for x, y in test_loader], dim=0)[:N_samp] #c = np.where(y_samps)[1] #c = y_samps[:,0] c = np.array(y_samps).reshape(N_samp, ndim_y) y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat([ torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) # start training loop try: # print('#Epoch \tIt/s \tl_total') t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # print(i_epoch, end='\t ') train(model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # predict the locations of test labels rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # predict the label given a location #pred_c = model(torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)), # dim=1).to(device)).data[:, -8:].argmax(dim=1) pred_c = model( torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)), dim=1).to(device)).data[:, -1:].argmax(dim=1) axes[0].clear() #axes[0].scatter(tmp_x_samps[:,0], tmp_x_samps[:,1], c=pred_c, cmap='Set1', s=1., vmin=0, vmax=9) axes[0].axis('equal') axes[0].axis([-3, 3, -3, 3]) axes[0].set_xticks([]) axes[0].set_yticks([]) axes[1].clear() axes[1].scatter(rev_x[:, 0], rev_x[:, 1], c=c, cmap='Set1', s=1., vmin=0, vmax=9) axes[1].axis('equal') axes[1].axis([-3, 3, -3, 3]) axes[1].set_xticks([]) axes[1].set_yticks([]) fig.canvas.draw() plt.savefig('/data/public_html/chrism/FrEIA/training_pred.png') except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up simulation parameters batch_size = 128 # set batch size r = 3 # the grid dimension for the output tests test_split = r * r # number of testing samples to use sig_model = 'sg' # the signal model to use sigma = 0.2 # the noise std ndata = 128 #32 number of data samples in time series bound = [0.0, 1.0, 0.0, 1.0] # effective bound for likelihood seed = 1 # seed for generating data out_dir = "/home/hunter.gabbard/public_html/CBC/cINNamon/gausian_results/" n_neurons = 0 do_contours = True # if True, plot contours of predictions by INN plot_cadence = 50 do_latent_struc = False # if True, plot latent space 2D structure conv_nn = False # if True, use convolutional nn structure # setup output directory - if it does not exist os.system('mkdir -p %s' % out_dir) # generate data pos, labels, x, sig = data.generate( model=sig_model, tot_dataset_size=int(1e6), # 1e6 ndata=ndata, sigma=sigma, prior_bound=bound, seed=seed) if do_latent_struc: # calculate mode of x-space for both pars mode_1 = stats.mode(np.array(pos[:, 0])) mode_2 = stats.mode(np.array(pos[:, 1])) # seperate the test data for plotting pos_test = pos[-test_split:] labels_test = labels[-test_split:] sig_test = sig[-test_split:] # plot the test data examples plt.figure(figsize=(6, 6)) fig_post, axes = plt.subplots(r, r, figsize=(6, 6)) cnt = 0 for i in range(r): for j in range(r): axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.') axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-') cnt += 1 axes[i, j].axis([0, 1, -1.5, 1.5]) plt.savefig("%stest_distribution.png" % out_dir, dpi=360) plt.close() # setting up the model ndim_x = 2 # number of posterior parameter dimensions (x,y) ndim_y = ndata # number of label dimensions (noisy data samples) ndim_z = 200 # number of latent space dimensions? ndim_tot = max( ndim_x, ndim_y + ndim_z) + n_neurons # must be > ndim_x and > ndim_y + ndim_z # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes filtsize = 3 dropout = 0.0 clamp = 1.0 if conv_nn == True: t1 = Node( [inp.out0], rev_multiplicative_layer, { 'F_class': F_conv, 'clamp': clamp, 'F_args': { 'kernel_size': filtsize, 'leaky_slope': 0.1, 'batch_norm': False } }) t2 = Node( [t1.out0], rev_multiplicative_layer, { 'F_class': F_conv, 'clamp': clamp, 'F_args': { 'kernel_size': filtsize, 'leaky_slope': 0.1, 'batch_norm': False } }) t3 = Node( [t2.out0], rev_multiplicative_layer, { 'F_class': F_conv, 'clamp': clamp, 'F_args': { 'kernel_size': filtsize, 'leaky_slope': 0.1, 'batch_norm': False } }) #t4 = Node([t1.out0], rev_multiplicative_layer, # {'F_class': F_conv, 'clamp': 2.0, # 'F_args':{'kernel_size': filtsize,'leaky_slope':0.1, # 'batch_norm':False}}) #t5 = Node([t2.out0], rev_multiplicative_layer, # {'F_class': F_conv, 'clamp': 2.0, # 'F_args':{'kernel_size': filtsize,'leaky_slope':0.1, # 'batch_norm':False}}) else: t1 = Node( [inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': clamp, 'F_args': { 'dropout': dropout } }) t2 = Node( [t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': clamp, 'F_args': { 'dropout': dropout } }) t3 = Node( [t2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': clamp, 'F_args': { 'dropout': dropout } }) # define output layer node outp = OutputNode([t3.out0], name='output') nodes = [inp, t1, t2, t3, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 12000 meta_epoch = 12 # what is this??? n_its_per_epoch = 12 lr = 1e-2 gamma = 0.01**(1. / 120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 4000. #300 forward pass lambd_latent = 900. #300 laten space lambd_rev = 1000. #400 backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): if conv_nn == True: coeff.conv3.weight.data = 0.01 * torch.randn( coeff.conv3.weight.shape) model.to(device) # number of test samples to use after training N_samp = 2500 # precompute true likelihood on the test data Ngrid = 64 cnt = 0 lik = np.zeros((r, r, Ngrid * Ngrid)) true_post = np.zeros((r, r, N_samp, 2)) lossf_hist = [] lossrev_hist = [] losstot_hist = [] losslatent_hist = [] beta_score_hist = [] for i in range(r): for j in range(r): mvec, cvec, temp, post_points = data.get_lik(np.array( labels_test[cnt, :]).flatten(), n_grid=Ngrid, sig_model=sig_model, sigma=sigma, xvec=x, bound=bound) lik[i, j, :] = temp.flatten() true_post[i, j, :] = post_points[:N_samp] cnt += 1 # start training loop try: t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # train the model losstot, losslatent, lossrev, lossf, lambd_latent = train( model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, conv_nn, i_epoch) # append current loss value to loss histories lossf_hist.append(lossf.data.item()) lossrev_hist.append(lossrev.data.item()) losstot_hist.append(losstot) losslatent_hist.append(losslatent.data.item()) pe_losses = [ losstot_hist, losslatent_hist, lossrev_hist, lossf_hist ] # loop over a few cases and plot results in a grid cnt = 0 beta_max = 0 if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)): # use the network to predict parameters\ if do_latent_struc: # do latent space structure plotting y_samps_latent = np.tile(np.array(labels_test[0, :]), 1).reshape(1, ndim_y) y_samps_latent = torch.tensor(y_samps_latent, dtype=torch.float) x1_i_dist = [] x2_i_dist = [] x1_i_par = np.array([]) x2_i_par = np.array([]) # define latent space mesh grid z_mesh = np.mgrid[-0.99:-0.01:100j, -0.99:-0.01:100j] z_mesh = np.vstack([z_mesh, np.zeros((2, 100, 100))]) #for z_i in range(10000): for i in range(z_mesh.shape[1]): for j in range(z_mesh.shape[2]): a = torch.randn(1, ndim_z) a[0, 0] = z_mesh[0, i, j] a[0, 1] = z_mesh[1, i, j] x_i = model(torch.cat([ a, torch.zeros(1, ndim_tot - ndim_y - ndim_z), y_samps_latent ], dim=1).to(device), rev=True) x_i = x_i.cpu().data.numpy() # calculate hue and intensity if np.abs(mode_1[0][0] - x_i[0][0]) < np.abs(mode_2[0][0] - x_i[0][1]): z_mesh[2, i, j] = np.abs(mode_1[0][0] - x_i[0][0]) z_mesh[3, i, j] = 0 else: z_mesh[2, i, j] = np.abs(mode_2[0][0] - x_i[0][1]) z_mesh[3, i, j] = 1 z_mesh[2, :, :][z_mesh[3, :, :] == 0] = z_mesh[2, :, :][ z_mesh[3, :, :] == 0] / np.max( z_mesh[2, :, :][z_mesh[3, :, :] == 0]) z_mesh[2, :, :][z_mesh[3, :, :] == 1] = z_mesh[2, :, :][ z_mesh[3, :, :] == 1] / np.max( z_mesh[2, :, :][z_mesh[3, :, :] == 1]) bg_color = 'black' fg_color = 'red' fig = plt.figure(facecolor=bg_color, edgecolor=fg_color) axes = fig.add_subplot(111) axes.patch.set_facecolor(bg_color) axes.xaxis.set_tick_params(color=fg_color, labelcolor=fg_color) axes.yaxis.set_tick_params(color=fg_color, labelcolor=fg_color) for spine in axes.spines.values(): spine.set_color(fg_color) plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 0], z_mesh[1, :, :][z_mesh[3, :, :] == 0], s=1, c=z_mesh[2, :, :][z_mesh[3, :, :] == 0], cmap='Greens', axes=axes) plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 1], z_mesh[1, :, :][z_mesh[3, :, :] == 1], s=1, c=z_mesh[2, :, :][z_mesh[3, :, :] == 1], cmap='Purples', axes=axes) plt.xlabel('z-space', color=fg_color) plt.ylabel('z-space', color=fg_color) plt.savefig('%sstruct_z.png' % out_dir, dpi=360) plt.close() # end of latent space structure plotting # initialize plot for showing testing results fig, axes = plt.subplots(r, r, figsize=(6, 6)) for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(labels_test[cnt, :]), N_samp).reshape(N_samp, ndim_y) y_samps = torch.tensor(y_samps, dtype=torch.float) #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), #zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) if conv_nn == True: y_samps = y_samps.reshape(y_samps.shape[0], y_samps.shape[1], 1, 1) rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() if conv_nn == True: rev_x = rev_x.reshape(rev_x.shape[0], rev_x.shape[1]) # plot the samples and the true contours axes[i, j].clear() axes[i, j].contour(mvec, cvec, lik[i, j, :].reshape(Ngrid, Ngrid), levels=[0.68, 0.9, 0.99]) axes[i, j].scatter(rev_x[:, 0], rev_x[:, 1], s=0.5, alpha=0.5, color='red') axes[i, j].scatter(true_post[i, j, :, 1], true_post[i, j, :, 0], s=0.5, alpha=0.5, color='blue') axes[i, j].plot(pos_test[cnt, 0], pos_test[cnt, 1], '+r', markersize=8) axes[i, j].axis(bound) # add contours to results try: if do_contours: contour_y = np.reshape(rev_x[:, 1], (rev_x[:, 1].shape[0])) contour_x = np.reshape(rev_x[:, 0], (rev_x[:, 0].shape[0])) contour_dataset = np.array( [contour_x, contour_y]) kernel_cnn = make_contour_plot( axes[i, j], contour_x, contour_y, contour_dataset, 'red', flip=False, kernel_cnn=False) # run overlap tests on results contour_x = np.reshape( true_post[i, j][:, 1], (true_post[i, j][:, 1].shape[0])) contour_y = np.reshape( true_post[i, j][:, 0], (true_post[i, j][:, 0].shape[0])) contour_dataset = np.array( [contour_x, contour_y]) ks_score, ad_score, beta_score = overlap_tests( rev_x, true_post[i, j], pos_test[cnt], kernel_cnn, gaussian_kde(contour_dataset)) axes[i, j].legend([ 'Overlap: %s' % str(np.round(beta_score, 3)) ]) beta_score_hist.append([beta_score]) except ValueError as e: pass cnt += 1 # sve the results to file fig_post.canvas.draw() plt.savefig('%sposteriors_%s.png' % (out_dir, i_epoch), dpi=360) plt.savefig('%slatest.png' % out_dir, dpi=360) plot_losses(pe_losses, '%spe_losses.png' % out_dir, legend=['PE-GEN']) plot_losses(pe_losses, '%spe_losses_logscale.png' % out_dir, logscale=True, legend=['PE-GEN']) except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up data test_split = 1 # number of testing samples to use # load in gw templates and signals signal_train_images, signal_train_pars, signal_image, noise_signal, signal_pars = load_gw_data() if add_noise_real: train_array = [] train_pe_array = [] for i in range(len(signal_train_images)): for j in range(n_real): train_array.append([signal_train_images[i] + np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23]) train_pe_array.append([signal_train_pars[i]]) train_array = np.array(train_array) train_pe_array = np.array(train_pe_array) train_array = train_array.reshape(train_array.shape[0],train_array.shape[2]) train_pe_array = train_pe_array.reshape(train_pe_array.shape[0],train_pe_array.shape[2]) else: for i in range(len(signal_train_images)): signal_train_images[i] += np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23 # load in lalinference noise signal noise_signal = h5py.File("gw_data/data/%s0%s.hdf5" % (event_name,tag),"r") noise_signal = np.reshape(noise_signal['wht_wvf'][:] * 1079.23,(n_pix,1)) # 817.98 need to not have this hardcoded #noise_signal *= 1079.23 / 817.98 #noise_signal = noise_signal.reshape(noise_signal.shape[0],1) plt.plot(noise_signal) plt.savefig('%s/test.png' % out_path) plt.close() # load in lalinference samples with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f: lalinf_post = pickle.load(f) lalinf_mc = lalinf_post[0] lalinf_q = lalinf_post[1] kernel_lalinf = gaussian_kde(lalinf_post) # declare gw variants of positions and labels mc_max = np.max(signal_train_pars[:,0]) #signal_train_pars /= mc_max labels = torch.tensor(signal_train_images, dtype=torch.float) pos = torch.tensor(signal_train_pars, dtype=torch.float) # setting up the model ndim_x = 2 # number of parameter dimensions ndim_y = n_pix # number of data dimensions ndim_z = 100 # number of latent space dimensions? ndim_tot = n_pix+ndim_z+ndim_x+n_neurons # two times the number data dimensions? # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes # number of nodes equal to number of parameters? t1 = Node([inp.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}}) t2 = Node([t1.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}}) """ t3 = Node([t2.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}}) t4 = Node([t3.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t5 = Node([t4.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t6 = Node([t5.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t7 = Node([t6.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t8 = Node([t7.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t9 = Node([t8.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t10 = Node([t9.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) """ # define output layer node outp = OutputNode([t2.out0], name='output') nodes = [inp, t1, t2, outp] model = ReversibleGraphNet(nodes) # Train model lr = 1e-4 gamma = 0.01**(1./120) l2_reg = 2e-5 y_noise_scale = 1 # amount of noise to add to y parameter? zeros_noise_scale = 3e-2 # what is this?? # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg, amsgrad=True) #optimizer = torch.optim.SGD(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up test set data loader test_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(pos[:test_split], labels[:test_split]), batch_size=batch_size, shuffle=True, drop_last=True) # set up training set data loader train_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(pos[:], labels[:]), batch_size=batch_size, shuffle=True, drop_last=True) # what is happening here? More set up of network? for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01*torch.randn(coeff.fc3.weight.shape) model.to(device) # number of test samples to use after training N_samp = 4000 # choose test samples to use after training # 1000 iterations of test signal burried in noise. Only need to change z parameter. #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp] #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp] #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale y_samps = y_noise_scale * np.transpose(torch.tensor(np.repeat(noise_signal, N_samp, axis=1), dtype=torch.float)) # make test samples. First element is the latent space dimension # second element is the extra zeros needed to pad the input. # the third element is the time series y_samps = torch.cat([torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), # zeros_noise_scale * y_samps], dim=1) # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048 y_samps = y_samps.to(device) # start training loop lossf_hist = [] lossrev_hist = [] beta_score_hist = [] kernel_cnn = False try: # print('#Epoch \tIt/s \tl_total') t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 #print(i_epoch, end='\t ') _,lossf, lossrev = train(model,train_loader,n_its_per_epoch,zeros_noise_scale,batch_size,ndim_tot,ndim_x,ndim_y,ndim_z,y_noise_scale,optimizer,lambd_predict,loss_fit,lambd_latent,loss_latent,lambd_rev,loss_backward,i_epoch) # append current loss value to loss histories lossf_hist.append(lossf.item()) lossrev_hist.append(lossrev.item()) pe_losses = [lossf_hist,lossrev_hist] # predict parameters of signal rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() #rev_x[:,0] = mc_max * rev_x[:,0] # plot pe results and loss beta_max = 0 """ if i_epoch>0: kernel_cnn = gaussian_kde(rev_x) #overlap_y = np.reshape(rev_x[:,1], (rev_x[:,1].shape[0])) #overlap_x = np.reshape(rev_x[:,0], (rev_x[:,0].shape[0])) #overlap_dataset = np.array([overlap_x,overlap_y]).transpose() ks_score, ad_score, beta_score = overlap_tests(rev_x,lalinf_post,signal_pars,kernel_cnn,kernel_lalinf) beta_score_hist.append([beta_score]) plt.plot(np.linspace(1,i_epoch,len(beta_score_hist)),beta_score_hist) plt.savefig('%s/latest/beta_hist.png' % out_path) plt.close() """ if ((i_epoch % plot_cadence == 0) & (i_epoch>0)): pe_std = [0.02185649964844209, 0.005701401364171313] # this will need to be removed beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std,kernel_lalinf=kernel_lalinf,kernel_cnn=kernel_cnn)]) plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist) plt.savefig('%s/latest/beta_hist.png' % out_path) plt.close() # plot loss curves - non-log and log plot_losses(pe_losses,'%s/latest/pe_losses.png' % out_path,legend=['PE-GEN']) plot_losses(pe_losses,'%s/latest/pe_losses_logscale.png' % out_path,logscale=True,legend=['PE-GEN']) # save model #if beta_score_hist[:-1] > beta_max: beta_max = beta_score_hist[:-1] #if beta_score_hist[:-1] > beta_max or i_epoch==plot_cadence: model.save_state_dict('mytraining.pt') # make PE scatter plots with contours and beta score #plt.scatter(rev_x[:,0], rev_x[:,1], s=1., c='red') #plt.scatter(lalinf_mc, lalinf_q, s=1., c='blue') except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up simulation parameters batch_size = 1600 # set batch size r = 3 # the grid dimension for the output tests test_split = r * r # number of testing samples to use sig_model = 'sg' # the signal model to use sigma = 0.2 # the noise std ndata = 32 # number of data samples bound = [0.0, 1.0, 0.0, 1.0] # effective bound for likelihood seed = 1 # seed for generating data # generate data pos, labels, x, sig = data.generate(model=sig_model, tot_dataset_size=2**20, ndata=ndata, sigma=sigma, prior_bound=bound, seed=seed) # seperate the test data for plotting pos_test = pos[-test_split:] labels_test = labels[-test_split:] sig_test = sig[-test_split:] # plot the test data examples plt.figure(figsize=(6, 6)) fig, axes = plt.subplots(r, r, figsize=(6, 6)) cnt = 0 for i in range(r): for j in range(r): axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.') axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-') cnt += 1 axes[i, j].axis([0, 1, -1.5, 1.5]) plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png', dpi=360) plt.close() # setting up the model ndim_x = 2 # number of posterior parameter dimensions (x,y) ndim_y = ndata # number of label dimensions (noisy data samples) ndim_z = 8 # number of latent space dimensions? ndim_tot = max(ndim_x, ndim_y + ndim_z) # must be > ndim_x and > ndim_y + ndim_z # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) t2 = Node([t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) t3 = Node([t2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) # define output layer node outp = OutputNode([t3.out0], name='output') nodes = [inp, t1, t2, t3, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 1000 meta_epoch = 12 # what is this??? n_its_per_epoch = 12 batch_size = 1600 lr = 1e-2 gamma = 0.01**(1. / 120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # initialize plot for showing testing results fig, axes = plt.subplots(r, r, figsize=(6, 6)) # number of test samples to use after training N_samp = 256 # precompute true likelihood on the test data Ngrid = 64 cnt = 0 lik = np.zeros((r, r, Ngrid * Ngrid)) for i in range(r): for j in range(r): mvec, cvec, temp = data.get_lik(np.array( labels_test[cnt, :]).flatten(), n_grid=Ngrid, sig_model=sig_model, sigma=sigma, xvec=x, bound=bound) lik[i, j, :] = temp.flatten() cnt += 1 # start training loop try: t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # train the model train(model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # loop over a few cases and plot results in a grid cnt = 0 for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(labels_test[cnt, :]), N_samp).reshape(N_samp, ndim_y) y_samps = torch.tensor(y_samps, dtype=torch.float) #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), #zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) # use the network to predict parameters rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # plot the samples and the true contours axes[i, j].clear() axes[i, j].contour(mvec, cvec, lik[i, j, :].reshape(Ngrid, Ngrid), levels=[0.68, 0.9, 0.99]) axes[i, j].scatter(rev_x[:, 0], rev_x[:, 1], s=0.5, alpha=0.5) axes[i, j].plot(pos_test[cnt, 0], pos_test[cnt, 1], '+r', markersize=8) axes[i, j].axis(bound) cnt += 1 # sve the results to file fig.canvas.draw() plt.savefig('/data/public_html/chrism/FrEIA/posteriors_%s.png' % i_epoch, dpi=360) plt.savefig('/data/public_html/chrism/FrEIA/latest.png', dpi=360) except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")