def __init__(self, ndim_total, dim_x, dim_y, dim_z, hidden_dim=128): super(INN, self).__init__() nodes = [InputNode(ndim_total, name='input')] self.hidden_dim = hidden_dim self.ndim_total = ndim_total self.dim_x = dim_x self.dim_y = dim_y self.dim_z = dim_z for k in range(4): nodes.append( Node(nodes[-1], GLOWCouplingBlock, { 'subnet_constructor': self.subnet_fc, 'clamp': 2.0 }, name=F'coupling_{k}')) nodes.append( Node(nodes[-1], PermuteRandom, {'seed': k}, name=F'permute_{k}')) nodes.append(OutputNode(nodes[-1], name='output')) self.model = ReversibleGraphNet(nodes, verbose=False) self.zeros_noise_scale = 5e-2 self.y_noise_scale = 1e-1
def model(dim_x, dim_y, dim_z, dim_total, lr, l2_reg, meta_epoch, gamma, hidden_depth=8): nodes = [] # 定义输入层节点 nodes.append(InputNode(dim_total, name='input')) # 定义隐藏层节点 for k in range(hidden_depth): nodes.append( Node(nodes[-1], GLOWCouplingBlock, { 'subnet_constructor': F_fully_connected, 'clamp': 2.0, }, name='coupling_{k}')) nodes.append( Node(nodes[-1], PermuteRandom, {'seed': 1}, name='permute_{k}')) nodes.append( Node(nodes[-1], GLOWCouplingBlock, { 'subnet_constructor': F_fully_connected, 'clamp': 2.0, }, name='coupling_{k}')) # 定义输出层节点 nodes.append(OutputNode(nodes[-1], name='output')) # 构建可逆网络 inn = ReversibleGraphNet(nodes) # 定义优化器 # TODO:参数调整 optimizer = torch.optim.Adam(inn.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-04, weight_decay=l2_reg) # 学习率调整 # TODO:参数调整 scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # 损失函数设置 # x,z无监督:MMD,y有监督:平方误差 loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit return inn, optimizer, scheduler, loss_backward, loss_latent, loss_fit
def cINN(flags): """ The constructor for INN network :param flags: input flags from configuration :return: The INN network """ # Set up the conditional node (y) cond_node = ConditionNode(flags.dim_y) # Start from input layer nodes = [InputNode(flags.dim_x, name='input')] # Recursively add the coupling layers and random permutation layer for i in range(flags.couple_layer_num): nodes.append( Node(nodes[-1], GLOWCouplingBlock, { 'subnet_constructor': subnet_fc, 'clamp': 2.0 }, conditions=cond_node, name='coupling_{}'.format(i))) nodes.append( Node(nodes[-1], PermuteRandom, {'seed': i}, name='permute_{}'.format(i))) # Attach the output Node nodes.append(OutputNode(nodes[-1], name='output')) nodes.append(cond_node) print("The nodes are:", nodes) # Return the return ReversibleGraphNet(nodes, verbose=True)
def main(): # ---------------------------------------生成数据------------------------------------------ t_generate_start = time() # 设置模拟数据参数 r = 3 # the grid dimension for the output tests test_split = r * r # number of testing samples to use optical_model = 'km' # the optical model to use ydim = 31 # number of data samples bound = [0.1, 0.9, 0.1, 0.9] seed = 1 # seed for generating data # 生成训练数据 # concentrations, reflectance, x, info = data.generate( # model=optical_model, # total_dataset_size=2 ** 20 * 20, # ydim=ydim, # prior_bound=bound, # seed=seed # ) concentrations, reflectance, x, info = data.math_optimized_generate() print("\n\nGenerating data took %.2f minutes\n" % ((time() - t_generate_start) / 60)) colors = np.arange(0, concentrations.shape[-1], 1) # 选取几个不参与训练,用作最后的测试样本 c_test = concentrations[-test_split:] r_test = reflectance[-test_split:] # 测试样本分光反射率图,用于观察,与模型无关 plt.figure(figsize=(6, 6)) fig, axes = plt.subplots(r, r, figsize=(6, 6)) cnt = 0 for i in range(r): for j in range(r): axes[i, j].plot(x, np.array(r_test[cnt, :]), '-') cnt += 1 axes[i, j].axis([400, 700, 0, 1]) plt.savefig('test_target_reflectance.png', dpi=360) plt.close() print("\n\nGenerating data took %.2f minutes\n" % ((time() - t_generate_start) / 60)) # ---------------------------------------构建网络------------------------------------------ # 设置模型参数值 ndim_x = concentrations.shape[-1] # 配方的维度,即待选色浆的种类数 ndim_y = ydim # 反射率的维度 31 ndim_z = 13 # 潜在空间的维度 ndim_tot = max(ndim_x, ndim_y + ndim_z) # 定义神经网络的不同部分 # 定义输入层节点 inp = InputNode(ndim_tot, name='input') # 定义隐藏层节点 t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p1 = Node([t1.out0], permute_layer, {'seed': 1}) t2 = Node([p1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p2 = Node([t2.out0], permute_layer, {'seed': 2}) t3 = Node([p2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p3 = Node([t3.out0], permute_layer, {'seed': 1}) t4 = Node([p3.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) p4 = Node([t4.out0], permute_layer, {'seed': 2}) t5 = Node([p4.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) # 定义输出层节点 outp = OutputNode([t5.out0], name='output') # 构建网络 nodes = [inp, t1, p1, t2, p2, t3, p3, t4, p4, t5, outp] model = ReversibleGraphNet(nodes) # ---------------------------------------训练网络------------------------------------------ # 超参数 # n_epochs = 3000 # 训练轮数 n_epochs = 0 # 训练轮数 plot_cadence = 100 # 每100步画一次损失函数图 meta_epoch = 12 # 调整学习率的步长 n_its_per_epoch = 12 # 每次训练12批数据 batch_size = 1600 # 每批1600个样本 lr = 1.5e-3 # 初始学习率 gamma = 0.004**(1. / 1333) # 学习率下降的乘数因子 l2_reg = 2e-5 # 权重衰减(L2惩罚) # 为了让输入和输出维度相同,对维度进行补齐,不使用0,而是使用一些很小的值 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # 损失的权重 lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # 定义优化器 # params:待优化参数,lr:学习率,betas:用于计算梯度以及梯度平方的运行平均值的系数 # eps:为了增加数值计算的稳定性而加到分母里的项 # weight_decay:权重衰减 optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) # 学习率调整 # optimizer:优化器 # step_size:调整学习率的步长 # gamma:学习率下降的乘数因子 scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # 损失函数设置 # x,z无监督:MMD,y有监督:平方误差 loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # 训练集数据加载 train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( concentrations[test_split:], reflectance[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # 初始化网络权重 for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # 初始化测试结果图表 fig, axes = plt.subplots(r, r, figsize=(6, 6)) # 测试用例数量 N_samp = 256 # ---------------------------------------开始训练------------------------------------------ try: t_start = time() # 训练开始时间 loss_for_list = [] # 记录前向训练的损失 loss_rev_list = [] # 记录反向训练的损失 tsne = TSNE(n_components=2, init='pca') # 颜色编号 color_names = [ '07H', '08', '08S', '09', '09B', '09S', '10B', '12', '13', '14', '15', '16', '17A', '18A', '19A', '20A-2', '23A', '2704', '2803', '2804', '2807' ] # n_epochs次迭代过程 for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # TODO:这个if并不会进入 # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # 训练模型 avg_loss, loss_for, loss_rev = train( model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # 添加正向和逆向的损失 loss_for_list.append(loss_for.item()) loss_rev_list.append(loss_rev.item()) inn_losses = [loss_for_list, loss_rev_list] if ((i_epoch + 1) % plot_cadence == 0) & (i_epoch > 0): plot_losses(inn_losses, legend=['PE-GEN'], lossNo=int((i_epoch + 1) / plot_cadence)) # TODO model = torch.load('model_dir/km_impl_model') # torch.save(model, 'model_dir/km_impl_model') fig, axes = plt.subplots(1, 1, figsize=(2, 2)) # 真实样本对应的反射率信息 test_samps = np.array([[ 0.2673378, 0.3132285, 0.3183329, 0.3234908, 0.3318701, 0.3409707, 0.3604081, 0.4168356, 0.5351773, 0.6202191, 0.6618687, 0.6919741, 0.7136238, 0.7292901, 0.7314631, 0.7131701, 0.6773048, 0.6302681, 0.5738088, 0.5133060, 0.4535525, 0.4108878, 0.3908512, 0.3808001, 0.3752591, 0.3727644, 0.3801365, 0.3976869, 0.4237110, 0.4332685, 0.4433292 ]]) # 真实样本对应的配方 test_cons = np.array([[ 0, 0.8014, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1491, 0, 0, 0, 0.2241, 0 ]]) for cnt in range(test_samps.shape[0]): print('before:', cnt, test_samps[cnt, :]) test_samp = np.tile(np.array(test_samps[cnt, :]), N_samp).reshape(N_samp, ydim) test_samp = torch.tensor(test_samp, dtype=torch.float) test_samp += y_noise_scale * torch.randn(N_samp, ydim) test_samp = torch.cat( [ torch.randn(N_samp, ndim_z), # zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), test_samp ], dim=1) test_samp = test_samp.to(device) print('after:', cnt, test_samp) # use the network to predict parameters test_rev = model(test_samp, rev=True)[:, :colors.size] test_rev = test_rev.cpu().data.numpy() # 假设涂料浓度小于一定值,就不需要这种涂料 test_rev = np.where(test_rev < 0.1, 0, test_rev) # 计算预测配方的反射率信息 # recipe_ref = data.recipe_reflectance(test_rev, optical_model) # 使用修正后的模型计算配方的反射率信息 recipe_ref = data.correct_recipe_reflectance(test_rev) print("######## Test Sample %d ########" % cnt) # 用于记录色差最小的三个配方 top3 = [[100, 0], [100, 0], [100, 0]] for n in range(test_rev.shape[0]): # print(test_rev[n, :]) diff = data.color_diff(test_samps[cnt, :], recipe_ref[n, :]) if diff < top3[2][0]: top3[2][0] = diff top3[2][1] = n top3.sort() # 将色差最小的三个配方打印出来 for n in range(3): print(test_rev[top3[n][1], :]) print("color diff: %.2f \n" % top3[n][0]) print("\n\n") # draw # feature scaling test_x = test_cons[cnt, :].reshape(1, test_cons[cnt, :].shape[-1]) plot_x = np.concatenate((test_rev, test_x), axis=0) # use tsne to decrease dimensionality x_norm = pd.DataFrame(plot_x, columns=color_names) # 根据需要的涂料种类(需要为1,不需要为0)将配方分类 classes = np.zeros(N_samp).reshape(N_samp, 1) paint_needed = np.where(test_rev == 0, 0, 1) for paint_no in colors: classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no class_norm = pd.DataFrame(np.concatenate( (classes, np.zeros(1).reshape(1, 1)), axis=0), columns=['class']) data_plot = pd.concat( [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm], axis=1) class_data = data_plot['class'] axes.clear() recipe_classes = np.array( class_norm[:-1].drop_duplicates()).reshape(1, -1).tolist()[0] for recipe_class in recipe_classes: axes.scatter(data_plot[class_data == recipe_class][0], data_plot[class_data == recipe_class][1], s=2, alpha=0.5) axes.scatter(data_plot[class_data == 0][0], data_plot[class_data == 0][1], marker='+', s=10) fig.canvas.draw() plt.savefig('test_result%d.png' % cnt, dpi=360) # loop over a few cases and plot results in a grid cnt = 0 for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(r_test[cnt, :]), N_samp).reshape(N_samp, ydim) y_samps = torch.tensor(y_samps, dtype=torch.float) y_samps += y_noise_scale * torch.randn(N_samp, ydim) y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), # zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) # use the network to predict parameters rev_x = model(y_samps, rev=True)[:, :colors.size] rev_x = rev_x.cpu().data.numpy() # 假设涂料浓度小于一定值,就不需要这种涂料 rev_x = np.where(rev_x < 0.1, 0, rev_x) # feature scaling test_x = c_test[cnt, :].reshape(1, c_test[cnt, :].shape[-1]) plot_x = np.concatenate((rev_x, test_x), axis=0) # use pca to decrease dimensionality x_norm = pd.DataFrame(plot_x, columns=color_names) # 根据需要的涂料种类(需要为1,不需要为0)将配方分类 classes = np.zeros(N_samp).reshape(N_samp, 1) paint_needed = np.where(rev_x == 0, 0, 1) for paint_no in colors: classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no class_norm = pd.DataFrame(np.concatenate( (classes, np.zeros(1).reshape(1, 1)), axis=0), columns=['class']) data_plot = pd.concat( [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm], axis=1) class_data = data_plot['class'] # plot the predicted and the true recipe axes.clear() recipe_classes = np.array( class_norm[:-1].drop_duplicates()).reshape(1, -1).tolist()[0] for recipe_class in recipe_classes: axes.scatter(data_plot[class_data == recipe_class][0], data_plot[class_data == recipe_class][1], s=2, alpha=0.5) axes.scatter(data_plot[class_data == 0][0], data_plot[class_data == 0][1], marker='+', s=10) fig.canvas.draw() plt.savefig('training_result%d.png' % cnt, dpi=360) # recipe_ref = data.recipe_reflectance(rev_x, optical_model) # 使用修正后的模型计算配方的反射率信息 recipe_ref = data.correct_recipe_reflectance(rev_x) print("######## Test %d ########" % cnt) print(c_test[cnt]) print("################") # 用于记录色差最小的三个配方 top3 = [[100, 0], [100, 0], [100, 0]] for n in range(rev_x.shape[0]): # print(rev_x[n, :]) diff = data.color_diff(r_test[cnt].numpy(), recipe_ref[n, :]) if diff < top3[2][0]: top3[2][0] = diff top3[2][1] = n top3.sort() # 将色差最小的三个配方打印出来 for n in range(3): print(test_rev[top3[n][1], :]) print("color diff: %.2f \n" % top3[n][0]) print("\n\n") cnt += 1 except KeyboardInterrupt: pass finally: print("\n\nTraining took %.2f minutes\n" % ((time() - t_start) / 60))
def main(): # Set up data # make training signals signal_train_pars = [] signal_train_images = [] for i in range(total_temp_num): signal_train_pars.append( [np.random.uniform(-1.0, 1.0), np.random.uniform(0.5, 1.5)]) signal_train_images.append( np.random.normal(loc=signal_train_pars[i][0], scale=signal_train_pars[i][1], size=(1, n_pix))) signal_train_pars = np.array(signal_train_pars) signal_train_images = np.array(signal_train_images).reshape( total_temp_num, n_pix) # make random 1D gaussian signal noise_signal = np.random.normal(loc=0.0, scale=1.0, size=(1, n_pix)) #noise_signal = norm.rvs(0,1.0,(1,n_pix)) signal_pars = [0.0, 1.0] # load in lalinference samples #with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f: # lalinf_post = pickle.load(f) #lalinf_mc = lalinf_post[0] #lalinf_q = lalinf_post[1] # declare gw variants of positions and labels labels = torch.tensor(signal_train_images, dtype=torch.float) pos = torch.tensor(signal_train_pars, dtype=torch.float) # setting up the model ndim_tot = n_pix + n_neurons # two times the number data dimensions? ndim_x = 2 # number of parameter dimensions ndim_y = n_pix # number of data dimensions ndim_z = 10 # number of latent space dimensions? # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) t2 = Node([t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) """ t3 = Node([t2.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.2}}) t4 = Node([t3.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) """ # define output layer node outp = OutputNode([t2.out0], name='output') nodes = [inp, t1, t2, outp] model = ReversibleGraphNet(nodes) # Train model lr = 1e-2 decayEpochs = (n_epochs * n_its_per_epoch) // meta_epoch gamma = 0.004**(1.0 / decayEpochs) l2_reg = 2e-5 #gamma = 0.01**(1./120) y_noise_scale = 3e-2 # amount of noise to add to y parameter? zeros_noise_scale = 3e-2 # what is this?? # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up test set data loader test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[:test_split], labels[:test_split]), batch_size=batch_size, shuffle=True, drop_last=True) # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[:], labels[:]), batch_size=batch_size, shuffle=True, drop_last=True) # what is happening here? More set up of network? for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # number of test samples to use after training N_samp = 4000 # choose test samples to use after training # 1000 iterations of test signal burried in noise. Only need to change z parameter. #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp] #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp] #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale y_samps_nparray = np.repeat(noise_signal, N_samp, axis=0) y_samps = torch.tensor(y_samps_nparray, dtype=torch.float) # make test samples. First element is the latent space dimension # second element is the extra zeros needed to pad the input. # the third element is the time series y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros( N_samp, ndim_tot - ndim_y - ndim_z), # zeros_noise_scale * y_samps ], dim=1) # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048 y_samps = y_samps.to(device) # get control contour values cont_mu, cont_sig, prob, levels = compute_like(noise_signal.reshape( n_pix, ), N=n_pix) #lalinf_post_blah = np.array([np.random.normal(loc=0,scale=1.0,size=(N_samp)), np.random.normal(loc=1.0,scale=1.0,size=(N_samp))]) # start training loop lossf_hist = [] lossrev_hist = [] beta_score_hist = [] try: # print('#Epoch \tIt/s \tl_total') t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 #print(i_epoch, end='\t ') _, lossf, lossrev = train(model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # append current loss value to loss histories lossf_hist.append(lossf.item()) lossrev_hist.append(lossrev.item()) pe_losses = [lossf_hist, lossrev_hist] # predict parameters of signal rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # plot pe results and loss if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)): #pe_std = [0.005, 0.01] # this will need to be removed #beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std)]) #plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist) #plt.savefig('%s/latest/beta_hist.png' % out_path) #plt.close() # plot loss curves - non-log and log plot_losses(pe_losses, '%s/latest/pe_losses.png' % out_path, legend=['PE-GEN']) plot_losses(pe_losses, '%s/latest/pe_losses_logscale.png' % out_path, logscale=True, legend=['PE-GEN']) # make PE scatter plots with contours and beta score mu0 = 0.0 sig0 = 1.0 plt.scatter(rev_x[:, 0], rev_x[:, 1], s=1., c='red', label='INN Results') plt.contour(cont_mu, cont_sig, prob, levels=[0.68, 0.9, 0.95, 0.99]) plt.plot(mu0, sig0, '+', label='Truth') plt.xlabel('mean') plt.ylabel('standard deviation') plt.legend() plt.savefig('%s/latest/predicted_pe.png' % out_path) plt.close() except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def __init__(self, config, *args): super(INNModel, self).__init__(config) self.config.update({ 'batch_size': int(2**12), 'lr': 0.001, 'n_epochs': 50, 'loss_epochs': 100, 'test_percent': 20, 'data_size': 8_000_000 }) self.config.update(config) # self.embedder = torch.load( # './runs/network_Sep19_23-35-50/checkpoints/model_999.pt' # ).feature_creator self.ndim_tot = 10 self.ndim_x = 1 self.ndim_y = 9 self.ndim_z = 1 inp = InputNode(self.ndim_tot, name='input') t1 = Node([inp.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'F_args': { 'batch_norm': True, 'internal_size': 2, # 'dropout': 0.3 }}) # t2 = Node([t1.out0], rev_multiplicative_layer, # {'F_class': F_fully_connected, 'clamp': 2.0, # 'F_args': {'dropout': 0.5}}) # t3 = Node([t2.out0], rev_multiplicative_layer, # {'F_class': F_fully_connected, 'clamp': 2.0, # 'F_args': {'dropout': 0.5}}) outp = OutputNode([t1.out0], name='output') nodes = [inp, t1, outp] self.model = ReversibleGraphNet(nodes) self.model.to(self.device) self.loss = F.mse_loss self.x_noise_scale = 3e-2 self.y_noise_scale = 3e-2 self.zeros_noise_scale = 3e-2 # relative weighting of losses: self.lambd_predict = 3. self.lambd_latent = 2. self.lambd_rev = 10. self.pad_x = torch.zeros(self.config['batch_size'], self.ndim_tot - self.ndim_x) # self.pad_yz = torch.zeros(self.config['batch_size'], self.ndim_tot - # self.ndim_y - self.ndim_z) def MMD_multiscale(x, y): xx, yy, zz = torch.mm(x,x.t()), torch.mm(y,y.t()), torch.mm(x,y.t()) rx = (xx.diag().unsqueeze(0).expand_as(xx)) ry = (yy.diag().unsqueeze(0).expand_as(yy)) dxx = rx.t() + rx - 2.*xx dyy = ry.t() + ry - 2.*yy dxy = rx.t() + ry - 2.*zz XX, YY, XY = (torch.zeros(xx.shape).to(self.device), torch.zeros(xx.shape).to(self.device), torch.zeros(xx.shape).to(self.device)) for a in [0.2, 0.5, 0.9, 1.3]: XX += a**2 * (a**2 + dxx)**-1 YY += a**2 * (a**2 + dyy)**-1 XY += a**2 * (a**2 + dxy)**-1 return torch.mean(XX + YY - 2.*XY) def fit(input, target): return torch.mean((input - target)**2) self.loss_backward = MMD_multiscale self.loss_latent = MMD_multiscale self.loss_fit = F.l1_loss self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.config['lr'], weight_decay=1e2, # momentum=0.9 ) self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, 1758 * 5, 0.1 )
class INNModel(BaseModel): def __init__(self, config, *args): super(INNModel, self).__init__(config) self.config.update({ 'batch_size': int(2**12), 'lr': 0.001, 'n_epochs': 50, 'loss_epochs': 100, 'test_percent': 20, 'data_size': 8_000_000 }) self.config.update(config) # self.embedder = torch.load( # './runs/network_Sep19_23-35-50/checkpoints/model_999.pt' # ).feature_creator self.ndim_tot = 10 self.ndim_x = 1 self.ndim_y = 9 self.ndim_z = 1 inp = InputNode(self.ndim_tot, name='input') t1 = Node([inp.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'F_args': { 'batch_norm': True, 'internal_size': 2, # 'dropout': 0.3 }}) # t2 = Node([t1.out0], rev_multiplicative_layer, # {'F_class': F_fully_connected, 'clamp': 2.0, # 'F_args': {'dropout': 0.5}}) # t3 = Node([t2.out0], rev_multiplicative_layer, # {'F_class': F_fully_connected, 'clamp': 2.0, # 'F_args': {'dropout': 0.5}}) outp = OutputNode([t1.out0], name='output') nodes = [inp, t1, outp] self.model = ReversibleGraphNet(nodes) self.model.to(self.device) self.loss = F.mse_loss self.x_noise_scale = 3e-2 self.y_noise_scale = 3e-2 self.zeros_noise_scale = 3e-2 # relative weighting of losses: self.lambd_predict = 3. self.lambd_latent = 2. self.lambd_rev = 10. self.pad_x = torch.zeros(self.config['batch_size'], self.ndim_tot - self.ndim_x) # self.pad_yz = torch.zeros(self.config['batch_size'], self.ndim_tot - # self.ndim_y - self.ndim_z) def MMD_multiscale(x, y): xx, yy, zz = torch.mm(x,x.t()), torch.mm(y,y.t()), torch.mm(x,y.t()) rx = (xx.diag().unsqueeze(0).expand_as(xx)) ry = (yy.diag().unsqueeze(0).expand_as(yy)) dxx = rx.t() + rx - 2.*xx dyy = ry.t() + ry - 2.*yy dxy = rx.t() + ry - 2.*zz XX, YY, XY = (torch.zeros(xx.shape).to(self.device), torch.zeros(xx.shape).to(self.device), torch.zeros(xx.shape).to(self.device)) for a in [0.2, 0.5, 0.9, 1.3]: XX += a**2 * (a**2 + dxx)**-1 YY += a**2 * (a**2 + dyy)**-1 XY += a**2 * (a**2 + dxy)**-1 return torch.mean(XX + YY - 2.*XY) def fit(input, target): return torch.mean((input - target)**2) self.loss_backward = MMD_multiscale self.loss_latent = MMD_multiscale self.loss_fit = F.l1_loss self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.config['lr'], weight_decay=1e2, # momentum=0.9 ) self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, 1758 * 5, 0.1 ) def process_batch(self, x, y): self.loss_factor = 1 y, x = x.to(self.device), y.to(self.device).reshape(-1, 1) # y = self.embedder(y[:, :4], y[:, 4:].long()).detach() x_clean = x.clone() y_clean = y.clone() pad_x = self.zeros_noise_scale * torch.randn( self.config['batch_size'], self.ndim_tot - self.ndim_x, device=self.device ) # pad_yz = self.zeros_noise_scale * torch.randn( # self.config['batch_size'], self.ndim_tot - self.ndim_y - # self.ndim_z, device=self.device # ) x += self.x_noise_scale * torch.randn( self.config['batch_size'], self.ndim_x, dtype=torch.float, device=self.device ) y += self.y_noise_scale * torch.randn( self.config['batch_size'], self.ndim_y, dtype=torch.float, device=self.device ) x, y = (torch.cat((x, pad_x), dim=1), torch.cat( (torch.randn(self.config['batch_size'], self.ndim_z, device=self.device), y), dim=1)) self.optimizer.zero_grad() # Forward step: output = self.model(x) # Shorten output, and remove gradients wrt y, for latent loss y_short = torch.cat((y[:, :self.ndim_z], y[:, -self.ndim_y:]), dim=1) l = 0.5 * self.lambd_predict * self.loss_fit(output[:, self.ndim_z:], y[:, self.ndim_z:]) output_block_grad = torch.cat((output[:, :self.ndim_z], output[:, -self.ndim_y:].data), dim=1) l += self.lambd_latent * self.loss_latent(output_block_grad, y_short) l_tot = l.data.item() l.backward() # Backward step: # pad_yz = self.zeros_noise_scale * torch.randn( # self.config['batch_size'], self.ndim_tot - self.ndim_y - # self.ndim_z, device=self.device # ) x = x_clean + self.y_noise_scale * torch.randn( self.config['batch_size'], self.ndim_x, device=self.device) y = y_clean + self.y_noise_scale * torch.randn( self.config['batch_size'], self.ndim_y, device=self.device) orig_z_perturbed = (output.data[:, :self.ndim_z] + self.y_noise_scale * torch.randn( self.config['batch_size'], self.ndim_z, device=self.device)) y_rev = torch.cat((orig_z_perturbed, # pad_yz, y), dim=1) y_rev_rand = torch.cat((torch.randn( self.config['batch_size'], self.ndim_z, device=self.device ), y), dim=1) output_rev = self.model(y_rev, rev=True) output_rev_rand = self.model(y_rev_rand, rev=True) l_rev = ( self.lambd_rev * self.loss_factor * self.loss_backward(output_rev_rand[:, :self.ndim_x], x[:, :self.ndim_x]) ) mse = torch.mean((output_rev[:, :self.ndim_x] - x[:, :self.ndim_x])**2) # l_rev += self.lambd_predict * self.loss_fit(output_rev, x) l_rev += self.lambd_predict * self.loss(output_rev[:, :self.ndim_x], x[:, :self.ndim_x]) l_tot += l_rev.data.item() l_rev.backward() for p in self.model.parameters(): p.grad.data.clamp_(-50, 50) self.optimizer.step() self.scheduler.step() return mse.item() @torch.no_grad() def eval(self, val_loader): self.model.eval() mse = 0 for x, y in tqdm(val_loader): x = x.to(self.device, non_blocking=True) y = y.to(self.device, non_blocking=True) # x = self.embedder(x[:, :4], x[:, 4:].long()) z = self.model(torch.cat( (x, torch.zeros(x.shape[0], self.ndim_tot-x.shape[1], device=self.device)), dim=1 ), rev=True)[:, 0].squeeze() mse += torch.sum((y - z)**2).item() return np.sqrt(mse/len(val_loader.dataset))
def main(): # Set up data batch_size = 1600 # set batch size test_split = 10000 # number of testing samples to use # generate data # makes a torch.tensor() with arrays of (n_samples X parameters) and (n_samples X data) # labels are the colours and pos are the x,y coords # however, labels are 1-hot encoded pos, labels = data.generate(labels='all', tot_dataset_size=2**20) # just simply renaming the colors properly. #c = np.where(labels[:test_split])[1] c = labels[:test_split, :] plt.figure(figsize=(6, 6)) plt.scatter(pos[:test_split, 0], pos[:test_split, 1], c=c, cmap='Set1', s=0.25) plt.xticks([]) plt.yticks([]) plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png') plt.close() # setting up the model ndim_tot = 16 # ? ndim_x = 2 # number of parameter dimensions (x,y) ndim_y = 3 # number of label dimensions (colours for 1-hot encoding) ndim_z = 2 # number of latent space dimensions? # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) t2 = Node([t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) t3 = Node([t2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.0 } }) # define output layer node outp = OutputNode([t3.out0], name='output') nodes = [inp, t1, t2, t3, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 3000 meta_epoch = 12 # what is this??? n_its_per_epoch = 4 batch_size = 1600 lr = 1e-2 gamma = 0.01**(1. / 120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) print(pad_x.shape, pad_yz.shape) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up test set data loader test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[:test_split], labels[:test_split]), batch_size=batch_size, shuffle=True, drop_last=True) # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # initialize gif for showing training procedure fig, axes = plt.subplots(1, 2, figsize=(8, 4)) axes[0].set_xticks([]) axes[0].set_yticks([]) axes[0].set_title('Predicted labels (Forwards Process)') axes[1].set_xticks([]) axes[1].set_yticks([]) axes[1].set_title('Generated Samples (Backwards Process)') #fig.show() #fig.canvas.draw() # number of test samples to use after training N_samp = 4096 # choose test samples to use after training x_samps = torch.cat([x for x, y in test_loader], dim=0)[:N_samp] y_samps = torch.cat([y for x, y in test_loader], dim=0)[:N_samp] #c = np.where(y_samps)[1] #c = y_samps[:,0] c = np.array(y_samps).reshape(N_samp, ndim_y) y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat([ torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) # start training loop try: # print('#Epoch \tIt/s \tl_total') t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # print(i_epoch, end='\t ') train(model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # predict the locations of test labels rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # predict the label given a location #pred_c = model(torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)), # dim=1).to(device)).data[:, -8:].argmax(dim=1) pred_c = model( torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)), dim=1).to(device)).data[:, -1:].argmax(dim=1) axes[0].clear() #axes[0].scatter(tmp_x_samps[:,0], tmp_x_samps[:,1], c=pred_c, cmap='Set1', s=1., vmin=0, vmax=9) axes[0].axis('equal') axes[0].axis([-3, 3, -3, 3]) axes[0].set_xticks([]) axes[0].set_yticks([]) axes[1].clear() axes[1].scatter(rev_x[:, 0], rev_x[:, 1], c=c, cmap='Set1', s=1., vmin=0, vmax=9) axes[1].axis('equal') axes[1].axis([-3, 3, -3, 3]) axes[1].set_xticks([]) axes[1].set_yticks([]) fig.canvas.draw() plt.savefig('/data/public_html/chrism/FrEIA/training_pred.png') except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up simulation parameters batch_size = 1600 # set batch size r = 4 # the grid dimension for the output tests test_split = r*r # number of testing samples to use sigma = 0.2 # the noise std ndata = 64 # number of data samples usepars = [0,1,2,3] # parameter indices to use seed = 1 # seed for generating data run_label='gpu0' out_dir = "/home/hunter.gabbard/public_html/CBC/cINNamon/gausian_results/multipar/%s/" % run_label # generate data pos, labels, x, sig, parnames = data.generate( tot_dataset_size=2**20, ndata=ndata, usepars=usepars, sigma=sigma, seed=seed ) print('generated data') # seperate the test data for plotting pos_test = pos[-test_split:] labels_test = labels[-test_split:] sig_test = sig[-test_split:] # plot the test data examples plt.figure(figsize=(6,6)) fig, axes = plt.subplots(r,r,figsize=(6,6),sharex='col',sharey='row') cnt = 0 for i in range(r): for j in range(r): axes[i,j].plot(x,np.array(labels_test[cnt,:]),'.') axes[i,j].plot(x,np.array(sig_test[cnt,:]),'-') cnt += 1 axes[i,j].axis([0,1,-1.5,1.5]) axes[i,j].set_xlabel('time') if i==r-1 else axes[i,j].set_xlabel('') axes[i,j].set_ylabel('h(t)') if j==0 else axes[i,j].set_ylabel('') plt.savefig('%stest_distribution.png' % out_dir,dpi=360) plt.close() # precompute true posterior samples on the test data cnt = 0 N_samp = 1000 ndim_x = len(usepars) samples = np.zeros((r*r,N_samp,ndim_x)) for i in range(r): for j in range(r): samples[cnt,:,:] = data.get_lik(np.array(labels_test[cnt,:]).flatten(),sigma=sigma,usepars=usepars,Nsamp=N_samp) print(samples[cnt,:10,:]) cnt += 1 # initialize plot for showing testing results fig, axes = plt.subplots(r,r,figsize=(6,6),sharex='col',sharey='row') for k in range(ndim_x): parname1 = parnames[k] for nextk in range(ndim_x): parname2 = parnames[nextk] if nextk>k: cnt = 0 for i in range(r): for j in range(r): # plot the samples and the true contours axes[i,j].clear() axes[i,j].scatter(samples[cnt,:,k], samples[cnt,:,nextk],c='b',s=0.5,alpha=0.5) axes[i,j].plot(pos_test[cnt,k],pos_test[cnt,nextk],'+c',markersize=8) axes[i,j].set_xlim([0,1]) axes[i,j].set_ylim([0,1]) axes[i,j].set_xlabel(parname1) if i==r-1 else axes[i,j].set_xlabel('') axes[i,j].set_ylabel(parname2) if j==0 else axes[i,j].set_ylabel('') cnt += 1 # save the results to file fig.canvas.draw() plt.savefig('%strue_samples_%d%d.png' % (out_dir,k,nextk),dpi=360) # setting up the model ndim_x = len(usepars) # number of posterior parameter dimensions (x,y) ndim_y = ndata # number of label dimensions (noisy data samples) ndim_z = 4 # number of latent space dimensions? ndim_tot = max(ndim_x,ndim_y+ndim_z) # must be > ndim_x and > ndim_y + ndim_z # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.2}}) #t1 = Node([inp.out0], rev_multiplicative_layer, # {'F_class': F_conv, 'clamp': 2.0, # 'F_args': {'kernel_size': 3,'leaky_slope': 0.1}}) #def __init__(self, dims_in, F_class=F_fully_connected, F_args={}, # clamp=5.): # super(rev_multiplicative_layer, self).__init__() # channels = dims_in[0][0] # # self.split_len1 = channels // 2 # self.split_len2 = channels - channels // 2 # self.ndims = len(dims_in[0]) # # self.clamp = clamp # self.max_s = exp(clamp) # self.min_s = exp(-clamp) # # self.s1 = F_class(self.split_len1, self.split_len2, **F_args) # self.t1 = F_class(self.split_len1, self.split_len2, **F_args) # self.s2 = F_class(self.split_len2, self.split_len1, **F_args) # self.t2 = F_class(self.split_len2, self.split_len1, **F_args) t2 = Node([t1.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.2}}) t3 = Node([t2.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.2}}) t4 = Node([t3.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) # define output layer node outp = OutputNode([t4.out0], name='output') nodes = [inp, t1, t2, t3, t4, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 10000 meta_epoch = 12 # what is this??? n_its_per_epoch = 12 batch_size = 1600 lr = 1e-2 gamma = 0.01**(1./120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up training set data loader train_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights #for mod_list in model.children(): # for block in mod_list.children(): # for coeff in block.children(): # coeff.fc3.weight.data = 0.01*torch.randn(coeff.fc3.weight.shape) #model.to(device) # start training loop try: t_start = time() olvec = np.zeros((r,r,int(n_epochs/10))) s = 0 # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # train the model train(model,train_loader,n_its_per_epoch,zeros_noise_scale,batch_size, ndim_tot,ndim_x,ndim_y,ndim_z,y_noise_scale,optimizer,lambd_predict, loss_fit,lambd_latent,loss_latent,lambd_rev,loss_backward,i_epoch) # loop over a few cases and plot results in a grid if np.remainder(i_epoch,10)==0: for k in range(ndim_x): parname1 = parnames[k] for nextk in range(ndim_x): parname2 = parnames[nextk] if nextk>k: cnt = 0 # initialize plot for showing testing results fig, axes = plt.subplots(r,r,figsize=(6,6),sharex='col',sharey='row') for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(labels_test[cnt,:]),N_samp).reshape(N_samp,ndim_y) y_samps = torch.tensor(y_samps, dtype=torch.float) y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat([torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps], dim=1) y_samps = y_samps.to(device) # use the network to predict parameters rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # compute the n-d overlap if k==0 and nextk==1: ol = data.overlap(samples[cnt,:,:ndim_x],rev_x[:,:ndim_x]) olvec[i,j,s] = ol # plot the samples and the true contours axes[i,j].clear() axes[i,j].scatter(samples[cnt,:,k], samples[cnt,:,nextk],c='b',s=0.2,alpha=0.5) axes[i,j].scatter(rev_x[:,k], rev_x[:,nextk],c='r',s=0.2,alpha=0.5) axes[i,j].plot(pos_test[cnt,k],pos_test[cnt,nextk],'+c',markersize=8) axes[i,j].set_xlim([0,1]) axes[i,j].set_ylim([0,1]) oltxt = '%.2f' % olvec[i,j,s] axes[i,j].text(0.90, 0.95, oltxt, horizontalalignment='right', verticalalignment='top', transform=axes[i,j].transAxes) matplotlib.rc('xtick', labelsize=8) matplotlib.rc('ytick', labelsize=8) axes[i,j].set_xlabel(parname1) if i==r-1 else axes[i,j].set_xlabel('') axes[i,j].set_ylabel(parname2) if j==0 else axes[i,j].set_ylabel('') cnt += 1 # save the results to file fig.canvas.draw() plt.savefig('%sposteriors_%d%d_%04d.png' % (out_dir,k,nextk,i_epoch),dpi=360) plt.savefig('%slatest_%d%d.png' % (out_dir,k,nextk),dpi=360) plt.close() s += 1 # plot overlap results if np.remainder(i_epoch,10)==0: fig, axes = plt.subplots(1,figsize=(6,6)) for i in range(r): for j in range(r): axes.semilogx(10*np.arange(olvec.shape[2]),olvec[i,j,:],alpha=0.5) axes.grid() axes.set_ylabel('overlap') axes.set_xlabel('epoch') axes.set_ylim([0,1]) plt.savefig('%soverlap.png' % out_dir,dpi=360) plt.close() except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up simulation parameters batch_size = 128 # set batch size r = 3 # the grid dimension for the output tests test_split = r * r # number of testing samples to use sig_model = 'sg' # the signal model to use sigma = 0.2 # the noise std ndata = 128 #32 number of data samples in time series bound = [0.0, 1.0, 0.0, 1.0] # effective bound for likelihood seed = 1 # seed for generating data out_dir = "/home/hunter.gabbard/public_html/CBC/cINNamon/gausian_results/" n_neurons = 0 do_contours = True # if True, plot contours of predictions by INN plot_cadence = 50 do_latent_struc = False # if True, plot latent space 2D structure conv_nn = False # if True, use convolutional nn structure # setup output directory - if it does not exist os.system('mkdir -p %s' % out_dir) # generate data pos, labels, x, sig = data.generate( model=sig_model, tot_dataset_size=int(1e6), # 1e6 ndata=ndata, sigma=sigma, prior_bound=bound, seed=seed) if do_latent_struc: # calculate mode of x-space for both pars mode_1 = stats.mode(np.array(pos[:, 0])) mode_2 = stats.mode(np.array(pos[:, 1])) # seperate the test data for plotting pos_test = pos[-test_split:] labels_test = labels[-test_split:] sig_test = sig[-test_split:] # plot the test data examples plt.figure(figsize=(6, 6)) fig_post, axes = plt.subplots(r, r, figsize=(6, 6)) cnt = 0 for i in range(r): for j in range(r): axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.') axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-') cnt += 1 axes[i, j].axis([0, 1, -1.5, 1.5]) plt.savefig("%stest_distribution.png" % out_dir, dpi=360) plt.close() # setting up the model ndim_x = 2 # number of posterior parameter dimensions (x,y) ndim_y = ndata # number of label dimensions (noisy data samples) ndim_z = 200 # number of latent space dimensions? ndim_tot = max( ndim_x, ndim_y + ndim_z) + n_neurons # must be > ndim_x and > ndim_y + ndim_z # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes filtsize = 3 dropout = 0.0 clamp = 1.0 if conv_nn == True: t1 = Node( [inp.out0], rev_multiplicative_layer, { 'F_class': F_conv, 'clamp': clamp, 'F_args': { 'kernel_size': filtsize, 'leaky_slope': 0.1, 'batch_norm': False } }) t2 = Node( [t1.out0], rev_multiplicative_layer, { 'F_class': F_conv, 'clamp': clamp, 'F_args': { 'kernel_size': filtsize, 'leaky_slope': 0.1, 'batch_norm': False } }) t3 = Node( [t2.out0], rev_multiplicative_layer, { 'F_class': F_conv, 'clamp': clamp, 'F_args': { 'kernel_size': filtsize, 'leaky_slope': 0.1, 'batch_norm': False } }) #t4 = Node([t1.out0], rev_multiplicative_layer, # {'F_class': F_conv, 'clamp': 2.0, # 'F_args':{'kernel_size': filtsize,'leaky_slope':0.1, # 'batch_norm':False}}) #t5 = Node([t2.out0], rev_multiplicative_layer, # {'F_class': F_conv, 'clamp': 2.0, # 'F_args':{'kernel_size': filtsize,'leaky_slope':0.1, # 'batch_norm':False}}) else: t1 = Node( [inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': clamp, 'F_args': { 'dropout': dropout } }) t2 = Node( [t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': clamp, 'F_args': { 'dropout': dropout } }) t3 = Node( [t2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': clamp, 'F_args': { 'dropout': dropout } }) # define output layer node outp = OutputNode([t3.out0], name='output') nodes = [inp, t1, t2, t3, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 12000 meta_epoch = 12 # what is this??? n_its_per_epoch = 12 lr = 1e-2 gamma = 0.01**(1. / 120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 4000. #300 forward pass lambd_latent = 900. #300 laten space lambd_rev = 1000. #400 backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): if conv_nn == True: coeff.conv3.weight.data = 0.01 * torch.randn( coeff.conv3.weight.shape) model.to(device) # number of test samples to use after training N_samp = 2500 # precompute true likelihood on the test data Ngrid = 64 cnt = 0 lik = np.zeros((r, r, Ngrid * Ngrid)) true_post = np.zeros((r, r, N_samp, 2)) lossf_hist = [] lossrev_hist = [] losstot_hist = [] losslatent_hist = [] beta_score_hist = [] for i in range(r): for j in range(r): mvec, cvec, temp, post_points = data.get_lik(np.array( labels_test[cnt, :]).flatten(), n_grid=Ngrid, sig_model=sig_model, sigma=sigma, xvec=x, bound=bound) lik[i, j, :] = temp.flatten() true_post[i, j, :] = post_points[:N_samp] cnt += 1 # start training loop try: t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # train the model losstot, losslatent, lossrev, lossf, lambd_latent = train( model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, conv_nn, i_epoch) # append current loss value to loss histories lossf_hist.append(lossf.data.item()) lossrev_hist.append(lossrev.data.item()) losstot_hist.append(losstot) losslatent_hist.append(losslatent.data.item()) pe_losses = [ losstot_hist, losslatent_hist, lossrev_hist, lossf_hist ] # loop over a few cases and plot results in a grid cnt = 0 beta_max = 0 if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)): # use the network to predict parameters\ if do_latent_struc: # do latent space structure plotting y_samps_latent = np.tile(np.array(labels_test[0, :]), 1).reshape(1, ndim_y) y_samps_latent = torch.tensor(y_samps_latent, dtype=torch.float) x1_i_dist = [] x2_i_dist = [] x1_i_par = np.array([]) x2_i_par = np.array([]) # define latent space mesh grid z_mesh = np.mgrid[-0.99:-0.01:100j, -0.99:-0.01:100j] z_mesh = np.vstack([z_mesh, np.zeros((2, 100, 100))]) #for z_i in range(10000): for i in range(z_mesh.shape[1]): for j in range(z_mesh.shape[2]): a = torch.randn(1, ndim_z) a[0, 0] = z_mesh[0, i, j] a[0, 1] = z_mesh[1, i, j] x_i = model(torch.cat([ a, torch.zeros(1, ndim_tot - ndim_y - ndim_z), y_samps_latent ], dim=1).to(device), rev=True) x_i = x_i.cpu().data.numpy() # calculate hue and intensity if np.abs(mode_1[0][0] - x_i[0][0]) < np.abs(mode_2[0][0] - x_i[0][1]): z_mesh[2, i, j] = np.abs(mode_1[0][0] - x_i[0][0]) z_mesh[3, i, j] = 0 else: z_mesh[2, i, j] = np.abs(mode_2[0][0] - x_i[0][1]) z_mesh[3, i, j] = 1 z_mesh[2, :, :][z_mesh[3, :, :] == 0] = z_mesh[2, :, :][ z_mesh[3, :, :] == 0] / np.max( z_mesh[2, :, :][z_mesh[3, :, :] == 0]) z_mesh[2, :, :][z_mesh[3, :, :] == 1] = z_mesh[2, :, :][ z_mesh[3, :, :] == 1] / np.max( z_mesh[2, :, :][z_mesh[3, :, :] == 1]) bg_color = 'black' fg_color = 'red' fig = plt.figure(facecolor=bg_color, edgecolor=fg_color) axes = fig.add_subplot(111) axes.patch.set_facecolor(bg_color) axes.xaxis.set_tick_params(color=fg_color, labelcolor=fg_color) axes.yaxis.set_tick_params(color=fg_color, labelcolor=fg_color) for spine in axes.spines.values(): spine.set_color(fg_color) plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 0], z_mesh[1, :, :][z_mesh[3, :, :] == 0], s=1, c=z_mesh[2, :, :][z_mesh[3, :, :] == 0], cmap='Greens', axes=axes) plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 1], z_mesh[1, :, :][z_mesh[3, :, :] == 1], s=1, c=z_mesh[2, :, :][z_mesh[3, :, :] == 1], cmap='Purples', axes=axes) plt.xlabel('z-space', color=fg_color) plt.ylabel('z-space', color=fg_color) plt.savefig('%sstruct_z.png' % out_dir, dpi=360) plt.close() # end of latent space structure plotting # initialize plot for showing testing results fig, axes = plt.subplots(r, r, figsize=(6, 6)) for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(labels_test[cnt, :]), N_samp).reshape(N_samp, ndim_y) y_samps = torch.tensor(y_samps, dtype=torch.float) #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), #zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) if conv_nn == True: y_samps = y_samps.reshape(y_samps.shape[0], y_samps.shape[1], 1, 1) rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() if conv_nn == True: rev_x = rev_x.reshape(rev_x.shape[0], rev_x.shape[1]) # plot the samples and the true contours axes[i, j].clear() axes[i, j].contour(mvec, cvec, lik[i, j, :].reshape(Ngrid, Ngrid), levels=[0.68, 0.9, 0.99]) axes[i, j].scatter(rev_x[:, 0], rev_x[:, 1], s=0.5, alpha=0.5, color='red') axes[i, j].scatter(true_post[i, j, :, 1], true_post[i, j, :, 0], s=0.5, alpha=0.5, color='blue') axes[i, j].plot(pos_test[cnt, 0], pos_test[cnt, 1], '+r', markersize=8) axes[i, j].axis(bound) # add contours to results try: if do_contours: contour_y = np.reshape(rev_x[:, 1], (rev_x[:, 1].shape[0])) contour_x = np.reshape(rev_x[:, 0], (rev_x[:, 0].shape[0])) contour_dataset = np.array( [contour_x, contour_y]) kernel_cnn = make_contour_plot( axes[i, j], contour_x, contour_y, contour_dataset, 'red', flip=False, kernel_cnn=False) # run overlap tests on results contour_x = np.reshape( true_post[i, j][:, 1], (true_post[i, j][:, 1].shape[0])) contour_y = np.reshape( true_post[i, j][:, 0], (true_post[i, j][:, 0].shape[0])) contour_dataset = np.array( [contour_x, contour_y]) ks_score, ad_score, beta_score = overlap_tests( rev_x, true_post[i, j], pos_test[cnt], kernel_cnn, gaussian_kde(contour_dataset)) axes[i, j].legend([ 'Overlap: %s' % str(np.round(beta_score, 3)) ]) beta_score_hist.append([beta_score]) except ValueError as e: pass cnt += 1 # sve the results to file fig_post.canvas.draw() plt.savefig('%sposteriors_%s.png' % (out_dir, i_epoch), dpi=360) plt.savefig('%slatest.png' % out_dir, dpi=360) plot_losses(pe_losses, '%spe_losses.png' % out_dir, legend=['PE-GEN']) plot_losses(pe_losses, '%spe_losses_logscale.png' % out_dir, logscale=True, legend=['PE-GEN']) except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up data test_split = 1 # number of testing samples to use # load in gw templates and signals signal_train_images, signal_train_pars, signal_image, noise_signal, signal_pars = load_gw_data() if add_noise_real: train_array = [] train_pe_array = [] for i in range(len(signal_train_images)): for j in range(n_real): train_array.append([signal_train_images[i] + np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23]) train_pe_array.append([signal_train_pars[i]]) train_array = np.array(train_array) train_pe_array = np.array(train_pe_array) train_array = train_array.reshape(train_array.shape[0],train_array.shape[2]) train_pe_array = train_pe_array.reshape(train_pe_array.shape[0],train_pe_array.shape[2]) else: for i in range(len(signal_train_images)): signal_train_images[i] += np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23 # load in lalinference noise signal noise_signal = h5py.File("gw_data/data/%s0%s.hdf5" % (event_name,tag),"r") noise_signal = np.reshape(noise_signal['wht_wvf'][:] * 1079.23,(n_pix,1)) # 817.98 need to not have this hardcoded #noise_signal *= 1079.23 / 817.98 #noise_signal = noise_signal.reshape(noise_signal.shape[0],1) plt.plot(noise_signal) plt.savefig('%s/test.png' % out_path) plt.close() # load in lalinference samples with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f: lalinf_post = pickle.load(f) lalinf_mc = lalinf_post[0] lalinf_q = lalinf_post[1] kernel_lalinf = gaussian_kde(lalinf_post) # declare gw variants of positions and labels mc_max = np.max(signal_train_pars[:,0]) #signal_train_pars /= mc_max labels = torch.tensor(signal_train_images, dtype=torch.float) pos = torch.tensor(signal_train_pars, dtype=torch.float) # setting up the model ndim_x = 2 # number of parameter dimensions ndim_y = n_pix # number of data dimensions ndim_z = 100 # number of latent space dimensions? ndim_tot = n_pix+ndim_z+ndim_x+n_neurons # two times the number data dimensions? # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes # number of nodes equal to number of parameters? t1 = Node([inp.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}}) t2 = Node([t1.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}}) """ t3 = Node([t2.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}}) t4 = Node([t3.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t5 = Node([t4.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t6 = Node([t5.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t7 = Node([t6.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t8 = Node([t7.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t9 = Node([t8.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) t10 = Node([t9.out0], rev_multiplicative_layer, {'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': {'dropout': 0.0}}) """ # define output layer node outp = OutputNode([t2.out0], name='output') nodes = [inp, t1, t2, outp] model = ReversibleGraphNet(nodes) # Train model lr = 1e-4 gamma = 0.01**(1./120) l2_reg = 2e-5 y_noise_scale = 1 # amount of noise to add to y parameter? zeros_noise_scale = 3e-2 # what is this?? # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg, amsgrad=True) #optimizer = torch.optim.SGD(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up test set data loader test_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(pos[:test_split], labels[:test_split]), batch_size=batch_size, shuffle=True, drop_last=True) # set up training set data loader train_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(pos[:], labels[:]), batch_size=batch_size, shuffle=True, drop_last=True) # what is happening here? More set up of network? for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01*torch.randn(coeff.fc3.weight.shape) model.to(device) # number of test samples to use after training N_samp = 4000 # choose test samples to use after training # 1000 iterations of test signal burried in noise. Only need to change z parameter. #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp] #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp] #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale y_samps = y_noise_scale * np.transpose(torch.tensor(np.repeat(noise_signal, N_samp, axis=1), dtype=torch.float)) # make test samples. First element is the latent space dimension # second element is the extra zeros needed to pad the input. # the third element is the time series y_samps = torch.cat([torch.randn(N_samp, ndim_z), zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), # zeros_noise_scale * y_samps], dim=1) # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048 y_samps = y_samps.to(device) # start training loop lossf_hist = [] lossrev_hist = [] beta_score_hist = [] kernel_cnn = False try: # print('#Epoch \tIt/s \tl_total') t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 #print(i_epoch, end='\t ') _,lossf, lossrev = train(model,train_loader,n_its_per_epoch,zeros_noise_scale,batch_size,ndim_tot,ndim_x,ndim_y,ndim_z,y_noise_scale,optimizer,lambd_predict,loss_fit,lambd_latent,loss_latent,lambd_rev,loss_backward,i_epoch) # append current loss value to loss histories lossf_hist.append(lossf.item()) lossrev_hist.append(lossrev.item()) pe_losses = [lossf_hist,lossrev_hist] # predict parameters of signal rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() #rev_x[:,0] = mc_max * rev_x[:,0] # plot pe results and loss beta_max = 0 """ if i_epoch>0: kernel_cnn = gaussian_kde(rev_x) #overlap_y = np.reshape(rev_x[:,1], (rev_x[:,1].shape[0])) #overlap_x = np.reshape(rev_x[:,0], (rev_x[:,0].shape[0])) #overlap_dataset = np.array([overlap_x,overlap_y]).transpose() ks_score, ad_score, beta_score = overlap_tests(rev_x,lalinf_post,signal_pars,kernel_cnn,kernel_lalinf) beta_score_hist.append([beta_score]) plt.plot(np.linspace(1,i_epoch,len(beta_score_hist)),beta_score_hist) plt.savefig('%s/latest/beta_hist.png' % out_path) plt.close() """ if ((i_epoch % plot_cadence == 0) & (i_epoch>0)): pe_std = [0.02185649964844209, 0.005701401364171313] # this will need to be removed beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std,kernel_lalinf=kernel_lalinf,kernel_cnn=kernel_cnn)]) plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist) plt.savefig('%s/latest/beta_hist.png' % out_path) plt.close() # plot loss curves - non-log and log plot_losses(pe_losses,'%s/latest/pe_losses.png' % out_path,legend=['PE-GEN']) plot_losses(pe_losses,'%s/latest/pe_losses_logscale.png' % out_path,logscale=True,legend=['PE-GEN']) # save model #if beta_score_hist[:-1] > beta_max: beta_max = beta_score_hist[:-1] #if beta_score_hist[:-1] > beta_max or i_epoch==plot_cadence: model.save_state_dict('mytraining.pt') # make PE scatter plots with contours and beta score #plt.scatter(rev_x[:,0], rev_x[:,1], s=1., c='red') #plt.scatter(lalinf_mc, lalinf_q, s=1., c='blue') except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
def main(): # Set up simulation parameters batch_size = 1600 # set batch size r = 3 # the grid dimension for the output tests test_split = r * r # number of testing samples to use sig_model = 'sg' # the signal model to use sigma = 0.2 # the noise std ndata = 32 # number of data samples bound = [0.0, 1.0, 0.0, 1.0] # effective bound for likelihood seed = 1 # seed for generating data # generate data pos, labels, x, sig = data.generate(model=sig_model, tot_dataset_size=2**20, ndata=ndata, sigma=sigma, prior_bound=bound, seed=seed) # seperate the test data for plotting pos_test = pos[-test_split:] labels_test = labels[-test_split:] sig_test = sig[-test_split:] # plot the test data examples plt.figure(figsize=(6, 6)) fig, axes = plt.subplots(r, r, figsize=(6, 6)) cnt = 0 for i in range(r): for j in range(r): axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.') axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-') cnt += 1 axes[i, j].axis([0, 1, -1.5, 1.5]) plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png', dpi=360) plt.close() # setting up the model ndim_x = 2 # number of posterior parameter dimensions (x,y) ndim_y = ndata # number of label dimensions (noisy data samples) ndim_z = 8 # number of latent space dimensions? ndim_tot = max(ndim_x, ndim_y + ndim_z) # must be > ndim_x and > ndim_y + ndim_z # define different parts of the network # define input node inp = InputNode(ndim_tot, name='input') # define hidden layer nodes t1 = Node([inp.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) t2 = Node([t1.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) t3 = Node([t2.out0], rev_multiplicative_layer, { 'F_class': F_fully_connected, 'clamp': 2.0, 'F_args': { 'dropout': 0.2 } }) # define output layer node outp = OutputNode([t3.out0], name='output') nodes = [inp, t1, t2, t3, outp] model = ReversibleGraphNet(nodes) # Train model # Training parameters n_epochs = 1000 meta_epoch = 12 # what is this??? n_its_per_epoch = 12 batch_size = 1600 lr = 1e-2 gamma = 0.01**(1. / 120) l2_reg = 2e-5 y_noise_scale = 3e-2 zeros_noise_scale = 3e-2 # relative weighting of losses: lambd_predict = 300. # forward pass lambd_latent = 300. # laten space lambd_rev = 400. # backwards pass # padding both the data and the latent space # such that they have equal dimension to the parameter space #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x) #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z) # define optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8), eps=1e-04, weight_decay=l2_reg) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=meta_epoch, gamma=gamma) # define the three loss functions loss_backward = MMD_multiscale loss_latent = MMD_multiscale loss_fit = fit # set up training set data loader train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset( pos[test_split:], labels[test_split:]), batch_size=batch_size, shuffle=True, drop_last=True) # initialisation of network weights for mod_list in model.children(): for block in mod_list.children(): for coeff in block.children(): coeff.fc3.weight.data = 0.01 * torch.randn( coeff.fc3.weight.shape) model.to(device) # initialize plot for showing testing results fig, axes = plt.subplots(r, r, figsize=(6, 6)) # number of test samples to use after training N_samp = 256 # precompute true likelihood on the test data Ngrid = 64 cnt = 0 lik = np.zeros((r, r, Ngrid * Ngrid)) for i in range(r): for j in range(r): mvec, cvec, temp = data.get_lik(np.array( labels_test[cnt, :]).flatten(), n_grid=Ngrid, sig_model=sig_model, sigma=sigma, xvec=x, bound=bound) lik[i, j, :] = temp.flatten() cnt += 1 # start training loop try: t_start = time() # loop over number of epochs for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80): scheduler.step() # Initially, the l2 reg. on x and z can give huge gradients, set # the lr lower for this if i_epoch < 0: print('inside this iepoch<0 thing') for param_group in optimizer.param_groups: param_group['lr'] = lr * 1e-2 # train the model train(model, train_loader, n_its_per_epoch, zeros_noise_scale, batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale, optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent, lambd_rev, loss_backward, i_epoch) # loop over a few cases and plot results in a grid cnt = 0 for i in range(r): for j in range(r): # convert data into correct format y_samps = np.tile(np.array(labels_test[cnt, :]), N_samp).reshape(N_samp, ndim_y) y_samps = torch.tensor(y_samps, dtype=torch.float) #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y) y_samps = torch.cat( [ torch.randn(N_samp, ndim_z), #zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps ], dim=1) y_samps = y_samps.to(device) # use the network to predict parameters rev_x = model(y_samps, rev=True) rev_x = rev_x.cpu().data.numpy() # plot the samples and the true contours axes[i, j].clear() axes[i, j].contour(mvec, cvec, lik[i, j, :].reshape(Ngrid, Ngrid), levels=[0.68, 0.9, 0.99]) axes[i, j].scatter(rev_x[:, 0], rev_x[:, 1], s=0.5, alpha=0.5) axes[i, j].plot(pos_test[cnt, 0], pos_test[cnt, 1], '+r', markersize=8) axes[i, j].axis(bound) cnt += 1 # sve the results to file fig.canvas.draw() plt.savefig('/data/public_html/chrism/FrEIA/posteriors_%s.png' % i_epoch, dpi=360) plt.savefig('/data/public_html/chrism/FrEIA/latest.png', dpi=360) except KeyboardInterrupt: pass finally: print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")