Exemple #1
0
def main():
    # ---------------------------------------生成数据------------------------------------------
    t_generate_start = time()
    # 设置模拟数据参数
    r = 3  # the grid dimension for the output tests
    test_split = r * r  # number of testing samples to use
    optical_model = 'km'  # the optical model to use
    ydim = 31  # number of data samples
    bound = [0.1, 0.9, 0.1, 0.9]
    seed = 1  # seed for generating data

    # 生成训练数据
    # concentrations, reflectance, x, info = data.generate(
    #     model=optical_model,
    #     total_dataset_size=2 ** 20 * 20,
    #     ydim=ydim,
    #     prior_bound=bound,
    #     seed=seed
    # )
    concentrations, reflectance, x, info = data.math_optimized_generate()

    print("\n\nGenerating data took %.2f minutes\n" %
          ((time() - t_generate_start) / 60))
    colors = np.arange(0, concentrations.shape[-1], 1)

    # 选取几个不参与训练,用作最后的测试样本
    c_test = concentrations[-test_split:]
    r_test = reflectance[-test_split:]

    # 测试样本分光反射率图,用于观察,与模型无关
    plt.figure(figsize=(6, 6))
    fig, axes = plt.subplots(r, r, figsize=(6, 6))
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i, j].plot(x, np.array(r_test[cnt, :]), '-')
            cnt += 1
            axes[i, j].axis([400, 700, 0, 1])
    plt.savefig('test_target_reflectance.png', dpi=360)
    plt.close()
    print("\n\nGenerating data took %.2f minutes\n" %
          ((time() - t_generate_start) / 60))

    # ---------------------------------------构建网络------------------------------------------
    # 设置模型参数值
    ndim_x = concentrations.shape[-1]  # 配方的维度,即待选色浆的种类数
    ndim_y = ydim  # 反射率的维度 31
    ndim_z = 13  # 潜在空间的维度
    ndim_tot = max(ndim_x, ndim_y + ndim_z)

    # 定义神经网络的不同部分
    # 定义输入层节点
    inp = InputNode(ndim_tot, name='input')

    # 定义隐藏层节点
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p1 = Node([t1.out0], permute_layer, {'seed': 1})

    t2 = Node([p1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p2 = Node([t2.out0], permute_layer, {'seed': 2})

    t3 = Node([p2.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p3 = Node([t3.out0], permute_layer, {'seed': 1})

    t4 = Node([p3.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p4 = Node([t4.out0], permute_layer, {'seed': 2})

    t5 = Node([p4.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    # 定义输出层节点
    outp = OutputNode([t5.out0], name='output')

    # 构建网络
    nodes = [inp, t1, p1, t2, p2, t3, p3, t4, p4, t5, outp]
    model = ReversibleGraphNet(nodes)

    # ---------------------------------------训练网络------------------------------------------
    # 超参数
    # n_epochs = 3000  # 训练轮数
    n_epochs = 0  # 训练轮数
    plot_cadence = 100  # 每100步画一次损失函数图
    meta_epoch = 12  # 调整学习率的步长
    n_its_per_epoch = 12  # 每次训练12批数据
    batch_size = 1600  # 每批1600个样本
    lr = 1.5e-3  # 初始学习率
    gamma = 0.004**(1. / 1333)  # 学习率下降的乘数因子
    l2_reg = 2e-5  # 权重衰减(L2惩罚)
    # 为了让输入和输出维度相同,对维度进行补齐,不使用0,而是使用一些很小的值
    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # 损失的权重
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # 定义优化器
    # params:待优化参数,lr:学习率,betas:用于计算梯度以及梯度平方的运行平均值的系数
    # eps:为了增加数值计算的稳定性而加到分母里的项
    # weight_decay:权重衰减
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    # 学习率调整
    # optimizer:优化器
    # step_size:调整学习率的步长
    # gamma:学习率下降的乘数因子
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)
    # 损失函数设置
    # x,z无监督:MMD,y有监督:平方误差
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # 训练集数据加载
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        concentrations[test_split:], reflectance[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # 初始化网络权重
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)
    model.to(device)

    # 初始化测试结果图表
    fig, axes = plt.subplots(r, r, figsize=(6, 6))

    # 测试用例数量
    N_samp = 256

    # ---------------------------------------开始训练------------------------------------------
    try:
        t_start = time()  # 训练开始时间
        loss_for_list = []  # 记录前向训练的损失
        loss_rev_list = []  # 记录反向训练的损失

        tsne = TSNE(n_components=2, init='pca')
        # 颜色编号
        color_names = [
            '07H', '08', '08S', '09', '09B', '09S', '10B', '12', '13', '14',
            '15', '16', '17A', '18A', '19A', '20A-2', '23A', '2704', '2803',
            '2804', '2807'
        ]

        # n_epochs次迭代过程
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):
            scheduler.step()
            # TODO:这个if并不会进入
            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # 训练模型
            avg_loss, loss_for, loss_rev = train(
                model, train_loader, n_its_per_epoch, zeros_noise_scale,
                batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent,
                lambd_rev, loss_backward, i_epoch)
            # 添加正向和逆向的损失
            loss_for_list.append(loss_for.item())
            loss_rev_list.append(loss_rev.item())
            inn_losses = [loss_for_list, loss_rev_list]

            if ((i_epoch + 1) % plot_cadence == 0) & (i_epoch > 0):
                plot_losses(inn_losses,
                            legend=['PE-GEN'],
                            lossNo=int((i_epoch + 1) / plot_cadence))

        # TODO
        model = torch.load('model_dir/km_impl_model')
        # torch.save(model, 'model_dir/km_impl_model')

        fig, axes = plt.subplots(1, 1, figsize=(2, 2))

        # 真实样本对应的反射率信息
        test_samps = np.array([[
            0.2673378, 0.3132285, 0.3183329, 0.3234908, 0.3318701, 0.3409707,
            0.3604081, 0.4168356, 0.5351773, 0.6202191, 0.6618687, 0.6919741,
            0.7136238, 0.7292901, 0.7314631, 0.7131701, 0.6773048, 0.6302681,
            0.5738088, 0.5133060, 0.4535525, 0.4108878, 0.3908512, 0.3808001,
            0.3752591, 0.3727644, 0.3801365, 0.3976869, 0.4237110, 0.4332685,
            0.4433292
        ]])
        # 真实样本对应的配方
        test_cons = np.array([[
            0, 0.8014, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1491, 0, 0, 0,
            0.2241, 0
        ]])
        for cnt in range(test_samps.shape[0]):
            print('before:', cnt, test_samps[cnt, :])
            test_samp = np.tile(np.array(test_samps[cnt, :]),
                                N_samp).reshape(N_samp, ydim)
            test_samp = torch.tensor(test_samp, dtype=torch.float)
            test_samp += y_noise_scale * torch.randn(N_samp, ydim)

            test_samp = torch.cat(
                [
                    torch.randn(N_samp, ndim_z),  # zeros_noise_scale *
                    torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                    test_samp
                ],
                dim=1)
            test_samp = test_samp.to(device)

            print('after:', cnt, test_samp)
            # use the network to predict parameters
            test_rev = model(test_samp, rev=True)[:, :colors.size]
            test_rev = test_rev.cpu().data.numpy()
            # 假设涂料浓度小于一定值,就不需要这种涂料
            test_rev = np.where(test_rev < 0.1, 0, test_rev)

            # 计算预测配方的反射率信息
            # recipe_ref = data.recipe_reflectance(test_rev, optical_model)
            # 使用修正后的模型计算配方的反射率信息
            recipe_ref = data.correct_recipe_reflectance(test_rev)
            print("######## Test Sample %d ########" % cnt)
            # 用于记录色差最小的三个配方
            top3 = [[100, 0], [100, 0], [100, 0]]
            for n in range(test_rev.shape[0]):
                # print(test_rev[n, :])
                diff = data.color_diff(test_samps[cnt, :], recipe_ref[n, :])
                if diff < top3[2][0]:
                    top3[2][0] = diff
                    top3[2][1] = n
                    top3.sort()
            # 将色差最小的三个配方打印出来
            for n in range(3):
                print(test_rev[top3[n][1], :])
                print("color diff: %.2f \n" % top3[n][0])
            print("\n\n")

            # draw
            # feature scaling
            test_x = test_cons[cnt, :].reshape(1, test_cons[cnt, :].shape[-1])
            plot_x = np.concatenate((test_rev, test_x), axis=0)

            # use tsne to decrease dimensionality
            x_norm = pd.DataFrame(plot_x, columns=color_names)

            # 根据需要的涂料种类(需要为1,不需要为0)将配方分类
            classes = np.zeros(N_samp).reshape(N_samp, 1)
            paint_needed = np.where(test_rev == 0, 0, 1)
            for paint_no in colors:
                classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no
            class_norm = pd.DataFrame(np.concatenate(
                (classes, np.zeros(1).reshape(1, 1)), axis=0),
                                      columns=['class'])

            data_plot = pd.concat(
                [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm], axis=1)
            class_data = data_plot['class']

            axes.clear()
            recipe_classes = np.array(
                class_norm[:-1].drop_duplicates()).reshape(1, -1).tolist()[0]
            for recipe_class in recipe_classes:
                axes.scatter(data_plot[class_data == recipe_class][0],
                             data_plot[class_data == recipe_class][1],
                             s=2,
                             alpha=0.5)
            axes.scatter(data_plot[class_data == 0][0],
                         data_plot[class_data == 0][1],
                         marker='+',
                         s=10)
            fig.canvas.draw()
            plt.savefig('test_result%d.png' % cnt, dpi=360)

        # loop over a few cases and plot results in a grid
        cnt = 0
        for i in range(r):
            for j in range(r):
                # convert data into correct format
                y_samps = np.tile(np.array(r_test[cnt, :]),
                                  N_samp).reshape(N_samp, ydim)
                y_samps = torch.tensor(y_samps, dtype=torch.float)
                y_samps += y_noise_scale * torch.randn(N_samp, ydim)

                y_samps = torch.cat(
                    [
                        torch.randn(N_samp, ndim_z),  # zeros_noise_scale *
                        torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                        y_samps
                    ],
                    dim=1)
                y_samps = y_samps.to(device)

                # use the network to predict parameters
                rev_x = model(y_samps, rev=True)[:, :colors.size]
                rev_x = rev_x.cpu().data.numpy()

                # 假设涂料浓度小于一定值,就不需要这种涂料
                rev_x = np.where(rev_x < 0.1, 0, rev_x)

                # feature scaling
                test_x = c_test[cnt, :].reshape(1, c_test[cnt, :].shape[-1])
                plot_x = np.concatenate((rev_x, test_x), axis=0)

                # use pca to decrease dimensionality
                x_norm = pd.DataFrame(plot_x, columns=color_names)

                # 根据需要的涂料种类(需要为1,不需要为0)将配方分类
                classes = np.zeros(N_samp).reshape(N_samp, 1)
                paint_needed = np.where(rev_x == 0, 0, 1)
                for paint_no in colors:
                    classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no
                class_norm = pd.DataFrame(np.concatenate(
                    (classes, np.zeros(1).reshape(1, 1)), axis=0),
                                          columns=['class'])

                data_plot = pd.concat(
                    [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm],
                    axis=1)

                class_data = data_plot['class']

                # plot the predicted and the true recipe
                axes.clear()
                recipe_classes = np.array(
                    class_norm[:-1].drop_duplicates()).reshape(1,
                                                               -1).tolist()[0]
                for recipe_class in recipe_classes:
                    axes.scatter(data_plot[class_data == recipe_class][0],
                                 data_plot[class_data == recipe_class][1],
                                 s=2,
                                 alpha=0.5)
                axes.scatter(data_plot[class_data == 0][0],
                             data_plot[class_data == 0][1],
                             marker='+',
                             s=10)

                fig.canvas.draw()
                plt.savefig('training_result%d.png' % cnt, dpi=360)

                # recipe_ref = data.recipe_reflectance(rev_x, optical_model)
                # 使用修正后的模型计算配方的反射率信息
                recipe_ref = data.correct_recipe_reflectance(rev_x)
                print("######## Test %d ########" % cnt)
                print(c_test[cnt])
                print("################")
                # 用于记录色差最小的三个配方
                top3 = [[100, 0], [100, 0], [100, 0]]
                for n in range(rev_x.shape[0]):
                    # print(rev_x[n, :])
                    diff = data.color_diff(r_test[cnt].numpy(),
                                           recipe_ref[n, :])
                    if diff < top3[2][0]:
                        top3[2][0] = diff
                        top3[2][1] = n
                        top3.sort()
                # 将色差最小的三个配方打印出来
                for n in range(3):
                    print(test_rev[top3[n][1], :])
                    print("color diff: %.2f \n" % top3[n][0])
                print("\n\n")

                cnt += 1

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took %.2f minutes\n" % ((time() - t_start) / 60))
Exemple #2
0
def main():
    # Set up data

    # make training signals
    signal_train_pars = []
    signal_train_images = []
    for i in range(total_temp_num):
        signal_train_pars.append(
            [np.random.uniform(-1.0, 1.0),
             np.random.uniform(0.5, 1.5)])
        signal_train_images.append(
            np.random.normal(loc=signal_train_pars[i][0],
                             scale=signal_train_pars[i][1],
                             size=(1, n_pix)))
    signal_train_pars = np.array(signal_train_pars)
    signal_train_images = np.array(signal_train_images).reshape(
        total_temp_num, n_pix)

    # make random 1D gaussian signal
    noise_signal = np.random.normal(loc=0.0, scale=1.0, size=(1, n_pix))
    #noise_signal = norm.rvs(0,1.0,(1,n_pix))
    signal_pars = [0.0, 1.0]

    # load in lalinference samples
    #with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f:
    #    lalinf_post = pickle.load(f)
    #lalinf_mc = lalinf_post[0]
    #lalinf_q = lalinf_post[1]

    # declare gw variants of positions and labels
    labels = torch.tensor(signal_train_images, dtype=torch.float)
    pos = torch.tensor(signal_train_pars, dtype=torch.float)

    # setting up the model
    ndim_tot = n_pix + n_neurons  # two times the number data dimensions?
    ndim_x = 2  # number of parameter dimensions
    ndim_y = n_pix  # number of data dimensions
    ndim_z = 10  # number of latent space dimensions?

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    t2 = Node([t1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })
    """
    t3 = Node([t2.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.2}})

    t4 = Node([t3.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    """
    # define output layer node
    outp = OutputNode([t2.out0], name='output')

    nodes = [inp, t1, t2, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    lr = 1e-2
    decayEpochs = (n_epochs * n_its_per_epoch) // meta_epoch
    gamma = 0.004**(1.0 / decayEpochs)
    l2_reg = 2e-5

    #gamma = 0.01**(1./120)

    y_noise_scale = 3e-2  # amount of noise to add to y parameter?
    zeros_noise_scale = 3e-2  # what is this??

    # relative weighting of losses:
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up test set data loader
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[:test_split], labels[:test_split]),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              drop_last=True)

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[:], labels[:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # what is happening here? More set up of network?
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)

    model.to(device)

    # number of test samples to use after training
    N_samp = 4000

    # choose test samples to use after training
    # 1000 iterations of test signal burried in noise. Only need to change z parameter.
    #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp]
    #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp]
    #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale
    y_samps_nparray = np.repeat(noise_signal, N_samp, axis=0)
    y_samps = torch.tensor(y_samps_nparray, dtype=torch.float)

    # make test samples. First element is the latent space dimension
    # second element is the extra zeros needed to pad the input.
    # the third element is the time series
    y_samps = torch.cat(
        [
            torch.randn(N_samp, ndim_z),
            zeros_noise_scale * torch.zeros(
                N_samp, ndim_tot - ndim_y - ndim_z),  # zeros_noise_scale * 
            y_samps
        ],
        dim=1)
    # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048
    y_samps = y_samps.to(device)

    # get control contour values
    cont_mu, cont_sig, prob, levels = compute_like(noise_signal.reshape(
        n_pix, ),
                                                   N=n_pix)

    #lalinf_post_blah = np.array([np.random.normal(loc=0,scale=1.0,size=(N_samp)), np.random.normal(loc=1.0,scale=1.0,size=(N_samp))])

    # start training loop
    lossf_hist = []
    lossrev_hist = []
    beta_score_hist = []
    try:
        #     print('#Epoch \tIt/s \tl_total')
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            #print(i_epoch, end='\t ')
            _, lossf, lossrev = train(model, train_loader, n_its_per_epoch,
                                      zeros_noise_scale, batch_size, ndim_tot,
                                      ndim_x, ndim_y, ndim_z, y_noise_scale,
                                      optimizer, lambd_predict, loss_fit,
                                      lambd_latent, loss_latent, lambd_rev,
                                      loss_backward, i_epoch)

            # append current loss value to loss histories
            lossf_hist.append(lossf.item())
            lossrev_hist.append(lossrev.item())
            pe_losses = [lossf_hist, lossrev_hist]

            # predict parameters of signal
            rev_x = model(y_samps, rev=True)
            rev_x = rev_x.cpu().data.numpy()

            # plot pe results and loss
            if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)):
                #pe_std = [0.005, 0.01] # this will need to be removed
                #beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std)])
                #plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist)
                #plt.savefig('%s/latest/beta_hist.png' % out_path)
                #plt.close()

                # plot loss curves - non-log and log
                plot_losses(pe_losses,
                            '%s/latest/pe_losses.png' % out_path,
                            legend=['PE-GEN'])
                plot_losses(pe_losses,
                            '%s/latest/pe_losses_logscale.png' % out_path,
                            logscale=True,
                            legend=['PE-GEN'])

                # make PE scatter plots with contours and beta score
                mu0 = 0.0
                sig0 = 1.0
                plt.scatter(rev_x[:, 0],
                            rev_x[:, 1],
                            s=1.,
                            c='red',
                            label='INN Results')
                plt.contour(cont_mu,
                            cont_sig,
                            prob,
                            levels=[0.68, 0.9, 0.95, 0.99])
                plt.plot(mu0, sig0, '+', label='Truth')
                plt.xlabel('mean')
                plt.ylabel('standard deviation')
                plt.legend()
                plt.savefig('%s/latest/predicted_pe.png' % out_path)
                plt.close()

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
Exemple #3
0
def main():
    # Set up data
    batch_size = 1600  # set batch size
    test_split = 10000  # number of testing samples to use

    # generate data
    # makes a torch.tensor() with arrays of (n_samples X parameters) and (n_samples X data)
    # labels are the colours and pos are the x,y coords
    # however, labels are 1-hot encoded
    pos, labels = data.generate(labels='all', tot_dataset_size=2**20)

    # just simply renaming the colors properly.
    #c = np.where(labels[:test_split])[1]
    c = labels[:test_split, :]
    plt.figure(figsize=(6, 6))
    plt.scatter(pos[:test_split, 0],
                pos[:test_split, 1],
                c=c,
                cmap='Set1',
                s=0.25)
    plt.xticks([])
    plt.yticks([])
    plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png')
    plt.close()

    # setting up the model
    ndim_tot = 16  # ?
    ndim_x = 2  # number of parameter dimensions (x,y)
    ndim_y = 3  # number of label dimensions (colours for 1-hot encoding)
    ndim_z = 2  # number of latent space dimensions?

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    t2 = Node([t1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    t3 = Node([t2.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    # define output layer node
    outp = OutputNode([t3.out0], name='output')

    nodes = [inp, t1, t2, t3, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 3000
    meta_epoch = 12  # what is this???
    n_its_per_epoch = 4
    batch_size = 1600

    lr = 1e-2
    gamma = 0.01**(1. / 120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    print(pad_x.shape, pad_yz.shape)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up test set data loader
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[:test_split], labels[:test_split]),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              drop_last=True)

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[test_split:], labels[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # initialisation of network weights
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)

    model.to(device)

    # initialize gif for showing training procedure
    fig, axes = plt.subplots(1, 2, figsize=(8, 4))
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_title('Predicted labels (Forwards Process)')
    axes[1].set_xticks([])
    axes[1].set_yticks([])
    axes[1].set_title('Generated Samples (Backwards Process)')
    #fig.show()
    #fig.canvas.draw()

    # number of test samples to use after training
    N_samp = 4096

    # choose test samples to use after training
    x_samps = torch.cat([x for x, y in test_loader], dim=0)[:N_samp]
    y_samps = torch.cat([y for x, y in test_loader], dim=0)[:N_samp]
    #c = np.where(y_samps)[1]
    #c = y_samps[:,0]
    c = np.array(y_samps).reshape(N_samp, ndim_y)
    y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
    y_samps = torch.cat([
        torch.randn(N_samp, ndim_z), zeros_noise_scale *
        torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps
    ],
                        dim=1)
    y_samps = y_samps.to(device)

    # start training loop
    try:
        #     print('#Epoch \tIt/s \tl_total')
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

    #         print(i_epoch, end='\t ')
            train(model, train_loader, n_its_per_epoch, zeros_noise_scale,
                  batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                  optimizer, lambd_predict, loss_fit, lambd_latent,
                  loss_latent, lambd_rev, loss_backward, i_epoch)

            # predict the locations of test labels
            rev_x = model(y_samps, rev=True)
            rev_x = rev_x.cpu().data.numpy()

            # predict the label given a location
            #pred_c = model(torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)),
            #                         dim=1).to(device)).data[:, -8:].argmax(dim=1)
            pred_c = model(
                torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)),
                          dim=1).to(device)).data[:, -1:].argmax(dim=1)

            axes[0].clear()
            #axes[0].scatter(tmp_x_samps[:,0], tmp_x_samps[:,1], c=pred_c, cmap='Set1', s=1., vmin=0, vmax=9)
            axes[0].axis('equal')
            axes[0].axis([-3, 3, -3, 3])
            axes[0].set_xticks([])
            axes[0].set_yticks([])

            axes[1].clear()
            axes[1].scatter(rev_x[:, 0],
                            rev_x[:, 1],
                            c=c,
                            cmap='Set1',
                            s=1.,
                            vmin=0,
                            vmax=9)
            axes[1].axis('equal')
            axes[1].axis([-3, 3, -3, 3])
            axes[1].set_xticks([])
            axes[1].set_yticks([])

            fig.canvas.draw()
            plt.savefig('/data/public_html/chrism/FrEIA/training_pred.png')

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
Exemple #4
0
def main():

    # Set up simulation parameters
    batch_size = 128  # set batch size
    r = 3  # the grid dimension for the output tests
    test_split = r * r  # number of testing samples to use
    sig_model = 'sg'  # the signal model to use
    sigma = 0.2  # the noise std
    ndata = 128  #32 number of data samples in time series
    bound = [0.0, 1.0, 0.0, 1.0]  # effective bound for likelihood
    seed = 1  # seed for generating data
    out_dir = "/home/hunter.gabbard/public_html/CBC/cINNamon/gausian_results/"
    n_neurons = 0
    do_contours = True  # if True, plot contours of predictions by INN
    plot_cadence = 50
    do_latent_struc = False  # if True, plot latent space 2D structure
    conv_nn = False  # if True, use convolutional nn structure

    # setup output directory - if it does not exist
    os.system('mkdir -p %s' % out_dir)

    # generate data
    pos, labels, x, sig = data.generate(
        model=sig_model,
        tot_dataset_size=int(1e6),  # 1e6
        ndata=ndata,
        sigma=sigma,
        prior_bound=bound,
        seed=seed)

    if do_latent_struc:
        # calculate mode of x-space for both pars
        mode_1 = stats.mode(np.array(pos[:, 0]))
        mode_2 = stats.mode(np.array(pos[:, 1]))

    # seperate the test data for plotting
    pos_test = pos[-test_split:]
    labels_test = labels[-test_split:]
    sig_test = sig[-test_split:]

    # plot the test data examples
    plt.figure(figsize=(6, 6))
    fig_post, axes = plt.subplots(r, r, figsize=(6, 6))
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.')
            axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-')
            cnt += 1
            axes[i, j].axis([0, 1, -1.5, 1.5])
    plt.savefig("%stest_distribution.png" % out_dir, dpi=360)
    plt.close()

    # setting up the model
    ndim_x = 2  # number of posterior parameter dimensions (x,y)
    ndim_y = ndata  # number of label dimensions (noisy data samples)
    ndim_z = 200  # number of latent space dimensions?
    ndim_tot = max(
        ndim_x,
        ndim_y + ndim_z) + n_neurons  # must be > ndim_x and > ndim_y + ndim_z

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    filtsize = 3
    dropout = 0.0
    clamp = 1.0
    if conv_nn == True:
        t1 = Node(
            [inp.out0], rev_multiplicative_layer, {
                'F_class': F_conv,
                'clamp': clamp,
                'F_args': {
                    'kernel_size': filtsize,
                    'leaky_slope': 0.1,
                    'batch_norm': False
                }
            })

        t2 = Node(
            [t1.out0], rev_multiplicative_layer, {
                'F_class': F_conv,
                'clamp': clamp,
                'F_args': {
                    'kernel_size': filtsize,
                    'leaky_slope': 0.1,
                    'batch_norm': False
                }
            })

        t3 = Node(
            [t2.out0], rev_multiplicative_layer, {
                'F_class': F_conv,
                'clamp': clamp,
                'F_args': {
                    'kernel_size': filtsize,
                    'leaky_slope': 0.1,
                    'batch_norm': False
                }
            })
        #t4 = Node([t1.out0], rev_multiplicative_layer,
        #          {'F_class': F_conv, 'clamp': 2.0,
        #           'F_args':{'kernel_size': filtsize,'leaky_slope':0.1,
        #           'batch_norm':False}})

        #t5 = Node([t2.out0], rev_multiplicative_layer,
        #          {'F_class': F_conv, 'clamp': 2.0,
        #           'F_args':{'kernel_size': filtsize,'leaky_slope':0.1,
        #           'batch_norm':False}})

    else:
        t1 = Node(
            [inp.out0], rev_multiplicative_layer, {
                'F_class': F_fully_connected,
                'clamp': clamp,
                'F_args': {
                    'dropout': dropout
                }
            })

        t2 = Node(
            [t1.out0], rev_multiplicative_layer, {
                'F_class': F_fully_connected,
                'clamp': clamp,
                'F_args': {
                    'dropout': dropout
                }
            })

        t3 = Node(
            [t2.out0], rev_multiplicative_layer, {
                'F_class': F_fully_connected,
                'clamp': clamp,
                'F_args': {
                    'dropout': dropout
                }
            })

    # define output layer node
    outp = OutputNode([t3.out0], name='output')

    nodes = [inp, t1, t2, t3, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 12000
    meta_epoch = 12  # what is this???
    n_its_per_epoch = 12

    lr = 1e-2
    gamma = 0.01**(1. / 120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 4000.  #300 forward pass
    lambd_latent = 900.  #300 laten space
    lambd_rev = 1000.  #400 backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[test_split:], labels[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # initialisation of network weights
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                if conv_nn == True:
                    coeff.conv3.weight.data = 0.01 * torch.randn(
                        coeff.conv3.weight.shape)
    model.to(device)

    # number of test samples to use after training
    N_samp = 2500

    # precompute true likelihood on the test data
    Ngrid = 64
    cnt = 0
    lik = np.zeros((r, r, Ngrid * Ngrid))
    true_post = np.zeros((r, r, N_samp, 2))
    lossf_hist = []
    lossrev_hist = []
    losstot_hist = []
    losslatent_hist = []
    beta_score_hist = []

    for i in range(r):
        for j in range(r):
            mvec, cvec, temp, post_points = data.get_lik(np.array(
                labels_test[cnt, :]).flatten(),
                                                         n_grid=Ngrid,
                                                         sig_model=sig_model,
                                                         sigma=sigma,
                                                         xvec=x,
                                                         bound=bound)
            lik[i, j, :] = temp.flatten()
            true_post[i, j, :] = post_points[:N_samp]
            cnt += 1

    # start training loop
    try:
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # train the model
            losstot, losslatent, lossrev, lossf, lambd_latent = train(
                model, train_loader, n_its_per_epoch, zeros_noise_scale,
                batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent,
                lambd_rev, loss_backward, conv_nn, i_epoch)

            # append current loss value to loss histories
            lossf_hist.append(lossf.data.item())
            lossrev_hist.append(lossrev.data.item())
            losstot_hist.append(losstot)
            losslatent_hist.append(losslatent.data.item())
            pe_losses = [
                losstot_hist, losslatent_hist, lossrev_hist, lossf_hist
            ]

            # loop over a few cases and plot results in a grid
            cnt = 0
            beta_max = 0
            if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)):
                # use the network to predict parameters\

                if do_latent_struc:
                    # do latent space structure plotting
                    y_samps_latent = np.tile(np.array(labels_test[0, :]),
                                             1).reshape(1, ndim_y)
                    y_samps_latent = torch.tensor(y_samps_latent,
                                                  dtype=torch.float)
                    x1_i_dist = []
                    x2_i_dist = []
                    x1_i_par = np.array([])
                    x2_i_par = np.array([])

                    # define latent space mesh grid
                    z_mesh = np.mgrid[-0.99:-0.01:100j, -0.99:-0.01:100j]
                    z_mesh = np.vstack([z_mesh, np.zeros((2, 100, 100))])

                    #for z_i in range(10000):
                    for i in range(z_mesh.shape[1]):
                        for j in range(z_mesh.shape[2]):
                            a = torch.randn(1, ndim_z)
                            a[0, 0] = z_mesh[0, i, j]
                            a[0, 1] = z_mesh[1, i, j]
                            x_i = model(torch.cat([
                                a,
                                torch.zeros(1, ndim_tot - ndim_y - ndim_z),
                                y_samps_latent
                            ],
                                                  dim=1).to(device),
                                        rev=True)
                            x_i = x_i.cpu().data.numpy()

                            # calculate hue and intensity
                            if np.abs(mode_1[0][0] -
                                      x_i[0][0]) < np.abs(mode_2[0][0] -
                                                          x_i[0][1]):
                                z_mesh[2, i,
                                       j] = np.abs(mode_1[0][0] - x_i[0][0])
                                z_mesh[3, i, j] = 0

                            else:
                                z_mesh[2, i,
                                       j] = np.abs(mode_2[0][0] - x_i[0][1])
                                z_mesh[3, i, j] = 1

                    z_mesh[2, :, :][z_mesh[3, :, :] == 0] = z_mesh[2, :, :][
                        z_mesh[3, :, :] == 0] / np.max(
                            z_mesh[2, :, :][z_mesh[3, :, :] == 0])
                    z_mesh[2, :, :][z_mesh[3, :, :] == 1] = z_mesh[2, :, :][
                        z_mesh[3, :, :] == 1] / np.max(
                            z_mesh[2, :, :][z_mesh[3, :, :] == 1])

                    bg_color = 'black'
                    fg_color = 'red'

                    fig = plt.figure(facecolor=bg_color, edgecolor=fg_color)
                    axes = fig.add_subplot(111)
                    axes.patch.set_facecolor(bg_color)
                    axes.xaxis.set_tick_params(color=fg_color,
                                               labelcolor=fg_color)
                    axes.yaxis.set_tick_params(color=fg_color,
                                               labelcolor=fg_color)
                    for spine in axes.spines.values():
                        spine.set_color(fg_color)
                    plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 0],
                                z_mesh[1, :, :][z_mesh[3, :, :] == 0],
                                s=1,
                                c=z_mesh[2, :, :][z_mesh[3, :, :] == 0],
                                cmap='Greens',
                                axes=axes)
                    plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 1],
                                z_mesh[1, :, :][z_mesh[3, :, :] == 1],
                                s=1,
                                c=z_mesh[2, :, :][z_mesh[3, :, :] == 1],
                                cmap='Purples',
                                axes=axes)
                    plt.xlabel('z-space', color=fg_color)
                    plt.ylabel('z-space', color=fg_color)
                    plt.savefig('%sstruct_z.png' % out_dir, dpi=360)
                    plt.close()

                # end of latent space structure plotting

                # initialize plot for showing testing results
                fig, axes = plt.subplots(r, r, figsize=(6, 6))
                for i in range(r):
                    for j in range(r):

                        # convert data into correct format
                        y_samps = np.tile(np.array(labels_test[cnt, :]),
                                          N_samp).reshape(N_samp, ndim_y)
                        y_samps = torch.tensor(y_samps, dtype=torch.float)
                        #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
                        y_samps = torch.cat(
                            [
                                torch.randn(N_samp,
                                            ndim_z),  #zeros_noise_scale * 
                                torch.zeros(N_samp,
                                            ndim_tot - ndim_y - ndim_z),
                                y_samps
                            ],
                            dim=1)
                        y_samps = y_samps.to(device)

                        if conv_nn == True:
                            y_samps = y_samps.reshape(y_samps.shape[0],
                                                      y_samps.shape[1], 1, 1)
                        rev_x = model(y_samps, rev=True)
                        rev_x = rev_x.cpu().data.numpy()

                        if conv_nn == True:
                            rev_x = rev_x.reshape(rev_x.shape[0],
                                                  rev_x.shape[1])

                        # plot the samples and the true contours
                        axes[i, j].clear()
                        axes[i, j].contour(mvec,
                                           cvec,
                                           lik[i, j, :].reshape(Ngrid, Ngrid),
                                           levels=[0.68, 0.9, 0.99])
                        axes[i, j].scatter(rev_x[:, 0],
                                           rev_x[:, 1],
                                           s=0.5,
                                           alpha=0.5,
                                           color='red')
                        axes[i, j].scatter(true_post[i, j, :, 1],
                                           true_post[i, j, :, 0],
                                           s=0.5,
                                           alpha=0.5,
                                           color='blue')
                        axes[i, j].plot(pos_test[cnt, 0],
                                        pos_test[cnt, 1],
                                        '+r',
                                        markersize=8)
                        axes[i, j].axis(bound)

                        # add contours to results
                        try:
                            if do_contours:
                                contour_y = np.reshape(rev_x[:, 1],
                                                       (rev_x[:, 1].shape[0]))
                                contour_x = np.reshape(rev_x[:, 0],
                                                       (rev_x[:, 0].shape[0]))
                                contour_dataset = np.array(
                                    [contour_x, contour_y])
                                kernel_cnn = make_contour_plot(
                                    axes[i, j],
                                    contour_x,
                                    contour_y,
                                    contour_dataset,
                                    'red',
                                    flip=False,
                                    kernel_cnn=False)

                                # run overlap tests on results
                                contour_x = np.reshape(
                                    true_post[i, j][:, 1],
                                    (true_post[i, j][:, 1].shape[0]))
                                contour_y = np.reshape(
                                    true_post[i, j][:, 0],
                                    (true_post[i, j][:, 0].shape[0]))
                                contour_dataset = np.array(
                                    [contour_x, contour_y])
                                ks_score, ad_score, beta_score = overlap_tests(
                                    rev_x, true_post[i, j], pos_test[cnt],
                                    kernel_cnn, gaussian_kde(contour_dataset))
                                axes[i, j].legend([
                                    'Overlap: %s' %
                                    str(np.round(beta_score, 3))
                                ])

                                beta_score_hist.append([beta_score])
                        except ValueError as e:
                            pass

                        cnt += 1

                # sve the results to file
                fig_post.canvas.draw()
                plt.savefig('%sposteriors_%s.png' % (out_dir, i_epoch),
                            dpi=360)
                plt.savefig('%slatest.png' % out_dir, dpi=360)

                plot_losses(pe_losses,
                            '%spe_losses.png' % out_dir,
                            legend=['PE-GEN'])
                plot_losses(pe_losses,
                            '%spe_losses_logscale.png' % out_dir,
                            logscale=True,
                            legend=['PE-GEN'])

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
Exemple #5
0
def main():
    # Set up data
    test_split = 1 # number of testing samples to use

    # load in gw templates and signals
    signal_train_images, signal_train_pars, signal_image, noise_signal, signal_pars = load_gw_data()
    
    if add_noise_real:
        train_array = []
        train_pe_array = []
        for i in range(len(signal_train_images)):
            for j in range(n_real):
                train_array.append([signal_train_images[i] + np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23])
                train_pe_array.append([signal_train_pars[i]])
        train_array = np.array(train_array)
        train_pe_array = np.array(train_pe_array)
        train_array = train_array.reshape(train_array.shape[0],train_array.shape[2])
        train_pe_array = train_pe_array.reshape(train_pe_array.shape[0],train_pe_array.shape[2])
    else:
        for i in range(len(signal_train_images)):
            signal_train_images[i] += np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23
    
    # load in lalinference noise signal
    noise_signal = h5py.File("gw_data/data/%s0%s.hdf5" % (event_name,tag),"r")
    noise_signal = np.reshape(noise_signal['wht_wvf'][:] * 1079.23,(n_pix,1)) # 817.98 need to not have this hardcoded
    #noise_signal *= 1079.23 / 817.98
    #noise_signal = noise_signal.reshape(noise_signal.shape[0],1)

    plt.plot(noise_signal)
    plt.savefig('%s/test.png' % out_path)
    plt.close()

    # load in lalinference samples
    with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f:
        lalinf_post = pickle.load(f) 
    lalinf_mc = lalinf_post[0]
    lalinf_q = lalinf_post[1]
    kernel_lalinf = gaussian_kde(lalinf_post)

    # declare gw variants of positions and labels
    mc_max = np.max(signal_train_pars[:,0])
    #signal_train_pars /= mc_max
    labels = torch.tensor(signal_train_images, dtype=torch.float)
    pos = torch.tensor(signal_train_pars, dtype=torch.float)

    # setting up the model
    ndim_x = 2    # number of parameter dimensions
    ndim_y = n_pix    # number of data dimensions
    ndim_z = 100    # number of latent space dimensions?
    ndim_tot = n_pix+ndim_z+ndim_x+n_neurons  # two times the number data dimensions?

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    # number of nodes equal to number of parameters?
    t1 = Node([inp.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}})
    
    
    t2 = Node([t1.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}})
     
    """
    t3 = Node([t2.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}})
     
    
    t4 = Node([t3.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t5 = Node([t4.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t6 = Node([t5.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t7 = Node([t6.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    t8 = Node([t7.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    t9 = Node([t8.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t10 = Node([t9.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    """
    # define output layer node
    outp = OutputNode([t2.out0], name='output')

    nodes = [inp, t1, t2, outp]
    model = ReversibleGraphNet(nodes)

    # Train model

    lr = 1e-4
    gamma = 0.01**(1./120)
    l2_reg = 2e-5

    y_noise_scale = 1     # amount of noise to add to y parameter?
    zeros_noise_scale = 3e-2 # what is this??

    # relative weighting of losses:
    lambd_predict = 300. # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.     # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)


    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8),
                             eps=1e-04, weight_decay=l2_reg, amsgrad=True)
    #optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)


    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up test set data loader
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(pos[:test_split], labels[:test_split]),
        batch_size=batch_size, shuffle=True, drop_last=True)

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(pos[:], labels[:]),
        batch_size=batch_size, shuffle=True, drop_last=True)


    # what is happening here? More set up of network?
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01*torch.randn(coeff.fc3.weight.shape)
            
    model.to(device)

    # number of test samples to use after training 
    N_samp = 4000

    # choose test samples to use after training
    # 1000 iterations of test signal burried in noise. Only need to change z parameter.
    #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp]
    #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp]
    #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale
    y_samps = y_noise_scale * np.transpose(torch.tensor(np.repeat(noise_signal, N_samp, axis=1), dtype=torch.float))

    # make test samples. First element is the latent space dimension
    # second element is the extra zeros needed to pad the input.
    # the third element is the time series
    y_samps = torch.cat([torch.randn(N_samp, ndim_z),
                         zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), # zeros_noise_scale * 
                         y_samps], dim=1)
    # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048
    y_samps = y_samps.to(device)

    # start training loop
    lossf_hist = []
    lossrev_hist = []
    beta_score_hist = []      
    kernel_cnn = False      
    try:
    #     print('#Epoch \tIt/s \tl_total')
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            #print(i_epoch, end='\t ')
            _,lossf, lossrev = train(model,train_loader,n_its_per_epoch,zeros_noise_scale,batch_size,ndim_tot,ndim_x,ndim_y,ndim_z,y_noise_scale,optimizer,lambd_predict,loss_fit,lambd_latent,loss_latent,lambd_rev,loss_backward,i_epoch)

            # append current loss value to loss histories
            lossf_hist.append(lossf.item())
            lossrev_hist.append(lossrev.item())
            pe_losses = [lossf_hist,lossrev_hist]

            # predict parameters of signal
            rev_x = model(y_samps, rev=True)
            rev_x = rev_x.cpu().data.numpy()
            #rev_x[:,0] = mc_max * rev_x[:,0]
        
            # plot pe results and loss
            beta_max = 0
            """
            if i_epoch>0:
                kernel_cnn = gaussian_kde(rev_x)
                #overlap_y = np.reshape(rev_x[:,1], (rev_x[:,1].shape[0]))
                #overlap_x = np.reshape(rev_x[:,0], (rev_x[:,0].shape[0]))
                #overlap_dataset = np.array([overlap_x,overlap_y]).transpose()
                ks_score, ad_score, beta_score = overlap_tests(rev_x,lalinf_post,signal_pars,kernel_cnn,kernel_lalinf)
                beta_score_hist.append([beta_score])    
                plt.plot(np.linspace(1,i_epoch,len(beta_score_hist)),beta_score_hist)
                plt.savefig('%s/latest/beta_hist.png' % out_path)
                plt.close()            
            """
            if ((i_epoch % plot_cadence == 0) & (i_epoch>0)):
                pe_std = [0.02185649964844209, 0.005701401364171313] # this will need to be removed
                beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std,kernel_lalinf=kernel_lalinf,kernel_cnn=kernel_cnn)])
                plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist)
                plt.savefig('%s/latest/beta_hist.png' % out_path)
                plt.close()

                # plot loss curves - non-log and log
                plot_losses(pe_losses,'%s/latest/pe_losses.png' % out_path,legend=['PE-GEN'])
                plot_losses(pe_losses,'%s/latest/pe_losses_logscale.png' % out_path,logscale=True,legend=['PE-GEN'])

                # save model
                #if beta_score_hist[:-1] > beta_max: beta_max = beta_score_hist[:-1]
                #if beta_score_hist[:-1] > beta_max or i_epoch==plot_cadence: model.save_state_dict('mytraining.pt')
            # make PE scatter plots with contours and beta score 
            #plt.scatter(rev_x[:,0], rev_x[:,1], s=1., c='red')
            #plt.scatter(lalinf_mc, lalinf_q, s=1., c='blue')
        

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")
Exemple #6
0
def main():

    # Set up simulation parameters
    batch_size = 1600  # set batch size
    r = 3  # the grid dimension for the output tests
    test_split = r * r  # number of testing samples to use
    sig_model = 'sg'  # the signal model to use
    sigma = 0.2  # the noise std
    ndata = 32  # number of data samples
    bound = [0.0, 1.0, 0.0, 1.0]  # effective bound for likelihood
    seed = 1  # seed for generating data

    # generate data
    pos, labels, x, sig = data.generate(model=sig_model,
                                        tot_dataset_size=2**20,
                                        ndata=ndata,
                                        sigma=sigma,
                                        prior_bound=bound,
                                        seed=seed)

    # seperate the test data for plotting
    pos_test = pos[-test_split:]
    labels_test = labels[-test_split:]
    sig_test = sig[-test_split:]

    # plot the test data examples
    plt.figure(figsize=(6, 6))
    fig, axes = plt.subplots(r, r, figsize=(6, 6))
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.')
            axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-')
            cnt += 1
            axes[i, j].axis([0, 1, -1.5, 1.5])
    plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png',
                dpi=360)
    plt.close()

    # setting up the model
    ndim_x = 2  # number of posterior parameter dimensions (x,y)
    ndim_y = ndata  # number of label dimensions (noisy data samples)
    ndim_z = 8  # number of latent space dimensions?
    ndim_tot = max(ndim_x,
                   ndim_y + ndim_z)  # must be > ndim_x and > ndim_y + ndim_z

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    t2 = Node([t1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    t3 = Node([t2.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    # define output layer node
    outp = OutputNode([t3.out0], name='output')

    nodes = [inp, t1, t2, t3, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 1000
    meta_epoch = 12  # what is this???
    n_its_per_epoch = 12
    batch_size = 1600

    lr = 1e-2
    gamma = 0.01**(1. / 120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[test_split:], labels[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # initialisation of network weights
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)
    model.to(device)

    # initialize plot for showing testing results
    fig, axes = plt.subplots(r, r, figsize=(6, 6))

    # number of test samples to use after training
    N_samp = 256

    # precompute true likelihood on the test data
    Ngrid = 64
    cnt = 0
    lik = np.zeros((r, r, Ngrid * Ngrid))
    for i in range(r):
        for j in range(r):
            mvec, cvec, temp = data.get_lik(np.array(
                labels_test[cnt, :]).flatten(),
                                            n_grid=Ngrid,
                                            sig_model=sig_model,
                                            sigma=sigma,
                                            xvec=x,
                                            bound=bound)
            lik[i, j, :] = temp.flatten()
            cnt += 1

    # start training loop
    try:
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # train the model
            train(model, train_loader, n_its_per_epoch, zeros_noise_scale,
                  batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                  optimizer, lambd_predict, loss_fit, lambd_latent,
                  loss_latent, lambd_rev, loss_backward, i_epoch)

            # loop over a few cases and plot results in a grid
            cnt = 0
            for i in range(r):
                for j in range(r):

                    # convert data into correct format
                    y_samps = np.tile(np.array(labels_test[cnt, :]),
                                      N_samp).reshape(N_samp, ndim_y)
                    y_samps = torch.tensor(y_samps, dtype=torch.float)
                    #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
                    y_samps = torch.cat(
                        [
                            torch.randn(N_samp, ndim_z),  #zeros_noise_scale * 
                            torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                            y_samps
                        ],
                        dim=1)
                    y_samps = y_samps.to(device)

                    # use the network to predict parameters
                    rev_x = model(y_samps, rev=True)
                    rev_x = rev_x.cpu().data.numpy()

                    # plot the samples and the true contours
                    axes[i, j].clear()
                    axes[i, j].contour(mvec,
                                       cvec,
                                       lik[i, j, :].reshape(Ngrid, Ngrid),
                                       levels=[0.68, 0.9, 0.99])
                    axes[i, j].scatter(rev_x[:, 0],
                                       rev_x[:, 1],
                                       s=0.5,
                                       alpha=0.5)
                    axes[i, j].plot(pos_test[cnt, 0],
                                    pos_test[cnt, 1],
                                    '+r',
                                    markersize=8)
                    axes[i, j].axis(bound)

                    cnt += 1

            # sve the results to file
            fig.canvas.draw()
            plt.savefig('/data/public_html/chrism/FrEIA/posteriors_%s.png' %
                        i_epoch,
                        dpi=360)
            plt.savefig('/data/public_html/chrism/FrEIA/latest.png', dpi=360)

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")