Python InputNode Examples

Programming Language: Python

Namespace/Package Name: FrEIA.framework

Class/Type: InputNode

Examples at hotexamples.com: 12

Python InputNode - 12 examples found. These are the top rated real world Python examples of FrEIA.framework.InputNode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

InputNode(12)

Frequently Used Methods

InputNode (12)

Example #1

Show file

    def __init__(self, ndim_total, dim_x, dim_y, dim_z, hidden_dim=128):
        super(INN, self).__init__()

        nodes = [InputNode(ndim_total, name='input')]
        self.hidden_dim = hidden_dim
        self.ndim_total = ndim_total
        self.dim_x = dim_x
        self.dim_y = dim_y
        self.dim_z = dim_z

        for k in range(4):
            nodes.append(
                Node(nodes[-1],
                     GLOWCouplingBlock, {
                         'subnet_constructor': self.subnet_fc,
                         'clamp': 2.0
                     },
                     name=F'coupling_{k}'))

            nodes.append(
                Node(nodes[-1],
                     PermuteRandom, {'seed': k},
                     name=F'permute_{k}'))

        nodes.append(OutputNode(nodes[-1], name='output'))

        self.model = ReversibleGraphNet(nodes, verbose=False)
        self.zeros_noise_scale = 5e-2
        self.y_noise_scale = 1e-1

Example #2

Show file

File: model_maker.py Project: jxzhangjhu/BDIMNNA

def cINN(flags):
    """
    The constructor for INN network
    :param flags: input flags from configuration
    :return: The INN network
    """
    # Set up the conditional node (y)
    cond_node = ConditionNode(flags.dim_y)
    # Start from input layer
    nodes = [InputNode(flags.dim_x, name='input')]
    # Recursively add the coupling layers and random permutation layer
    for i in range(flags.couple_layer_num):
        nodes.append(
            Node(nodes[-1],
                 GLOWCouplingBlock, {
                     'subnet_constructor': subnet_fc,
                     'clamp': 2.0
                 },
                 conditions=cond_node,
                 name='coupling_{}'.format(i)))
        nodes.append(
            Node(nodes[-1],
                 PermuteRandom, {'seed': i},
                 name='permute_{}'.format(i)))
    # Attach the output Node
    nodes.append(OutputNode(nodes[-1], name='output'))
    nodes.append(cond_node)
    print("The nodes are:", nodes)
    # Return the
    return ReversibleGraphNet(nodes, verbose=True)

Example #3

Show file

File: Inn2.py Project: hagabbar/cINNamon

    def __init__(self, inputs, outputs, zeroPadding=0, numInvLayers=5, dropout=0.00, minSize=None):
        # Determine dimensions and construct DataSchema
        inMinLength = schema_min_len(inputs, zeroPadding)
        outMinLength = schema_min_len(outputs, zeroPadding)
        minLength = max(inMinLength, outMinLength)
        if minSize is not None:
            minLength = max(minLength, minSize)
        self.inSchema = DataSchema1D(inputs, minLength, zeroPadding)
        self.outSchema = DataSchema1D(outputs, minLength, zeroPadding)
        if len(self.inSchema) != len(self.outSchema):
            raise ValueError('Input and output schemas do not have the same dimension.')

        # Build net graph
        inp = InputNode(len(self.inSchema), name='Input (0-pad extra channels)')
        nodes = [inp]

        for i in range(numInvLayers):
            nodes.append(Node([nodes[-1].out0], rev_multiplicative_layer,
                         {'F_class': F_fully_connected_leaky, 'clamp': 2.0,
                          'F_args': {'dropout': 0.0}}, name='Inv%d' % i))
            if (i != numInvLayers - 1):
                nodes.append(Node([nodes[-1].out0], permute_layer, {'seed': i}, name='Permute%d' % i))

        nodes.append(OutputNode([nodes[-1].out0], name='Output'))
        # Build net
        super().__init__(nodes)

Example #4

Show file

def model(dim_x,
          dim_y,
          dim_z,
          dim_total,
          lr,
          l2_reg,
          meta_epoch,
          gamma,
          hidden_depth=8):
    nodes = []
    # 定义输入层节点
    nodes.append(InputNode(dim_total, name='input'))

    # 定义隐藏层节点
    for k in range(hidden_depth):
        nodes.append(
            Node(nodes[-1],
                 GLOWCouplingBlock, {
                     'subnet_constructor': F_fully_connected,
                     'clamp': 2.0,
                 },
                 name='coupling_{k}'))
        nodes.append(
            Node(nodes[-1], PermuteRandom, {'seed': 1}, name='permute_{k}'))
    nodes.append(
        Node(nodes[-1],
             GLOWCouplingBlock, {
                 'subnet_constructor': F_fully_connected,
                 'clamp': 2.0,
             },
             name='coupling_{k}'))
    # 定义输出层节点
    nodes.append(OutputNode(nodes[-1], name='output'))

    # 构建可逆网络
    inn = ReversibleGraphNet(nodes)

    # 定义优化器
    # TODO:参数调整
    optimizer = torch.optim.Adam(inn.parameters(),
                                 lr=lr,
                                 betas=(0.9, 0.999),
                                 eps=1e-04,
                                 weight_decay=l2_reg)

    # 学习率调整
    # TODO:参数调整
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # 损失函数设置
    # x，z无监督：MMD，y有监督：平方误差
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    return inn, optimizer, scheduler, loss_backward, loss_latent, loss_fit

Example #5

Show file

File: model.py Project: YXJiang1996/ColorMatching

def main():
    # ---------------------------------------生成数据------------------------------------------
    t_generate_start = time()
    # 设置模拟数据参数
    r = 3  # the grid dimension for the output tests
    test_split = r * r  # number of testing samples to use
    optical_model = 'km'  # the optical model to use
    ydim = 31  # number of data samples
    bound = [0.1, 0.9, 0.1, 0.9]
    seed = 1  # seed for generating data

    # 生成训练数据
    # concentrations, reflectance, x, info = data.generate(
    #     model=optical_model,
    #     total_dataset_size=2 ** 20 * 20,
    #     ydim=ydim,
    #     prior_bound=bound,
    #     seed=seed
    # )
    concentrations, reflectance, x, info = data.math_optimized_generate()

    print("\n\nGenerating data took %.2f minutes\n" %
          ((time() - t_generate_start) / 60))
    colors = np.arange(0, concentrations.shape[-1], 1)

    # 选取几个不参与训练，用作最后的测试样本
    c_test = concentrations[-test_split:]
    r_test = reflectance[-test_split:]

    # 测试样本分光反射率图，用于观察，与模型无关
    plt.figure(figsize=(6, 6))
    fig, axes = plt.subplots(r, r, figsize=(6, 6))
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i, j].plot(x, np.array(r_test[cnt, :]), '-')
            cnt += 1
            axes[i, j].axis([400, 700, 0, 1])
    plt.savefig('test_target_reflectance.png', dpi=360)
    plt.close()
    print("\n\nGenerating data took %.2f minutes\n" %
          ((time() - t_generate_start) / 60))

    # ---------------------------------------构建网络------------------------------------------
    # 设置模型参数值
    ndim_x = concentrations.shape[-1]  # 配方的维度，即待选色浆的种类数
    ndim_y = ydim  # 反射率的维度 31
    ndim_z = 13  # 潜在空间的维度
    ndim_tot = max(ndim_x, ndim_y + ndim_z)

    # 定义神经网络的不同部分
    # 定义输入层节点
    inp = InputNode(ndim_tot, name='input')

    # 定义隐藏层节点
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p1 = Node([t1.out0], permute_layer, {'seed': 1})

    t2 = Node([p1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p2 = Node([t2.out0], permute_layer, {'seed': 2})

    t3 = Node([p2.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p3 = Node([t3.out0], permute_layer, {'seed': 1})

    t4 = Node([p3.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    p4 = Node([t4.out0], permute_layer, {'seed': 2})

    t5 = Node([p4.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    # 定义输出层节点
    outp = OutputNode([t5.out0], name='output')

    # 构建网络
    nodes = [inp, t1, p1, t2, p2, t3, p3, t4, p4, t5, outp]
    model = ReversibleGraphNet(nodes)

    # ---------------------------------------训练网络------------------------------------------
    # 超参数
    # n_epochs = 3000  # 训练轮数
    n_epochs = 0  # 训练轮数
    plot_cadence = 100  # 每100步画一次损失函数图
    meta_epoch = 12  # 调整学习率的步长
    n_its_per_epoch = 12  # 每次训练12批数据
    batch_size = 1600  # 每批1600个样本
    lr = 1.5e-3  # 初始学习率
    gamma = 0.004**(1. / 1333)  # 学习率下降的乘数因子
    l2_reg = 2e-5  # 权重衰减（L2惩罚）
    # 为了让输入和输出维度相同，对维度进行补齐，不使用0，而是使用一些很小的值
    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # 损失的权重
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # 定义优化器
    # params：待优化参数，lr：学习率，betas:用于计算梯度以及梯度平方的运行平均值的系数
    # eps:为了增加数值计算的稳定性而加到分母里的项
    # weight_decay:权重衰减
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    # 学习率调整
    # optimizer:优化器
    # step_size:调整学习率的步长
    # gamma:学习率下降的乘数因子
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)
    # 损失函数设置
    # x，z无监督：MMD，y有监督：平方误差
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # 训练集数据加载
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        concentrations[test_split:], reflectance[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # 初始化网络权重
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)
    model.to(device)

    # 初始化测试结果图表
    fig, axes = plt.subplots(r, r, figsize=(6, 6))

    # 测试用例数量
    N_samp = 256

    # ---------------------------------------开始训练------------------------------------------
    try:
        t_start = time()  # 训练开始时间
        loss_for_list = []  # 记录前向训练的损失
        loss_rev_list = []  # 记录反向训练的损失

        tsne = TSNE(n_components=2, init='pca')
        # 颜色编号
        color_names = [
            '07H', '08', '08S', '09', '09B', '09S', '10B', '12', '13', '14',
            '15', '16', '17A', '18A', '19A', '20A-2', '23A', '2704', '2803',
            '2804', '2807'
        ]

        # n_epochs次迭代过程
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):
            scheduler.step()
            # TODO:这个if并不会进入
            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # 训练模型
            avg_loss, loss_for, loss_rev = train(
                model, train_loader, n_its_per_epoch, zeros_noise_scale,
                batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent,
                lambd_rev, loss_backward, i_epoch)
            # 添加正向和逆向的损失
            loss_for_list.append(loss_for.item())
            loss_rev_list.append(loss_rev.item())
            inn_losses = [loss_for_list, loss_rev_list]

            if ((i_epoch + 1) % plot_cadence == 0) & (i_epoch > 0):
                plot_losses(inn_losses,
                            legend=['PE-GEN'],
                            lossNo=int((i_epoch + 1) / plot_cadence))

        # TODO
        model = torch.load('model_dir/km_impl_model')
        # torch.save(model, 'model_dir/km_impl_model')

        fig, axes = plt.subplots(1, 1, figsize=(2, 2))

        # 真实样本对应的反射率信息
        test_samps = np.array([[
            0.2673378, 0.3132285, 0.3183329, 0.3234908, 0.3318701, 0.3409707,
            0.3604081, 0.4168356, 0.5351773, 0.6202191, 0.6618687, 0.6919741,
            0.7136238, 0.7292901, 0.7314631, 0.7131701, 0.6773048, 0.6302681,
            0.5738088, 0.5133060, 0.4535525, 0.4108878, 0.3908512, 0.3808001,
            0.3752591, 0.3727644, 0.3801365, 0.3976869, 0.4237110, 0.4332685,
            0.4433292
        ]])
        # 真实样本对应的配方
        test_cons = np.array([[
            0, 0.8014, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1491, 0, 0, 0,
            0.2241, 0
        ]])
        for cnt in range(test_samps.shape[0]):
            print('before:', cnt, test_samps[cnt, :])
            test_samp = np.tile(np.array(test_samps[cnt, :]),
                                N_samp).reshape(N_samp, ydim)
            test_samp = torch.tensor(test_samp, dtype=torch.float)
            test_samp += y_noise_scale * torch.randn(N_samp, ydim)

            test_samp = torch.cat(
                [
                    torch.randn(N_samp, ndim_z),  # zeros_noise_scale *
                    torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                    test_samp
                ],
                dim=1)
            test_samp = test_samp.to(device)

            print('after:', cnt, test_samp)
            # use the network to predict parameters
            test_rev = model(test_samp, rev=True)[:, :colors.size]
            test_rev = test_rev.cpu().data.numpy()
            # 假设涂料浓度小于一定值，就不需要这种涂料
            test_rev = np.where(test_rev < 0.1, 0, test_rev)

            # 计算预测配方的反射率信息
            # recipe_ref = data.recipe_reflectance(test_rev, optical_model)
            # 使用修正后的模型计算配方的反射率信息
            recipe_ref = data.correct_recipe_reflectance(test_rev)
            print("######## Test Sample %d ########" % cnt)
            # 用于记录色差最小的三个配方
            top3 = [[100, 0], [100, 0], [100, 0]]
            for n in range(test_rev.shape[0]):
                # print(test_rev[n, :])
                diff = data.color_diff(test_samps[cnt, :], recipe_ref[n, :])
                if diff < top3[2][0]:
                    top3[2][0] = diff
                    top3[2][1] = n
                    top3.sort()
            # 将色差最小的三个配方打印出来
            for n in range(3):
                print(test_rev[top3[n][1], :])
                print("color diff: %.2f \n" % top3[n][0])
            print("\n\n")

            # draw
            # feature scaling
            test_x = test_cons[cnt, :].reshape(1, test_cons[cnt, :].shape[-1])
            plot_x = np.concatenate((test_rev, test_x), axis=0)

            # use tsne to decrease dimensionality
            x_norm = pd.DataFrame(plot_x, columns=color_names)

            # 根据需要的涂料种类（需要为1，不需要为0）将配方分类
            classes = np.zeros(N_samp).reshape(N_samp, 1)
            paint_needed = np.where(test_rev == 0, 0, 1)
            for paint_no in colors:
                classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no
            class_norm = pd.DataFrame(np.concatenate(
                (classes, np.zeros(1).reshape(1, 1)), axis=0),
                                      columns=['class'])

            data_plot = pd.concat(
                [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm], axis=1)
            class_data = data_plot['class']

            axes.clear()
            recipe_classes = np.array(
                class_norm[:-1].drop_duplicates()).reshape(1, -1).tolist()[0]
            for recipe_class in recipe_classes:
                axes.scatter(data_plot[class_data == recipe_class][0],
                             data_plot[class_data == recipe_class][1],
                             s=2,
                             alpha=0.5)
            axes.scatter(data_plot[class_data == 0][0],
                         data_plot[class_data == 0][1],
                         marker='+',
                         s=10)
            fig.canvas.draw()
            plt.savefig('test_result%d.png' % cnt, dpi=360)

        # loop over a few cases and plot results in a grid
        cnt = 0
        for i in range(r):
            for j in range(r):
                # convert data into correct format
                y_samps = np.tile(np.array(r_test[cnt, :]),
                                  N_samp).reshape(N_samp, ydim)
                y_samps = torch.tensor(y_samps, dtype=torch.float)
                y_samps += y_noise_scale * torch.randn(N_samp, ydim)

                y_samps = torch.cat(
                    [
                        torch.randn(N_samp, ndim_z),  # zeros_noise_scale *
                        torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                        y_samps
                    ],
                    dim=1)
                y_samps = y_samps.to(device)

                # use the network to predict parameters
                rev_x = model(y_samps, rev=True)[:, :colors.size]
                rev_x = rev_x.cpu().data.numpy()

                # 假设涂料浓度小于一定值，就不需要这种涂料
                rev_x = np.where(rev_x < 0.1, 0, rev_x)

                # feature scaling
                test_x = c_test[cnt, :].reshape(1, c_test[cnt, :].shape[-1])
                plot_x = np.concatenate((rev_x, test_x), axis=0)

                # use pca to decrease dimensionality
                x_norm = pd.DataFrame(plot_x, columns=color_names)

                # 根据需要的涂料种类（需要为1，不需要为0）将配方分类
                classes = np.zeros(N_samp).reshape(N_samp, 1)
                paint_needed = np.where(rev_x == 0, 0, 1)
                for paint_no in colors:
                    classes[:, 0] += paint_needed[:, paint_no] * 2**paint_no
                class_norm = pd.DataFrame(np.concatenate(
                    (classes, np.zeros(1).reshape(1, 1)), axis=0),
                                          columns=['class'])

                data_plot = pd.concat(
                    [pd.DataFrame(tsne.fit_transform(x_norm)), class_norm],
                    axis=1)

                class_data = data_plot['class']

                # plot the predicted and the true recipe
                axes.clear()
                recipe_classes = np.array(
                    class_norm[:-1].drop_duplicates()).reshape(1,
                                                               -1).tolist()[0]
                for recipe_class in recipe_classes:
                    axes.scatter(data_plot[class_data == recipe_class][0],
                                 data_plot[class_data == recipe_class][1],
                                 s=2,
                                 alpha=0.5)
                axes.scatter(data_plot[class_data == 0][0],
                             data_plot[class_data == 0][1],
                             marker='+',
                             s=10)

                fig.canvas.draw()
                plt.savefig('training_result%d.png' % cnt, dpi=360)

                # recipe_ref = data.recipe_reflectance(rev_x, optical_model)
                # 使用修正后的模型计算配方的反射率信息
                recipe_ref = data.correct_recipe_reflectance(rev_x)
                print("######## Test %d ########" % cnt)
                print(c_test[cnt])
                print("################")
                # 用于记录色差最小的三个配方
                top3 = [[100, 0], [100, 0], [100, 0]]
                for n in range(rev_x.shape[0]):
                    # print(rev_x[n, :])
                    diff = data.color_diff(r_test[cnt].numpy(),
                                           recipe_ref[n, :])
                    if diff < top3[2][0]:
                        top3[2][0] = diff
                        top3[2][1] = n
                        top3.sort()
                # 将色差最小的三个配方打印出来
                for n in range(3):
                    print(test_rev[top3[n][1], :])
                    print("color diff: %.2f \n" % top3[n][0])
                print("\n\n")

                cnt += 1

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took %.2f minutes\n" % ((time() - t_start) / 60))

Example #6

Show file

def main():
    # Set up data

    # make training signals
    signal_train_pars = []
    signal_train_images = []
    for i in range(total_temp_num):
        signal_train_pars.append(
            [np.random.uniform(-1.0, 1.0),
             np.random.uniform(0.5, 1.5)])
        signal_train_images.append(
            np.random.normal(loc=signal_train_pars[i][0],
                             scale=signal_train_pars[i][1],
                             size=(1, n_pix)))
    signal_train_pars = np.array(signal_train_pars)
    signal_train_images = np.array(signal_train_images).reshape(
        total_temp_num, n_pix)

    # make random 1D gaussian signal
    noise_signal = np.random.normal(loc=0.0, scale=1.0, size=(1, n_pix))
    #noise_signal = norm.rvs(0,1.0,(1,n_pix))
    signal_pars = [0.0, 1.0]

    # load in lalinference samples
    #with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f:
    #    lalinf_post = pickle.load(f)
    #lalinf_mc = lalinf_post[0]
    #lalinf_q = lalinf_post[1]

    # declare gw variants of positions and labels
    labels = torch.tensor(signal_train_images, dtype=torch.float)
    pos = torch.tensor(signal_train_pars, dtype=torch.float)

    # setting up the model
    ndim_tot = n_pix + n_neurons  # two times the number data dimensions?
    ndim_x = 2  # number of parameter dimensions
    ndim_y = n_pix  # number of data dimensions
    ndim_z = 10  # number of latent space dimensions?

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    t2 = Node([t1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })
    """
    t3 = Node([t2.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.2}})

    t4 = Node([t3.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    """
    # define output layer node
    outp = OutputNode([t2.out0], name='output')

    nodes = [inp, t1, t2, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    lr = 1e-2
    decayEpochs = (n_epochs * n_its_per_epoch) // meta_epoch
    gamma = 0.004**(1.0 / decayEpochs)
    l2_reg = 2e-5

    #gamma = 0.01**(1./120)

    y_noise_scale = 3e-2  # amount of noise to add to y parameter?
    zeros_noise_scale = 3e-2  # what is this??

    # relative weighting of losses:
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up test set data loader
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[:test_split], labels[:test_split]),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              drop_last=True)

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[:], labels[:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # what is happening here? More set up of network?
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)

    model.to(device)

    # number of test samples to use after training
    N_samp = 4000

    # choose test samples to use after training
    # 1000 iterations of test signal burried in noise. Only need to change z parameter.
    #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp]
    #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp]
    #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale
    y_samps_nparray = np.repeat(noise_signal, N_samp, axis=0)
    y_samps = torch.tensor(y_samps_nparray, dtype=torch.float)

    # make test samples. First element is the latent space dimension
    # second element is the extra zeros needed to pad the input.
    # the third element is the time series
    y_samps = torch.cat(
        [
            torch.randn(N_samp, ndim_z),
            zeros_noise_scale * torch.zeros(
                N_samp, ndim_tot - ndim_y - ndim_z),  # zeros_noise_scale * 
            y_samps
        ],
        dim=1)
    # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048
    y_samps = y_samps.to(device)

    # get control contour values
    cont_mu, cont_sig, prob, levels = compute_like(noise_signal.reshape(
        n_pix, ),
                                                   N=n_pix)

    #lalinf_post_blah = np.array([np.random.normal(loc=0,scale=1.0,size=(N_samp)), np.random.normal(loc=1.0,scale=1.0,size=(N_samp))])

    # start training loop
    lossf_hist = []
    lossrev_hist = []
    beta_score_hist = []
    try:
        #     print('#Epoch \tIt/s \tl_total')
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            #print(i_epoch, end='\t ')
            _, lossf, lossrev = train(model, train_loader, n_its_per_epoch,
                                      zeros_noise_scale, batch_size, ndim_tot,
                                      ndim_x, ndim_y, ndim_z, y_noise_scale,
                                      optimizer, lambd_predict, loss_fit,
                                      lambd_latent, loss_latent, lambd_rev,
                                      loss_backward, i_epoch)

            # append current loss value to loss histories
            lossf_hist.append(lossf.item())
            lossrev_hist.append(lossrev.item())
            pe_losses = [lossf_hist, lossrev_hist]

            # predict parameters of signal
            rev_x = model(y_samps, rev=True)
            rev_x = rev_x.cpu().data.numpy()

            # plot pe results and loss
            if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)):
                #pe_std = [0.005, 0.01] # this will need to be removed
                #beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std)])
                #plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist)
                #plt.savefig('%s/latest/beta_hist.png' % out_path)
                #plt.close()

                # plot loss curves - non-log and log
                plot_losses(pe_losses,
                            '%s/latest/pe_losses.png' % out_path,
                            legend=['PE-GEN'])
                plot_losses(pe_losses,
                            '%s/latest/pe_losses_logscale.png' % out_path,
                            logscale=True,
                            legend=['PE-GEN'])

                # make PE scatter plots with contours and beta score
                mu0 = 0.0
                sig0 = 1.0
                plt.scatter(rev_x[:, 0],
                            rev_x[:, 1],
                            s=1.,
                            c='red',
                            label='INN Results')
                plt.contour(cont_mu,
                            cont_sig,
                            prob,
                            levels=[0.68, 0.9, 0.95, 0.99])
                plt.plot(mu0, sig0, '+', label='Truth')
                plt.xlabel('mean')
                plt.ylabel('standard deviation')
                plt.legend()
                plt.savefig('%s/latest/predicted_pe.png' % out_path)
                plt.close()

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")

Example #7

Show file

File: models.py Project: tbung/nyc-taxi-fare-challenge

    def __init__(self, config, *args):
        super(INNModel, self).__init__(config)
        self.config.update({
            'batch_size': int(2**12),
            'lr': 0.001,
            'n_epochs': 50,
            'loss_epochs': 100,
            'test_percent': 20,
            'data_size': 8_000_000
        })
        self.config.update(config)

        # self.embedder = torch.load(
        #     './runs/network_Sep19_23-35-50/checkpoints/model_999.pt'
        # ).feature_creator

        self.ndim_tot = 10
        self.ndim_x = 1
        self.ndim_y = 9
        self.ndim_z = 1

        inp = InputNode(self.ndim_tot, name='input')

        t1 = Node([inp.out0], rev_multiplicative_layer,
                  {'F_class': F_fully_connected,
                   'F_args': {
                       'batch_norm': True, 'internal_size': 2,
                       # 'dropout': 0.3
                   }})

        # t2 = Node([t1.out0], rev_multiplicative_layer,
        #           {'F_class': F_fully_connected, 'clamp': 2.0,
        #            'F_args': {'dropout': 0.5}})

        # t3 = Node([t2.out0], rev_multiplicative_layer,
        #           {'F_class': F_fully_connected, 'clamp': 2.0,
        #            'F_args': {'dropout': 0.5}})

        outp = OutputNode([t1.out0], name='output')

        nodes = [inp, t1, outp]
        self.model = ReversibleGraphNet(nodes)
        self.model.to(self.device)

        self.loss = F.mse_loss
        self.x_noise_scale = 3e-2
        self.y_noise_scale = 3e-2
        self.zeros_noise_scale = 3e-2

        # relative weighting of losses:
        self.lambd_predict = 3.
        self.lambd_latent = 2.
        self.lambd_rev = 10.

        self.pad_x = torch.zeros(self.config['batch_size'], self.ndim_tot -
                                 self.ndim_x)
        # self.pad_yz = torch.zeros(self.config['batch_size'], self.ndim_tot -
        #                           self.ndim_y - self.ndim_z)

        def MMD_multiscale(x, y):
            xx, yy, zz = torch.mm(x,x.t()), torch.mm(y,y.t()), torch.mm(x,y.t())

            rx = (xx.diag().unsqueeze(0).expand_as(xx))
            ry = (yy.diag().unsqueeze(0).expand_as(yy))

            dxx = rx.t() + rx - 2.*xx
            dyy = ry.t() + ry - 2.*yy
            dxy = rx.t() + ry - 2.*zz

            XX, YY, XY = (torch.zeros(xx.shape).to(self.device),
                          torch.zeros(xx.shape).to(self.device),
                          torch.zeros(xx.shape).to(self.device))

            for a in [0.2, 0.5, 0.9, 1.3]:
                XX += a**2 * (a**2 + dxx)**-1
                YY += a**2 * (a**2 + dyy)**-1
                XY += a**2 * (a**2 + dxy)**-1

            return torch.mean(XX + YY - 2.*XY)

        def fit(input, target):
            return torch.mean((input - target)**2)

        self.loss_backward = MMD_multiscale
        self.loss_latent = MMD_multiscale
        self.loss_fit = F.l1_loss

        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=self.config['lr'],
            weight_decay=1e2,
            # momentum=0.9
        )
        self.scheduler = torch.optim.lr_scheduler.StepLR(
            self.optimizer, 1758 * 5,
            0.1
        )

Example #8

Show file

File: toy_model.py Project: hagabbar/cINNamon

def main():
    # Set up data
    batch_size = 1600  # set batch size
    test_split = 10000  # number of testing samples to use

    # generate data
    # makes a torch.tensor() with arrays of (n_samples X parameters) and (n_samples X data)
    # labels are the colours and pos are the x,y coords
    # however, labels are 1-hot encoded
    pos, labels = data.generate(labels='all', tot_dataset_size=2**20)

    # just simply renaming the colors properly.
    #c = np.where(labels[:test_split])[1]
    c = labels[:test_split, :]
    plt.figure(figsize=(6, 6))
    plt.scatter(pos[:test_split, 0],
                pos[:test_split, 1],
                c=c,
                cmap='Set1',
                s=0.25)
    plt.xticks([])
    plt.yticks([])
    plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png')
    plt.close()

    # setting up the model
    ndim_tot = 16  # ?
    ndim_x = 2  # number of parameter dimensions (x,y)
    ndim_y = 3  # number of label dimensions (colours for 1-hot encoding)
    ndim_z = 2  # number of latent space dimensions?

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    t2 = Node([t1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    t3 = Node([t2.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.0
        }
    })

    # define output layer node
    outp = OutputNode([t3.out0], name='output')

    nodes = [inp, t1, t2, t3, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 3000
    meta_epoch = 12  # what is this???
    n_its_per_epoch = 4
    batch_size = 1600

    lr = 1e-2
    gamma = 0.01**(1. / 120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    print(pad_x.shape, pad_yz.shape)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up test set data loader
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[:test_split], labels[:test_split]),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              drop_last=True)

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[test_split:], labels[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # initialisation of network weights
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)

    model.to(device)

    # initialize gif for showing training procedure
    fig, axes = plt.subplots(1, 2, figsize=(8, 4))
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_title('Predicted labels (Forwards Process)')
    axes[1].set_xticks([])
    axes[1].set_yticks([])
    axes[1].set_title('Generated Samples (Backwards Process)')
    #fig.show()
    #fig.canvas.draw()

    # number of test samples to use after training
    N_samp = 4096

    # choose test samples to use after training
    x_samps = torch.cat([x for x, y in test_loader], dim=0)[:N_samp]
    y_samps = torch.cat([y for x, y in test_loader], dim=0)[:N_samp]
    #c = np.where(y_samps)[1]
    #c = y_samps[:,0]
    c = np.array(y_samps).reshape(N_samp, ndim_y)
    y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
    y_samps = torch.cat([
        torch.randn(N_samp, ndim_z), zeros_noise_scale *
        torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), y_samps
    ],
                        dim=1)
    y_samps = y_samps.to(device)

    # start training loop
    try:
        #     print('#Epoch \tIt/s \tl_total')
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

    #         print(i_epoch, end='\t ')
            train(model, train_loader, n_its_per_epoch, zeros_noise_scale,
                  batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                  optimizer, lambd_predict, loss_fit, lambd_latent,
                  loss_latent, lambd_rev, loss_backward, i_epoch)

            # predict the locations of test labels
            rev_x = model(y_samps, rev=True)
            rev_x = rev_x.cpu().data.numpy()

            # predict the label given a location
            #pred_c = model(torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)),
            #                         dim=1).to(device)).data[:, -8:].argmax(dim=1)
            pred_c = model(
                torch.cat((x_samps, torch.zeros(N_samp, ndim_tot - ndim_x)),
                          dim=1).to(device)).data[:, -1:].argmax(dim=1)

            axes[0].clear()
            #axes[0].scatter(tmp_x_samps[:,0], tmp_x_samps[:,1], c=pred_c, cmap='Set1', s=1., vmin=0, vmax=9)
            axes[0].axis('equal')
            axes[0].axis([-3, 3, -3, 3])
            axes[0].set_xticks([])
            axes[0].set_yticks([])

            axes[1].clear()
            axes[1].scatter(rev_x[:, 0],
                            rev_x[:, 1],
                            c=c,
                            cmap='Set1',
                            s=1.,
                            vmin=0,
                            vmax=9)
            axes[1].axis('equal')
            axes[1].axis([-3, 3, -3, 3])
            axes[1].set_xticks([])
            axes[1].set_yticks([])

            fig.canvas.draw()
            plt.savefig('/data/public_html/chrism/FrEIA/training_pred.png')

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")

Example #9

Show file

File: chris_model.py Project: hagabbar/cINNamon

def main():

    # Set up simulation parameters
    batch_size = 1600  # set batch size
    r = 4              # the grid dimension for the output tests
    test_split = r*r   # number of testing samples to use
    sigma = 0.2        # the noise std
    ndata = 64         # number of data samples
    usepars = [0,1,2,3]    # parameter indices to use
    seed = 1           # seed for generating data
    run_label='gpu0'
    out_dir = "/home/hunter.gabbard/public_html/CBC/cINNamon/gausian_results/multipar/%s/" % run_label

    # generate data
    pos, labels, x, sig, parnames = data.generate(
        tot_dataset_size=2**20,
        ndata=ndata,
        usepars=usepars,
        sigma=sigma,
        seed=seed
    )
    print('generated data')

    # seperate the test data for plotting
    pos_test = pos[-test_split:]
    labels_test = labels[-test_split:]
    sig_test = sig[-test_split:]

    # plot the test data examples
    plt.figure(figsize=(6,6))
    fig, axes = plt.subplots(r,r,figsize=(6,6),sharex='col',sharey='row')
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i,j].plot(x,np.array(labels_test[cnt,:]),'.')
            axes[i,j].plot(x,np.array(sig_test[cnt,:]),'-')
            cnt += 1
            axes[i,j].axis([0,1,-1.5,1.5])
            axes[i,j].set_xlabel('time') if i==r-1 else axes[i,j].set_xlabel('')
            axes[i,j].set_ylabel('h(t)') if j==0 else axes[i,j].set_ylabel('')
    plt.savefig('%stest_distribution.png' % out_dir,dpi=360)
    plt.close()

    # precompute true posterior samples on the test data
    cnt = 0
    N_samp = 1000
    ndim_x = len(usepars)
    samples = np.zeros((r*r,N_samp,ndim_x))
    for i in range(r):
        for j in range(r):
            samples[cnt,:,:] = data.get_lik(np.array(labels_test[cnt,:]).flatten(),sigma=sigma,usepars=usepars,Nsamp=N_samp)
            print(samples[cnt,:10,:])
            cnt += 1

    # initialize plot for showing testing results
    fig, axes = plt.subplots(r,r,figsize=(6,6),sharex='col',sharey='row')

    for k in range(ndim_x):
        parname1 = parnames[k]
        for nextk in range(ndim_x):
            parname2 = parnames[nextk]
            if nextk>k:
                cnt = 0
                for i in range(r):
                     for j in range(r):

                         # plot the samples and the true contours
                         axes[i,j].clear()
                         axes[i,j].scatter(samples[cnt,:,k], samples[cnt,:,nextk],c='b',s=0.5,alpha=0.5)
                         axes[i,j].plot(pos_test[cnt,k],pos_test[cnt,nextk],'+c',markersize=8)
                         axes[i,j].set_xlim([0,1])
                         axes[i,j].set_ylim([0,1])
                         axes[i,j].set_xlabel(parname1) if i==r-1 else axes[i,j].set_xlabel('')
                         axes[i,j].set_ylabel(parname2) if j==0 else axes[i,j].set_ylabel('')
                         
                         cnt += 1

                # save the results to file
                fig.canvas.draw()
                plt.savefig('%strue_samples_%d%d.png' % (out_dir,k,nextk),dpi=360)

    # setting up the model 
    ndim_x = len(usepars)        # number of posterior parameter dimensions (x,y)
    ndim_y = ndata    # number of label dimensions (noisy data samples)
    ndim_z = 4        # number of latent space dimensions?
    ndim_tot = max(ndim_x,ndim_y+ndim_z)     # must be > ndim_x and > ndim_y + ndim_z

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.2}})

    #t1 = Node([inp.out0], rev_multiplicative_layer,
    #          {'F_class': F_conv, 'clamp': 2.0,
    #           'F_args': {'kernel_size': 3,'leaky_slope': 0.1}})

    #def __init__(self, dims_in, F_class=F_fully_connected, F_args={},
    #             clamp=5.):
    #    super(rev_multiplicative_layer, self).__init__()
    #    channels = dims_in[0][0]
    #
    #    self.split_len1 = channels // 2
    #    self.split_len2 = channels - channels // 2
    #    self.ndims = len(dims_in[0])
    #
    #    self.clamp = clamp
    #    self.max_s = exp(clamp)
    #    self.min_s = exp(-clamp)
    #
    #    self.s1 = F_class(self.split_len1, self.split_len2, **F_args)
    #    self.t1 = F_class(self.split_len1, self.split_len2, **F_args)
    #    self.s2 = F_class(self.split_len2, self.split_len1, **F_args)
    #    self.t2 = F_class(self.split_len2, self.split_len1, **F_args)

    t2 = Node([t1.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.2}})

    t3 = Node([t2.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.2}})

    t4 = Node([t3.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    # define output layer node
    outp = OutputNode([t4.out0], name='output')

    nodes = [inp, t1, t2, t3, t4, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 10000
    meta_epoch = 12 # what is this???
    n_its_per_epoch = 12
    batch_size = 1600

    lr = 1e-2
    gamma = 0.01**(1./120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 300. # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.     # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8),
                             eps=1e-04, weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(pos[test_split:], labels[test_split:]),
        batch_size=batch_size, shuffle=True, drop_last=True)

    # initialisation of network weights
    #for mod_list in model.children():
    #    for block in mod_list.children():
    #        for coeff in block.children():
    #            coeff.fc3.weight.data = 0.01*torch.randn(coeff.fc3.weight.shape)
    #model.to(device)

    # start training loop            
    try:
        t_start = time()
        olvec = np.zeros((r,r,int(n_epochs/10)))
        s = 0
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # train the model
            train(model,train_loader,n_its_per_epoch,zeros_noise_scale,batch_size,
                ndim_tot,ndim_x,ndim_y,ndim_z,y_noise_scale,optimizer,lambd_predict,
                loss_fit,lambd_latent,loss_latent,lambd_rev,loss_backward,i_epoch)

            # loop over a few cases and plot results in a grid
            if np.remainder(i_epoch,10)==0:
                for k in range(ndim_x):
                    parname1 = parnames[k]
                    for nextk in range(ndim_x):
                        parname2 = parnames[nextk]
                        if nextk>k:
                            cnt = 0

                            # initialize plot for showing testing results
                            fig, axes = plt.subplots(r,r,figsize=(6,6),sharex='col',sharey='row')

                            for i in range(r):
                                for j in range(r):

                                    # convert data into correct format
                                    y_samps = np.tile(np.array(labels_test[cnt,:]),N_samp).reshape(N_samp,ndim_y)
                                    y_samps = torch.tensor(y_samps, dtype=torch.float)
                                    y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
                                    y_samps = torch.cat([torch.randn(N_samp, ndim_z), zeros_noise_scale * 
                                        torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                                        y_samps], dim=1)
                                    y_samps = y_samps.to(device)

                                    # use the network to predict parameters
                                    rev_x = model(y_samps, rev=True)
                                    rev_x = rev_x.cpu().data.numpy()

                                    # compute the n-d overlap
                                    if k==0 and nextk==1:
                                        ol = data.overlap(samples[cnt,:,:ndim_x],rev_x[:,:ndim_x])
                                        olvec[i,j,s] = ol                                     

                                    # plot the samples and the true contours
                                    axes[i,j].clear()
                                    axes[i,j].scatter(samples[cnt,:,k], samples[cnt,:,nextk],c='b',s=0.2,alpha=0.5)
                                    axes[i,j].scatter(rev_x[:,k], rev_x[:,nextk],c='r',s=0.2,alpha=0.5)
                                    axes[i,j].plot(pos_test[cnt,k],pos_test[cnt,nextk],'+c',markersize=8)
                                    axes[i,j].set_xlim([0,1])
                                    axes[i,j].set_ylim([0,1])
                                    oltxt = '%.2f' % olvec[i,j,s]
                                    axes[i,j].text(0.90, 0.95, oltxt,
                                        horizontalalignment='right',
                                        verticalalignment='top',
                                        transform=axes[i,j].transAxes)
                                    matplotlib.rc('xtick', labelsize=8)     
                                    matplotlib.rc('ytick', labelsize=8) 
                                    axes[i,j].set_xlabel(parname1) if i==r-1 else axes[i,j].set_xlabel('')
                                    axes[i,j].set_ylabel(parname2) if j==0 else axes[i,j].set_ylabel('')
                                    cnt += 1

                            # save the results to file
                            fig.canvas.draw()
                            plt.savefig('%sposteriors_%d%d_%04d.png' % (out_dir,k,nextk,i_epoch),dpi=360)
                            plt.savefig('%slatest_%d%d.png' % (out_dir,k,nextk),dpi=360)
                            plt.close()
                s += 1

            # plot overlap results
            if np.remainder(i_epoch,10)==0:                
                fig, axes = plt.subplots(1,figsize=(6,6))
                for i in range(r):
                    for j in range(r):
                        axes.semilogx(10*np.arange(olvec.shape[2]),olvec[i,j,:],alpha=0.5)
                axes.grid()
                axes.set_ylabel('overlap')
                axes.set_xlabel('epoch')
                axes.set_ylim([0,1])
                plt.savefig('%soverlap.png' % out_dir,dpi=360)
                plt.close()

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")

Example #10

Show file

File: gaussian_INN.py Project: hagabbar/cINNamon

def main():

    # Set up simulation parameters
    batch_size = 128  # set batch size
    r = 3  # the grid dimension for the output tests
    test_split = r * r  # number of testing samples to use
    sig_model = 'sg'  # the signal model to use
    sigma = 0.2  # the noise std
    ndata = 128  #32 number of data samples in time series
    bound = [0.0, 1.0, 0.0, 1.0]  # effective bound for likelihood
    seed = 1  # seed for generating data
    out_dir = "/home/hunter.gabbard/public_html/CBC/cINNamon/gausian_results/"
    n_neurons = 0
    do_contours = True  # if True, plot contours of predictions by INN
    plot_cadence = 50
    do_latent_struc = False  # if True, plot latent space 2D structure
    conv_nn = False  # if True, use convolutional nn structure

    # setup output directory - if it does not exist
    os.system('mkdir -p %s' % out_dir)

    # generate data
    pos, labels, x, sig = data.generate(
        model=sig_model,
        tot_dataset_size=int(1e6),  # 1e6
        ndata=ndata,
        sigma=sigma,
        prior_bound=bound,
        seed=seed)

    if do_latent_struc:
        # calculate mode of x-space for both pars
        mode_1 = stats.mode(np.array(pos[:, 0]))
        mode_2 = stats.mode(np.array(pos[:, 1]))

    # seperate the test data for plotting
    pos_test = pos[-test_split:]
    labels_test = labels[-test_split:]
    sig_test = sig[-test_split:]

    # plot the test data examples
    plt.figure(figsize=(6, 6))
    fig_post, axes = plt.subplots(r, r, figsize=(6, 6))
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.')
            axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-')
            cnt += 1
            axes[i, j].axis([0, 1, -1.5, 1.5])
    plt.savefig("%stest_distribution.png" % out_dir, dpi=360)
    plt.close()

    # setting up the model
    ndim_x = 2  # number of posterior parameter dimensions (x,y)
    ndim_y = ndata  # number of label dimensions (noisy data samples)
    ndim_z = 200  # number of latent space dimensions?
    ndim_tot = max(
        ndim_x,
        ndim_y + ndim_z) + n_neurons  # must be > ndim_x and > ndim_y + ndim_z

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    filtsize = 3
    dropout = 0.0
    clamp = 1.0
    if conv_nn == True:
        t1 = Node(
            [inp.out0], rev_multiplicative_layer, {
                'F_class': F_conv,
                'clamp': clamp,
                'F_args': {
                    'kernel_size': filtsize,
                    'leaky_slope': 0.1,
                    'batch_norm': False
                }
            })

        t2 = Node(
            [t1.out0], rev_multiplicative_layer, {
                'F_class': F_conv,
                'clamp': clamp,
                'F_args': {
                    'kernel_size': filtsize,
                    'leaky_slope': 0.1,
                    'batch_norm': False
                }
            })

        t3 = Node(
            [t2.out0], rev_multiplicative_layer, {
                'F_class': F_conv,
                'clamp': clamp,
                'F_args': {
                    'kernel_size': filtsize,
                    'leaky_slope': 0.1,
                    'batch_norm': False
                }
            })
        #t4 = Node([t1.out0], rev_multiplicative_layer,
        #          {'F_class': F_conv, 'clamp': 2.0,
        #           'F_args':{'kernel_size': filtsize,'leaky_slope':0.1,
        #           'batch_norm':False}})

        #t5 = Node([t2.out0], rev_multiplicative_layer,
        #          {'F_class': F_conv, 'clamp': 2.0,
        #           'F_args':{'kernel_size': filtsize,'leaky_slope':0.1,
        #           'batch_norm':False}})

    else:
        t1 = Node(
            [inp.out0], rev_multiplicative_layer, {
                'F_class': F_fully_connected,
                'clamp': clamp,
                'F_args': {
                    'dropout': dropout
                }
            })

        t2 = Node(
            [t1.out0], rev_multiplicative_layer, {
                'F_class': F_fully_connected,
                'clamp': clamp,
                'F_args': {
                    'dropout': dropout
                }
            })

        t3 = Node(
            [t2.out0], rev_multiplicative_layer, {
                'F_class': F_fully_connected,
                'clamp': clamp,
                'F_args': {
                    'dropout': dropout
                }
            })

    # define output layer node
    outp = OutputNode([t3.out0], name='output')

    nodes = [inp, t1, t2, t3, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 12000
    meta_epoch = 12  # what is this???
    n_its_per_epoch = 12

    lr = 1e-2
    gamma = 0.01**(1. / 120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 4000.  #300 forward pass
    lambd_latent = 900.  #300 laten space
    lambd_rev = 1000.  #400 backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[test_split:], labels[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # initialisation of network weights
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                if conv_nn == True:
                    coeff.conv3.weight.data = 0.01 * torch.randn(
                        coeff.conv3.weight.shape)
    model.to(device)

    # number of test samples to use after training
    N_samp = 2500

    # precompute true likelihood on the test data
    Ngrid = 64
    cnt = 0
    lik = np.zeros((r, r, Ngrid * Ngrid))
    true_post = np.zeros((r, r, N_samp, 2))
    lossf_hist = []
    lossrev_hist = []
    losstot_hist = []
    losslatent_hist = []
    beta_score_hist = []

    for i in range(r):
        for j in range(r):
            mvec, cvec, temp, post_points = data.get_lik(np.array(
                labels_test[cnt, :]).flatten(),
                                                         n_grid=Ngrid,
                                                         sig_model=sig_model,
                                                         sigma=sigma,
                                                         xvec=x,
                                                         bound=bound)
            lik[i, j, :] = temp.flatten()
            true_post[i, j, :] = post_points[:N_samp]
            cnt += 1

    # start training loop
    try:
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # train the model
            losstot, losslatent, lossrev, lossf, lambd_latent = train(
                model, train_loader, n_its_per_epoch, zeros_noise_scale,
                batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                optimizer, lambd_predict, loss_fit, lambd_latent, loss_latent,
                lambd_rev, loss_backward, conv_nn, i_epoch)

            # append current loss value to loss histories
            lossf_hist.append(lossf.data.item())
            lossrev_hist.append(lossrev.data.item())
            losstot_hist.append(losstot)
            losslatent_hist.append(losslatent.data.item())
            pe_losses = [
                losstot_hist, losslatent_hist, lossrev_hist, lossf_hist
            ]

            # loop over a few cases and plot results in a grid
            cnt = 0
            beta_max = 0
            if ((i_epoch % plot_cadence == 0) & (i_epoch > 0)):
                # use the network to predict parameters\

                if do_latent_struc:
                    # do latent space structure plotting
                    y_samps_latent = np.tile(np.array(labels_test[0, :]),
                                             1).reshape(1, ndim_y)
                    y_samps_latent = torch.tensor(y_samps_latent,
                                                  dtype=torch.float)
                    x1_i_dist = []
                    x2_i_dist = []
                    x1_i_par = np.array([])
                    x2_i_par = np.array([])

                    # define latent space mesh grid
                    z_mesh = np.mgrid[-0.99:-0.01:100j, -0.99:-0.01:100j]
                    z_mesh = np.vstack([z_mesh, np.zeros((2, 100, 100))])

                    #for z_i in range(10000):
                    for i in range(z_mesh.shape[1]):
                        for j in range(z_mesh.shape[2]):
                            a = torch.randn(1, ndim_z)
                            a[0, 0] = z_mesh[0, i, j]
                            a[0, 1] = z_mesh[1, i, j]
                            x_i = model(torch.cat([
                                a,
                                torch.zeros(1, ndim_tot - ndim_y - ndim_z),
                                y_samps_latent
                            ],
                                                  dim=1).to(device),
                                        rev=True)
                            x_i = x_i.cpu().data.numpy()

                            # calculate hue and intensity
                            if np.abs(mode_1[0][0] -
                                      x_i[0][0]) < np.abs(mode_2[0][0] -
                                                          x_i[0][1]):
                                z_mesh[2, i,
                                       j] = np.abs(mode_1[0][0] - x_i[0][0])
                                z_mesh[3, i, j] = 0

                            else:
                                z_mesh[2, i,
                                       j] = np.abs(mode_2[0][0] - x_i[0][1])
                                z_mesh[3, i, j] = 1

                    z_mesh[2, :, :][z_mesh[3, :, :] == 0] = z_mesh[2, :, :][
                        z_mesh[3, :, :] == 0] / np.max(
                            z_mesh[2, :, :][z_mesh[3, :, :] == 0])
                    z_mesh[2, :, :][z_mesh[3, :, :] == 1] = z_mesh[2, :, :][
                        z_mesh[3, :, :] == 1] / np.max(
                            z_mesh[2, :, :][z_mesh[3, :, :] == 1])

                    bg_color = 'black'
                    fg_color = 'red'

                    fig = plt.figure(facecolor=bg_color, edgecolor=fg_color)
                    axes = fig.add_subplot(111)
                    axes.patch.set_facecolor(bg_color)
                    axes.xaxis.set_tick_params(color=fg_color,
                                               labelcolor=fg_color)
                    axes.yaxis.set_tick_params(color=fg_color,
                                               labelcolor=fg_color)
                    for spine in axes.spines.values():
                        spine.set_color(fg_color)
                    plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 0],
                                z_mesh[1, :, :][z_mesh[3, :, :] == 0],
                                s=1,
                                c=z_mesh[2, :, :][z_mesh[3, :, :] == 0],
                                cmap='Greens',
                                axes=axes)
                    plt.scatter(z_mesh[0, :, :][z_mesh[3, :, :] == 1],
                                z_mesh[1, :, :][z_mesh[3, :, :] == 1],
                                s=1,
                                c=z_mesh[2, :, :][z_mesh[3, :, :] == 1],
                                cmap='Purples',
                                axes=axes)
                    plt.xlabel('z-space', color=fg_color)
                    plt.ylabel('z-space', color=fg_color)
                    plt.savefig('%sstruct_z.png' % out_dir, dpi=360)
                    plt.close()

                # end of latent space structure plotting

                # initialize plot for showing testing results
                fig, axes = plt.subplots(r, r, figsize=(6, 6))
                for i in range(r):
                    for j in range(r):

                        # convert data into correct format
                        y_samps = np.tile(np.array(labels_test[cnt, :]),
                                          N_samp).reshape(N_samp, ndim_y)
                        y_samps = torch.tensor(y_samps, dtype=torch.float)
                        #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
                        y_samps = torch.cat(
                            [
                                torch.randn(N_samp,
                                            ndim_z),  #zeros_noise_scale * 
                                torch.zeros(N_samp,
                                            ndim_tot - ndim_y - ndim_z),
                                y_samps
                            ],
                            dim=1)
                        y_samps = y_samps.to(device)

                        if conv_nn == True:
                            y_samps = y_samps.reshape(y_samps.shape[0],
                                                      y_samps.shape[1], 1, 1)
                        rev_x = model(y_samps, rev=True)
                        rev_x = rev_x.cpu().data.numpy()

                        if conv_nn == True:
                            rev_x = rev_x.reshape(rev_x.shape[0],
                                                  rev_x.shape[1])

                        # plot the samples and the true contours
                        axes[i, j].clear()
                        axes[i, j].contour(mvec,
                                           cvec,
                                           lik[i, j, :].reshape(Ngrid, Ngrid),
                                           levels=[0.68, 0.9, 0.99])
                        axes[i, j].scatter(rev_x[:, 0],
                                           rev_x[:, 1],
                                           s=0.5,
                                           alpha=0.5,
                                           color='red')
                        axes[i, j].scatter(true_post[i, j, :, 1],
                                           true_post[i, j, :, 0],
                                           s=0.5,
                                           alpha=0.5,
                                           color='blue')
                        axes[i, j].plot(pos_test[cnt, 0],
                                        pos_test[cnt, 1],
                                        '+r',
                                        markersize=8)
                        axes[i, j].axis(bound)

                        # add contours to results
                        try:
                            if do_contours:
                                contour_y = np.reshape(rev_x[:, 1],
                                                       (rev_x[:, 1].shape[0]))
                                contour_x = np.reshape(rev_x[:, 0],
                                                       (rev_x[:, 0].shape[0]))
                                contour_dataset = np.array(
                                    [contour_x, contour_y])
                                kernel_cnn = make_contour_plot(
                                    axes[i, j],
                                    contour_x,
                                    contour_y,
                                    contour_dataset,
                                    'red',
                                    flip=False,
                                    kernel_cnn=False)

                                # run overlap tests on results
                                contour_x = np.reshape(
                                    true_post[i, j][:, 1],
                                    (true_post[i, j][:, 1].shape[0]))
                                contour_y = np.reshape(
                                    true_post[i, j][:, 0],
                                    (true_post[i, j][:, 0].shape[0]))
                                contour_dataset = np.array(
                                    [contour_x, contour_y])
                                ks_score, ad_score, beta_score = overlap_tests(
                                    rev_x, true_post[i, j], pos_test[cnt],
                                    kernel_cnn, gaussian_kde(contour_dataset))
                                axes[i, j].legend([
                                    'Overlap: %s' %
                                    str(np.round(beta_score, 3))
                                ])

                                beta_score_hist.append([beta_score])
                        except ValueError as e:
                            pass

                        cnt += 1

                # sve the results to file
                fig_post.canvas.draw()
                plt.savefig('%sposteriors_%s.png' % (out_dir, i_epoch),
                            dpi=360)
                plt.savefig('%slatest.png' % out_dir, dpi=360)

                plot_losses(pe_losses,
                            '%spe_losses.png' % out_dir,
                            legend=['PE-GEN'])
                plot_losses(pe_losses,
                            '%spe_losses_logscale.png' % out_dir,
                            logscale=True,
                            legend=['PE-GEN'])

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")

Example #11

Show file

File: gw_INN.py Project: hagabbar/cINNamon

def main():
    # Set up data
    test_split = 1 # number of testing samples to use

    # load in gw templates and signals
    signal_train_images, signal_train_pars, signal_image, noise_signal, signal_pars = load_gw_data()
    
    if add_noise_real:
        train_array = []
        train_pe_array = []
        for i in range(len(signal_train_images)):
            for j in range(n_real):
                train_array.append([signal_train_images[i] + np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23])
                train_pe_array.append([signal_train_pars[i]])
        train_array = np.array(train_array)
        train_pe_array = np.array(train_pe_array)
        train_array = train_array.reshape(train_array.shape[0],train_array.shape[2])
        train_pe_array = train_pe_array.reshape(train_pe_array.shape[0],train_pe_array.shape[2])
    else:
        for i in range(len(signal_train_images)):
            signal_train_images[i] += np.random.normal(loc=0.0, scale=n_sig) / 817.98 * 1079.23
    
    # load in lalinference noise signal
    noise_signal = h5py.File("gw_data/data/%s0%s.hdf5" % (event_name,tag),"r")
    noise_signal = np.reshape(noise_signal['wht_wvf'][:] * 1079.23,(n_pix,1)) # 817.98 need to not have this hardcoded
    #noise_signal *= 1079.23 / 817.98
    #noise_signal = noise_signal.reshape(noise_signal.shape[0],1)

    plt.plot(noise_signal)
    plt.savefig('%s/test.png' % out_path)
    plt.close()

    # load in lalinference samples
    with open('gw_data/data/gw150914_mc_q_lalinf_post_srate-1024_python3.sav','rb' ) as f:
        lalinf_post = pickle.load(f) 
    lalinf_mc = lalinf_post[0]
    lalinf_q = lalinf_post[1]
    kernel_lalinf = gaussian_kde(lalinf_post)

    # declare gw variants of positions and labels
    mc_max = np.max(signal_train_pars[:,0])
    #signal_train_pars /= mc_max
    labels = torch.tensor(signal_train_images, dtype=torch.float)
    pos = torch.tensor(signal_train_pars, dtype=torch.float)

    # setting up the model
    ndim_x = 2    # number of parameter dimensions
    ndim_y = n_pix    # number of data dimensions
    ndim_z = 100    # number of latent space dimensions?
    ndim_tot = n_pix+ndim_z+ndim_x+n_neurons  # two times the number data dimensions?

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    # number of nodes equal to number of parameters?
    t1 = Node([inp.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}})
    
    
    t2 = Node([t1.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}})
     
    """
    t3 = Node([t2.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}, 'F_args': {'batch_norm': False}})
     
    
    t4 = Node([t3.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t5 = Node([t4.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t6 = Node([t5.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t7 = Node([t6.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    t8 = Node([t7.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})

    t9 = Node([t8.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    
    t10 = Node([t9.out0], rev_multiplicative_layer,
              {'F_class': F_fully_connected, 'clamp': 2.0,
               'F_args': {'dropout': 0.0}})
    """
    # define output layer node
    outp = OutputNode([t2.out0], name='output')

    nodes = [inp, t1, t2, outp]
    model = ReversibleGraphNet(nodes)

    # Train model

    lr = 1e-4
    gamma = 0.01**(1./120)
    l2_reg = 2e-5

    y_noise_scale = 1     # amount of noise to add to y parameter?
    zeros_noise_scale = 3e-2 # what is this??

    # relative weighting of losses:
    lambd_predict = 300. # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.     # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)


    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.8, 0.8),
                             eps=1e-04, weight_decay=l2_reg, amsgrad=True)
    #optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)


    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up test set data loader
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(pos[:test_split], labels[:test_split]),
        batch_size=batch_size, shuffle=True, drop_last=True)

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(pos[:], labels[:]),
        batch_size=batch_size, shuffle=True, drop_last=True)


    # what is happening here? More set up of network?
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01*torch.randn(coeff.fc3.weight.shape)
            
    model.to(device)

    # number of test samples to use after training 
    N_samp = 4000

    # choose test samples to use after training
    # 1000 iterations of test signal burried in noise. Only need to change z parameter.
    #x_samps = torch.cat([x for x,y in test_loader], dim=0)[:N_samp]
    #y_samps = torch.cat([y for x,y in test_loader], dim=0)[:N_samp]
    #y_samps += torch.randn(N_samp, ndim_y) #* y_noise_scale
    y_samps = y_noise_scale * np.transpose(torch.tensor(np.repeat(noise_signal, N_samp, axis=1), dtype=torch.float))

    # make test samples. First element is the latent space dimension
    # second element is the extra zeros needed to pad the input.
    # the third element is the time series
    y_samps = torch.cat([torch.randn(N_samp, ndim_z),
                         zeros_noise_scale * torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z), # zeros_noise_scale * 
                         y_samps], dim=1)
    # what we should have now are 1000 copies of the event burried in noise with zero padding up to 2048
    y_samps = y_samps.to(device)

    # start training loop
    lossf_hist = []
    lossrev_hist = []
    beta_score_hist = []      
    kernel_cnn = False      
    try:
    #     print('#Epoch \tIt/s \tl_total')
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            #print(i_epoch, end='\t ')
            _,lossf, lossrev = train(model,train_loader,n_its_per_epoch,zeros_noise_scale,batch_size,ndim_tot,ndim_x,ndim_y,ndim_z,y_noise_scale,optimizer,lambd_predict,loss_fit,lambd_latent,loss_latent,lambd_rev,loss_backward,i_epoch)

            # append current loss value to loss histories
            lossf_hist.append(lossf.item())
            lossrev_hist.append(lossrev.item())
            pe_losses = [lossf_hist,lossrev_hist]

            # predict parameters of signal
            rev_x = model(y_samps, rev=True)
            rev_x = rev_x.cpu().data.numpy()
            #rev_x[:,0] = mc_max * rev_x[:,0]
        
            # plot pe results and loss
            beta_max = 0
            """
            if i_epoch>0:
                kernel_cnn = gaussian_kde(rev_x)
                #overlap_y = np.reshape(rev_x[:,1], (rev_x[:,1].shape[0]))
                #overlap_x = np.reshape(rev_x[:,0], (rev_x[:,0].shape[0]))
                #overlap_dataset = np.array([overlap_x,overlap_y]).transpose()
                ks_score, ad_score, beta_score = overlap_tests(rev_x,lalinf_post,signal_pars,kernel_cnn,kernel_lalinf)
                beta_score_hist.append([beta_score])    
                plt.plot(np.linspace(1,i_epoch,len(beta_score_hist)),beta_score_hist)
                plt.savefig('%s/latest/beta_hist.png' % out_path)
                plt.close()            
            """
            if ((i_epoch % plot_cadence == 0) & (i_epoch>0)):
                pe_std = [0.02185649964844209, 0.005701401364171313] # this will need to be removed
                beta_score_hist.append([plot_pe_samples(rev_x,signal_pars,out_path,i_epoch,lalinf_post,pe_std,kernel_lalinf=kernel_lalinf,kernel_cnn=kernel_cnn)])
                plt.plot(np.linspace(plot_cadence,i_epoch,len(beta_score_hist)),beta_score_hist)
                plt.savefig('%s/latest/beta_hist.png' % out_path)
                plt.close()

                # plot loss curves - non-log and log
                plot_losses(pe_losses,'%s/latest/pe_losses.png' % out_path,legend=['PE-GEN'])
                plot_losses(pe_losses,'%s/latest/pe_losses_logscale.png' % out_path,logscale=True,legend=['PE-GEN'])

                # save model
                #if beta_score_hist[:-1] > beta_max: beta_max = beta_score_hist[:-1]
                #if beta_score_hist[:-1] > beta_max or i_epoch==plot_cadence: model.save_state_dict('mytraining.pt')
            # make PE scatter plots with contours and beta score 
            #plt.scatter(rev_x[:,0], rev_x[:,1], s=1., c='red')
            #plt.scatter(lalinf_mc, lalinf_q, s=1., c='blue')
        

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")

Example #12

Show file

File: chris_model.py Project: hagabbar/cINNamon

def main():

    # Set up simulation parameters
    batch_size = 1600  # set batch size
    r = 3  # the grid dimension for the output tests
    test_split = r * r  # number of testing samples to use
    sig_model = 'sg'  # the signal model to use
    sigma = 0.2  # the noise std
    ndata = 32  # number of data samples
    bound = [0.0, 1.0, 0.0, 1.0]  # effective bound for likelihood
    seed = 1  # seed for generating data

    # generate data
    pos, labels, x, sig = data.generate(model=sig_model,
                                        tot_dataset_size=2**20,
                                        ndata=ndata,
                                        sigma=sigma,
                                        prior_bound=bound,
                                        seed=seed)

    # seperate the test data for plotting
    pos_test = pos[-test_split:]
    labels_test = labels[-test_split:]
    sig_test = sig[-test_split:]

    # plot the test data examples
    plt.figure(figsize=(6, 6))
    fig, axes = plt.subplots(r, r, figsize=(6, 6))
    cnt = 0
    for i in range(r):
        for j in range(r):
            axes[i, j].plot(x, np.array(labels_test[cnt, :]), '.')
            axes[i, j].plot(x, np.array(sig_test[cnt, :]), '-')
            cnt += 1
            axes[i, j].axis([0, 1, -1.5, 1.5])
    plt.savefig('/data/public_html/chrism/FrEIA/test_distribution.png',
                dpi=360)
    plt.close()

    # setting up the model
    ndim_x = 2  # number of posterior parameter dimensions (x,y)
    ndim_y = ndata  # number of label dimensions (noisy data samples)
    ndim_z = 8  # number of latent space dimensions?
    ndim_tot = max(ndim_x,
                   ndim_y + ndim_z)  # must be > ndim_x and > ndim_y + ndim_z

    # define different parts of the network
    # define input node
    inp = InputNode(ndim_tot, name='input')

    # define hidden layer nodes
    t1 = Node([inp.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    t2 = Node([t1.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    t3 = Node([t2.out0], rev_multiplicative_layer, {
        'F_class': F_fully_connected,
        'clamp': 2.0,
        'F_args': {
            'dropout': 0.2
        }
    })

    # define output layer node
    outp = OutputNode([t3.out0], name='output')

    nodes = [inp, t1, t2, t3, outp]
    model = ReversibleGraphNet(nodes)

    # Train model
    # Training parameters
    n_epochs = 1000
    meta_epoch = 12  # what is this???
    n_its_per_epoch = 12
    batch_size = 1600

    lr = 1e-2
    gamma = 0.01**(1. / 120)
    l2_reg = 2e-5

    y_noise_scale = 3e-2
    zeros_noise_scale = 3e-2

    # relative weighting of losses:
    lambd_predict = 300.  # forward pass
    lambd_latent = 300.  # laten space
    lambd_rev = 400.  # backwards pass

    # padding both the data and the latent space
    # such that they have equal dimension to the parameter space
    #pad_x = torch.zeros(batch_size, ndim_tot - ndim_x)
    #pad_yz = torch.zeros(batch_size, ndim_tot - ndim_y - ndim_z)

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 betas=(0.8, 0.8),
                                 eps=1e-04,
                                 weight_decay=l2_reg)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=meta_epoch,
                                                gamma=gamma)

    # define the three loss functions
    loss_backward = MMD_multiscale
    loss_latent = MMD_multiscale
    loss_fit = fit

    # set up training set data loader
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
        pos[test_split:], labels[test_split:]),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

    # initialisation of network weights
    for mod_list in model.children():
        for block in mod_list.children():
            for coeff in block.children():
                coeff.fc3.weight.data = 0.01 * torch.randn(
                    coeff.fc3.weight.shape)
    model.to(device)

    # initialize plot for showing testing results
    fig, axes = plt.subplots(r, r, figsize=(6, 6))

    # number of test samples to use after training
    N_samp = 256

    # precompute true likelihood on the test data
    Ngrid = 64
    cnt = 0
    lik = np.zeros((r, r, Ngrid * Ngrid))
    for i in range(r):
        for j in range(r):
            mvec, cvec, temp = data.get_lik(np.array(
                labels_test[cnt, :]).flatten(),
                                            n_grid=Ngrid,
                                            sig_model=sig_model,
                                            sigma=sigma,
                                            xvec=x,
                                            bound=bound)
            lik[i, j, :] = temp.flatten()
            cnt += 1

    # start training loop
    try:
        t_start = time()
        # loop over number of epochs
        for i_epoch in tqdm(range(n_epochs), ascii=True, ncols=80):

            scheduler.step()

            # Initially, the l2 reg. on x and z can give huge gradients, set
            # the lr lower for this
            if i_epoch < 0:
                print('inside this iepoch<0 thing')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr * 1e-2

            # train the model
            train(model, train_loader, n_its_per_epoch, zeros_noise_scale,
                  batch_size, ndim_tot, ndim_x, ndim_y, ndim_z, y_noise_scale,
                  optimizer, lambd_predict, loss_fit, lambd_latent,
                  loss_latent, lambd_rev, loss_backward, i_epoch)

            # loop over a few cases and plot results in a grid
            cnt = 0
            for i in range(r):
                for j in range(r):

                    # convert data into correct format
                    y_samps = np.tile(np.array(labels_test[cnt, :]),
                                      N_samp).reshape(N_samp, ndim_y)
                    y_samps = torch.tensor(y_samps, dtype=torch.float)
                    #y_samps += y_noise_scale * torch.randn(N_samp, ndim_y)
                    y_samps = torch.cat(
                        [
                            torch.randn(N_samp, ndim_z),  #zeros_noise_scale * 
                            torch.zeros(N_samp, ndim_tot - ndim_y - ndim_z),
                            y_samps
                        ],
                        dim=1)
                    y_samps = y_samps.to(device)

                    # use the network to predict parameters
                    rev_x = model(y_samps, rev=True)
                    rev_x = rev_x.cpu().data.numpy()

                    # plot the samples and the true contours
                    axes[i, j].clear()
                    axes[i, j].contour(mvec,
                                       cvec,
                                       lik[i, j, :].reshape(Ngrid, Ngrid),
                                       levels=[0.68, 0.9, 0.99])
                    axes[i, j].scatter(rev_x[:, 0],
                                       rev_x[:, 1],
                                       s=0.5,
                                       alpha=0.5)
                    axes[i, j].plot(pos_test[cnt, 0],
                                    pos_test[cnt, 1],
                                    '+r',
                                    markersize=8)
                    axes[i, j].axis(bound)

                    cnt += 1

            # sve the results to file
            fig.canvas.draw()
            plt.savefig('/data/public_html/chrism/FrEIA/posteriors_%s.png' %
                        i_epoch,
                        dpi=360)
            plt.savefig('/data/public_html/chrism/FrEIA/latest.png', dpi=360)

    except KeyboardInterrupt:
        pass
    finally:
        print("\n\nTraining took {(time()-t_start)/60:.2f} minutes\n")