def rbfnn(args, X, y, Xval, yval):
    from sklearn.cluster import KMeans
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    args.n_clusters = min(args.n_clusters, X.shape[0])

    y, y_mean, y_std = normalize_y(y)

    cluster_alg = KMeans(args.n_clusters)
    cluster_alg.fit(X)
    c = cluster_alg.cluster_centers_
    if torch.cuda.is_available() and args.cuda:
        c = torch.tensor(c).to(torch.float32).to('cuda')
    else:
        c = torch.tensor(c).to(torch.float32)

    mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                               torch.nn.ReLU(), torch.nn.Linear(n_neurons, 1))
    var = torch.nn.Sequential(RBF(None, None, c, 1.0),
                              PosLinear(args.n_clusters, 1, bias=False),
                              Reciprocal(0.1), PosLinear(1, 1, bias=False))

    if torch.cuda.is_available() and args.cuda:
        mean.cuda()
        var.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    optimizer = torch.optim.Adam(chain(mean.parameters(), var.parameters()),
                                 lr=args.lr)
    it = 0
    progressBar = tqdm(desc='Training nn', total=args.iters, unit='iter')
    batches = batchify(X, y, batch_size=args.batch_size, shuffel=args.shuffel)
    while it < args.iters:
        switch = 1.0 if it > args.iters / 2 else 0.0
        optimizer.zero_grad()
        data, label = next(batches)
        data = torch.tensor(data).to(torch.float32).to(device)
        label = torch.tensor(label).to(torch.float32).to(device)
        m, v = mean(data), var(data)
        v = switch * v + (1 - switch) * torch.tensor([0.02**2], device=device)
        loss = normal_log_prob(label, m, v).sum()
        (-loss).backward()
        optimizer.step()
        it += 1
        progressBar.update()
        progressBar.set_postfix({'loss': loss.item()})
    progressBar.close()

    data = torch.tensor(Xval).to(torch.float32).to(device)
    label = torch.tensor(yval).to(torch.float32).to(device)
    m, v = mean(data), var(data)
    m = m * y_std + y_mean
    v = v * y_std**2
    log_px = normal_log_prob(label, m, v)
    rmse = ((label - m.flatten())**2).mean().sqrt()
    return log_px.mean().item(), rmse.item()
Beispiel #2
0
def nn(args, X, y, Xpool, ypool, Xtest, ytest):
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50

    y, y_mean, y_std = normalize_y(y)

    mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                               torch.nn.ReLU(), torch.nn.Linear(n_neurons, 1))
    var = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                              torch.nn.ReLU(), torch.nn.Linear(n_neurons, 1),
                              torch.nn.Softplus())
    if torch.cuda.is_available() and args.cuda:
        mean.cuda()
        var.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    optimizer = torch.optim.Adam(chain(mean.parameters(), var.parameters()),
                                 lr=args.lr)
    it = 0
    progressBar = tqdm(desc='Training nn', total=args.iters, unit='iter')
    batches = batchify(X, y, batch_size=args.batch_size, shuffel=args.shuffel)
    while it < args.iters:
        switch = 1.0 if it > args.iters / 2 else 0.0
        optimizer.zero_grad()
        data, label = next(batches)
        data = torch.tensor(data).to(torch.float32).to(device)
        label = torch.tensor(label).to(torch.float32).to(device)
        m, v = mean(data), var(data)
        v = switch * v + (1 - switch) * torch.tensor([0.02**2], device=device)
        loss = normal_log_prob(label, m, v).sum()
        (-loss).backward()
        optimizer.step()
        it += 1
        progressBar.update()
        progressBar.set_postfix({'loss': loss.item()})
    progressBar.close()

    with torch.no_grad():
        data = torch.tensor(Xpool).to(torch.float32).to(device)
        label = torch.tensor(ypool).to(torch.float32).to(device)
        m, v = mean(data), var(data)
        pool_m = m * y_std + y_mean
        pool_v = v * y_std**2

        data = torch.tensor(Xtest).to(torch.float32).to(device)
        label = torch.tensor(ytest).to(torch.float32).to(device)
        m, v = mean(data), var(data)
        m = m * y_std + y_mean
        v = v * y_std**2
        test_log_px = normal_log_prob(label, m, v)
        test_rmse = ((label - m.flatten())**2).mean().sqrt()

    return test_log_px.mean().item(), \
            test_rmse.item(), \
            pool_m.cpu().flatten().numpy(), \
            pool_v.cpu().flatten().numpy()
Beispiel #3
0
def john(args, X, y, Xval, yval):
    from sklearn.cluster import KMeans
    from utils import dist
    from itertools import chain
    from torch import distributions as D
    from locality_sampler import gen_Qw, locality_sampler2
    from sklearn.decomposition import PCA
    
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    args.n_clusters = min(args.n_clusters, X.shape[0])
    
    y, y_mean, y_std = normalize_y(y)
    
    mean_psu = 1
    mean_ssu = 40
    mean_M = 50

    var_psu = 2
    var_ssu = 10
    var_M = 10
    
    num_draws_train = 20
    kmeans = KMeans(n_clusters=args.n_clusters)
    if args.dataset != 'year_prediction':
        kmeans.fit(np.concatenate([X], axis=0))
    else:
        kmeans.fit(X[np.random.randint(0, X.shape[0], size=(10000))])
    c = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32)
    if torch.cuda.is_available() and args.cuda: 
        c = torch.tensor(c).to(torch.float32).to('cuda')
    else:
        c = torch.tensor(c).to(torch.float32)
        
    class translatedSigmoid(torch.nn.Module):
        def __init__(self):
            super(translatedSigmoid, self).__init__()
            self.beta = torch.nn.Parameter(torch.tensor([1.5]))
            
        def forward(self, x):
            beta = torch.nn.functional.softplus(self.beta)
            alpha = -beta*(6.9077542789816375)
            return torch.sigmoid((x+alpha)/beta)
    
    class GPNNModel(torch.nn.Module):
        def __init__(self):
            super(GPNNModel, self).__init__()
            self.mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                      torch.nn.ReLU(),
                                      torch.nn.Linear(n_neurons, y.shape[1]))
            self.alph = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                      torch.nn.ReLU(),
                                      torch.nn.Linear(n_neurons, y.shape[1]),
                                      torch.nn.Softplus())
            self.bet = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                     torch.nn.ReLU(),
                                     torch.nn.Linear(n_neurons, y.shape[1]),
                                     torch.nn.Softplus())
            self.trans = translatedSigmoid()
            
        def forward(self, x, switch):
            d = dist(x, c)
            d_min = d.min(dim=1, keepdim=True)[0]
            s = self.trans(d_min)
            mean = self.mean(x)
            if switch:
                a = self.alph(x)
                b = self.bet(x)
                gamma_dist = D.Gamma(a+1e-8, 1.0/(b+1e-8))
                if self.training:
                    samples_var = gamma_dist.rsample(torch.Size([num_draws_train]))
                    x_var = (1.0/(samples_var+1e-8))
                else:
                    samples_var = gamma_dist.rsample(torch.Size([2000]))
                    x_var = (1.0/(samples_var+1e-8))
                var = (1-s) * x_var + s * y_std ** 2

            else:
                var = 0.05*torch.ones_like(mean)
            return mean, var
    
    model = GPNNModel()
    if torch.cuda.is_available() and args.cuda: 
        model.cuda()
        device=torch.device('cuda')
    else:
        device=torch.device('cpu')

    optimizer = torch.optim.Adam(model.mean.parameters(), lr=1e-2)
    optimizer2 = torch.optim.Adam(chain(model.alph.parameters(),
                                        model.bet.parameters(),
                                        model.trans.parameters()), lr=1e-4)
    mean_Q, mean_w = gen_Qw(X, mean_psu, mean_ssu, mean_M)
    
    if X.shape[0] > 100000 and X.shape[1] > 10:
        pca = PCA(n_components=0.5)
        temp = pca.fit_transform(X)
        var_Q, var_w = gen_Qw(temp, var_psu, var_ssu, var_M)
    else:    
        var_Q, var_w = gen_Qw(X, var_psu, var_ssu, var_M)
    
    #mean_pseupoch = get_pseupoch(mean_w,0.5)
    #var_pseupoch = get_pseupoch(var_w,0.5)
    opt_switch = 1
    mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
    var_w = torch.tensor(var_w).to(torch.float32).to(device)
    model.train()
    
    X = torch.tensor(X).to(torch.float32).to(device)
    y = torch.tensor(y).to(torch.float32).to(device)
    batches = batchify(X, y, batch_size = args.batch_size, shuffel=args.shuffel)

    # validation data and performance measures
    ll_list = []
    mae_list = []
    rmse_list = []
    x_eval = torch.tensor(Xval).to(torch.float32).to(device)
    y_eval = torch.tensor(yval).to(torch.float32).to(device)
    y_mean = torch.tensor(y_mean).to(torch.float32).to(device)
    y_std = torch.tensor(y_std).to(torch.float32).to(device)

    it = 0
    its_per_epoch = int(np.ceil(X.shape[0] / args.batch_size))
    epochs = round(args.iters / its_per_epoch)
    while it < args.iters:
        switch = 1.0 if it > args.iters/2.0 else 0.0
        
        if it % 11: opt_switch = opt_switch + 1 # change between var and mean optimizer
        with torch.autograd.detect_anomaly():
            data, label = next(batches)
            if not switch:
                optimizer.zero_grad()
                m, v = model(data, switch)
                loss = -t_likelihood(label, m, v.unsqueeze(0))
                loss.backward()
                optimizer.step()
            else:
                if opt_switch % 2 == 0:    
                    #for b in range(mean_pseupoch):
                    optimizer.zero_grad()
                    batch = locality_sampler2(mean_psu,mean_ssu,mean_Q,mean_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch], m, v, mean_w[batch])
                    loss.backward()
                    optimizer.step()
                else:
                    #for b in range(var_pseupoch):
                    optimizer2.zero_grad()
                    batch = locality_sampler2(var_psu,var_ssu,var_Q,var_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch], m, v, var_w[batch])
                    loss.backward()
                    optimizer2.step()

        # test on validation set once per epoch
        if it % its_per_epoch == 0:
            model.eval()
            with torch.no_grad():
                m, v = model(x_eval, switch)
            m = m * y_std + y_mean
            v = v * y_std ** 2
            if switch == 0:
                ll = t_likelihood(y_eval, m, v.unsqueeze(0)).item()
            else:
                ll = t_likelihood(y_eval, m, v).item()
            # if it % (500 * its_per_epoch) == 0:
            #     print('Epoch {:d}/{:d},'.format(it // its_per_epoch, epochs), 'Loss {:.4f},'.format(ll))

            # log validation performance after we are stable in the second optimization regime
            if it > args.iters * 0.60:
                ll_list.append(ll)
                error = torch.norm(y_eval - m, p=2, dim=1)
                mae_list.append(error.abs().mean().item())
                rmse_list.append((error ** 2).mean().sqrt().item())
                model.train()

                # early stop check
                if len(ll_list) - np.argmax(ll_list) > 50:
                    it = args.iters
                    print('Early Stop!')

        it+=1

    # get best LL
    i_best = np.argmax(ll_list)

    # evaluate model moments
    with torch.no_grad():
        model.training = False
        m, v = model(x_eval, 1.0)
        m = m * y_std + y_mean
        v = v * y_std ** 2

    return ll_list[i_best], rmse_list[i_best], m.cpu().numpy(), v.cpu().numpy()
Beispiel #4
0
def bnn(args, X, y, Xval, yval):
    import tensorflow as tf
    import tensorflow_probability as tfp
    from tensorflow_probability import distributions as tfd
    tf.reset_default_graph()
    
    y, y_mean, y_std = normalize_y(y)
    
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
        
    def VariationalNormal(name, shape, constraint=None):
        means = tf.get_variable(name+'_mean',
                                initializer=tf.ones([1]),
                                constraint=constraint)
        stds = tf.get_variable(name+'_std',
                               initializer=-1.0*tf.ones([1]))
        return tfd.Normal(loc=means, scale=tf.nn.softplus(stds))
    
    x_p = tf.placeholder(tf.float32, shape=(None, X.shape[1]))
    y_p = tf.placeholder(tf.float32, shape=(None, 1))
    
    with tf.name_scope('model', values=[x_p]):
        layer1 = tfp.layers.DenseFlipout(
                units=n_neurons,
                activation='relu',
                kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn(),
                bias_posterior_fn = tfp.layers.default_mean_field_normal_fn()
                )
        layer2 = tfp.layers.DenseFlipout(
                units=1,
                activation='linear',
                kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn(),
                bias_posterior_fn = tfp.layers.default_mean_field_normal_fn()
                )
        predictions = layer2(layer1(x_p))
        noise = VariationalNormal('noise', [1], constraint=tf.keras.constraints.NonNeg())
        pred_distribution = tfd.Normal(loc=predictions,
                                       scale=noise.sample())
        
    neg_log_prob = -tf.reduce_mean(pred_distribution.log_prob(y_p))
    kl_div = sum(layer1.losses + layer2.losses) / X.shape[0]
    elbo_loss = neg_log_prob + kl_div
    
    with tf.name_scope("train"):
        optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
        train_op = optimizer.minimize(elbo_loss)
        
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        it = 0
        progressBar = tqdm(desc='Training BNN', total=args.iters, unit='iter')
        batches = batchify(X, y, batch_size = args.batch_size, shuffel=args.shuffel)
        while it < args.iters:
            data, label = next(batches)
            _, l = sess.run([train_op, elbo_loss], feed_dict={x_p: data, y_p: label.reshape(-1, 1)})
            progressBar.update()
            progressBar.set_postfix({'loss': l})
            it+=1
        progressBar.close()
    
        W0_samples = layer1.kernel_posterior.sample(1000)
        b0_samples = layer1.bias_posterior.sample(1000)
        W1_samples = layer2.kernel_posterior.sample(1000)
        b1_samples = layer2.bias_posterior.sample(1000)
        noise_samples = noise.sample(1000)
    
        W0, b0, W1, b1, n = sess.run([W0_samples,
                                      b0_samples,
                                      W1_samples,
                                      b1_samples,
                                      noise_samples])
    
    def sample_net(x, W0, b0, W1, b1, n):
        h = np.maximum(np.matmul(x[np.newaxis], W0) + b0[:, np.newaxis, :], 0.0)
        return np.matmul(h, W1) + b1[:, np.newaxis, :] + n[:, np.newaxis, :] * np.random.randn()
        
    samples = sample_net(Xval, W0, b0, W1, b1, n)

    m = samples.mean(axis=0)
    v = samples.var(axis=0)
    
    m = m * y_std + y_mean
    v = v * y_std**2
    
    log_probs = normal_log_prob(yval, m, v)
    rmse = math.sqrt(((m.flatten() - yval)**2).mean())
    
    return log_probs.mean(), rmse
Beispiel #5
0
def ensnn(args, X, y, Xval, yval):
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    
    y, y_mean, y_std = normalize_y(y)
    
    ms, vs = [ ], [ ]
    for m in range(args.n_models): # initialize differently
        mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                   torch.nn.ReLU(),
                                   torch.nn.Linear(n_neurons, 1))
        var = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                  torch.nn.ReLU(),
                                  torch.nn.Linear(n_neurons, 1),
                                  torch.nn.Softplus())
        if torch.cuda.is_available() and args.cuda: 
            mean.cuda(); var.cuda(); 
            device=torch.device('cuda')
        else:
            device=torch.device('cpu')
        
        optimizer = torch.optim.Adam(chain(mean.parameters(),
                                           var.parameters()), lr=args.lr)
        it = 0
        progressBar = tqdm(desc='Training nn', total=args.iters, unit='iter')
        batches = batchify(X, y, batch_size = args.batch_size, shuffel=args.shuffel)
        while it < args.iters:
            switch = 0.0#1.0 if it > args.iters/2 else 0.0
            optimizer.zero_grad()
            data, label = next(batches)
            data = torch.tensor(data).to(torch.float32).to(device)
            label = torch.tensor(label).to(torch.float32).to(device)
            m, v = mean(data), var(data)
            v = switch*v + (1-switch)*torch.tensor([0.02**2], device=device)
            loss = normal_log_prob(label, m, v).sum()
            (-loss).backward()
            optimizer.step()
            it+=1
            progressBar.update()
            progressBar.set_postfix({'loss': loss.item()})
        progressBar.close()
        
        data = torch.tensor(Xval).to(torch.float32).to(device)
        label = torch.tensor(yval).to(torch.float32).to(device)
        m, v = mean(data), var(data)
        m = m * y_std + y_mean
        v = v * y_std**2
        
        ms.append(m)
        vs.append(v)
    
    ms = torch.stack(ms)
    vs = torch.stack(vs)
        
    m = ms.mean(dim=0)
    v = (vs + ms**2).mean(dim=0) - m**2
    
    log_px = normal_log_prob(label, m, v)
    rmse = ((label - m.flatten())**2).mean().sqrt()
    return log_px.mean().item(), rmse.item()
def john(args, X, y, Xval, yval):
    from sklearn.cluster import KMeans
    from utils import dist
    from itertools import chain
    from torch import distributions as D
    from locality_sampler import gen_Qw, locality_sampler2
    from sklearn.decomposition import PCA

    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    args.n_clusters = min(args.n_clusters, X.shape[0])

    y, y_mean, y_std = normalize_y(y)

    mean_psu = 1
    mean_ssu = 40
    mean_M = 50

    var_psu = 2
    var_ssu = 10
    var_M = 10

    num_draws_train = 20
    kmeans = KMeans(n_clusters=args.n_clusters)
    if args.dataset != 'year_prediction':
        kmeans.fit(np.concatenate([X], axis=0))
    else:
        kmeans.fit(X[np.random.randint(0, X.shape[0], size=(10000))])
    c = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32)
    if torch.cuda.is_available() and args.cuda:
        c = torch.tensor(c).to(torch.float32).to('cuda')
    else:
        c = torch.tensor(c).to(torch.float32)

    class translatedSigmoid(torch.nn.Module):
        def __init__(self):
            super(translatedSigmoid, self).__init__()
            self.beta = torch.nn.Parameter(torch.tensor([1.5]))

        def forward(self, x):
            beta = torch.nn.functional.softplus(self.beta)
            alpha = -beta * (6.9077542789816375)
            return torch.sigmoid((x + alpha) / beta)

    class GPNNModel(torch.nn.Module):
        def __init__(self):
            super(GPNNModel, self).__init__()
            self.mean = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1))
            self.alph = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1), torch.nn.Softplus())
            self.bet = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1), torch.nn.Softplus())
            self.trans = translatedSigmoid()

        def forward(self, x, switch):
            d = dist(x, c)
            d_min = d.min(dim=1, keepdim=True)[0]
            s = self.trans(d_min)
            mean = self.mean(x)
            if switch:
                a = self.alph(x)
                b = self.bet(x)
                gamma_dist = D.Gamma(a + 1e-8, 1.0 / (b + 1e-8))
                if self.training:
                    samples_var = gamma_dist.rsample(
                        torch.Size([num_draws_train]))
                    x_var = (1.0 / (samples_var + 1e-8))
                else:
                    samples_var = gamma_dist.rsample(torch.Size([1000]))
                    x_var = (1.0 / (samples_var + 1e-8))
                var = (1 - s) * x_var + s * torch.tensor(
                    [y_std**2], device=x.device)  # HYPERPARAMETER

            else:
                var = 0.05 * torch.ones_like(mean)
            return mean, var

    model = GPNNModel()
    if torch.cuda.is_available() and args.cuda:
        model.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    optimizer = torch.optim.Adam(model.mean.parameters(), lr=1e-2)
    optimizer2 = torch.optim.Adam(chain(model.alph.parameters(),
                                        model.bet.parameters(),
                                        model.trans.parameters()),
                                  lr=1e-4)
    mean_Q, mean_w = gen_Qw(X, mean_psu, mean_ssu, mean_M)

    if X.shape[0] > 100000 and X.shape[1] > 10:
        pca = PCA(n_components=0.5)
        temp = pca.fit_transform(X)
        var_Q, var_w = gen_Qw(temp, var_psu, var_ssu, var_M)
    else:
        var_Q, var_w = gen_Qw(X, var_psu, var_ssu, var_M)

    #mean_pseupoch = get_pseupoch(mean_w,0.5)
    #var_pseupoch = get_pseupoch(var_w,0.5)
    opt_switch = 1
    mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
    var_w = torch.tensor(var_w).to(torch.float32).to(device)
    model.train()

    X = torch.tensor(X).to(torch.float32).to(device)
    y = torch.tensor(y).to(torch.float32).to(device)
    batches = batchify(X, y, batch_size=args.batch_size, shuffel=args.shuffel)

    it = 0
    while it < args.iters:
        switch = 1.0 if it > args.iters / 2.0 else 0.0

        if it % 11:
            opt_switch = opt_switch + 1  # change between var and mean optimizer
        with torch.autograd.detect_anomaly():
            data, label = next(batches)
            if not switch:
                optimizer.zero_grad()
                m, v = model(data, switch)
                loss = -t_likelihood(label.reshape(-1, 1), m,
                                     v.reshape(1, -1, 1)) / X.shape[0]
                loss.backward()
                optimizer.step()
            else:
                if opt_switch % 2 == 0:
                    #for b in range(mean_pseupoch):
                    optimizer.zero_grad()
                    batch = locality_sampler2(mean_psu, mean_ssu, mean_Q,
                                              mean_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch].reshape(-1, 1), m, v,
                                         mean_w[batch]) / X.shape[0]
                    loss.backward()
                    optimizer.step()
                else:
                    #for b in range(var_pseupoch):
                    optimizer2.zero_grad()
                    batch = locality_sampler2(var_psu, var_ssu, var_Q, var_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch].reshape(-1, 1), m, v,
                                         var_w[batch]) / X.shape[0]
                    loss.backward()
                    optimizer2.step()

        if it % 500 == 0:
            m, v = model(data, switch)
            loss = -(-v.log() / 2 -
                     ((m.flatten() - label)**2).reshape(1, -1, 1) /
                     (2 * v)).mean()
            print('Iter {0}/{1}, Loss {2}'.format(it, args.iters, loss.item()))
        it += 1

    model.eval()

    data = torch.tensor(Xval).to(torch.float32).to(device)
    label = torch.tensor(yval).to(torch.float32).to(device)
    with torch.no_grad():
        m, v = model(data, switch)
    m = m * y_std + y_mean
    v = v * y_std**2
    #log_px = normal_log_prob(label, m, v).mean(dim=0) # check for correctness
    log_px = t_likelihood(label.reshape(-1, 1), m, v) / Xval.shape[0]  # check
    rmse = ((label - m.flatten())**2).mean().sqrt()
    return log_px.mean().item(), rmse.item()
Beispiel #7
0
def jnlsmv(args, X, y, Xval, yval):
    from sklearn.cluster import KMeans
    from utils import dist
    from torch import distributions as D

    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    args.n_clusters = min(args.n_clusters, X.shape[0])

    y, y_mean, y_std = normalize_y(y)

    kmeans = KMeans(n_clusters=args.n_clusters)
    kmeans.fit(np.concatenate([X], axis=0))
    c = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32)
    if torch.cuda.is_available() and args.cuda:
        c = torch.tensor(c).to(torch.float32).to('cuda')
    else:
        c = torch.tensor(c).to(torch.float32)

    class translatedSigmoid(torch.nn.Module):
        def __init__(self):
            super(translatedSigmoid, self).__init__()
            self.beta = torch.nn.Parameter(torch.tensor([1.5]))

        def forward(self, x):
            beta = torch.nn.functional.softplus(self.beta)
            alpha = -beta * (6.9077542789816375)
            return torch.sigmoid((x + alpha) / beta)

    class GPNNModel(torch.nn.Module):
        def __init__(self):
            super(GPNNModel, self).__init__()
            self.mean = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.Sigmoid(),
                torch.nn.Linear(n_neurons, 1))
            self.alph = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1), torch.nn.Softplus())
            self.bet = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1), torch.nn.Softplus())
            self.trans = translatedSigmoid()

        def forward(self, x, switch):
            d = dist(x, c)
            d_min = d.min(dim=1, keepdim=True)[0]
            s = self.trans(d_min)
            mean = self.mean(x)
            if switch:
                a = self.alph(x)
                b = self.bet(x)
                gamma_dist = D.Gamma(a + 1e-8, 1.0 / (b + 1e-8))
                if self.training:
                    samples_var = gamma_dist.rsample(torch.Size([20]))
                    x_var = (1.0 / (samples_var + 1e-8))
                else:
                    samples_var = gamma_dist.rsample(torch.Size([1000]))
                    x_var = (1.0 / (samples_var + 1e-8)).mean(dim=0)
                var = (1 - s) * x_var + s * torch.tensor(
                    [3.5**2], device=x.device)  # HYPERPARAMETER

            else:
                var = torch.tensor([0.05], device=x.device)
            return mean, var

    model = GPNNModel()
    if torch.cuda.is_available() and args.cuda:
        model.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    optimizer = torch.optim.Adam(model.mean.parameters(), lr=1e-2)
    optimizer2 = torch.optim.Adam(chain(model.alph.parameters(),
                                        model.bet.parameters(),
                                        model.trans.parameters()),
                                  lr=1e-4)

    it = 0
    opt_switch = 0
    progressBar = tqdm(desc='Training nn', total=args.iters, unit='iter')
    batches = local_batchify(X,
                             y,
                             batch_size=args.batch_size,
                             shuffel=args.shuffel)

    while it < args.iters:
        switch = 1.0 if it > args.iters / 2 else 0.0
        if it % 11 == 0 and switch:
            opt_switch = opt_switch + 1  # change between var and mean optimizer

        data, label, mean_w, var_w = next(batches)
        data = torch.tensor(data).to(torch.float32).to(device)
        label = torch.tensor(label).to(torch.float32).to(device)
        mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
        var_w = torch.tensor(var_w).to(torch.float32).to(device)

        if opt_switch % 2 == 0:
            #for b in range(mean_pseupoch):
            optimizer.zero_grad()
            #batch = locality_sampler2(mean_psu,mean_ssu,mean_Q,mean_w)
            m, v = model(data, switch)
            loss = -t_likelihood(label.reshape(-1, 1), m, v,
                                 mean_w) / X.shape[0]
            loss.backward()
            optimizer.step()
        else:
            #for b in range(var_pseupoch):
            optimizer2.zero_grad()
            #batch = locality_sampler2(var_psu,var_ssu,var_Q,var_w)
            m, v = model(data, switch)
            loss = -t_likelihood(label.reshape(-1, 1), m, v,
                                 var_w) / X.shape[0]
            loss.backward()
            optimizer2.step()

        it += 1
        progressBar.update()
        progressBar.set_postfix({'loss': loss.item()})
    progressBar.close()

    data = torch.tensor(Xval).to(torch.float32).to(device)
    label = torch.tensor(yval).to(torch.float32).to(device)
    m, v = model(data, switch)
    m = m * y_std + y_mean
    v = v * y_std**2
    log_px = t_likelihood(label.reshape(-1, 1), m, v)
    rmse = ((label - m.flatten())**2).mean().sqrt()
    return log_px.mean().item(), rmse.item()
Beispiel #8
0
def nnlsmv(args, X, y, Xval, yval):
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50

    y, y_mean, y_std = normalize_y(y)

    mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                               torch.nn.ReLU(), torch.nn.Linear(n_neurons, 1))
    var = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                              torch.nn.ReLU(), torch.nn.Linear(n_neurons, 1),
                              torch.nn.Softplus())
    if torch.cuda.is_available() and args.cuda:
        mean.cuda()
        var.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    optimizer = torch.optim.Adam(mean.parameters(), lr=args.lr)
    optimizer2 = torch.optim.Adam(var.parameters(), lr=args.lr)

    it = 0
    opt_switch = 0
    progressBar = tqdm(desc='Training nn', total=args.iters, unit='iter')
    batches = local_batchify(X,
                             y,
                             batch_size=args.batch_size,
                             shuffel=args.shuffel)

    while it < args.iters:
        switch = 1.0 if it > args.iters / 2 else 0.0
        if it % 11 == 0 and switch:
            opt_switch = opt_switch + 1  # change between var and mean optimizer

        data, label, mean_w, var_w = next(batches)
        data = torch.tensor(data).to(torch.float32).to(device)
        label = torch.tensor(label).to(torch.float32).to(device)
        mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
        var_w = torch.tensor(var_w).to(torch.float32).to(device)

        if opt_switch % 2 == 0:
            optimizer.zero_grad()
            m, v = mean(data), var(data)
            v = switch * v + (1 - switch) * torch.tensor([0.02**2],
                                                         device=device)
            loss = -(-v.log() - (m.flatten() - label)**2 / (2 * v)) / mean_w
            loss = loss.sum()
            loss.backward()
            optimizer.step()
        else:
            optimizer2.zero_grad()
            m, v = mean(data), var(data)
            loss = -(-v.log() - (m.flatten() - label)**2 / (2 * v)) / var_w
            loss = loss.sum()
            loss.backward()
            optimizer2.step()

        it += 1
        progressBar.update()
        progressBar.set_postfix({'loss': loss.item()})
    progressBar.close()

    data = torch.tensor(Xval).to(torch.float32).to(device)
    label = torch.tensor(yval).to(torch.float32).to(device)
    m, v = mean(data), var(data)
    m = m * y_std + y_mean
    v = v * y_std**2
    log_px = normal_log_prob(label, m, v)
    rmse = ((label - m.flatten())**2).mean().sqrt()
    return log_px.mean().item(), rmse.item()
Beispiel #9
0
def model(args, X, y, local=False, mean_var=False):

    y, y_mean, y_std = normalize_y(y)

    mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], args.n_neurons),
                               torch.nn.ReLU(),
                               torch.nn.Linear(args.n_neurons, 1))
    var = torch.nn.Sequential(torch.nn.Linear(X.shape[1], args.n_neurons),
                              torch.nn.ReLU(),
                              torch.nn.Linear(args.n_neurons, 1),
                              torch.nn.Softplus())
    if torch.cuda.is_available() and args.cuda:
        mean.cuda()
        var.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    if mean_var:
        optimizer = torch.optim.Adam(mean.parameters(), lr=args.lr)
        optimizer2 = torch.optim.Adam(var.parameters(), lr=args.lr)
    else:
        optimizer = torch.optim.Adam(chain(mean.parameters(),
                                           var.parameters()),
                                     lr=args.lr)

    it = 0
    opt_switch = 0
    progressBar = tqdm(desc='Training nn', total=args.iters, unit='iter')

    # Batching scheme
    if local:
        batches = local_batchify(X,
                                 y,
                                 batch_size=args.batch_size,
                                 shuffel=args.shuffel)
    else:
        batches = batchify(X,
                           y,
                           batch_size=args.batch_size,
                           shuffel=args.shuffel)

    # store values
    loglike = []
    rmse = []
    params_m = []
    params_v = []
    gradlist_m = []
    gradlist_v = []

    while it < args.iters:
        switch = 1.0 if it > args.iters / 2 else 0.0
        if it % 11 == 0 and switch: opt_switch = opt_switch + 1

        if local:
            data, label, mean_w, var_w = next(batches)
            mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
            var_w = torch.tensor(var_w).to(torch.float32).to(device)
        else:
            data, label = next(batches)
            mean_w = 1
            var_w = 1

        data = torch.tensor(data).to(torch.float32).to(device)
        label = torch.tensor(label).to(torch.float32).to(device)
        m, v = mean(data), var(data)
        v = switch * (v + 1e-5) + (1 - switch) * torch.tensor([0.02],
                                                              device=device)

        if mean_var:
            if opt_switch % 2 == 0:
                optimizer.zero_grad()
                loss = -(-v.log() - (m.flatten() - label)**2 /
                         (2 * v)) / mean_w
                loss = loss.mean()
                loss.backward()
                optimizer.step()
            else:
                optimizer2.zero_grad()
                m, v = mean(data), var(data)
                d = (m.flatten() - label)**2
                loss = -(-(d.log() / 2 + d / v + v.log() / 2)) / var_w
                loss = loss.mean()
                loss.backward()
                optimizer2.step()
        else:
            optimizer.zero_grad()
            loss = -(-v.log() - (m.flatten() - label)**2 / (2 * v))
            loss = loss.mean()
            loss.backward()
            optimizer.step()

        it += 1
        progressBar.update()
        progressBar.set_postfix({'loss': loss.item()})

        with torch.no_grad():
            loglike.append(loss.item())
            rmse.append((m - label).pow(2.0).mean().item())
            params_m.append(
                [copy.deepcopy(p) for p in list(mean.parameters())])
            params_v.append([copy.deepcopy(p) for p in list(var.parameters())])
            gradlist_m.append(
                [copy.deepcopy(p.grad) for p in list(mean.parameters())])
            gradlist_v.append(
                [copy.deepcopy(p.grad) for p in list(var.parameters())])

    progressBar.close()

    return loglike, rmse, params_m, params_v, gradlist_m, gradlist_v