예제 #1
0
def main(args):

    train_loader, test_loader = create_torch_dataloader(batch=64)
    vae = VAE_x(z_dim=10)
    F = Discriminator_depth(z_dim=10, latent_size=args.latent_size,
                            relu=False).cuda()
    #3F = Discriminator(z_dim=10,latent_size=args.latent_size,relu=True).cuda()
    train_vae(vae, F, train_loader, test_loader, args, args.latent_size)
예제 #2
0
def F_informaiton_calculation_kernel(args, latent_size, model_latent_size):
    #Since H(u) is a constant, we can calculate it at the end of all experiments
    best_F = 100
    vae = VAE_x(z_dim=10)
    #model_path = 'vae_model_adult_latent_'+str(model_latent_size)+'.pth.tar'
    model_path = 'vaex_model_adult_kernel.pth.tar'
    vae.load_state_dict(torch.load(model_path))
    train_loader, test_loader = create_torch_dataloader(batch=64)
    train_loss_F = 0.0
    cnt = 0.
    best_auc = 0.0
    bw = 15
    for epoch in range(args.epochs):
        bw = epoch + 1
        train_loss_F = 0.0
        correct = 0.0
        cnt = 0
        u_collect = []
        z_collect = []

        for iteration, (x, u, y) in enumerate(train_loader):
            x, u, y = x.cuda(), u.cuda(), y.cuda()
            recon_x, mean, log_var, z = vae(x, u)
            z_collect.append(mean)
            u_collect.append(u)

        z = torch.cat(z_collect, dim=0)
        u = torch.cat(u_collect, dim=0)
        K = KernelRegression(bandwidth=bw, X=z, Y=u)

        u_collect = []
        recon_u_collect = []
        for iteration, (x, u, y) in enumerate(test_loader):
            x, u, y = x.cuda(), u.cuda(), y.cuda()
            recon_x, mean, log_var, z = vae(x, u)

            recon_u = K.predict_u(mean)
            loss_F = loss_BCE(recon_u, u)
            train_loss_F += loss_F.item() * x.size(0)
            pred = (recon_u > 0.5).float()
            correct += (pred == u).float().sum()
            u_collect.append(u.detach().cpu())
            recon_u_collect.append(recon_u.detach().cpu())

        u = torch.cat(u_collect, dim=0).numpy()
        recon_u = torch.cat(recon_u_collect, dim=0).numpy()
        test_auc = roc_auc_score(u, recon_u)
        if test_auc > best_auc:
            best_auc = test_auc
        print("epoch: {}, F loss : {}, acc: {}, auc: {}".format(
            epoch, 0.631475 - train_loss_F / (len(test_loader.dataset)),
            correct / len(test_loader.dataset), test_auc))
        if train_loss_F / (len(test_loader.dataset)) < best_F:
            best_F = train_loss_F / (len(test_loader.dataset))

    print("Model F={}, Latent size :{}, F informaiton(best) :{}".format(
        model_latent_size, latent_size, best_F))
예제 #3
0
def train_regression(model=None):
    def accuracy(y, y_logits):
        y_ = (y_logits > 0.0).astype(np.float32)
        return np.mean((y_ == y).astype(np.float32))

    train_loader, test_loader = create_torch_dataloader(batch=64)
    vae = VAE(z_dim=10)
    model_path = 'vae_model_adult_latent_100.pth.tar'
    #vae.load_state_dict(torch.load(model_path))
    if model != None:
        vae = model
    #F = Discriminator(z_dim=10).cuda()
    zs = []
    ys = []

    for iteration, (x, u, y) in enumerate(train_loader):
        x, u = x.cuda(), u.cuda()
        mean = vae(x, u, classifier=True)
        zs.append(mean.detach().cpu().numpy())
        ys.append(y.numpy())

    zs = np.concatenate(zs, axis=0)
    #print("Feature shape:",zs.shape)
    zsm = np.mean(zs, axis=0)
    zss = np.std(zs, axis=0)
    ys = np.concatenate(ys, axis=0)
    #print("Label shape:",ys.shape)
    from sklearn.linear_model import LogisticRegression

    lr = LogisticRegression()
    lr.fit((zs - zsm) / zss, ys)
    ys_ = lr.predict((zs - zsm) / zss)
    from sklearn.metrics import roc_auc_score

    train_auc = roc_auc_score(ys, ys_)
    train_acc = accuracy(ys, ys_)

    zs = []
    ys = []
    for iteration, (x, u, y) in enumerate(test_loader):
        x, u = x.cuda(), u.cuda()
        mean = vae(x, u, classifier=True)
        zs.append(mean.detach().cpu().numpy())
        ys.append(y.numpy())

    zs = np.concatenate(zs, axis=0)
    #print("test Feature shape:", zs.shape)
    ys = np.concatenate(ys, axis=0)
    #print("test Label shape:", ys.shape)
    ys_ = lr.predict((zs - zsm) / zss)
    test_auc = roc_auc_score(ys, ys_),
    test_acc = accuracy(ys, ys_)

    print(
        "train acc : {}, train auc : {}, test acc : {}, test auc : {}".format(
            train_acc, train_auc, test_acc, test_auc))
    return test_auc[0]
예제 #4
0
def mutual_information_q_u(args):

    z_collect = [[],[]]
    u_collect = []
    z_all = []
    best_F = 100
    vae = VAE(z_dim=10)
    model_path = 'vae_model_adult_latent_'+str(args.latent_size)+'.pth.tar'
    #model_path = 'vae_model_adult_kernel.pth.tar'
    vae.load_state_dict(torch.load(model_path))


    train_loader, test_loader = create_torch_dataloader(batch=64)
    for iteration, (x, u, y) in enumerate(train_loader):
        x, u, y = x.cuda(), u.cuda(), y.cuda()
        recon_x, mean, log_var, z = vae(x, u)
        u_collect.append(u.cpu())
        u = u.detach().cpu().numpy()
        mean = mean.detach().cpu().numpy()
        z_collect[0].append(mean[np.nonzero(1 - u)[0]])
        z_collect[1].append(mean[np.nonzero(u)[0]])
        z_all.append(mean)


    z_collect[0] = np.concatenate(z_collect[0], axis=0)

    z_collect[1] = np.concatenate(z_collect[1], axis=0)
    z_all = np.concatenate(z_all,axis=0)
    print(z_collect[0].shape,z_collect[1].shape,z_all.shape)
    u_ = torch.cat(u_collect,dim=0).numpy().squeeze(1)
    kde = [gaussian_kde(z_collect[0].transpose()), gaussian_kde(z_collect[1].transpose())]
    kde[0].set_bandwidth('silverman')
    kde[1].set_bandwidth('silverman')
    kde_all = gaussian_kde(z_all.transpose())
    kde_all.set_bandwidth('silverman')
    mi_zu = 0.0
    mi_z = 0.0
    cnt = 0.0
    for iteration, (x, u, y) in enumerate(test_loader):
        x, u, y = x.cuda(), u.cuda(), y.cuda()
        recon_x, mean, log_var, z = vae(x, u)
        u = u.detach().cpu().numpy()
        idx = [np.nonzero(1.0 - u)[0], np.nonzero(u)[0]]
        mean = mean.detach().cpu().numpy()
        mi_zu += kde[0].logpdf(mean[idx[0]].transpose()).sum()
        mi_zu += kde[1].logpdf(mean[idx[1]].transpose()).sum()
        mi_z += kde_all.logpdf(mean.transpose()).sum()
        cnt += x.size(0)

    print(mi_z/cnt,mi_zu/cnt)

    mi = (mi_zu - mi_z)/cnt
    print("I:",mi)
예제 #5
0
def Hz():

    train_loader, test_loader = create_torch_dataloader(batch=64)
    cnt = np.zeros((2))

    for iteration, (x, u, y) in enumerate(train_loader):

        cnt[0] += (u == 0).float().numpy().sum()
        cnt[1] += (u == 1).float().numpy().sum()

    cnt /= cnt.sum()
    hz = -(cnt[0] * np.log2(cnt[0]) + cnt[1] * np.log2(cnt[1]))
    cnt = torch.from_numpy(cnt[0].reshape(1, 1))
    label = np.array([0]).reshape(1, 1)
    label = torch.from_numpy(label).double()
    print(loss_BCE(cnt, label))
    print("H(z) = ", hz)
예제 #6
0
def train_classifier(args):
    train_loader, test_loader = create_torch_dataloader(batch=64)
    vae = VAE(z_dim=10)
    model_path = 'vae_model_adult.pth.tar'
    vae.load_state_dict(torch.load(model_path))
    #F = Discriminator(z_dim=10).cuda()
    classifier = Classifier(z_dim=10).cuda()
    optimizer = torch.optim.Adam(classifier.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.5, 0.999))

    for epoch in range(args.epochs):
        train_loss = 0.0
        tcorrect = 0.0
        correct = 0.0
        for iteration, (x, u, y) in enumerate(train_loader):

            x, u, y = x.cuda(), u.cuda(), y.cuda().long()
            mean = vae(x, u, classifier=True)
            output = classifier(mean)
            pre = (output > 0.5).detach().long()
            tcorrect += pre.eq(y).sum().item()
            loss = loss_BCE(output, y.float())
            train_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        for iteration, (x, u, y) in enumerate(test_loader):

            x, u, y = x.cuda(), u.cuda(), y.cuda().long()
            mean = vae(x, u, classifier=True)
            output = classifier(mean)

            pre = (output > 0.5).detach().long()
            correct += pre.eq(y).sum().item()

        print("Epoch:{}, train acc : {}, test acc : {}".format(
            epoch, tcorrect / len(train_loader.dataset),
            correct / len(test_loader.dataset)))
    torch.save(classifier.state_dict(), 'classifier_adult.pth.tar')
예제 #7
0
def test(args, latent_size):
    #Since H(u) is a constant, we can calculate it at the end of all experiments
    best_F = 100
    vae = VAE(z_dim=10)
    model_path = 'vae_model_adult_latent_50.pth.tar'
    vae.load_state_dict(torch.load(model_path))
    F = Discriminator(z_dim=10, latent_size=latent_size).cuda()
    model_path = 'F_adult_latent_50.pth.tar'
    F.load_state_dict(torch.load(model_path))

    train_loader, test_loader = create_torch_dataloader(batch=64)
    for epoch in range(1):
        train_loss_F = 0.0
        for iteration, (x, u, y) in enumerate(test_loader):
            x, u, y = x.cuda(), u.cuda(), y.cuda()
            recon_x, mean, log_var, z = vae(x, u)
            recon_u = F(z)

            loss_F = loss_BCE(recon_u, u)
            train_loss_F += loss_F.item() * x.size(0)

        print("epoch: {},  H(u|z) loss : {}".format(epoch, best_F))
예제 #8
0
def F_informaiton_calculation(args, latent_size, model_latent_size):
    #Since H(u) is a constant, we can calculate it at the end of all experiments
    best_F = 100
    vae = VAE_x(z_dim=10)
    #model_path = 'vae_model_adult_latent_'+str(model_latent_size)+'.pth.tar'
    model_path = 'vaex_model_adult_kernel.pth.tar'
    #model_path = 'vaex_model_relu_adult_latent_50050.pth.tar'
    vae.load_state_dict(torch.load(model_path))
    #F = Discriminator(z_dim=10,latent_size=latent_size,relu=True).cuda()
    F = Logistic().cuda()
    #F = Discriminator_depth(z_dim=10, latent_size=latent_size, relu=False).cuda()
    optimizer_F = torch.optim.Adam(F.parameters(),
                                   lr=args.learning_rate,
                                   betas=(0.5, 0.999))
    train_loader, test_loader = create_torch_dataloader(batch=64)
    train_loss_F = 0.0
    cnt = 0.
    best_auc = 0.0
    bw = 10
    for epoch in range(args.epochs):
        #if epoch == 200:
        #train_loss_F = 0.
        #cnt = 0.
        train_loss_F = 0.0
        correct = 0.0
        cnt = 0
        u_collect = []
        recon_u_collect = []
        for iteration, (x, u, y) in enumerate(train_loader):
            x, u, y = x.cuda(), u.cuda(), y.cuda()
            recon_x, mean, log_var, z = vae(x, u)
            recon_u = F(z)

            loss_F = loss_BCE(recon_u, u)
            optimizer_F.zero_grad()
            loss_F.backward()
            optimizer_F.step()

        for iteration, (x, u, y) in enumerate(test_loader):
            x, u, y = x.cuda(), u.cuda(), y.cuda()
            recon_x, mean, log_var, z = vae(x, u)
            recon_u = F(mean)
            #K = KernelRegression(bandwidth=bw, X=z, Y=u)
            #recon_u = K.predict_u(z)
            loss_F = loss_BCE(recon_u, u)
            train_loss_F += loss_F.item() * x.size(0)
            pred = (recon_u > 0.5).float()
            correct += (pred == u).float().sum()
            u_collect.append(u.detach().cpu())
            recon_u_collect.append(recon_u.detach().cpu())

        u = torch.cat(u_collect, dim=0).numpy()
        recon_u = torch.cat(recon_u_collect, dim=0).numpy()
        test_auc = roc_auc_score(u, recon_u)
        if test_auc > best_auc:
            best_auc = test_auc
        print("epoch: {}, F loss : {}, acc: {}, auc: {}".format(
            epoch, 0.631475 - train_loss_F / (len(test_loader.dataset)),
            correct / len(test_loader.dataset), test_auc))
        if train_loss_F / (len(test_loader.dataset)) < best_F:
            best_F = train_loss_F / (len(test_loader.dataset))

    print("Model F={}, Latent size :{}, F informaiton(best) :{}".format(
        model_latent_size, latent_size, best_F))