Example #1
0
 def test_DGCCA_methods_cpu(self):
     latent_dims = 2
     device = 'cpu'
     encoder_1 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     encoder_2 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     encoder_3 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     # DTCCA
     dtcca_model = DTCCA(latent_dims=latent_dims,
                         encoders=[encoder_1, encoder_2])
     # hidden_layer_sizes are shown explicitly but these are also the defaults
     dtcca_model = DeepWrapper(dtcca_model, device=device)
     dtcca_model.fit((self.X, self.Y, self.Z))
     # DGCCA
     dgcca_model = DCCA(latent_dims=latent_dims,
                        encoders=[encoder_1, encoder_2, encoder_3],
                        objective=objectives.GCCA)
     # hidden_layer_sizes are shown explicitly but these are also the defaults
     dgcca_model = DeepWrapper(dgcca_model, device=device)
     dgcca_model.fit((self.X, self.Y, self.Z))
     # DMCCA
     dmcca_model = DCCA(latent_dims=latent_dims,
                        encoders=[encoder_1, encoder_2, encoder_3],
                        objective=objectives.MCCA)
     # hidden_layer_sizes are shown explicitly but these are also the defaults
     dmcca_model = DeepWrapper(dmcca_model, device=device)
     dmcca_model.fit((self.X, self.Y, self.Z))
Example #2
0
def test_linear():
    encoder_1 = architectures.LinearEncoder(latent_dims=1, feature_size=10)
    encoder_2 = architectures.LinearEncoder(latent_dims=1, feature_size=12)
    dcca = DCCA(latent_dims=1, encoders=[encoder_1, encoder_2])
    optimizer = optim.Adam(dcca.parameters(), lr=1e-1)
    dcca = CCALightning(dcca, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=50, enable_checkpointing=False)
    trainer.fit(dcca, loader)
    cca = CCA().fit((X, Y))
    # check linear encoder with SGD matches vanilla linear CCA
    assert (np.testing.assert_array_almost_equal(
        cca.score((X, Y)), trainer.model.score(loader), decimal=2) is None)
Example #3
0
 def test_input_types(self):
     latent_dims = 2
     device = 'cpu'
     encoder_1 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     encoder_2 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     # DCCA
     dcca_model = DCCA(latent_dims=latent_dims,
                       encoders=[encoder_1, encoder_2],
                       objective=objectives.CCA)
     # hidden_layer_sizes are shown explicitly but these are also the defaults
     dcca_model = DeepWrapper(dcca_model, device=device)
     dcca_model.fit(self.train_dataset, epochs=3)
     dcca_model.fit(self.train_dataset,
                    val_dataset=self.train_dataset,
                    epochs=3)
     dcca_model.fit((self.X, self.Y),
                    val_dataset=(self.X, self.Y),
                    epochs=3)
Example #4
0
 def test_schedulers(self):
     latent_dims = 2
     device = 'cpu'
     encoder_1 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     encoder_2 = architectures.Encoder(latent_dims=latent_dims,
                                       feature_size=10)
     # DCCA
     optimizers = [
         optim.Adam(encoder_1.parameters(), lr=1e-4),
         optim.Adam(encoder_2.parameters(), lr=1e-4)
     ]
     schedulers = [
         optim.lr_scheduler.CosineAnnealingLR(optimizers[0], 1),
         optim.lr_scheduler.ReduceLROnPlateau(optimizers[1])
     ]
     dcca_model = DCCA(latent_dims=latent_dims,
                       encoders=[encoder_1, encoder_2],
                       objective=objectives.CCA,
                       optimizers=optimizers,
                       schedulers=schedulers)
     # hidden_layer_sizes are shown explicitly but these are also the defaults
     dcca_model = DeepWrapper(dcca_model, device=device)
     dcca_model.fit((self.X, self.Y), epochs=20)
Example #5
0
train_dataset = Subset(train_dataset, np.arange(n_train))
train_loader, val_loader = get_dataloaders(train_dataset,
                                           val_dataset,
                                           batch_size=128)

# The number of latent dimensions across models
latent_dims = 2
# number of epochs for deep models
epochs = 50

encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)

# %%
# Deep CCA
dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
dcca = CCALightning(dcca)
trainer = pl.Trainer(
    max_epochs=epochs,
    enable_checkpointing=False,
    log_every_n_steps=1,
    flush_logs_every_n_steps=1,
)
trainer.fit(dcca, train_loader, val_loader)
plot_latent_label(dcca.model, train_loader)
plt.suptitle("DCCA")
plt.show()

# %%
# Deep CCA by Non-Linear Orthogonal Iterations
dcca_noi = DCCA_NOI(latent_dims=latent_dims,
from multiviewdata.torchdatasets import SplitMNISTDataset
from cca_zoo.deepmodels import DCCA, CCALightning, get_dataloaders, architectures

n_train = 500
n_val = 100
train_dataset = SplitMNISTDataset(root="",
                                  mnist_type="MNIST",
                                  train=True,
                                  download=True)
val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val))
train_dataset = Subset(train_dataset, np.arange(n_train))
train_loader, val_loader = get_dataloaders(train_dataset, val_dataset)

# The number of latent dimensions across models
latent_dims = 2
# number of epochs for deep models
epochs = 10

# TODO add in custom architecture and schedulers and stuff to show it off
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)

# Deep CCA
dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
optimizer = optim.Adam(dcca.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 1)
dcca = CCALightning(dcca, optimizer=optimizer, lr_scheduler=scheduler)
trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
trainer.fit(dcca, train_loader, val_loader)
val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val))
train_dataset = Subset(train_dataset, np.arange(n_train))
train_loader, val_loader = get_dataloaders(train_dataset, val_dataset)

# The number of latent dimensions across models
latent_dims = 2
# number of epochs for deep models
epochs = 10

encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)

# %%
# Deep MCCA
dcca = DCCA(latent_dims=latent_dims,
            encoders=[encoder_1, encoder_2],
            objective=objectives.MCCA)
dcca = CCALightning(dcca)
trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
trainer.fit(dcca, train_loader, val_loader)

# %%
# Deep GCCA
dcca = DCCA(latent_dims=latent_dims,
            encoders=[encoder_1, encoder_2],
            objective=objectives.GCCA)
dcca = CCALightning(dcca)
trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
trainer.fit(dcca, train_loader, val_loader)

# %%
Example #8
0
def test_DCCA_methods():
    N = len(train_dataset)
    latent_dims = 2
    epochs = 100
    cca = CCA(latent_dims=latent_dims).fit((X, Y))
    # DCCA_NOI
    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
    dcca_noi = DCCA_NOI(latent_dims, N, encoders=[encoder_1, encoder_2], rho=0)
    optimizer = optim.Adam(dcca_noi.parameters(), lr=1e-2)
    dcca_noi = CCALightning(dcca_noi, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=epochs,
                         log_every_n_steps=10,
                         enable_checkpointing=False)
    trainer.fit(dcca_noi, train_loader)
    assert (np.testing.assert_array_less(
        cca.score((X, Y)).sum(),
        trainer.model.score(train_loader).sum()) is None)
    # Soft Decorrelation (stochastic Decorrelation Loss)
    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
    sdl = DCCA_SDL(latent_dims, N, encoders=[encoder_1, encoder_2], lam=1e-3)
    optimizer = optim.SGD(sdl.parameters(), lr=1e-1)
    sdl = CCALightning(sdl, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10)
    trainer.fit(sdl, train_loader)
    assert (np.testing.assert_array_less(
        cca.score((X, Y)).sum(),
        trainer.model.score(train_loader).sum()) is None)
    # DCCA
    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
    dcca = DCCA(
        latent_dims=latent_dims,
        encoders=[encoder_1, encoder_2],
        objective=objectives.CCA,
    )
    optimizer = optim.SGD(dcca.parameters(), lr=1e-1)
    dcca = CCALightning(dcca, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=epochs,
                         log_every_n_steps=10,
                         enable_checkpointing=False)
    trainer.fit(dcca, train_loader, val_dataloaders=val_loader)
    assert (np.testing.assert_array_less(
        cca.score((X, Y)).sum(),
        trainer.model.score(train_loader).sum()) is None)
    # DGCCA
    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
    dgcca = DCCA(
        latent_dims=latent_dims,
        encoders=[encoder_1, encoder_2],
        objective=objectives.GCCA,
    )
    optimizer = optim.SGD(dgcca.parameters(), lr=1e-2)
    dgcca = CCALightning(dgcca, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=epochs,
                         log_every_n_steps=10,
                         enable_checkpointing=False)
    trainer.fit(dgcca, train_loader)
    assert (np.testing.assert_array_less(
        cca.score((X, Y)).sum(),
        trainer.model.score(train_loader).sum()) is None)
    # DMCCA
    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
    dmcca = DCCA(
        latent_dims=latent_dims,
        encoders=[encoder_1, encoder_2],
        objective=objectives.MCCA,
    )
    optimizer = optim.SGD(dmcca.parameters(), lr=1e-2)
    dmcca = CCALightning(dmcca, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=epochs,
                         log_every_n_steps=10,
                         enable_checkpointing=False)
    trainer.fit(dmcca, train_loader)
    assert (np.testing.assert_array_less(
        cca.score((X, Y)).sum(),
        trainer.model.score(train_loader).sum()) is None)
    # Barlow Twins
    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
    barlowtwins = BarlowTwins(
        latent_dims=latent_dims,
        encoders=[encoder_1, encoder_2],
    )
    optimizer = optim.SGD(barlowtwins.parameters(), lr=1e-2)
    barlowtwins = CCALightning(barlowtwins, optimizer=optimizer)
    trainer = pl.Trainer(max_epochs=epochs,
                         log_every_n_steps=10,
                         enable_checkpointing=False)
    trainer.fit(barlowtwins, train_loader)
    assert (np.testing.assert_array_less(
        cca.score((X, Y)).sum(),
        trainer.model.score(train_loader).sum()) is None)
Example #9
0
### Deep Learning

We also have deep CCA methods (and autoencoder variants)
- Deep CCA (DCCA)
- Deep Canonically Correlated Autoencoders (DCCAE)

We introduce a Config class from configuration.py. This contains a number of default settings for running DCCA.

"""
from cca_zoo.deepmodels import DCCA, DCCAE, DVCCA, DCCA_NOI, DeepWrapper, architectures, objectives
# %%
# DCCA
print('DCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])

# hidden_layer_sizes are shown explicitly but these are also the defaults
dcca_model = DeepWrapper(dcca_model)

dcca_model.fit(train_view_1, train_view_2, epochs=epochs)

dcca_results = np.stack((dcca_model.train_correlations[0, 1],
                         dcca_model.predict_corr(test_view_1, test_view_2)[0,
                                                                           1]))

# DGCCA
print('DGCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dgcca_model = DCCA(latent_dims=latent_dims,