def test_DGCCA_methods_cpu(self): latent_dims = 2 device = 'cpu' encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_3 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) # DTCCA dtcca_model = DTCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2]) # hidden_layer_sizes are shown explicitly but these are also the defaults dtcca_model = DeepWrapper(dtcca_model, device=device) dtcca_model.fit((self.X, self.Y, self.Z)) # DGCCA dgcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2, encoder_3], objective=objectives.GCCA) # hidden_layer_sizes are shown explicitly but these are also the defaults dgcca_model = DeepWrapper(dgcca_model, device=device) dgcca_model.fit((self.X, self.Y, self.Z)) # DMCCA dmcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2, encoder_3], objective=objectives.MCCA) # hidden_layer_sizes are shown explicitly but these are also the defaults dmcca_model = DeepWrapper(dmcca_model, device=device) dmcca_model.fit((self.X, self.Y, self.Z))
def test_linear(): encoder_1 = architectures.LinearEncoder(latent_dims=1, feature_size=10) encoder_2 = architectures.LinearEncoder(latent_dims=1, feature_size=12) dcca = DCCA(latent_dims=1, encoders=[encoder_1, encoder_2]) optimizer = optim.Adam(dcca.parameters(), lr=1e-1) dcca = CCALightning(dcca, optimizer=optimizer) trainer = pl.Trainer(max_epochs=50, enable_checkpointing=False) trainer.fit(dcca, loader) cca = CCA().fit((X, Y)) # check linear encoder with SGD matches vanilla linear CCA assert (np.testing.assert_array_almost_equal( cca.score((X, Y)), trainer.model.score(loader), decimal=2) is None)
def test_input_types(self): latent_dims = 2 device = 'cpu' encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) # DCCA dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.CCA) # hidden_layer_sizes are shown explicitly but these are also the defaults dcca_model = DeepWrapper(dcca_model, device=device) dcca_model.fit(self.train_dataset, epochs=3) dcca_model.fit(self.train_dataset, val_dataset=self.train_dataset, epochs=3) dcca_model.fit((self.X, self.Y), val_dataset=(self.X, self.Y), epochs=3)
def test_schedulers(self): latent_dims = 2 device = 'cpu' encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) # DCCA optimizers = [ optim.Adam(encoder_1.parameters(), lr=1e-4), optim.Adam(encoder_2.parameters(), lr=1e-4) ] schedulers = [ optim.lr_scheduler.CosineAnnealingLR(optimizers[0], 1), optim.lr_scheduler.ReduceLROnPlateau(optimizers[1]) ] dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.CCA, optimizers=optimizers, schedulers=schedulers) # hidden_layer_sizes are shown explicitly but these are also the defaults dcca_model = DeepWrapper(dcca_model, device=device) dcca_model.fit((self.X, self.Y), epochs=20)
train_dataset = Subset(train_dataset, np.arange(n_train)) train_loader, val_loader = get_dataloaders(train_dataset, val_dataset, batch_size=128) # The number of latent dimensions across models latent_dims = 2 # number of epochs for deep models epochs = 50 encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392) # %% # Deep CCA dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2]) dcca = CCALightning(dcca) trainer = pl.Trainer( max_epochs=epochs, enable_checkpointing=False, log_every_n_steps=1, flush_logs_every_n_steps=1, ) trainer.fit(dcca, train_loader, val_loader) plot_latent_label(dcca.model, train_loader) plt.suptitle("DCCA") plt.show() # %% # Deep CCA by Non-Linear Orthogonal Iterations dcca_noi = DCCA_NOI(latent_dims=latent_dims,
from multiviewdata.torchdatasets import SplitMNISTDataset from cca_zoo.deepmodels import DCCA, CCALightning, get_dataloaders, architectures n_train = 500 n_val = 100 train_dataset = SplitMNISTDataset(root="", mnist_type="MNIST", train=True, download=True) val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val)) train_dataset = Subset(train_dataset, np.arange(n_train)) train_loader, val_loader = get_dataloaders(train_dataset, val_dataset) # The number of latent dimensions across models latent_dims = 2 # number of epochs for deep models epochs = 10 # TODO add in custom architecture and schedulers and stuff to show it off encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392) # Deep CCA dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2]) optimizer = optim.Adam(dcca.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 1) dcca = CCALightning(dcca, optimizer=optimizer, lr_scheduler=scheduler) trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False) trainer.fit(dcca, train_loader, val_loader)
val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val)) train_dataset = Subset(train_dataset, np.arange(n_train)) train_loader, val_loader = get_dataloaders(train_dataset, val_dataset) # The number of latent dimensions across models latent_dims = 2 # number of epochs for deep models epochs = 10 encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392) # %% # Deep MCCA dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.MCCA) dcca = CCALightning(dcca) trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False) trainer.fit(dcca, train_loader, val_loader) # %% # Deep GCCA dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.GCCA) dcca = CCALightning(dcca) trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False) trainer.fit(dcca, train_loader, val_loader) # %%
def test_DCCA_methods(): N = len(train_dataset) latent_dims = 2 epochs = 100 cca = CCA(latent_dims=latent_dims).fit((X, Y)) # DCCA_NOI encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12) dcca_noi = DCCA_NOI(latent_dims, N, encoders=[encoder_1, encoder_2], rho=0) optimizer = optim.Adam(dcca_noi.parameters(), lr=1e-2) dcca_noi = CCALightning(dcca_noi, optimizer=optimizer) trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10, enable_checkpointing=False) trainer.fit(dcca_noi, train_loader) assert (np.testing.assert_array_less( cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()) is None) # Soft Decorrelation (stochastic Decorrelation Loss) encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12) sdl = DCCA_SDL(latent_dims, N, encoders=[encoder_1, encoder_2], lam=1e-3) optimizer = optim.SGD(sdl.parameters(), lr=1e-1) sdl = CCALightning(sdl, optimizer=optimizer) trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10) trainer.fit(sdl, train_loader) assert (np.testing.assert_array_less( cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()) is None) # DCCA encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12) dcca = DCCA( latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.CCA, ) optimizer = optim.SGD(dcca.parameters(), lr=1e-1) dcca = CCALightning(dcca, optimizer=optimizer) trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10, enable_checkpointing=False) trainer.fit(dcca, train_loader, val_dataloaders=val_loader) assert (np.testing.assert_array_less( cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()) is None) # DGCCA encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12) dgcca = DCCA( latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.GCCA, ) optimizer = optim.SGD(dgcca.parameters(), lr=1e-2) dgcca = CCALightning(dgcca, optimizer=optimizer) trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10, enable_checkpointing=False) trainer.fit(dgcca, train_loader) assert (np.testing.assert_array_less( cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()) is None) # DMCCA encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12) dmcca = DCCA( latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.MCCA, ) optimizer = optim.SGD(dmcca.parameters(), lr=1e-2) dmcca = CCALightning(dmcca, optimizer=optimizer) trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10, enable_checkpointing=False) trainer.fit(dmcca, train_loader) assert (np.testing.assert_array_less( cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()) is None) # Barlow Twins encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12) barlowtwins = BarlowTwins( latent_dims=latent_dims, encoders=[encoder_1, encoder_2], ) optimizer = optim.SGD(barlowtwins.parameters(), lr=1e-2) barlowtwins = CCALightning(barlowtwins, optimizer=optimizer) trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10, enable_checkpointing=False) trainer.fit(barlowtwins, train_loader) assert (np.testing.assert_array_less( cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()) is None)
### Deep Learning We also have deep CCA methods (and autoencoder variants) - Deep CCA (DCCA) - Deep Canonically Correlated Autoencoders (DCCAE) We introduce a Config class from configuration.py. This contains a number of default settings for running DCCA. """ from cca_zoo.deepmodels import DCCA, DCCAE, DVCCA, DCCA_NOI, DeepWrapper, architectures, objectives # %% # DCCA print('DCCA') encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784) dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2]) # hidden_layer_sizes are shown explicitly but these are also the defaults dcca_model = DeepWrapper(dcca_model) dcca_model.fit(train_view_1, train_view_2, epochs=epochs) dcca_results = np.stack((dcca_model.train_correlations[0, 1], dcca_model.predict_corr(test_view_1, test_view_2)[0, 1])) # DGCCA print('DGCCA') encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784) encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784) dgcca_model = DCCA(latent_dims=latent_dims,