def run(batch_size, loss, optimizer, base_lr, n_epochs, schedule, dataset, model): # load the dataset dataset_train_meta, dataset_test_meta = load_dataset(dataset) exporter = Exporter(depth=-1, module_filter=[torch.nn.Conv2d, torch.nn.Linear],) # instantiate model from ikkuna.models import get_model model = get_model(model, dataset_train_meta.shape[1:], num_classes=dataset_train_meta.num_classes, exporter=exporter) loss_fn = getattr(torch.nn, loss)() # set up the trainer trainer = Trainer(dataset_train_meta, batch_size=batch_size, loss=loss_fn, exporter=exporter) trainer.set_model(model) trainer.optimize(name=optimizer, lr=base_lr) trainer.add_subscriber(RatioSubscriber(['weight_updates', 'weights'])) trainer.add_subscriber(NormSubscriber('layer_gradients')) trainer.add_subscriber(NormSubscriber('weight_gradients')) trainer.add_subscriber(VarianceSubscriber('weight_updates')) trainer.add_subscriber(VarianceSubscriber('layer_gradients')) trainer.add_subscriber(VarianceSubscriber('weight_gradients')) trainer.add_subscriber(TrainAccuracySubscriber()) trainer.add_subscriber(TestAccuracySubscriber(dataset_test_meta, trainer.model.forward, frequency=trainer.batches_per_epoch, batch_size=batch_size)) trainer.add_subscriber(MessageMeanSubscriber('weight_updates_weights_ratio')) logged_metrics = ['weight_updates_weights_ratio', 'loss', 'test_accuracy', 'train_accuracy', 'layer_gradients_norm2', 'weight_gradients_norm2', 'weight_gradients_variance', 'layer_gradients_variance', 'weight_gradients_variance'] if schedule == 'ratio_adaptive_schedule_fn': from experiments.subscribers import RatioLRSubscriber lr_sub = RatioLRSubscriber(base_lr) trainer.add_subscriber(lr_sub) trainer.set_schedule(torch.optim.lr_scheduler.LambdaLR, lr_sub) logged_metrics.append('learning_rate') trainer.add_subscriber(SacredLoggingSubscriber(ex, logged_metrics)) # do n epochs of training batches_per_epoch = trainer.batches_per_epoch epochs = n_epochs for i in range(epochs): for b in range(batches_per_epoch): trainer.train_batch() # we return a result so we can use it for filtering aborted experiments in mongodb return 0
def run(batch_size, loss, optimizer, base_lr, n_epochs, dataset, model, freeze_at): # load the dataset transforms = [ToTensor()] dataset_train_meta, dataset_test_meta = load_dataset(dataset, train_transforms=transforms, test_transforms=transforms) exporter = Exporter(depth=-1, module_filter=[torch.nn.Conv2d, torch.nn.Linear],) # instantiate model model = get_model(model, dataset_train_meta.shape[1:], num_classes=dataset_train_meta.num_classes, exporter=exporter) loss_fn = getattr(torch.nn, loss)() backend = None # set up the trainer trainer = Trainer(dataset_train_meta, batch_size=batch_size, loss=loss_fn, exporter=exporter) trainer.set_model(model) trainer.optimize(name=optimizer, lr=base_lr) svcca = SVCCASubscriber(dataset_test_meta, 500, trainer.model.forward, freeze_at=freeze_at, subsample=trainer.batches_per_epoch, backend=backend) trainer.add_subscriber(svcca) trainer.add_subscriber(RatioSubscriber(['weight_updates', 'weights'], backend=backend)) trainer.add_subscriber(NormSubscriber('weight_gradients', backend=backend)) trainer.add_subscriber(TrainAccuracySubscriber(backend=backend)) trainer.add_subscriber(TestAccuracySubscriber(dataset_test_meta, trainer.model.forward, frequency=trainer.batches_per_epoch, batch_size=batch_size, backend=backend)) logged_metrics = ['test_accuracy', 'train_accuracy', 'weight_gradients_norm2', 'weight_updates_weights_ratio', 'self_similarity', ] trainer.add_subscriber(SacredLoggingSubscriber(ex, logged_metrics)) if freeze_at == 'percentage': modules = exporter.modules n_modules = len(modules) step = n_epochs // n_modules # do n epochs of training batches_per_epoch = trainer.batches_per_epoch for i in range(n_epochs): for b in range(batches_per_epoch): trainer.train_batch() if freeze_at == 'percentage': freeze_idx = i // step - 1 if freeze_idx >= 0: svcca._freeze_module(modules[freeze_idx]) # we return a result so we can use it for filtering aborted experiments in mongodb return 0
def main(): backend = 'tb' dataset_train, dataset_test = load_dataset(train_config['dataset']) ikkuna.visualization.set_run_info('\n'.join( f'{k}: {v}' for k, v in train_config.items())) exporter = Exporter(depth=-1) loss_fn = getattr(torch.nn, train_config['loss'])() trainer = Trainer(dataset_train, batch_size=train_config['batch_size'], loss=loss_fn, exporter=exporter) model = AlexNetMini(dataset_train.shape[1:], num_classes=dataset_train.num_classes, exporter=exporter) trainer.set_model(model) trainer.optimize(name=train_config['optimizer'], lr=train_config['base_lr']) schedule_fn = globals()[train_config['schedule']] trainer.set_schedule(torch.optim.lr_scheduler.LambdaLR, schedule_fn) # add all the ordinary subscribers trainer.add_subscriber(TrainAccuracySubscriber(backend=backend)) trainer.add_subscriber( TestAccuracySubscriber(dataset_test, trainer.model.forward, frequency=trainer.batches_per_epoch, batch_size=train_config['batch_size'], backend=backend)) trainer.add_subscriber(SpectralNormSubscriber('weights', backend=backend)) trainer.add_subscriber( RatioSubscriber(['weight_updates', 'weights'], backend=backend)) trainer.add_subscriber(VarianceSubscriber('activations', backend=backend)) trainer.add_subscriber(NormSubscriber('weights', backend=backend)) trainer.add_subscriber(NormSubscriber('layer_gradients', backend=backend)) trainer.add_subscriber(NormSubscriber('weight_gradients', backend=backend)) batches_per_epoch = trainer.batches_per_epoch epochs = train_config['n_epochs'] for i in tqdm(range(epochs), desc='Epoch'): for b in tqdm(range(batches_per_epoch), desc='Batch'): trainer.train_batch()
def run(batch_size, loss, optimizer, base_lr, n_epochs, dataset, model): # load the dataset transforms = [ToTensor()] if not dataset == 'WhitenedCIFAR10' else None dataset_train_meta, dataset_test_meta = load_dataset(dataset, train_transforms=transforms, test_transforms=transforms) exporter = Exporter(depth=-1, module_filter=[torch.nn.Conv2d, torch.nn.Linear],) # instantiate model if model == 'AdamModel': model = AdamModel(dataset_test_meta.shape[1:], num_classes=dataset_train_meta.num_classes, exporter=exporter) else: model = get_model(model, dataset_train_meta.shape[1:], num_classes=dataset_train_meta.num_classes, exporter=exporter) loss_fn = getattr(torch.nn, loss)() backend = None # set up the trainer trainer = Trainer(dataset_train_meta, batch_size=batch_size, loss=loss_fn, exporter=exporter) trainer.set_model(model) trainer.optimize(name=optimizer, lr=base_lr) trainer.add_subscriber(BiasCorrectedMomentsSubscriber(base_lr, 0.9, 0.999, 1e-8, backend=backend)) trainer.add_subscriber(LossSubscriber(backend=backend)) # trainer.add_subscriber(RatioSubscriber(['weight_updates', 'weights'], backend=backend)) # trainer.add_subscriber(NormSubscriber('weight_gradients', backend=backend)) # trainer.add_subscriber(SpectralNormSubscriber('weights', backend=backend)) # trainer.add_subscriber(VarianceSubscriber('weight_gradients', backend=backend)) # trainer.add_subscriber(MeanSubscriber('weight_gradients', backend=backend)) trainer.add_subscriber(TrainAccuracySubscriber(backend=backend)) trainer.add_subscriber(TestAccuracySubscriber(dataset_test_meta, trainer.model.forward, frequency=trainer.batches_per_epoch, batch_size=batch_size, backend=backend)) logged_metrics = ['loss', 'test_accuracy', 'train_accuracy', # 'weight_gradients_mean', # 'weight_gradients_variance', # 'weights_spectral_norm', # 'weight_updates_weights_ratio', 'biased_grad_mean_estimate_mean', 'biased_grad_mean_estimate_median', 'biased_grad_mean_estimate_var', 'biased_grad_var_estimate_mean', 'biased_grad_var_estimate_median', 'biased_grad_var_estimate_var', 'biased_grad_mean_estimate_norm', 'biased_grad_var_estimate_norm', 'grad_mean_estimate_mean', 'grad_mean_estimate_median', 'grad_mean_estimate_var', 'grad_var_estimate_mean', 'grad_var_estimate_median', 'grad_var_estimate_var', 'grad_mean_estimate_norm', 'grad_var_estimate_norm', 'effective_lr_mean', 'effective_lr_median', 'effective_lr_var', 'effective_lr_norm', ] trainer.add_subscriber(SacredLoggingSubscriber(ex, logged_metrics)) # do n epochs of training batches_per_epoch = trainer.batches_per_epoch epochs = n_epochs for i in range(epochs): for b in range(batches_per_epoch): trainer.train_batch() # we return a result so we can use it for filtering aborted experiments in mongodb return 0
import os from tqdm import tqdm import torch import numpy as np from ikkuna.utils import load_dataset dataset_train, dataset_test = load_dataset('CIFAR10') def zca_whitening_matrix(X): '''Compute the ZCA whitening transform. Parameters ---------- X : np.ndarray Data matrix of shape (n_samples, n_features) Returns ------- np.ndarray (n_features, n_features) whitening matrix. Use it for whitening by X.dot(ZCAMatrix) to obtain data with unit variance and zero covariance between features. (Not exactly, but more or less) ''' # Compute the covariance N = X.shape[0] mean = X.mean(axis=0) sigma = (X - mean).T.dot((X - mean)) / N # Decompose U, S, V = np.linalg.svd(sigma) epsilon = 1e-5
train_transforms = [Lambda(whiten), Lambda(random_flip), Lambda(pad4), Lambda(random_crop_32x32_from_40x40), ToTensor()] test_transforms = [Lambda(whiten), ToTensor()] def initialize(m): # in PyTorch, all modules have a default initialization method, so you only need to override # specifics if isinstance(m, torch.nn.Conv2d): torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, torch.nn.BatchNorm2d): torch.nn.init.constant_(m.weight, 1) torch.nn.init.constant_(m.bias, 0) dataset_train, dataset_test = load_dataset('CIFAR10', train_transforms=train_transforms, test_transforms=test_transforms) def main(): exporter = Exporter(depth=-1, module_filter=[torch.nn.Conv2d, torch.nn.Linear]) trainer = Trainer(dataset_train, batch_size=train_config['batch_size'], loss=train_config['loss'], exporter=exporter) model = resnet18(exporter=exporter, num_classes=dataset_train.num_classes) trainer.set_model(model) trainer.optimize(name=train_config['optimizer'], weight_decay=train_config['weight_decay'], momentum=train_config['momentum'], lr=train_config['base_lr']) trainer.initialize(initialize) trainer.set_schedule(FunctionScheduler, train_config['schedule']) trainer.add_subscriber(TrainAccuracySubscriber()) trainer.add_subscriber(TestAccuracySubscriber(dataset_test, trainer.model.forward, frequency=2000,
def run(batch_size, loss, optimizer, base_lr, n_epochs, schedule, dataset, model): global LR_SUBSCRIBER LR_SUBSCRIBER = RatioLRSubscriber(base_lr) # load the dataset dataset_train_meta, dataset_test_meta = load_dataset(dataset) exporter = Exporter(depth=-1, module_filter=[torch.nn.Conv2d, torch.nn.Linear]) # instantiate model from ikkuna import models try: if model.startswith('ResNet'): model_fn = getattr(models, model.lower()) model = model_fn(exporter=exporter) else: Model = getattr(models, model) model = Model(dataset_train_meta.shape[1:], num_classes=dataset_train_meta.num_classes, exporter=exporter) except AttributeError: raise ValueError(f'Unknown model {model}') # get loss and scheduling function since sacred can only log strings loss_fn = getattr(torch.nn, loss)() schedule_fn = globals()[schedule] # set up the trainer trainer = Trainer(dataset_train_meta, batch_size=batch_size, loss=loss_fn, exporter=exporter) trainer.set_model(model) trainer.optimize(name=optimizer, lr=base_lr) trainer.add_subscriber(RatioSubscriber(['weight_updates', 'weights'])) trainer.add_subscriber(LR_SUBSCRIBER) trainer.add_subscriber(TrainAccuracySubscriber()) trainer.add_subscriber( TestAccuracySubscriber(dataset_test_meta, trainer.model.forward, frequency=trainer.batches_per_epoch, batch_size=batch_size)) trainer.add_subscriber( SacredLoggingSubscriber(ex, ['test_accuracy', 'learning_rate'])) trainer.set_schedule(torch.optim.lr_scheduler.LambdaLR, schedule_fn) # do n epochs of training batches_per_epoch = trainer.batches_per_epoch epochs = n_epochs for i in range(epochs): for b in range(batches_per_epoch): trainer.train_batch() # do testing batchwise to avoid memory errors n_batches = 0 accuracy = 0 loader = iter( DataLoader(dataset_test_meta.dataset, batch_size=batch_size, shuffle=False, pin_memory=True)) try: model.train(False) while True: X, labels = next(loader) outputs = model(X.cuda()) predictions = outputs.argmax(1) n_correct = (predictions.cpu() == labels).sum().item() accuracy += n_correct / X.shape[0] n_batches += 1 except StopIteration: accuracy /= n_batches return accuracy
def _main(dataset_str, model_str, batch_size, epochs, optimizer, **kwargs): '''Run the training procedure. Parameters ---------- dataset_str : str Name of the dataset to use model_str : str Unqualified name of the model class to use batch_size : int epochs : int optimizer : str Name of the optimizer to use ''' dataset_train, dataset_test = load_dataset(dataset_str, train_transforms=[ToTensor()], test_transforms=[ToTensor()]) # for some strange reason, python claims 'torch referenced before assignment' when importing at # the top. hahaaaaa import torch bus = MessageBus('main') trainer = Trainer(dataset_train, batch_size=batch_size, exporter=Exporter(depth=kwargs['depth'], module_filter=[torch.nn.Conv2d], message_bus=bus)) trainer.set_model(model_str) trainer.optimize(name=optimizer, lr=kwargs.get('learning_rate', 0.01)) if 'exponential_decay' in kwargs: decay = kwargs['exponential_decay'] if decay is not None: trainer.set_schedule(torch.optim.lr_scheduler.ExponentialLR, decay) subsample = kwargs['subsample'] backend = kwargs['visualisation'] subscriber_added = False if kwargs['hessian']: from torch.utils.data import DataLoader from ikkuna.export.subscriber import HessianEigenSubscriber loader = DataLoader(dataset_train.dataset, batch_size=batch_size, shuffle=True) trainer.add_subscriber(HessianEigenSubscriber(trainer.model.forward, trainer.loss, loader, batch_size, frequency=trainer.batches_per_epoch, num_eig=1, power_steps=25, backend=backend)) trainer.create_graph = True subscriber_added = True if kwargs['spectral_norm']: for kind in kwargs['spectral_norm']: spectral_norm_subscriber = SpectralNormSubscriber(kind, backend=backend) trainer.add_subscriber(spectral_norm_subscriber) subscriber_added = True if kwargs['variance']: for kind in kwargs['variance']: var_sub = VarianceSubscriber(kind, backend=backend) trainer.add_subscriber(var_sub) subscriber_added = True if kwargs['test_accuracy']: test_accuracy_subscriber = TestAccuracySubscriber(dataset_test, trainer.model.forward, frequency=trainer.batches_per_epoch, batch_size=batch_size, backend=backend) trainer.add_subscriber(test_accuracy_subscriber) subscriber_added = True if kwargs['train_accuracy']: train_accuracy_subscriber = TrainAccuracySubscriber(subsample=subsample, backend=backend) trainer.add_subscriber(train_accuracy_subscriber) subscriber_added = True if kwargs['ratio']: for kind1, kind2 in kwargs['ratio']: ratio_subscriber = RatioSubscriber([kind1, kind2], subsample=subsample, backend=backend) trainer.add_subscriber(ratio_subscriber) pubs = ratio_subscriber.publications type, topics = pubs.popitem() # there can be multiple publications per type, but we know the RatioSubscriber only # publishes one trainer.add_subscriber(MessageMeanSubscriber(topics[0])) subscriber_added = True if kwargs['histogram']: for kind in kwargs['histogram']: histogram_subscriber = HistogramSubscriber(kind, backend=backend) trainer.add_subscriber(histogram_subscriber) subscriber_added = True if kwargs['norm']: for kind in kwargs['norm']: norm_subscriber = NormSubscriber(kind, backend=backend) trainer.add_subscriber(norm_subscriber) subscriber_added = True if kwargs['svcca']: svcca_subscriber = SVCCASubscriber(dataset_test, 500, trainer.model.forward, subsample=trainer.batches_per_epoch, backend=backend) trainer.add_subscriber(svcca_subscriber) subscriber_added = True if not subscriber_added: warnings.warn('No subscriber was added, the will be no visualisation.') batches_per_epoch = trainer.batches_per_epoch print(f'Batches per epoch: {batches_per_epoch}') # exporter = trainer.exporter # modules = exporter.modules # n_modules = len(modules) epoch_range = range(epochs) batch_range = range(batches_per_epoch) if kwargs['verbose']: epoch_range = tqdm(epoch_range, desc='Epoch') batch_range = tqdm(batch_range, desc='Batch') for e in epoch_range: # freeze_idx = int(e/epochs * n_modules) - 1 # if freeze_idx >= 0: # exporter.freeze_module(modules[freeze_idx]) for batch_idx in batch_range: trainer.train_batch()