def test_model_without_set_get_weights( model: nn.Module, testset: Dataset, metric: TrainingMetrics, config: TrainingConfig, rept: int = 1) -> Tuple[np.ndarray, np.ndarray]: """ Test if the model is coequal before and after using model.teleport """ loss_diff_avg = [] acc_diff_avg = [] for _ in range(rept): m = NeuralTeleportationModel(model, input_shape=(config.batch_size, 3, 32, 32)).to(device) res = test(m, testset, metric, config) loss1, acc1 = res['loss'], res['accuracy'] m.random_teleport() res = test(m, testset, metric, config) loss2, acc2 = res['loss'], res['accuracy'] loss_diff_avg.append(np.abs(loss1 - loss2)) acc_diff_avg.append(np.abs(acc1 - acc2)) print("==========================================") print("Loss and accuracy diff without set/get was") print("Loss diff was: {:.6e}".format(np.abs(loss1 - loss2))) print("Acc diff was: {:.6e}".format(np.abs(acc1 - acc2))) print("==========================================") return np.mean(loss_diff_avg), np.mean(acc_diff_avg)
def test_model_with_set_get_weights( model: nn.Module, testset: Dataset, metric: TrainingMetrics, config: TrainingConfig, rept: int = 1) -> Tuple[np.ndarray, np.ndarray]: loss_diff_avg = [] acc_diff_avg = [] for _ in range(rept): m = NeuralTeleportationModel(model, input_shape=(config.batch_size, 3, 32, 32)).to(device) w_o, cob_o = m.get_params() m.random_teleport() w_t, cob_t = m.get_params() m.set_params(weights=w_o, cob=cob_o) res = test(m, testset, metric, config) loss1, acc1 = res['loss'], res['accuracy'] m.set_params(weights=w_t, cob=cob_t) res = test(m, testset, metric, config) loss2, acc2 = res['loss'], res['accuracy'] loss_diff_avg.append(np.abs(loss1 - loss2)) acc_diff_avg.append(np.abs(acc1 - acc2)) print("==========================================") print("Loss and accuracy diff with set/get was") print("Loss diff was: {:.6e}".format(np.abs(loss1 - loss2))) print("Acc diff was: {:.6e}".format(np.abs(acc1 - acc2))) print("==========================================") return np.mean(loss_diff_avg), np.mean(acc_diff_avg)
def generate_1D_linear_interp( model: NeuralTeleportationModel, param_o: Tuple[torch.Tensor, torch.Tensor], param_t: Tuple[torch.Tensor, torch.Tensor], a: torch.Tensor, trainset: Dataset, valset: Dataset, metric: TrainingMetrics, config: TrainingConfig, checkpoint: dict = None) -> Tuple[list, list, list, list]: """ This is 1-Dimensional Linear Interpolation θ(α) = (1−α)θ + αθ′ """ loss = [] loss_v = [] acc_t = [] acc_v = [] w_o, cob_o = param_o w_t, cob_t = param_t start_at = checkpoint["step"] if checkpoint else 0 try: for step, coord in enumerate(a, start_at): # Interpolate the weight from W to T(W), # then interpolate the cob for the activation # and batchNorm layers only. print("step {} of {} - alpha={}".format(step + 1, len(a), coord)) w = (1 - coord) * w_o + coord * w_t cob = (1 - coord) * cob_o + coord * cob_t model.set_params(w, cob) res = test(model, trainset, metric, config) loss.append(res['loss']) acc_t.append(res['accuracy']) res = test(model, valset, metric, config) acc_v.append(res['accuracy']) loss_v.append(res['loss']) except: if not checkpoint: checkpoint = { 'step': step, 'alpha': a, 'original_model': param_o, 'teleported_model': param_t, 'losses': loss, 'acc_t': acc_t, 'acc_v': acc_v, } else: checkpoint['step'] = step checkpoint['losses'] = checkpoint['losses'].append(loss) checkpoint['acc_t'] = checkpoint['acc_t'].append(acc_t) checkpoint['acc_v'] = checkpoint['acc_v'].append(loss) torch.save(checkpoint, linterp_checkpoint_file) print("A checkpoint was made on step {} of {}".format(step, len(a))) # This is to notify the upper level of try/except # Since there is no way to know if this is from before teleportation or after teleportation. raise return loss, acc_t, loss_v, acc_v
def generate_contour_loss_values( model: NeuralTeleportationModel, directions: Tuple[torch.Tensor, torch.Tensor], weights: torch.Tensor, surface: torch.Tensor, trainset: Dataset, metric: TrainingMetrics, config: TrainingConfig, checkpoint: dict = None) -> Tuple[np.ndarray, np.ndarray]: """ Generate a tensor containing the loss values from a given model. """ loss = [] acc = [] delta, eta = directions start_at = 0 if checkpoint: start_at = checkpoint['step'] try: for step, (x, y) in enumerate(surface, start_at): print("Evaluating step {}: [{:.3f}, {:.3f}]".format(step, x, y)) x, y = x.to(config.device), y.to(config.device) # L (w + alpha*delta + beta*eta) changes = (delta * x + eta * y).to(config.device) w = torch.add(weights, changes) model.set_weights(w) results = test(model, trainset, metric, config) loss.append(results['loss']) acc.append(results['accuracy']) except: # The reason is that, no matter what, make a checkpoint of the current surface generation. if not checkpoint: checkpoint = {'step': step, 'surface': surface, 'loss': loss} else: checkpoint['step'] = step [checkpoint['loss'].append(l) for l in loss] torch.save(checkpoint, contour_checkpoint_file) print("A checkpoint was made at coord {} of {}".format(x, y)) # This is to notify the upper level of try/except # Since there is no way to know if this is from before teleportation or after teleportation. raise return np.array(loss), np.array(acc)
def train(model: Union[NeuralTeleportationModel, Tuple[str, NeuralTeleportationModel]], train_dataset: Dataset, metrics: TrainingMetrics, config: BreadthTeleportationTrainingConfig, val_dataset: Dataset = None, optimizer: Optimizer = None) -> Dict[str, NeuralTeleportationModel]: # If the model is not named (at the first iteration), initialize its name based on its class if type(model) is tuple: model_name, model = model else: model_name = model.__class__.__name__ # Initialize an optimizer if there isn't already one if optimizer is None: optimizer = get_optimizer_from_model_and_config(model, config) train_loader = DataLoader(train_dataset, batch_size=config.batch_size) # Always move model to GPU before training model.cuda() stopping_epoch = min(config.starting_epoch + config.every_n_epochs, config.epochs + 1) for epoch in range(config.starting_epoch, stopping_epoch): print(f'Training epoch {epoch} for {model_name} ...') train_epoch(model, metrics, optimizer, train_loader, epoch, device=config.device) if val_dataset: val_res = test(model, val_dataset, metrics, config) print("Validation: {}".format(val_res)) # Always move model off-GPU after training model.cpu() # Update new starting epoch for the next iteration of model training config.starting_epoch += config.every_n_epochs # Determine if training has reached its end # TODO Add test for convergence is_train_end = config.starting_epoch >= config.epochs + 1 if is_train_end: trained_models = {f'{model_name}_0': model} else: # Teleport the model and train each teleportation recursively trained_models = _teleport_and_train((model_name, model), train_dataset, metrics, config, optimizer, val_dataset=val_dataset) return trained_models
def run_model(model: nn.Module, config: TrainingConfig, metrics: TrainingMetrics, train_set: VisionDataset, test_set: VisionDataset, val_set: VisionDataset = None, optimizer: Optimizer = None, lr_scheduler=None) -> None: if isinstance(model, NeuralTeleportationModel): model_cls = model.network.__class__ else: model_cls = model.__class__ print(f"Training {model_cls.__name__}") # Always log parameters (to enable useful filtering options in the web interface) assert config.logger is not None hparams = config_to_dict(config) hparams.update({ "model_name": model_cls.__name__.lower(), "dataset_name": train_set.__class__.__name__.lower() }) config.logger.log_parameters(hparams) with config.logger.train(): trained_model = train(model, train_set, metrics, config, val_dataset=val_set, optimizer=optimizer, lr_scheduler=lr_scheduler) # Ensure the model is on the correct device before testing # This avoids problem in case models are shuffled between CPU and GPU during training trained_model.to(config.device) with config.logger.test(): print("Testing {}: {} \n".format( model.__class__.__name__, test(trained_model, test_set, metrics, config))) print() config.logger.flush()
def run_multi_output_training(train_fct: Callable, models: Sequence[nn.Module], config: TrainingConfig, metrics: TrainingMetrics, train_set: VisionDataset, test_set: VisionDataset, val_set: VisionDataset = None) -> None: for model in models: print(f"Training {model.__class__.__name__}") trained_models = train_fct(model, train_dataset=train_set, metrics=metrics, config=deepcopy(config), val_dataset=val_set) for id, trained_model in trained_models.items(): # Ensure the model is on the correct device before testing # This avoids problem in case models are shuffled between CPU and GPU during training trained_model.to(config.device) print("Testing {}: {} \n".format( id, test(trained_model, test_set, metrics, config))) print()
def start_training(model: NeuralTeleportationModel, trainloader: DataLoader, valset: VisionDataset, metric: TrainingMetrics, config: CompareTrainingConfig, teleport_chance: float) -> np.ndarray: """ This function starts a model training with a specific Scenario configuration. Scenario 1: train the model without using teleportation (teleportation_chance = 0.0) Scenario 2: train the model using a probability of teleporting every Xth epochs (0 < teleportation_chance < 1.0) Scenario 3: train the model using teleportation every Xth epochs (teleportation_chance = 1.0) returns: np.array containing the validation accuracy results of every epochs. """ model.to(config.device) optimizer = get_optimizer_from_model_and_config(model, config) results = [] for e in np.arange(1, args.epochs + 1): train_epoch(model=model, metrics=metric, optimizer=optimizer, train_loader=trainloader, epoch=e, device=config.device) results.append(test(model=model, dataset=valset, metrics=metric, config=config)['accuracy']) model.train() if e % config.every_n_epochs == 0 and random.random() <= teleport_chance: print("teleported model") if config.targeted_teleportation: # TODO: use teleportation function here when they are available. raise NotImplementedError else: model.random_teleport(cob_range=config.cob_range, sampling_type=config.cob_sampling) optimizer = get_optimizer_from_model_and_config(model, config) model.cpu() # Force the network to go out of the cuda mem. return np.array(results)
net2 = MLPCOB(input_shape=(1, 28, 28), num_classes=10, hidden_layers=hidden_layers).to(device) model1 = NeuralTeleportationModel(network=net1, input_shape=sample_input_shape) if args.weights1 is not None: model1.load_state_dict(torch.load(args.weights1)) config.batch_size = 8 # Change batch size to train to different minima train(model1, train_dataset=mnist_train, metrics=metrics, config=config, val_dataset=mnist_test) torch.save(model1.state_dict(), pjoin(save_path, 'model1.pt')) print("Model 1 test results: ", test(model1, mnist_test, metrics, config)) model2 = NeuralTeleportationModel(network=net2, input_shape=sample_input_shape) if args.weights2 is not None: model2.load_state_dict(torch.load(args.weights2)) config.batch_size = 512 # Change batch size to train to different minima train(model2, train_dataset=mnist_train, metrics=metrics, config=config, val_dataset=mnist_test) torch.save(model2.state_dict(), pjoin(save_path, 'model2.pt')) print("Model 2 test results: ", test(model2, mnist_test, metrics, config)) # Compare the output of the two models for a given input.
device=device, batch_size=args.batch_size) net = get_model(args.dataset, args.model, device=device) if args.load_model_path: load_dict = torch.load(args.load_model_path) if not net.state_dict().keys() == load_dict.keys(): raise Exception( "Model that was loaded does not match the model type used in the experiment." ) net.load_state_dict(load_dict) else: if args.train: train(net, train_dataset=trainset, metrics=metric, config=config) test(net, dataset=trainset, metrics=metric, config=config) if args.save_model: torch.save(net.state_dict(), get_nonexistent_path(args.save_path)) checkpoint = None if checkpoint_exist: print( "A checkpoint exists and is requested to use, overriding all Experiment configuration!" ) checkpoint = torch.load(checkpoint_file) step = checkpoint['step'] surface = checkpoint['surface'][step:] section = checkpoint['section'] plot_before = args.plot_before if not checkpoint else section == "before" if plot_before:
batch_size=args.batch_size, device=device, cob_range=args.cob_range, cob_sampling=args.cob_sampling, targeted_teleportation=args.targeted_teleportation, every_n_epochs=args.teleport_every ) res_vanilla = np.empty((args.run, args.epochs + 1)) res_5050 = np.empty((args.run, args.epochs + 1)) res_teleport = np.empty((args.run, args.epochs + 1)) results = [res_vanilla, res_5050, res_teleport] # No need to run test for each since they all have the same weights at start. init_val_res = test(model=nets[0], dataset=valset, metrics=metric, config=config)['accuracy'] teleport_probs = [0.0, args.teleport_chance, 1, 0] for net in nets: scenarion_num = nets.index(net) print("Starting scenario {}".format(scenarion_num + 1)) for n in range(args.run): print("run no {}".format(n + 1)) net.set_weights(init_weights) results[scenarion_num][n] = np.concatenate(([init_val_res], start_training(net, trainloader, valset, metric, config, teleport_chance=teleport_probs[scenarion_num]))) mean_vanilla = res_vanilla.mean(axis=0) std_vanilla = res_vanilla.std(axis=0) if args.run > 1 else 0 mean_5050 = res_5050.mean(axis=0)
modelA.load_state_dict(torch.load(args.weightsA)) # modelA = torch.load(args.weightsA) if args.weightsB is not None: modelB.load_state_dict(torch.load(args.weightsB)) # modelB = torch.load(args.weightsB) if args.train: print("Train model A") train(modelA, trainset, metric, configA, val_dataset=valset) print("Train model B") train(modelB, trainset, metric, configB, val_dataset=valset) torch.save(modelA.state_dict(), pjoin(save_path, 'modelA.pt')) torch.save(modelB.state_dict(), pjoin(save_path, 'modelB.pt')) res = test(modelA, valset, metric, configA) print("Model A Scored {} acc on valset".format(res['accuracy'])) res = test(modelB, valset, metric, configB) print("Model B Scored {} acc on valset".format(res['accuracy'])) a = torch.linspace(args.x[0], args.x[1], int(args.x[2])) teleportation_model = get_model(args.dataset, args.model, device=device) interpolation_config = LandscapeConfig(batch_size=1000, device=device) # interpolate between two original models print("Interpolating between original models...") param_o = modelA.get_params() param_t = modelB.get_params()