def generate_teleportation_training_weights( model: NeuralTeleportationModel, trainset: Dataset, metric: TrainingMetrics, config: LandscapeConfig) -> Tuple[List[torch.Tensor], torch.Tensor]: """ This will generate a list of weights at a given epoch while training the passed model. If teleport_every is different than 0, the model will teleport every time. """ w = [model.get_weights().clone().detach().cpu()] trainloader = torch.utils.data.DataLoader(trainset, batch_size=config.batch_size, drop_last=True) optim = get_optimizer_from_model_and_config(model, config) for e in range(config.epochs): if e in config.teleport_at and config.teleport_at != 0: print("Teleporting Model...") model.random_teleport(cob_range=config.cob_range, sampling_type=config.cob_sampling) w.append(model.get_weights().clone().detach().cpu()) optim = get_optimizer_from_model_and_config(model, config) train_epoch(model, metrics=metric, config=config, train_loader=trainloader, optimizer=optim, epoch=e, device=config.device) w.append(model.get_weights().clone().detach().cpu()) final = w[-1::][0] return w, final
def test_calculate_cob_weights(network, model_name=None, input_shape=(1, 1, 28, 28), noise=False, verbose=True): """ Test if a cob can be calculated and applied to a network to teleport the network from the initial weights to the targets weights. Args: network (nn.Module): Network to be tested model_name (str): The name or label assigned to differentiate the model input_shape (tuple): Input shape of network noise (bool): whether to add noise to the target weights before optimisation. verbose (bool): whether to display sample ouputs during the test """ model_name = model_name or network.__class__.__name__ model = NeuralTeleportationModel(network=network, input_shape=input_shape) initial_weights = model.get_weights() w1 = model.get_weights(concat=False, flatten=False, bias=False) model.random_teleport() c1 = model.get_cob() model.random_teleport() c2 = model.get_cob() target_weights = model.get_weights() w2 = model.get_weights(concat=False, flatten=False, bias=False) if noise: for w in w2: w += torch.rand(w.shape) * 0.001 calculated_cob = model.calculate_cob(w1, w2) model.initialize_cob() model.set_weights(initial_weights) model.teleport(calculated_cob, reset_teleportation=True) calculated_weights = model.get_weights() error = (calculated_weights - initial_weights).abs().mean() if verbose: print("weights: ", target_weights.flatten()) print("Calculated cob weights: ", calculated_weights.flatten()) print("Weight error ", error) print("C1: ", c1.flatten()[:10]) print("C2: ", c2.flatten()[:10]) print("C1 * C2: ", (c1 * c2).flatten()[:10]) print("Calculated cob: ", calculated_cob.flatten()[:10]) assert np.allclose(calculated_weights.detach().numpy(), target_weights.detach().numpy()), \ "Calculate cob and weights FAILED for " + model_name + " model with error: " + str(error.item()) print("Calculate cob and weights successful for " + model_name + " model.")
def simulate_teleportation_sphere(model: NeuralTeleportationModel, config: "PseudoTeleportationTrainingConfig", **kwargs) -> NeuralTeleportationModel: print( f"Shifting weights on a sphere similar to a {config.cob_sampling} teleportation w/ {config.cob_range} " f"COB range.") model.cpu( ) # Move model to CPU to avoid having 2 models on the GPU (to avoid possible CUDA OOM error) teleported_model = deepcopy(model).random_teleport( cob_range=config.cob_range, sampling_type=config.cob_sampling) init_layers = model.get_weights(concat=False) teleported_layers = teleported_model.get_weights(concat=False) pseudo_teleported_layers = [] for init_layer, teleported_layer in zip(init_layers, teleported_layers): layer_shift = torch.randn_like(init_layer) layer_shift = normalize(layer_shift, p=1, dim=0) * torch.norm( teleported_layer - init_layer, 1) pseudo_teleported_layer = init_layer + layer_shift pseudo_teleported_layers.append(pseudo_teleported_layer) pseudo_teleported_weights = torch.cat(pseudo_teleported_layers) model.set_weights(pseudo_teleported_weights) return model.to(config.device)
def test_set_cob(network, model_name, input_shape=(1, 1, 28, 28), verbose=False): """ Test if the set_change_of_basis method works. Args: network (nn.Module): Network to be tested model_name (str): The name or label assigned to differentiate the model input_shape (tuple): Input shape of network verbose (bool): Flag to print comparision between network and a teleportation """ x = torch.rand(input_shape) model = NeuralTeleportationModel(network, input_shape=input_shape) model.random_teleport() w1 = model.get_weights() t1 = model.get_cob() pred1 = model(x) model.reset_weights() pred2 = model(x) model.set_weights(w1) model.teleport_activations(t1) pred3 = model(x) if verbose: print("Diff prediction average: ", (pred1 - pred3).mean()) print("Pre teleportation: ", pred1.flatten()[:10]) print("Post teleportation: ", pred3.flatten()[:10]) assert not np.allclose(pred1.detach().numpy(), pred2.detach().numpy(), atol=1e-5) assert np.allclose(pred1.detach().numpy(), pred3.detach().numpy(), atol=1e-5), "Set cob/weights did not work." print("Set cob successful for " + model_name + " model.")
def test_reset_weights(network: nn.Module, input_shape: Tuple = (1, 1, 28, 28), model_name: str = None): """ test_reset_weights checks if method reset_weights() in NeuralTeleportationModel works Args: network (nn.Module): Network to be tested input_shape (tuple): Input shape of network model_name (str): The name or label assigned to differentiate the model """ model_name = model_name or network.__class__.__name__ model = NeuralTeleportationModel(network, input_shape=input_shape) w1 = model.get_weights().detach().numpy() model.reset_weights() w2 = model.get_weights().detach().numpy() assert not np.allclose(w1, w2) print("Reset weights successful for " + model_name + " model.")
def test_calculate_ones(network, model_name=None, input_shape=(1, 1, 28, 28), noise=False, verbose=False): """ Test if the correct change of basis can be calculated for a cob of ones. Args: network (nn.Module): Network to be tested model_name (str): The name or label assigned to differentiate the model input_shape (tuple): Input shape of network noise (bool): whether to add noise to the target weights before optimisation. verbose (bool): whether to display sample ouputs during the test """ model_name = model_name or network.__class__.__name__ model = NeuralTeleportationModel(network=network, input_shape=input_shape) model.initialize_cob() w1 = model.get_weights(concat=False, flatten=False, bias=False) _w1 = model.get_weights(concat=False, flatten=False, bias=False) if noise: for w in _w1: w += torch.rand(w.shape) * 0.001 cob = model.get_cob() calculated_cob = model.calculate_cob(w1, _w1) error = (cob - calculated_cob).abs().mean() if verbose: print("Cob: ", cob.flatten()[:10]) print("Calculated cob: ", calculated_cob.flatten()[:10]) print("cob error ", (calculated_cob - cob).flatten()[:10]) print("cob error : ", error) assert np.allclose( cob, calculated_cob ), "Calculate cob (ones) FAILED for " + model_name + " model." print("Calculate cob (ones) successful for " + model_name + " model.")
def test_teleport(network: nn.Module, input_shape: Tuple = (1, 1, 28, 28), verbose: bool = False, atol: float = 1e-5, model_name: str = None): """ Return mean of the difference between the weights of network and a random teleportation, and checks if teleportation has the same network function Args: network (nn.Module): Network to be tested input_shape (tuple): Input shape of network verbose (bool): Flag to print comparision between network and a teleportation atol (float): Absolute tolerance allowed between outputs to pass the test model_name (str): The name or label assigned to differentiate the model Returns: float with the average of the difference between the weights of the network and a teleportation """ model_name = model_name or network.__class__.__name__ model = NeuralTeleportationModel(network=network, input_shape=input_shape) model.eval() # model must be set to eval because of dropout x = torch.rand(input_shape) pred1 = model(x).detach().numpy() w1 = model.get_weights().detach().numpy() model.random_teleport() pred2 = model(x).detach().numpy() w2 = model.get_weights().detach().numpy() diff_average = np.mean(np.abs((pred1 - pred2))) if verbose: print("Sample outputs: ") print("Pre teleportation: ", pred1.flatten()[:10]) print("Post teleportation: ", pred2.flatten()[:10]) print("Diff weight average: ", np.mean(np.abs((w1 - w2)))) print("Diff prediction average: ", diff_average) assert not np.allclose(w1, w2) assert np.allclose(pred1, pred2, atol=atol), "Teleporation did not work for model {}. Average difference: {}". \ format(model_name, diff_average) print("Teleportation successful for " + model_name + " model.") return diff_average
def test_cuda_teleport(network, input_shape=(1, 1, 28, 28), verbose=False): """ Test if a model can be teleported successfully on cuda. Args: network (nn.Module): Model to test input_shape (tuple): Input shape for the model verbose (bool): if True samples of predictions are printed Returns: Average difference between elements of prediction before and after teleportation. """ network = network.cuda() model = NeuralTeleportationModel(network=network, input_shape=input_shape) x = torch.rand(input_shape).cuda() pred1 = model(x).cpu().detach().numpy() w1 = model.get_weights().cpu().detach().numpy() model.random_teleport() pred2 = model(x).cpu().detach().numpy() w2 = model.get_weights().cpu().detach().numpy() diff_average = (w1 - w2).mean() if verbose: print("Model on device: {}".format(next(network.parameters()).device)) print("Sample outputs: ") print("Pre teleportation: ", pred1.flatten()[:10]) print("Post teleportation: ", pred2.flatten()[:10]) assert not np.allclose(w1, w2) assert np.allclose( pred1, pred2), "Teleporation did not work. Average difference: {}".format( diff_average) print("Teleportation successful.") return diff_average
def weighted_grad_norm(model: NeuralTeleportationModel, data: Tensor, target: Tensor, metrics: TrainingMetrics, order: Union[str, number] = 'fro', **kwargs) -> Number: weights = model.get_weights() gradients = torch.stack([model.get_grad(data_batch, target_batch, metrics.criterion) for data_batch, target_batch in zip(data, target)]).mean(dim=0) # Compute the gradient/weight ratio where possible ratio = gradients / weights # Identify where the ratio is numerically unstable (division by 0-valued weights) nan_ratio_mask = torch.isnan(ratio) # Replace unstable values by statistically representative measures ratio[nan_ratio_mask] = ratio[~nan_ratio_mask].mean() # Compute the norm of the ratio and move result to CPU (to avoid cluttering GPU if fct is called repeatedly) return torch.norm(ratio, p=order).item()
def micro_teleportation_dot_product(network, dataset, nb_teleport=100, network_descriptor='', sampling_types=['intra_landscape'], batch_sizes=[8, 64], criterion=None, device='cpu', verbose=False, random_data=False, number_classes=10) -> None: """ This method tests the scalar product between the teleporation line and the gradient, as well as between a random vector and the gradient for nullity. It then displays the histograms of the calculated scalar products. The method also aggregates all relevant micro teleportation data in a dataframe. Args: network : the model which we wish to use to compute the micro-teleporations dataset: the dataset that will be used to calculate the gradient and get dimensions for the neural teleportation model nb_teleport: The number of time the network is teleported and the scalar product calculated. An average is then calculated. network_descriptor: String describing the content of the network sampling_types : Teleportation sampling types, governs how the change of basis is computed batch_sizes: Size of the minibatch used to perform gradient calculation criterion: the loss function used to compute the gradient device: Device used to compute the network operations ('cpu' or 'cuda') verbose: If true, the method will output extensive details about the calculated vectors and aggregated data (mainly for debugging purposes) random_data: If True, random data with random labels is used for computing the gradient. If False, the dataset is used for computing the gradient. number_classes: Number of classes of the classification problem. """ # Arbitrary precision threshold for nullity comparison torch.set_printoptions(precision=10, sci_mode=True) tol = 1e-2 cobs = [0.001] hist_dir = f'images/histograms/{network_descriptor}' if torch.cuda.is_available(): print(f'{green}Using CUDA{reset}') network = network.cuda() if (criterion is None): loss_func = torch.nn.CrossEntropyLoss() else: loss_func = criterion # Initialize the dataframe for data aggregation aggregator = pd.DataFrame(columns=[ 'model name', 'sampling type', 'batch size', 'COB range', 'weights vector length', 'Micro-teleportation vs Gradient', 'Micro-teleportation vs Gradient std', 'Gradient vs Random Vector', 'Gradient vs Random Vector std', 'Random Vector vs Random Vector', 'Random Vector vs Random Vector std', 'Micro-teleportation vs Random Vector', 'Micro-teleportation vs Random Vector std' ]) for sampling_type in sampling_types: for batch_size in batch_sizes: dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size) data, target = next(iter(dataloader)) # save the initial weights for further reset model = NeuralTeleportationModel(network=network, input_shape=data.shape) if torch.cuda.is_available(): model = model.cuda() else: model = model.cpu() if torch.cuda.is_available(): w1 = model.get_weights().detach() else: w1 = model.get_weights().detach().numpy() for cob in cobs: angle_results = [] rand_angle_results = [] rand_rand_angle_results = [] rand_micro_angle_results = [] iterations = min( int(len(dataloader.dataset) / dataloader.batch_size), nb_teleport) for _ in tqdm(range(0, iterations)): # Get next data batch data, target = next(iter(dataloader)) if random_data: data, target = torch.rand(data.shape), torch.randint( 0, number_classes, target.shape) data, target = data.to(device), target.to(device) grad = model.get_grad(data, target, loss_func, zero_grad=False) # reset the weights for next teleportation model.set_weights(torch.tensor(w1)) # teleport and get the new weights model = model.random_teleport(cob_range=cob, sampling_type=sampling_type) if torch.cuda.is_available(): w2 = model.get_weights().detach() else: w2 = model.get_weights().detach().numpy() # get teleportation vector micro_teleport_vec = (w2 - w1) random_vector = torch.rand(grad.shape, dtype=torch.float) - 0.5 random_vector2 = torch.rand(grad.shape, dtype=torch.float) - 0.5 random_vector = random_vector.to(device) random_vector2 = random_vector2.to(device) # Normalized scalar products & angles calculations dot_prod = normalized_dot_product(grad, micro_teleport_vec) angle = np.degrees(torch.acos(dot_prod).cpu()) rand_dot_prod = normalized_dot_product(grad, random_vector) rand_angle = np.degrees(torch.acos(rand_dot_prod).cpu()) rand_rand_dot_prod = normalized_dot_product( random_vector2, random_vector) rand_rand_angle = np.degrees( torch.acos(rand_rand_dot_prod).cpu()) rand_micro_dot_prod = normalized_dot_product( random_vector2, micro_teleport_vec) rand_micro_angle = np.degrees( torch.acos(rand_micro_dot_prod).cpu()) # Perpendicularity assertion failed = (not torch.allclose( dot_prod, torch.tensor([0.0]).to(device), atol=tol)) rand_failed = (not torch.allclose(rand_dot_prod, torch.tensor( [0.0]).to(device), atol=tol)) target_angle = 90.0 angle_results.append(angle) rand_angle_results.append(rand_angle) rand_rand_angle_results.append(rand_rand_angle) rand_micro_angle_results.append(rand_micro_angle) angle_results = np.array(angle_results) rand_angle_results = np.array(rand_angle_results) rand_rand_angle_results = np.array(rand_rand_angle_results) rand_micro_angle_results = np.array(rand_micro_angle_results) # Append resuslts to dataframe for further ploting aggregator = aggregator.append( { 'model name': network_descriptor, 'sampling type': sampling_type, 'batch size': batch_size, 'COB range': cob, 'weights vector length': len(w1), 'Micro-teleportation vs Gradient': angle_results.mean(), 'Micro-teleportation vs Gradient std': angle_results.std(), 'Gradient vs Random Vector': rand_angle_results.mean(), 'Gradient vs Random Vector std': rand_angle_results.std(), 'Random Vector vs Random Vector': rand_rand_angle_results.mean(), 'Random Vector vs Random Vector std': rand_rand_angle_results.std(), 'Micro-teleportation vs Random Vector': rand_micro_angle_results.mean(), 'Micro-teleportation vs Random Vector std': rand_micro_angle_results.std() }, ignore_index=True) print( f'The angle between the gradient and a micro-teleporation vector is: ' f'{red * failed}' f'{np.round(angle_results.mean(), abs(int(np.log10(tol))))}', f' (!=0 => FAILED!)' * failed, f'{reset}', f' using {sampling_type} sampling type', f', the delta in angle is {angle - target_angle}°\n', f'The angle between the gradient and a random vector is: ', f'{red * rand_failed}{rand_angle_results.mean()}', f' (FAILED!)' * rand_failed, f'{reset}', f', the delta in angle is {rand_angle - target_angle}°\n', sep='') if verbose: print(aggregator.iloc[aggregator.last_valid_index()]) if torch.cuda.is_available(): print(f'w1: {w1}', f'nans: {torch.sum(torch.isnan(w1))}', f'max: {torch.max(w1)}', f'min: {torch.min(w1)}', sep='\n') print(f'w2: {w2}', f' nans: {torch.sum(torch.isnan(w2))}', f'max: {torch.max(w2)}', f'min: {torch.min(w2)}', sep='\n') else: print(f'w1: {w1}', f'nans: {np.sum(np.isnan(w1))}', f'max: {np.max(w1)}', f'min: {np.min(w1)}', sep='\n') print(f'w2: {w2}', f' nans: {np.sum(np.isnan(w2))}', f'max: {np.max(w2)}', f'min: {np.min(w2)}', sep='\n') if not np.isnan( aggregator.loc[aggregator.last_valid_index(), 'Micro-teleportation vs Gradient']): delta = 0.25 x_min = 90 - delta x_max = 90 + delta figsize = (10.0, 10.0) fig, (ax0, ax1, ax2, ax3) = plt.subplots(4, 1, figsize=figsize) if random_data: fig.suptitle( f'{network_descriptor} on Random Data and batch size of {batch_size}' ) else: fig.suptitle( f'{network_descriptor} on CIFAR-10 and batch size of {batch_size}' ) bin_height, bin_boundary = np.histogram( np.array(angle_results)) width = bin_boundary[1] - bin_boundary[0] bin_height = bin_height / float(max(bin_height)) ax0.bar(bin_boundary[:-1], bin_height, width=np.maximum(width, 0.01)) ax0.legend(['Micro-teleportation\n vs \n Gradient']) ax0.set_xlim(x_min, x_max) ax0.set_yticks([]) bin_height, bin_boundary = np.histogram( np.array(rand_micro_angle_results)) width = bin_boundary[1] - bin_boundary[0] bin_height = bin_height / float(max(bin_height)) ax1.bar(bin_boundary[:-1], bin_height, width=np.maximum(width, 0.1), color='g') ax1.set_xlim(x_min, x_max) ax1.legend(['Micro-teleportation\n vs \n Random Vector']) ax1.set_yticks([]) bin_height, bin_boundary = np.histogram( np.array(rand_angle_results)) width = bin_boundary[1] - bin_boundary[0] bin_height = bin_height / float(max(bin_height)) ax2.bar(bin_boundary[:-1], bin_height, width=np.maximum(width, 0.1), color='g') ax2.set_xlim(x_min, x_max) ax2.legend(['Gradient\n vs \n Random Vector']) ax2.set_yticks([]) bin_height, bin_boundary = np.histogram( np.array(rand_rand_angle_results)) width = bin_boundary[1] - bin_boundary[0] bin_height = bin_height / float(max(bin_height)) ax3.bar(bin_boundary[:-1], bin_height, width=np.maximum(width, 0.1), color='g') ax3.set_xlim(x_min, x_max) ax3.legend(['Random Vector\n vs \n Random Vector']) ax3.set_yticks([]) plt.xlabel('Angle in degrees') Path(hist_dir).mkdir(parents=True, exist_ok=True) plt.savefig( f'{hist_dir}/{network_descriptor}_' f'_cob_{cob}_iter_{iterations}_batch_size_{batch_size}.png' ) plt.show() if random_data: fig.savefig( f"{network_descriptor}-RandomData-batchsize_{batch_size}.pdf", bbox_inches='tight') else: fig.savefig( f"{network_descriptor}-cifar10-batchsize_{batch_size}.pdf", bbox_inches='tight') else: print(red) print(aggregator.iloc[aggregator.last_valid_index()]) print(reset)
def dot_product_between_teleportation(network, dataset, network_descriptor=None, nb_teleport=100, device='cpu') -> None: """ This method tests the scalar product between the initial and teleported set of weights and plots the results with respect to the order of magnitude of the change of basis of the teleportation Args: network : the model which we want to use to compute the teleportations dataset : the model which we want to use to size the teleportation model network_descriptor: String describing the content of the network nb_teleport: Number of times the micro-teleportation for statistical (mean, variance, etc) calculation device: Device used to compute the network operations ('cpu' or 'cuda') """ series_dir = f'images/series_dot_prod_vs_cob/{network_descriptor}' if torch.cuda.is_available(): print(f'{green}Using CUDA{reset}') network = network.cuda() if network_descriptor is None: network_descriptor = network.__name__ # Prepare the range of COB to test cobs = np.linspace(0.00001, 0.999, 40) dataloader = torch.utils.data.DataLoader(dataset, batch_size=16) data, target = next(iter(dataloader)) model = NeuralTeleportationModel(network=network, input_shape=data.shape) if torch.cuda.is_available(): model = model.cuda() else: model = model.cpu() w1 = model.get_weights().detach().to(device) dot_product_results = [] angles = [] for cob in cobs: dot_product_result = 0 angle = 0 for _ in tqdm(range(0, nb_teleport)): # reset the weights model.set_weights(w1) # teleport and get the new weights model.random_teleport(cob_range=cob, sampling_type='intra_landscape') w2 = model.get_weights().detach().to(device) # cos(theta) = (w1 w2)/(||w1|| ||w2||) dot_product_result += normalized_dot_product(w1, w2) angle += np.degrees( torch.acos(normalized_dot_product(w1, w2)).cpu()) dot_product_result /= nb_teleport angle /= nb_teleport dot_product_results.append(dot_product_result.item()) angles.append(angle.item()) plt.plot(cobs, dot_product_results) plt.title(f'Scalar product between original and \nteleported weights with ' f'respect to COB\'s order of magnitude\n{network_descriptor}') plt.ylabel('Scalar product') plt.xlabel('change of basis') Path(series_dir).mkdir(parents=True, exist_ok=True) plt.savefig( f'{series_dir}/dot_product_vs_cob_{network_descriptor}_Samp_type_intra_landscape' ) plt.show() plt.plot(cobs, angles) plt.title(f'Angle between original and \nteleported weights with ' f'respect to COB\'s order of magnitude\n{network_descriptor}') plt.ylabel('Theta') plt.xlabel('change of basis') Path(series_dir).mkdir(parents=True, exist_ok=True) plt.savefig( f'{series_dir}/angle_vs_cob_{network_descriptor}_Samp_type_intra_landscape' ) plt.show()
config=config, val_dataset=mnist_test) torch.save(model2.state_dict(), pjoin(save_path, 'model2.pt')) print("Model 2 test results: ", test(model2, mnist_test, metrics, config)) # Compare the output of the two models for a given input. x, y = mnist_train[0] pred1 = model1(x.to(device)) pred2 = model2(x.to(device)) print("Model 1 prediction: ", pred1) print("Model 2 prediction: ", pred2) print("Pred diff", (pred1 - pred2).abs()) print("Pred diff mean", (pred1 - pred2).abs().mean()) print("Initial: w1 - w2 ([:100]): ", (model1.get_weights() - model2.get_weights()).abs()[:100]) print("Initial: w1 - w2 ([-100:]): ", (model1.get_weights() - model2.get_weights()).abs()[-100:]) w1 = model1.get_weights() w2 = model2.get_weights() diff = (w1.detach().cpu() - w2.detach().cpu()).abs().mean() print("Initial weight difference :", diff) w1 = model1.get_weights(concat=False, flatten=False, bias=False) w2 = model2.get_weights(concat=False, flatten=False, bias=False) calculated_cob = model1.calculate_cob(w1, w2, concat=True, eta=0.00001, steps=6000)
teleport_at=args.teleport_at, device=device) trainset, valset, testset = get_dataset_subsets("cifar10") model = get_model("cifar10", args.model) x = torch.linspace(-1, 1, args.x) y = torch.linspace(-1, 1, args.y) shape = x.shape if y is None else (len(x), len(y)) surface = torch.stack((x, y)) model.to(device) model = NeuralTeleportationModel(model, input_shape=(config.batch_size, 3, 32, 32)).to(device) original_w = model.get_weights() w_checkpoints, final_w = losslandscape.generate_teleportation_training_weights( model, trainset, metric=metric, config=config) delta = losslandscape.generate_random_2d_vector(final_w, seed=1) eta = losslandscape.generate_random_2d_vector(final_w, seed=2) # Calculate angle between the two direction vectors. print("angle between direction is {} rad".format( losslandscape.compute_angle(delta, eta))) loss, _ = losslandscape.generate_contour_loss_values( model, (delta, eta), surface, trainset, metric, config) loss = np.array(loss) loss = np.resize(loss, shape) w_diff = [(w - final_w) for w in w_checkpoints]
help='Range for the teleportation to create target weights') return parser.parse_args() if __name__ == '__main__': args = argument_parser() torch.manual_seed(args.seed) model = NeuralTeleportationModel(network=MLPCOB(input_shape=(1, 28, 28), num_classes=10), input_shape=(1, 1, 28, 28)) # Get the initial set of weights and teleport. initial_weights = model.get_weights() model.random_teleport(cob_range=args.cob_range) # Get second set of weights (target weights) target_weights = model.get_weights() # Get the change of basis that created this set of weights. target_cob = model.get_cob(concat=True) # Generate a new random cob cob = model.generate_random_cob(cob_range=args.cob_range, requires_grad=True) history = [] cob_error_history = [] print("Initial error: ", (cob - target_cob).abs().mean().item())
def plot_model_weights_histogram(model: NeuralTeleportationModel, mode: str, title: str, output_dir: Path = None, save_format: str = None, xlim: float = None, ylim_max: float = None, zoom_plot: bool = False) -> None: def _format_ticklabels(ticklabels) -> List[str]: return [f"{ticklabel:.1f}" for ticklabel in ticklabels] def _zoom_plot(ax, data, inset, lims): with sns.axes_style({'axes.linewidth': 1, 'axes.edgecolor': 'black'}): axins = ax.inset_axes(inset) sns.kdeplot(data, fill=True, ax=axins) axins.set_xlim(lims[:2]) axins.set_ylim(lims[2:]) axins.set(xticklabels=[], ylabel=None, yticklabels=[]) rectangle_patch, connector_lines = ax.indicate_inset_zoom(axins) # Make the indicator and connectors more easily visible rectangle_patch.set_linewidth(2) for connector_line in connector_lines: connector_line.set_linewidth(2) # Manually set the visibility of the appropriate connector lines connector_lines[0].set_visible(True) # Lower left connector_lines[1].set_visible(False) # Upper left connector_lines[2].set_visible(True) # Lower right connector_lines[3].set_visible(False) # Upper right return axins def _plot_1d_array_histogram(array: np.ndarray, title: str): with sns.axes_style("darkgrid"): axes = sns.kdeplot(array, fill=True) if xlim: axes.set(xlim=(-xlim, xlim)) if ylim_max: axes.set(ylim=(0, ylim_max)) axes.set(ylabel=None, yticklabels=[]) axes.set_xticklabels(_format_ticklabels(axes.get_xticks()), size=20) if zoom_plot: _zoom_plot(axes, array, inset=[0.05, 0.6, 0.35, 0.35], lims=[-0.13, -0.12, 0, 0.5]) _zoom_plot(axes, array, inset=[0.6, 0.6, 0.35, 0.35], lims=[0.12, 0.13, 0, 0.5]) if save_format: plt.savefig(output_dir / f"{title}.{save_format}", bbox_inches='tight') plt.close() else: plt.show() print(f"Plotting {title} histogram ...") if mode == "modelwise": _plot_1d_array_histogram(model.get_weights().cpu().detach().numpy(), title) elif mode == "layerwise": # Get weights of each layer, without the bias weights layers = model.get_weights(concat=False, bias=False) # Get rid of layers connected to input and output nodes, to keep only hidden layers layers = layers[1:-1] for idx, layer_weights in enumerate(layers): _plot_1d_array_histogram(layer_weights.cpu().detach().numpy(), title + f"_layer{idx}") else: raise ValueError( f"Mode {mode} is not a valid option. Choose one of: {{modelwise,layerwise}}" )