Beispiel #1
0
def loss_lookahead_diff(model: NeuralTeleportationModel, data: Tensor, target: Tensor,
                        metrics: TrainingMetrics, config: OptimalTeleportationTrainingConfig, **kwargs) -> Number:
    # Save the state of the model, prior to performing the lookahead
    state_dict = model.state_dict()

    # Initialize a new optimizer to perform lookahead
    optimizer = get_optimizer_from_model_and_config(model, config)
    optimizer.zero_grad()

    # Compute loss at the teleported point
    loss = torch.stack([metrics.criterion(model(data_batch), target_batch)
                        for data_batch, target_batch in zip(data, target)]).mean(dim=0)

    # Take a step using the gradient at the teleported point
    loss.backward()

    # Compute loss after the optimizer step
    lookahead_loss = torch.stack([metrics.criterion(model(data_batch), target_batch)
                                  for data_batch, target_batch in zip(data, target)]).mean(dim=0)

    # Restore the state of the model prior to the lookahead
    model.load_state_dict(state_dict)

    # Compute the difference between the lookahead loss and the original loss
    return (loss - lookahead_loss).item()
Beispiel #2
0
def teleport_model_to_optimize_metric(model: NeuralTeleportationModel, train_dataset: Dataset, metrics: TrainingMetrics,
                                      config: "OptimalTeleportationTrainingConfig", **kwargs) \
        -> NeuralTeleportationModel:
    print(f"Selecting best of {config.num_teleportations} random COBs "
          f"w.r.t. {config.optim_metric.__name__}")

    # Extract a single batch on which to compute gradients for each model to be compared
    dataloader = DataLoader(train_dataset, batch_size=config.batch_size)
    data, target = [], []
    for (data_batch, target_batch), _ in zip(dataloader, range(config.num_batches)):
        data.append(data_batch)
        target.append(target_batch)
    data = torch.stack(data).to(device=config.device)
    target = torch.stack(target).to(device=config.device)

    optimal_metric = config.optim_metric(model=model, data=data, target=target, metrics=metrics, config=config)
    model.cpu()  # Move model to CPU to avoid having 2 models on the GPU (to avoid possible CUDA OOM error)
    optimal_model = model

    for _ in range(config.num_teleportations):
        teleported_model = deepcopy(model).random_teleport(cob_range=config.cob_range,
                                                           sampling_type=config.cob_sampling)
        teleported_model.to(config.device)  # Move model back to chosen device before computing gradients
        metric = config.optim_metric(model=teleported_model, data=data, target=target, metrics=metrics, config=config)
        teleported_model.cpu()  # Move model back to CPU after computation is done (to avoid possible CUDA OOM error)
        if metric > optimal_metric:
            optimal_model = teleported_model
            optimal_metric = metric

    return optimal_model.to(config.device)
Beispiel #3
0
def generate_teleportation_training_weights(
        model: NeuralTeleportationModel, trainset: Dataset,
        metric: TrainingMetrics,
        config: LandscapeConfig) -> Tuple[List[torch.Tensor], torch.Tensor]:
    """
        This will generate a list of weights at a given epoch while training the passed model.
        If teleport_every is different than 0, the model will teleport every time.
    """
    w = [model.get_weights().clone().detach().cpu()]
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=config.batch_size,
                                              drop_last=True)
    optim = get_optimizer_from_model_and_config(model, config)

    for e in range(config.epochs):
        if e in config.teleport_at and config.teleport_at != 0:
            print("Teleporting Model...")
            model.random_teleport(cob_range=config.cob_range,
                                  sampling_type=config.cob_sampling)
            w.append(model.get_weights().clone().detach().cpu())
            optim = get_optimizer_from_model_and_config(model, config)

        train_epoch(model,
                    metrics=metric,
                    config=config,
                    train_loader=trainloader,
                    optimizer=optim,
                    epoch=e,
                    device=config.device)
        w.append(model.get_weights().clone().detach().cpu())

    final = w[-1::][0]
    return w, final
Beispiel #4
0
def test_model_without_set_get_weights(
        model: nn.Module,
        testset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        rept: int = 1) -> Tuple[np.ndarray, np.ndarray]:
    """
        Test if the model is coequal before and after using model.teleport
    """
    loss_diff_avg = []
    acc_diff_avg = []
    for _ in range(rept):
        m = NeuralTeleportationModel(model,
                                     input_shape=(config.batch_size, 3, 32,
                                                  32)).to(device)

        res = test(m, testset, metric, config)
        loss1, acc1 = res['loss'], res['accuracy']

        m.random_teleport()

        res = test(m, testset, metric, config)
        loss2, acc2 = res['loss'], res['accuracy']

        loss_diff_avg.append(np.abs(loss1 - loss2))
        acc_diff_avg.append(np.abs(acc1 - acc2))

        print("==========================================")
        print("Loss and accuracy diff without set/get was")
        print("Loss diff was: {:.6e}".format(np.abs(loss1 - loss2)))
        print("Acc diff was: {:.6e}".format(np.abs(acc1 - acc2)))
        print("==========================================")

    return np.mean(loss_diff_avg), np.mean(acc_diff_avg)
Beispiel #5
0
def test_model_with_set_get_weights(
        model: nn.Module,
        testset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        rept: int = 1) -> Tuple[np.ndarray, np.ndarray]:
    loss_diff_avg = []
    acc_diff_avg = []
    for _ in range(rept):
        m = NeuralTeleportationModel(model,
                                     input_shape=(config.batch_size, 3, 32,
                                                  32)).to(device)
        w_o, cob_o = m.get_params()
        m.random_teleport()
        w_t, cob_t = m.get_params()

        m.set_params(weights=w_o, cob=cob_o)
        res = test(m, testset, metric, config)
        loss1, acc1 = res['loss'], res['accuracy']

        m.set_params(weights=w_t, cob=cob_t)
        res = test(m, testset, metric, config)
        loss2, acc2 = res['loss'], res['accuracy']

        loss_diff_avg.append(np.abs(loss1 - loss2))
        acc_diff_avg.append(np.abs(acc1 - acc2))

        print("==========================================")
        print("Loss and accuracy diff with set/get was")
        print("Loss diff was: {:.6e}".format(np.abs(loss1 - loss2)))
        print("Acc diff was: {:.6e}".format(np.abs(acc1 - acc2)))
        print("==========================================")

    return np.mean(loss_diff_avg), np.mean(acc_diff_avg)
Beispiel #6
0
def generate_1D_linear_interp(
        model: NeuralTeleportationModel,
        param_o: Tuple[torch.Tensor, torch.Tensor],
        param_t: Tuple[torch.Tensor, torch.Tensor],
        a: torch.Tensor,
        trainset: Dataset,
        valset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        checkpoint: dict = None) -> Tuple[list, list, list, list]:
    """
        This is 1-Dimensional Linear Interpolation
        θ(α) = (1−α)θ + αθ′
    """
    loss = []
    loss_v = []
    acc_t = []
    acc_v = []
    w_o, cob_o = param_o
    w_t, cob_t = param_t
    start_at = checkpoint["step"] if checkpoint else 0
    try:
        for step, coord in enumerate(a, start_at):
            # Interpolate the weight from W to T(W),
            # then interpolate the cob for the activation
            # and batchNorm layers only.
            print("step {} of {} - alpha={}".format(step + 1, len(a), coord))
            w = (1 - coord) * w_o + coord * w_t
            cob = (1 - coord) * cob_o + coord * cob_t
            model.set_params(w, cob)
            res = test(model, trainset, metric, config)
            loss.append(res['loss'])
            acc_t.append(res['accuracy'])
            res = test(model, valset, metric, config)
            acc_v.append(res['accuracy'])
            loss_v.append(res['loss'])
    except:
        if not checkpoint:
            checkpoint = {
                'step': step,
                'alpha': a,
                'original_model': param_o,
                'teleported_model': param_t,
                'losses': loss,
                'acc_t': acc_t,
                'acc_v': acc_v,
            }
        else:
            checkpoint['step'] = step
            checkpoint['losses'] = checkpoint['losses'].append(loss)
            checkpoint['acc_t'] = checkpoint['acc_t'].append(acc_t)
            checkpoint['acc_v'] = checkpoint['acc_v'].append(loss)
        torch.save(checkpoint, linterp_checkpoint_file)
        print("A checkpoint was made on step {} of {}".format(step, len(a)))
        # This is to notify the upper level of try/except
        # Since there is no way to know if this is from before teleportation or after teleportation.
        raise

    return loss, acc_t, loss_v, acc_v
Beispiel #7
0
def teleport_model_randomly(model: NeuralTeleportationModel, config: "RandomTeleportationTrainingConfig", **kwargs) \
        -> NeuralTeleportationModel:
    if random.random() < config.teleport_prob:
        print("Applying random COB to model in training")
        model.random_teleport(cob_range=config.cob_range,
                              sampling_type=config.cob_sampling)
    else:
        print("Skipping COB")

    return model
Beispiel #8
0
def simulate_teleportation_distribution(
        model: NeuralTeleportationModel,
        config: "DistributionTeleportationTrainingConfig",
        **kwargs) -> NeuralTeleportationModel:
    print(
        f"Shifting weights to a similar distribution to a {config.cob_sampling} teleportation w/ {config.cob_range}"
        f"COB range.")

    model.cpu()
    teleported_model = deepcopy(model).random_teleport(
        cob_range=config.cob_range, sampling_type=config.cob_sampling)
    teleported_weights = teleported_model.get_weights(
        concat=True).cpu().detach().numpy()
    hist, bin_edges = np.histogram(teleported_weights, bins=1000)
    hist = hist / hist.sum()
    model.init_like_histogram(hist, bin_edges)
    return model.to(config.device)
Beispiel #9
0
def weighted_grad_norm(model: NeuralTeleportationModel, data: Tensor, target: Tensor,
                       metrics: TrainingMetrics, order: Union[str, number] = 'fro', **kwargs) -> Number:
    weights = model.get_weights()
    gradients = torch.stack([model.get_grad(data_batch, target_batch, metrics.criterion)
                             for data_batch, target_batch in zip(data, target)]).mean(dim=0)

    # Compute the gradient/weight ratio where possible
    ratio = gradients / weights

    # Identify where the ratio is numerically unstable (division by 0-valued weights)
    nan_ratio_mask = torch.isnan(ratio)

    # Replace unstable values by statistically representative measures
    ratio[nan_ratio_mask] = ratio[~nan_ratio_mask].mean()

    # Compute the norm of the ratio and move result to CPU (to avoid cluttering GPU if fct is called repeatedly)
    return torch.norm(ratio, p=order).item()
Beispiel #10
0
def generate_contour_loss_values(
        model: NeuralTeleportationModel,
        directions: Tuple[torch.Tensor, torch.Tensor],
        weights: torch.Tensor,
        surface: torch.Tensor,
        trainset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        checkpoint: dict = None) -> Tuple[np.ndarray, np.ndarray]:
    """
        Generate a tensor containing the loss values from a given model.
    """
    loss = []
    acc = []
    delta, eta = directions
    start_at = 0
    if checkpoint:
        start_at = checkpoint['step']
    try:
        for step, (x, y) in enumerate(surface, start_at):
            print("Evaluating step {}: [{:.3f}, {:.3f}]".format(step, x, y))
            x, y = x.to(config.device), y.to(config.device)

            # L (w + alpha*delta + beta*eta)
            changes = (delta * x + eta * y).to(config.device)
            w = torch.add(weights, changes)
            model.set_weights(w)
            results = test(model, trainset, metric, config)

            loss.append(results['loss'])
            acc.append(results['accuracy'])
    except:
        # The reason is that, no matter what, make a checkpoint of the current surface generation.
        if not checkpoint:
            checkpoint = {'step': step, 'surface': surface, 'loss': loss}
        else:
            checkpoint['step'] = step
            [checkpoint['loss'].append(l) for l in loss]
        torch.save(checkpoint, contour_checkpoint_file)
        print("A checkpoint was made at coord {} of {}".format(x, y))

        # This is to notify the upper level of try/except
        # Since there is no way to know if this is from before teleportation or after teleportation.
        raise

    return np.array(loss), np.array(acc)
Beispiel #11
0
def get_model(dataset_name: str,
              model_name: str,
              device: str = 'cpu',
              initializer: Dict[str, Union[str, float]] = None,
              **model_kwargs) -> NeuralTeleportationModel:
    # Look up if the requested model is available in the model zoo
    model_factories = _get_model_factories()
    if model_name not in model_factories:
        raise KeyError(f"{model_name} was not found in the model zoo")

    # Dynamically determine the parameters for initializing the model based on the dataset
    model_kwargs.update(get_dataset_info(dataset_name, "num_classes"))
    if "mlp" in model_name.lower():
        input_channels, image_size = get_dataset_info(dataset_name,
                                                      "input_channels",
                                                      "image_size").values()
        model_kwargs.update(input_shape=(input_channels, *image_size))
    else:
        model_kwargs.update(get_dataset_info(dataset_name, "input_channels"))

    if "cifar" in dataset_name and ("resnet" in model_name):
        model_kwargs.update({"for_dataset": "cifar"})
    # Instantiate the model
    model_factory = model_factories[model_name]
    model = model_factory(**model_kwargs)
    # Initialize the model
    if initializer is not None:
        init_gain = None if "gain" not in initializer.keys(
        ) and initializer["type"] == "none" else initializer["gain"]
        init_non_linearity = None if "non_linearity" not in initializer.keys(
        ) else initializer["non_linearity"]
        model = initialize_model(model,
                                 init_type=initializer["type"],
                                 init_gain=init_gain,
                                 non_linearity=init_non_linearity)

    # Transform the base ``nn.Module`` to a ``NeuralTeleportationModel``
    input_channels, image_size = get_dataset_info(dataset_name,
                                                  "input_channels",
                                                  "image_size").values()
    model = NeuralTeleportationModel(network=model,
                                     input_shape=(2, input_channels,
                                                  *image_size))

    return model.to(device)
Beispiel #12
0
def simulate_teleportation_sphere(model: NeuralTeleportationModel,
                                  config: "PseudoTeleportationTrainingConfig",
                                  **kwargs) -> NeuralTeleportationModel:
    print(
        f"Shifting weights on a sphere similar to a {config.cob_sampling} teleportation w/ {config.cob_range} "
        f"COB range.")

    model.cpu(
    )  # Move model to CPU to avoid having 2 models on the GPU (to avoid possible CUDA OOM error)

    teleported_model = deepcopy(model).random_teleport(
        cob_range=config.cob_range, sampling_type=config.cob_sampling)

    init_layers = model.get_weights(concat=False)
    teleported_layers = teleported_model.get_weights(concat=False)

    pseudo_teleported_layers = []
    for init_layer, teleported_layer in zip(init_layers, teleported_layers):
        layer_shift = torch.randn_like(init_layer)
        layer_shift = normalize(layer_shift, p=1, dim=0) * torch.norm(
            teleported_layer - init_layer, 1)
        pseudo_teleported_layer = init_layer + layer_shift
        pseudo_teleported_layers.append(pseudo_teleported_layer)

    pseudo_teleported_weights = torch.cat(pseudo_teleported_layers)
    model.set_weights(pseudo_teleported_weights)
    return model.to(config.device)
def test_set_weights(network: nn.Module, input_shape: Tuple = (1, 1, 28, 28), model_name: str = None):
    """
        test_set_weights checks if method set_weights() in NeuralTeleportationModel works
    Args:
        network (nn.Module): Network to test
        input_shape (tuple): Input shape of network
        model_name (str): The name or label assigned to differentiate the model
    """
    model_name = model_name or network.__class__.__name__

    model = NeuralTeleportationModel(network, input_shape)
    w1 = model.get_weights()

    model.reset_weights()
    model.set_weights(w1)
    w2 = model.get_weights()

    assert np.allclose(w1.detach().numpy(), w2.detach().numpy())
    print("Weights set successfully for " + model_name + " model.")
def test_teleport(network: nn.Module, input_shape: Tuple = (1, 1, 28, 28), verbose: bool = False,
                  atol: float = 1e-5, model_name: str = None):
    """
        Return mean of the difference between the weights of network and a random teleportation, and checks if
        teleportation has the same network function

    Args:
        network (nn.Module): Network to be tested
        input_shape (tuple): Input shape of network
        verbose (bool): Flag to print comparision between network and a teleportation
        atol (float): Absolute tolerance allowed between outputs to pass the test
        model_name (str): The name or label assigned to differentiate the model

    Returns:
        float with the average of the difference between the weights of the network and a teleportation
    """
    model_name = model_name or network.__class__.__name__
    model = NeuralTeleportationModel(network=network, input_shape=input_shape)
    model.eval()  # model must be set to eval because of dropout
    x = torch.rand(input_shape)
    pred1 = model(x).detach().numpy()
    w1 = model.get_weights().detach().numpy()

    model.random_teleport()

    pred2 = model(x).detach().numpy()
    w2 = model.get_weights().detach().numpy()

    diff_average = np.mean(np.abs((pred1 - pred2)))

    if verbose:
        print("Sample outputs: ")
        print("Pre teleportation: ", pred1.flatten()[:10])
        print("Post teleportation: ", pred2.flatten()[:10])
        print("Diff weight average: ", np.mean(np.abs((w1 - w2))))
        print("Diff prediction average: ", diff_average)

    assert not np.allclose(w1, w2)
    assert np.allclose(pred1, pred2, atol=atol), "Teleporation did not work for model {}. Average difference: {}". \
        format(model_name, diff_average)

    print("Teleportation successful for " + model_name + " model.")
    return diff_average
def test_calculate_ones(network,
                        model_name=None,
                        input_shape=(1, 1, 28, 28),
                        noise=False,
                        verbose=False):
    """
        Test if the correct change of basis can be calculated for a cob of ones.

    Args:
        network (nn.Module): Network to be tested
        model_name (str): The name or label assigned to differentiate the model
        input_shape (tuple): Input shape of network
        noise (bool): whether to add noise to the target weights before optimisation.
        verbose (bool): whether to display sample ouputs during the test
    """
    model_name = model_name or network.__class__.__name__
    model = NeuralTeleportationModel(network=network, input_shape=input_shape)

    model.initialize_cob()

    w1 = model.get_weights(concat=False, flatten=False, bias=False)
    _w1 = model.get_weights(concat=False, flatten=False, bias=False)

    if noise:
        for w in _w1:
            w += torch.rand(w.shape) * 0.001

    cob = model.get_cob()
    calculated_cob = model.calculate_cob(w1, _w1)

    error = (cob - calculated_cob).abs().mean()

    if verbose:
        print("Cob: ", cob.flatten()[:10])
        print("Calculated cob: ", calculated_cob.flatten()[:10])
        print("cob error ", (calculated_cob - cob).flatten()[:10])
        print("cob error : ", error)

    assert np.allclose(
        cob, calculated_cob
    ), "Calculate cob (ones) FAILED for " + model_name + " model."

    print("Calculate cob (ones) successful for " + model_name + " model.")
def test_multiple_teleport(network: nn.Module, input_shape: Tuple = (1, 1, 28, 28), verbose: bool = False,
                           atol: float = 1e-5, model_name: str = None):
    """
        Test multiple successive teleporations.

    Args:
        network (nn.Module): Network to be tested
        input_shape (tuple): Input shape of network
        verbose (bool): Flag to print comparision between network and a teleportation
        atol (float): Absolute tolerance allowed between outputs to pass the test
        model_name (str): The name or label assigned to differentiate the model

    """
    model_name = model_name or network.__class__.__name__
    model = NeuralTeleportationModel(network=network, input_shape=input_shape)
    model.eval()  # model must be set to eval because of dropout
    x = torch.rand(input_shape)
    pred1 = model(x).detach().numpy()

    for _ in range(10):
        model.random_teleport(cob_range=10, sampling_type='inter_landscape')

        pred2 = model(x).detach().numpy()

        diff_average = np.mean(np.abs((pred1 - pred2)))

        assert np.allclose(pred1, pred2,
                           atol=atol), "Multiple Teleporation did not work for model {}. Average difference: {}".format(
            model_name, diff_average)

    for _ in range(10):
        model.random_teleport(cob_range=10, sampling_type='inter_landscape', reset_teleportation=False)

        pred2 = model(x).detach().numpy()

        diff_average = np.mean(np.abs((pred1 - pred2)))

        assert np.allclose(pred1, pred2,
                           atol=atol), "Multiple Teleporation did not work for model {}. Average difference: {}".format(
            model_name, diff_average)

    print("Multiple Teleportations successful for " + model_name + " model.")
def start_training(model: NeuralTeleportationModel,
                   trainloader: DataLoader,
                   valset: VisionDataset,
                   metric: TrainingMetrics,
                   config: CompareTrainingConfig,
                   teleport_chance: float) -> np.ndarray:
    """
        This function starts a model training with a specific Scenario configuration.

        Scenario 1: train the model without using teleportation (teleportation_chance = 0.0)
        Scenario 2: train the model using a probability of teleporting every Xth epochs
        (0 < teleportation_chance < 1.0)
        Scenario 3: train the model using teleportation every Xth epochs (teleportation_chance = 1.0)

        returns:
            np.array containing the validation accuracy results of every epochs.
    """
    model.to(config.device)
    optimizer = get_optimizer_from_model_and_config(model, config)

    results = []
    for e in np.arange(1, args.epochs + 1):
        train_epoch(model=model, metrics=metric, optimizer=optimizer, train_loader=trainloader, epoch=e,
                    device=config.device)
        results.append(test(model=model, dataset=valset, metrics=metric, config=config)['accuracy'])
        model.train()

        if e % config.every_n_epochs == 0 and random.random() <= teleport_chance:
            print("teleported model")
            if config.targeted_teleportation:
                # TODO: use teleportation function here when they are available.
                raise NotImplementedError
            else:
                model.random_teleport(cob_range=config.cob_range, sampling_type=config.cob_sampling)
                optimizer = get_optimizer_from_model_and_config(model, config)

    model.cpu()  # Force the network to go out of the cuda mem.

    return np.array(results)
def test_cuda_teleport(network, input_shape=(1, 1, 28, 28), verbose=False):
    """
        Test if a model can be teleported successfully on cuda.
    Args:
        network (nn.Module): Model to test
        input_shape (tuple): Input shape for the model
        verbose (bool): if True samples of predictions are printed

    Returns:
        Average difference between elements of prediction before and after teleportation.
    """

    network = network.cuda()

    model = NeuralTeleportationModel(network=network, input_shape=input_shape)

    x = torch.rand(input_shape).cuda()
    pred1 = model(x).cpu().detach().numpy()
    w1 = model.get_weights().cpu().detach().numpy()

    model.random_teleport()

    pred2 = model(x).cpu().detach().numpy()
    w2 = model.get_weights().cpu().detach().numpy()

    diff_average = (w1 - w2).mean()

    if verbose:
        print("Model on device: {}".format(next(network.parameters()).device))
        print("Sample outputs: ")
        print("Pre teleportation: ", pred1.flatten()[:10])
        print("Post teleportation: ", pred2.flatten()[:10])

    assert not np.allclose(w1, w2)
    assert np.allclose(
        pred1,
        pred2), "Teleporation did not work. Average difference: {}".format(
            diff_average)
    print("Teleportation successful.")
    return diff_average
    from neuralteleportation.models.model_zoo.vggcob import vgg16_bnCOB
    from neuralteleportation.models.model_zoo.resnetcob import resnet18COB

    from torch.utils.data import DataLoader
    from tqdm import tqdm

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    input_shape = (32, 3, 32, 32)
    trainset, valset, testset = experiment_setup.get_dataset_subsets("cifar10")
    train_loader = DataLoader(trainset, batch_size=input_shape[0], shuffle=True)

    # MLP
    pbar = tqdm(enumerate(train_loader))
    mlp = MLPCOB(input_shape=(3, 32, 32), num_classes=10).to(device=device)
    mlp = NeuralTeleportationModel(network=mlp, input_shape=input_shape)
    plot_difference_teleported_gradients(network=mlp, pbar=pbar, network_descriptor='MLP on CIFAR-10', device=device)
    pbar.close()

    # VGG
    pbar = tqdm(enumerate(train_loader))
    vgg = vgg16_bnCOB(num_classes=10).to(device=device)
    vgg = NeuralTeleportationModel(network=vgg, input_shape=input_shape)
    plot_difference_teleported_gradients(network=vgg, pbar=pbar, network_descriptor='VGG on CIFAR-10', device=device)
    pbar.close()

    # ResNet
    pbar = tqdm(enumerate(train_loader))
    resnet = resnet18COB(num_classes=10).to(device=device)
    resnet = NeuralTeleportationModel(network=resnet, input_shape=input_shape)
    plot_difference_teleported_gradients(network=resnet, pbar=pbar, network_descriptor='ResNet on CIFAR-10',
def plot_model_weights_histogram(model: NeuralTeleportationModel,
                                 mode: str,
                                 title: str,
                                 output_dir: Path = None,
                                 save_format: str = None,
                                 xlim: float = None,
                                 ylim_max: float = None,
                                 zoom_plot: bool = False) -> None:
    def _format_ticklabels(ticklabels) -> List[str]:
        return [f"{ticklabel:.1f}" for ticklabel in ticklabels]

    def _zoom_plot(ax, data, inset, lims):
        with sns.axes_style({'axes.linewidth': 1, 'axes.edgecolor': 'black'}):
            axins = ax.inset_axes(inset)
            sns.kdeplot(data, fill=True, ax=axins)
            axins.set_xlim(lims[:2])
            axins.set_ylim(lims[2:])
            axins.set(xticklabels=[], ylabel=None, yticklabels=[])
            rectangle_patch, connector_lines = ax.indicate_inset_zoom(axins)
            # Make the indicator and connectors more easily visible
            rectangle_patch.set_linewidth(2)
            for connector_line in connector_lines:
                connector_line.set_linewidth(2)
            # Manually set the visibility of the appropriate connector lines
            connector_lines[0].set_visible(True)  # Lower left
            connector_lines[1].set_visible(False)  # Upper left
            connector_lines[2].set_visible(True)  # Lower right
            connector_lines[3].set_visible(False)  # Upper right
            return axins

    def _plot_1d_array_histogram(array: np.ndarray, title: str):
        with sns.axes_style("darkgrid"):
            axes = sns.kdeplot(array, fill=True)
            if xlim:
                axes.set(xlim=(-xlim, xlim))
            if ylim_max:
                axes.set(ylim=(0, ylim_max))
            axes.set(ylabel=None, yticklabels=[])
            axes.set_xticklabels(_format_ticklabels(axes.get_xticks()),
                                 size=20)
            if zoom_plot:
                _zoom_plot(axes,
                           array,
                           inset=[0.05, 0.6, 0.35, 0.35],
                           lims=[-0.13, -0.12, 0, 0.5])
                _zoom_plot(axes,
                           array,
                           inset=[0.6, 0.6, 0.35, 0.35],
                           lims=[0.12, 0.13, 0, 0.5])
            if save_format:
                plt.savefig(output_dir / f"{title}.{save_format}",
                            bbox_inches='tight')
                plt.close()
            else:
                plt.show()

    print(f"Plotting {title} histogram ...")
    if mode == "modelwise":
        _plot_1d_array_histogram(model.get_weights().cpu().detach().numpy(),
                                 title)
    elif mode == "layerwise":
        # Get weights of each layer, without the bias weights
        layers = model.get_weights(concat=False, bias=False)

        # Get rid of layers connected to input and output nodes, to keep only hidden layers
        layers = layers[1:-1]

        for idx, layer_weights in enumerate(layers):
            _plot_1d_array_histogram(layer_weights.cpu().detach().numpy(),
                                     title + f"_layer{idx}")
    else:
        raise ValueError(
            f"Mode {mode} is not a valid option. Choose one of: {{modelwise,layerwise}}"
        )
Beispiel #21
0
                             cob_range=args.cob_range,
                             cob_sampling=args.cob_sampling,
                             teleport_at=args.teleport_at,
                             device=device)

    trainset, valset, testset = get_dataset_subsets("cifar10")
    model = get_model("cifar10", args.model)

    x = torch.linspace(-1, 1, args.x)
    y = torch.linspace(-1, 1, args.y)
    shape = x.shape if y is None else (len(x), len(y))
    surface = torch.stack((x, y))

    model.to(device)
    model = NeuralTeleportationModel(model,
                                     input_shape=(config.batch_size, 3, 32,
                                                  32)).to(device)
    original_w = model.get_weights()
    w_checkpoints, final_w = losslandscape.generate_teleportation_training_weights(
        model, trainset, metric=metric, config=config)
    delta = losslandscape.generate_random_2d_vector(final_w, seed=1)
    eta = losslandscape.generate_random_2d_vector(final_w, seed=2)

    # Calculate angle between the two direction vectors.
    print("angle between direction is {} rad".format(
        losslandscape.compute_angle(delta, eta)))

    loss, _ = losslandscape.generate_contour_loss_values(
        model, (delta, eta), surface, trainset, metric, config)
    loss = np.array(loss)
    loss = np.resize(loss, shape)
    parser.add_argument(
        "--cob_range",
        type=float,
        default=1,
        help='Range for the teleportation to create target weights')

    return parser.parse_args()


if __name__ == '__main__':
    args = argument_parser()

    torch.manual_seed(args.seed)

    model = NeuralTeleportationModel(network=MLPCOB(input_shape=(1, 28, 28),
                                                    num_classes=10),
                                     input_shape=(1, 1, 28, 28))

    # Get the initial set of weights and teleport.
    initial_weights = model.get_weights()
    model.random_teleport(cob_range=args.cob_range)

    # Get second set of weights (target weights)
    target_weights = model.get_weights()
    # Get the change of basis that created this set of weights.
    target_cob = model.get_cob(concat=True)

    # Generate a new random cob
    cob = model.generate_random_cob(cob_range=args.cob_range,
                                    requires_grad=True)
def test_set_cob(network, model_name, input_shape=(1, 1, 28, 28), verbose=False):
    """
        Test if the set_change_of_basis method works.

    Args:
        network (nn.Module): Network to be tested
        model_name (str): The name or label assigned to differentiate the model
        input_shape (tuple): Input shape of network
        verbose (bool): Flag to print comparision between network and a teleportation

    """
    x = torch.rand(input_shape)
    model = NeuralTeleportationModel(network, input_shape=input_shape)
    model.random_teleport()
    w1 = model.get_weights()
    t1 = model.get_cob()
    pred1 = model(x)

    model.reset_weights()
    pred2 = model(x)

    model.set_weights(w1)
    model.teleport_activations(t1)

    pred3 = model(x)

    if verbose:
        print("Diff prediction average: ", (pred1 - pred3).mean())
        print("Pre teleportation: ", pred1.flatten()[:10])
        print("Post teleportation: ", pred3.flatten()[:10])

    assert not np.allclose(pred1.detach().numpy(), pred2.detach().numpy(), atol=1e-5)
    assert np.allclose(pred1.detach().numpy(), pred3.detach().numpy(), atol=1e-5), "Set cob/weights did not work."

    print("Set cob successful for " + model_name + " model.")
Beispiel #24
0
    mnist_train, mnist_val, mnist_test = get_dataset_subsets("mnist")

    sample_input_shape = (1, 1, 28, 28)
    hidden_layers = (128, 10)

    net1 = MLPCOB(input_shape=(1, 28, 28),
                  num_classes=10,
                  hidden_layers=hidden_layers).to(device)
    if args.same_init:
        net2 = deepcopy(net1)
    else:
        net2 = MLPCOB(input_shape=(1, 28, 28),
                      num_classes=10,
                      hidden_layers=hidden_layers).to(device)

    model1 = NeuralTeleportationModel(network=net1,
                                      input_shape=sample_input_shape)
    if args.weights1 is not None:
        model1.load_state_dict(torch.load(args.weights1))
    config.batch_size = 8  # Change batch size to train to different minima
    train(model1,
          train_dataset=mnist_train,
          metrics=metrics,
          config=config,
          val_dataset=mnist_test)
    torch.save(model1.state_dict(), pjoin(save_path, 'model1.pt'))
    print("Model 1 test results: ", test(model1, mnist_test, metrics, config))

    model2 = NeuralTeleportationModel(network=net2,
                                      input_shape=sample_input_shape)
    if args.weights2 is not None:
        model2.load_state_dict(torch.load(args.weights2))
def test_calculate_cob_weights(network,
                               model_name=None,
                               input_shape=(1, 1, 28, 28),
                               noise=False,
                               verbose=True):
    """
        Test if a cob can be calculated and applied to a network to teleport the network from the initial weights to
        the targets weights.

    Args:
        network (nn.Module): Network to be tested
        model_name (str): The name or label assigned to differentiate the model
        input_shape (tuple): Input shape of network
        noise (bool): whether to add noise to the target weights before optimisation.
        verbose (bool): whether to display sample ouputs during the test
    """
    model_name = model_name or network.__class__.__name__
    model = NeuralTeleportationModel(network=network, input_shape=input_shape)

    initial_weights = model.get_weights()
    w1 = model.get_weights(concat=False, flatten=False, bias=False)

    model.random_teleport()
    c1 = model.get_cob()
    model.random_teleport()
    c2 = model.get_cob()

    target_weights = model.get_weights()
    w2 = model.get_weights(concat=False, flatten=False, bias=False)

    if noise:
        for w in w2:
            w += torch.rand(w.shape) * 0.001

    calculated_cob = model.calculate_cob(w1, w2)

    model.initialize_cob()
    model.set_weights(initial_weights)
    model.teleport(calculated_cob, reset_teleportation=True)

    calculated_weights = model.get_weights()

    error = (calculated_weights - initial_weights).abs().mean()

    if verbose:
        print("weights: ", target_weights.flatten())
        print("Calculated cob weights: ", calculated_weights.flatten())
        print("Weight error ", error)
        print("C1: ", c1.flatten()[:10])
        print("C2: ", c2.flatten()[:10])
        print("C1 * C2: ", (c1 * c2).flatten()[:10])
        print("Calculated cob: ", calculated_cob.flatten()[:10])

    assert np.allclose(calculated_weights.detach().numpy(), target_weights.detach().numpy()), \
        "Calculate cob and weights FAILED for " + model_name + " model with error: " + str(error.item())

    print("Calculate cob and weights successful for " + model_name + " model.")
Beispiel #26
0
def micro_teleportation_dot_product(network,
                                    dataset,
                                    nb_teleport=100,
                                    network_descriptor='',
                                    sampling_types=['intra_landscape'],
                                    batch_sizes=[8, 64],
                                    criterion=None,
                                    device='cpu',
                                    verbose=False,
                                    random_data=False,
                                    number_classes=10) -> None:
    """
    This method tests the scalar product between the teleporation line and the gradient, as well as between a random
    vector and the gradient for nullity. It then displays the histograms of the calculated scalar products. The
    method also aggregates all relevant micro teleportation data in a dataframe.

    Args:
        network :               the model which we wish to use to compute the micro-teleporations

        dataset:                 the dataset that will be used to calculate the gradient and get dimensions for the
                                neural teleportation model

        nb_teleport:            The number of time the network is teleported and the scalar product calculated. An
                                average is then calculated.

        network_descriptor:     String describing the content of the network

        sampling_types :        Teleportation sampling types, governs how the change of basis is computed

        batch_sizes:             Size of the minibatch used to perform gradient calculation

        criterion:              the loss function used to compute the gradient

        device:                 Device used to compute the network operations ('cpu' or 'cuda')

        verbose:                If true, the method will output extensive details about the calculated vectors and
                                aggregated data (mainly for debugging purposes)

        random_data:            If True, random data with random labels is used for computing the gradient.
                                If False, the dataset is used for computing the gradient.

        number_classes:         Number of classes of the classification problem.

    """

    # Arbitrary precision threshold for nullity comparison
    torch.set_printoptions(precision=10, sci_mode=True)
    tol = 1e-2
    cobs = [0.001]
    hist_dir = f'images/histograms/{network_descriptor}'

    if torch.cuda.is_available():
        print(f'{green}Using CUDA{reset}')
        network = network.cuda()

    if (criterion is None):
        loss_func = torch.nn.CrossEntropyLoss()
    else:
        loss_func = criterion

    # Initialize the dataframe for data aggregation
    aggregator = pd.DataFrame(columns=[
        'model name', 'sampling type', 'batch size', 'COB range',
        'weights vector length', 'Micro-teleportation vs Gradient',
        'Micro-teleportation vs Gradient std', 'Gradient vs Random Vector',
        'Gradient vs Random Vector std', 'Random Vector vs  Random Vector',
        'Random Vector vs  Random Vector std',
        'Micro-teleportation vs Random Vector',
        'Micro-teleportation vs Random Vector std'
    ])

    for sampling_type in sampling_types:
        for batch_size in batch_sizes:
            dataloader = torch.utils.data.DataLoader(dataset,
                                                     batch_size=batch_size)
            data, target = next(iter(dataloader))

            # save the initial weights for further reset
            model = NeuralTeleportationModel(network=network,
                                             input_shape=data.shape)

            if torch.cuda.is_available():
                model = model.cuda()
            else:
                model = model.cpu()

            if torch.cuda.is_available():
                w1 = model.get_weights().detach()
            else:
                w1 = model.get_weights().detach().numpy()

            for cob in cobs:
                angle_results = []
                rand_angle_results = []
                rand_rand_angle_results = []
                rand_micro_angle_results = []

                iterations = min(
                    int(len(dataloader.dataset) / dataloader.batch_size),
                    nb_teleport)

                for _ in tqdm(range(0, iterations)):

                    # Get next data batch
                    data, target = next(iter(dataloader))

                    if random_data:
                        data, target = torch.rand(data.shape), torch.randint(
                            0, number_classes, target.shape)

                    data, target = data.to(device), target.to(device)
                    grad = model.get_grad(data,
                                          target,
                                          loss_func,
                                          zero_grad=False)

                    # reset the weights for next teleportation
                    model.set_weights(torch.tensor(w1))

                    # teleport and get the new weights
                    model = model.random_teleport(cob_range=cob,
                                                  sampling_type=sampling_type)

                    if torch.cuda.is_available():
                        w2 = model.get_weights().detach()
                    else:
                        w2 = model.get_weights().detach().numpy()

                    # get teleportation vector
                    micro_teleport_vec = (w2 - w1)

                    random_vector = torch.rand(grad.shape,
                                               dtype=torch.float) - 0.5
                    random_vector2 = torch.rand(grad.shape,
                                                dtype=torch.float) - 0.5

                    random_vector = random_vector.to(device)
                    random_vector2 = random_vector2.to(device)

                    # Normalized scalar products & angles calculations
                    dot_prod = normalized_dot_product(grad, micro_teleport_vec)
                    angle = np.degrees(torch.acos(dot_prod).cpu())

                    rand_dot_prod = normalized_dot_product(grad, random_vector)
                    rand_angle = np.degrees(torch.acos(rand_dot_prod).cpu())

                    rand_rand_dot_prod = normalized_dot_product(
                        random_vector2, random_vector)
                    rand_rand_angle = np.degrees(
                        torch.acos(rand_rand_dot_prod).cpu())

                    rand_micro_dot_prod = normalized_dot_product(
                        random_vector2, micro_teleport_vec)
                    rand_micro_angle = np.degrees(
                        torch.acos(rand_micro_dot_prod).cpu())

                    # Perpendicularity assertion
                    failed = (not torch.allclose(
                        dot_prod, torch.tensor([0.0]).to(device), atol=tol))
                    rand_failed = (not torch.allclose(rand_dot_prod,
                                                      torch.tensor(
                                                          [0.0]).to(device),
                                                      atol=tol))
                    target_angle = 90.0

                    angle_results.append(angle)
                    rand_angle_results.append(rand_angle)
                    rand_rand_angle_results.append(rand_rand_angle)
                    rand_micro_angle_results.append(rand_micro_angle)

                angle_results = np.array(angle_results)
                rand_angle_results = np.array(rand_angle_results)
                rand_rand_angle_results = np.array(rand_rand_angle_results)
                rand_micro_angle_results = np.array(rand_micro_angle_results)

                # Append resuslts to dataframe for further ploting
                aggregator = aggregator.append(
                    {
                        'model name':
                        network_descriptor,
                        'sampling type':
                        sampling_type,
                        'batch size':
                        batch_size,
                        'COB range':
                        cob,
                        'weights vector length':
                        len(w1),
                        'Micro-teleportation vs Gradient':
                        angle_results.mean(),
                        'Micro-teleportation vs Gradient std':
                        angle_results.std(),
                        'Gradient vs Random Vector':
                        rand_angle_results.mean(),
                        'Gradient vs Random Vector std':
                        rand_angle_results.std(),
                        'Random Vector vs  Random Vector':
                        rand_rand_angle_results.mean(),
                        'Random Vector vs  Random Vector std':
                        rand_rand_angle_results.std(),
                        'Micro-teleportation vs Random Vector':
                        rand_micro_angle_results.mean(),
                        'Micro-teleportation vs Random Vector std':
                        rand_micro_angle_results.std()
                    },
                    ignore_index=True)

                print(
                    f'The angle between the gradient and a micro-teleporation vector is: '
                    f'{red * failed}'
                    f'{np.round(angle_results.mean(), abs(int(np.log10(tol))))}',
                    f' (!=0 => FAILED!)' * failed,
                    f'{reset}',
                    f' using {sampling_type} sampling type',
                    f', the delta in angle is {angle - target_angle}°\n',
                    f'The angle between the gradient and a random vector is: ',
                    f'{red * rand_failed}{rand_angle_results.mean()}',
                    f' (FAILED!)' * rand_failed,
                    f'{reset}',
                    f', the delta in angle is {rand_angle - target_angle}°\n',
                    sep='')

                if verbose:
                    print(aggregator.iloc[aggregator.last_valid_index()])
                    if torch.cuda.is_available():
                        print(f'w1: {w1}',
                              f'nans: {torch.sum(torch.isnan(w1))}',
                              f'max: {torch.max(w1)}',
                              f'min: {torch.min(w1)}',
                              sep='\n')
                        print(f'w2: {w2}',
                              f' nans: {torch.sum(torch.isnan(w2))}',
                              f'max: {torch.max(w2)}',
                              f'min: {torch.min(w2)}',
                              sep='\n')
                    else:
                        print(f'w1: {w1}',
                              f'nans: {np.sum(np.isnan(w1))}',
                              f'max: {np.max(w1)}',
                              f'min: {np.min(w1)}',
                              sep='\n')
                        print(f'w2: {w2}',
                              f' nans: {np.sum(np.isnan(w2))}',
                              f'max: {np.max(w2)}',
                              f'min: {np.min(w2)}',
                              sep='\n')

                if not np.isnan(
                        aggregator.loc[aggregator.last_valid_index(),
                                       'Micro-teleportation vs Gradient']):
                    delta = 0.25
                    x_min = 90 - delta
                    x_max = 90 + delta
                    figsize = (10.0, 10.0)

                    fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,
                                                             1,
                                                             figsize=figsize)

                    if random_data:
                        fig.suptitle(
                            f'{network_descriptor} on Random Data and batch size of {batch_size}'
                        )

                    else:
                        fig.suptitle(
                            f'{network_descriptor} on CIFAR-10 and batch size of {batch_size}'
                        )

                    bin_height, bin_boundary = np.histogram(
                        np.array(angle_results))
                    width = bin_boundary[1] - bin_boundary[0]
                    bin_height = bin_height / float(max(bin_height))
                    ax0.bar(bin_boundary[:-1],
                            bin_height,
                            width=np.maximum(width, 0.01))
                    ax0.legend(['Micro-teleportation\n vs \n Gradient'])
                    ax0.set_xlim(x_min, x_max)
                    ax0.set_yticks([])

                    bin_height, bin_boundary = np.histogram(
                        np.array(rand_micro_angle_results))
                    width = bin_boundary[1] - bin_boundary[0]
                    bin_height = bin_height / float(max(bin_height))
                    ax1.bar(bin_boundary[:-1],
                            bin_height,
                            width=np.maximum(width, 0.1),
                            color='g')
                    ax1.set_xlim(x_min, x_max)
                    ax1.legend(['Micro-teleportation\n vs \n Random Vector'])
                    ax1.set_yticks([])

                    bin_height, bin_boundary = np.histogram(
                        np.array(rand_angle_results))
                    width = bin_boundary[1] - bin_boundary[0]
                    bin_height = bin_height / float(max(bin_height))
                    ax2.bar(bin_boundary[:-1],
                            bin_height,
                            width=np.maximum(width, 0.1),
                            color='g')
                    ax2.set_xlim(x_min, x_max)
                    ax2.legend(['Gradient\n vs \n Random Vector'])
                    ax2.set_yticks([])

                    bin_height, bin_boundary = np.histogram(
                        np.array(rand_rand_angle_results))
                    width = bin_boundary[1] - bin_boundary[0]
                    bin_height = bin_height / float(max(bin_height))
                    ax3.bar(bin_boundary[:-1],
                            bin_height,
                            width=np.maximum(width, 0.1),
                            color='g')
                    ax3.set_xlim(x_min, x_max)
                    ax3.legend(['Random Vector\n vs \n Random Vector'])
                    ax3.set_yticks([])

                    plt.xlabel('Angle in degrees')

                    Path(hist_dir).mkdir(parents=True, exist_ok=True)
                    plt.savefig(
                        f'{hist_dir}/{network_descriptor}_'
                        f'_cob_{cob}_iter_{iterations}_batch_size_{batch_size}.png'
                    )
                    plt.show()

                    if random_data:
                        fig.savefig(
                            f"{network_descriptor}-RandomData-batchsize_{batch_size}.pdf",
                            bbox_inches='tight')

                    else:
                        fig.savefig(
                            f"{network_descriptor}-cifar10-batchsize_{batch_size}.pdf",
                            bbox_inches='tight')
                else:
                    print(red)
                    print(aggregator.iloc[aggregator.last_valid_index()])
                    print(reset)
Beispiel #27
0
def dot_product_between_teleportation(network,
                                      dataset,
                                      network_descriptor=None,
                                      nb_teleport=100,
                                      device='cpu') -> None:
    """
    This method tests the scalar product between the initial and teleported set of weights and plots the results with
    respect to the order of magnitude of the change of basis of the teleportation

    Args:
        network :               the model which we want to use to compute the teleportations

        dataset :               the model which we want to use to size the teleportation model

        network_descriptor:     String describing the content of the network

        nb_teleport:             Number of times the micro-teleportation for statistical (mean, variance, etc)
                                calculation

        device:                 Device used to compute the network operations ('cpu' or 'cuda')
    """
    series_dir = f'images/series_dot_prod_vs_cob/{network_descriptor}'

    if torch.cuda.is_available():
        print(f'{green}Using CUDA{reset}')
        network = network.cuda()

    if network_descriptor is None:
        network_descriptor = network.__name__

    # Prepare the range of COB to test
    cobs = np.linspace(0.00001, 0.999, 40)

    dataloader = torch.utils.data.DataLoader(dataset, batch_size=16)
    data, target = next(iter(dataloader))
    model = NeuralTeleportationModel(network=network, input_shape=data.shape)

    if torch.cuda.is_available():
        model = model.cuda()
    else:
        model = model.cpu()

    w1 = model.get_weights().detach().to(device)

    dot_product_results = []
    angles = []

    for cob in cobs:

        dot_product_result = 0
        angle = 0

        for _ in tqdm(range(0, nb_teleport)):
            # reset the weights
            model.set_weights(w1)

            # teleport and get the new weights
            model.random_teleport(cob_range=cob,
                                  sampling_type='intra_landscape')
            w2 = model.get_weights().detach().to(device)

            # cos(theta) = (w1 w2)/(||w1|| ||w2||)
            dot_product_result += normalized_dot_product(w1, w2)
            angle += np.degrees(
                torch.acos(normalized_dot_product(w1, w2)).cpu())

        dot_product_result /= nb_teleport
        angle /= nb_teleport

        dot_product_results.append(dot_product_result.item())
        angles.append(angle.item())

    plt.plot(cobs, dot_product_results)
    plt.title(f'Scalar product between original and \nteleported weights with '
              f'respect to COB\'s order of magnitude\n{network_descriptor}')

    plt.ylabel('Scalar product')
    plt.xlabel('change of basis')

    Path(series_dir).mkdir(parents=True, exist_ok=True)
    plt.savefig(
        f'{series_dir}/dot_product_vs_cob_{network_descriptor}_Samp_type_intra_landscape'
    )
    plt.show()

    plt.plot(cobs, angles)
    plt.title(f'Angle between original and \nteleported weights with '
              f'respect to COB\'s order of magnitude\n{network_descriptor}')

    plt.ylabel('Theta')
    plt.xlabel('change of basis')

    Path(series_dir).mkdir(parents=True, exist_ok=True)
    plt.savefig(
        f'{series_dir}/angle_vs_cob_{network_descriptor}_Samp_type_intra_landscape'
    )
    plt.show()