def __init__(
        self,
        input_shape=[1],
        layer_units=[200, 100, 1],
        layer_activations=["relu", "relu", "linear"],
        initial_unconstrained_scale=None,
        transform_unconstrained_scale_factor=0.05,  # factor to be used in the calculation of the actual noise std.
        l2_weight_lambda=None,  # float or list of floats
        l2_bias_lambda=None,
        preprocess_x=False,
        preprocess_y=False,
        learning_rate=0.01,  # can be float or an instance of tf.keras.optimizers.schedules
        last_layer_prior="non-informative",
        last_layer_prior_params=None,
        seed=0,
    ):
        self.input_shape = input_shape
        self.layer_units = layer_units
        self.layer_activations = layer_activations
        self.initial_unconstrained_scale = initial_unconstrained_scale
        self.transform_unconstrained_scale_factor = transform_unconstrained_scale_factor
        self.l2_weight_lambda = l2_weight_lambda
        self.l2_bias_lambda = l2_bias_lambda
        self.preprocess_x = preprocess_x
        self.preprocess_y = preprocess_y
        self.learning_rate = learning_rate
        self.last_layer_prior = last_layer_prior
        self.last_layer_prior_params = last_layer_prior_params
        self.seed = seed

        if self.preprocess_y:
            self.y_preprocessor = StandardizePreprocessor()
        names = [None] * (len(self.layer_units) - 2) + ["feature_extractor", "output"]
        tf.random.set_seed(self.seed)
        # if self.initial_unconstrained_scale is None:
        #     self.network = MapNetwork(
        #         self.input_shape,
        #         self.layer_units,
        #         self.layer_activations,
        #         self.l2_weight_lambda,
        #         self.l2_bias_lambda,
        #         preprocess_x=self.preprocess_x,
        #         learning_rate=self.learning_rate,
        #         names=names,
        #         seed=self.seed,
        #     )
        # else:
        self.network = MapDensityNetwork(
            self.input_shape,
            self.layer_units,
            self.layer_activations,
            self.initial_unconstrained_scale,
            self.transform_unconstrained_scale_factor,
            self.l2_weight_lambda,
            self.l2_bias_lambda,
            preprocess_x=self.preprocess_x,
            learning_rate=self.learning_rate,
            names=names,
            seed=self.seed,
        )
Example #2
0
# %%
# General training
epochs = 100
batch_size = n_train

# %% markdown
# # MAP Density Model

# %%
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=n_train, decay_rate=0.9, staircase=True)

net = MapDensityNetwork(
    input_shape=[1],
    layer_units=layer_units,
    layer_activations=layer_activations,
    learning_rate=lr_schedule,
)

net.fit(x_train=x_train,
        y_train=y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=0)

# %%
prediction = net.predict(x_plot)  # Mixture Of Gaussian prediction
fig, ax = plt.subplots(figsize=figsize)
plot_moment_matched_predictive_normal_distribution(
    x_plot=_x_plot,
    predictive_distribution=prediction,
Example #3
0
y_lim = [-5, 7]
fig, ax = plt.subplots(figsize=(8, 8))
plot_training_data(x_train, y_train, fig=fig, ax=ax, y_lim=y_lim)
plot_ground_truth(x_plot, y_ground_truth, fig=fig, ax=ax)
ax.legend()

# %%
initial_learning_rate = 0.05
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=20, decay_rate=0.9, staircase=True)

net = MapDensityNetwork(
    input_shape=input_shape,
    layer_units=layer_units,
    layer_activations=layer_activations,
    weight_prior=weight_prior,
    bias_prior=bias_prior,
    n_train=n_train,
    learning_rate=0.01,
)

prior_predictive_distributions = net.predict_with_prior_samples(x_plot,
                                                                n_samples=4)

plot_distribution_samples(
    x_plot=x_plot,
    distribution_samples=prior_predictive_distributions,
    x_train=x_train,
    y_train=y_train,
    y_ground_truth=y_ground_truth,
    # y_lim=[-30, 30],
Example #4
0
bias_priors = [tfd.Normal(0, bias_prior_scale)] * len(layer_units)
l2_weight_lambda = prior_scale_to_regularization_lambda(
    weight_prior_scale, n_train)
l2_bias_lambda = prior_scale_to_regularization_lambda(bias_prior_scale,
                                                      n_train)
assert np.isclose(
    weight_prior_scale,
    regularization_lambda_to_prior_scale(l2_weight_lambda, n_train))

# %%
seed = 0
model = MapDensityNetwork(
    input_shape=[1],
    layer_units=layer_units,
    layer_activations=layer_activations,
    initial_unconstrained_sigma=0.0,
    l2_weight_lambda=l2_weight_lambda,
    l2_bias_lambda=l2_bias_lambda,
    seed=seed,
)

models = []
n_models = 4
seeds = np.arange(n_models)
initial_unconstrained_sigmas = seeds + 0.1
for seed, initial_unconstrained_sigma in zip(seeds,
                                             initial_unconstrained_sigmas):

    m = MapDensityNetwork(
        input_shape=[1],
        layer_units=layer_units,
Example #5
0
    # save_path=figure_dir.joinpath(f"llb_moment_matched_{experiment_name}.pdf")
)

# %% markdown
# # Using pretrained network

# %%
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=n_train, decay_rate=0.9, staircase=True)
net = MapDensityNetwork(
    input_shape=input_shape,
    layer_units=layer_units,
    layer_activations=layer_activations,
    initial_unconstrained_scale=initial_unconstrained_scale,
    transform_unconstrained_scale_factor=transform_unconstrained_scale_factor,
    preprocess_x=preprocess_x,
    preprocess_y=preprocess_y,
    learning_rate=lr_schedule,
    names=[None, "feature_extractor", "output"],
    seed=0,
)
net.fit(x_train=x_train,
        y_train=y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=0)
prediction = net.predict(x_plot)
plot_moment_matched_predictive_normal_distribution(
    x_plot=x_plot,
    predictive_distribution=prediction,
    x_train=x_train,
bias_prior = weight_prior
network_prior = make_independent_gaussian_network_prior(
    input_shape=input_shape, layer_units=layer_units, loc=0.0, scale=1.0
)


# %% markdown
# ### Let's first train a map network

# %%
net = MapDensityNetwork(
    input_shape=input_shape,
    layer_units=layer_units,
    layer_activations=layer_activations,
    transform_unconstrained_scale_factor=transform_unconstrained_scale_factor,
    weight_prior=weight_prior,
    bias_prior=bias_prior,
    n_train=n_train,
    preprocess_y=False,
    learning_rate=0.01,
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor="loss", patience=20, verbose=1, restore_best_weights=False
)

net.fit(
    x_train=x_train,
    y_train=y_train,
    batch_size=10,
    epochs=10000,
    early_stop_callback=early_stop_callback,
Example #7
0
def map_network_likelihood_loss(net, x_train, y_train):
    loss = tf.reduce_mean(
        MapDensityNetwork.negative_log_likelihood(y_train,
                                                  net.network(x_train)))
    return loss
Example #8
0
batch_size = n_train
weight_prior_scale = 2
bias_prior_scale = weight_prior_scale
weight_prior = tfd.Normal(0, weight_prior_scale)
bias_prior = tfd.Normal(0, bias_prior_scale)
weight_priors = [weight_prior] * len(layer_units)
bias_priors = [bias_prior] * len(layer_units)

# %%
seed = 0
model = MapDensityNetwork(
    input_shape=[1],
    layer_units=layer_units,
    layer_activations=layer_activations,
    initial_unconstrained_scale=0.0,
    weight_prior=weight_prior,
    bias_prior=bias_prior,
    scale_prior=tfd.InverseGamma(0.1, 0.1),
    n_train=n_train,
    seed=seed,
)

models = []
n_models = 4
seeds = np.arange(n_models)
initial_unconstrained_scales = seeds + 0.1
for seed, initial_unconstrained_scale in zip(seeds,
                                             initial_unconstrained_scales):

    m = MapDensityNetwork(
        input_shape=[1],
class PostHocLastLayerBayesianNetwork:
    def __init__(
        self,
        input_shape=[1],
        layer_units=[200, 100, 1],
        layer_activations=["relu", "relu", "linear"],
        initial_unconstrained_scale=None,
        transform_unconstrained_scale_factor=0.05,  # factor to be used in the calculation of the actual noise std.
        l2_weight_lambda=None,  # float or list of floats
        l2_bias_lambda=None,
        preprocess_x=False,
        preprocess_y=False,
        learning_rate=0.01,  # can be float or an instance of tf.keras.optimizers.schedules
        last_layer_prior="non-informative",
        last_layer_prior_params=None,
        seed=0,
    ):
        self.input_shape = input_shape
        self.layer_units = layer_units
        self.layer_activations = layer_activations
        self.initial_unconstrained_scale = initial_unconstrained_scale
        self.transform_unconstrained_scale_factor = transform_unconstrained_scale_factor
        self.l2_weight_lambda = l2_weight_lambda
        self.l2_bias_lambda = l2_bias_lambda
        self.preprocess_x = preprocess_x
        self.preprocess_y = preprocess_y
        self.learning_rate = learning_rate
        self.last_layer_prior = last_layer_prior
        self.last_layer_prior_params = last_layer_prior_params
        self.seed = seed

        if self.preprocess_y:
            self.y_preprocessor = StandardizePreprocessor()
        names = [None] * (len(self.layer_units) - 2) + ["feature_extractor", "output"]
        tf.random.set_seed(self.seed)
        # if self.initial_unconstrained_scale is None:
        #     self.network = MapNetwork(
        #         self.input_shape,
        #         self.layer_units,
        #         self.layer_activations,
        #         self.l2_weight_lambda,
        #         self.l2_bias_lambda,
        #         preprocess_x=self.preprocess_x,
        #         learning_rate=self.learning_rate,
        #         names=names,
        #         seed=self.seed,
        #     )
        # else:
        self.network = MapDensityNetwork(
            self.input_shape,
            self.layer_units,
            self.layer_activations,
            self.initial_unconstrained_scale,
            self.transform_unconstrained_scale_factor,
            self.l2_weight_lambda,
            self.l2_bias_lambda,
            preprocess_x=self.preprocess_x,
            learning_rate=self.learning_rate,
            names=names,
            seed=self.seed,
        )

    @property
    def total_epochs(self):
        return self.network.total_epochs

    def fit_preprocessing(self, y_train):
        if self.preprocess_y:
            self.y_preprocessor.fit(y_train)

    def fit(
        self,
        x_train,
        y_train,
        batch_size=1,
        epochs=1,
        early_stop_callback=None,
        validation_split=0.0,
        validation_data=None,
        verbose=1,
        pretrained_network=None,
    ):
        tf.random.set_seed(self.seed)
        self.fit_preprocessing(y_train)
        if self.preprocess_y:
            y_train = self.y_preprocessor.transform(y_train)
        if pretrained_network is None:
            self.network.fit(
                x_train,
                y_train,
                batch_size=batch_size,
                epochs=epochs,
                early_stop_callback=early_stop_callback,
                validation_split=validation_split,
                validation_data=validation_data,
                verbose=verbose,
            )
        else:
            self.network = pretrained_network

        self.feature_extractor = tf.keras.Model(
            self.network.network.inputs,
            self.network.network.get_layer("feature_extractor").output,
        )
        features_train = self.feature_extractor(x_train).numpy()
        features_train = np.hstack((features_train, np.ones((x_train.shape[0], 1))))

        # "fit" bayesian linear regression
        n_features = features_train.shape[1]
        if self.last_layer_prior_params is None:
            if self.last_layer_prior == "non-informative":
                self.last_layer_prior_params = {
                    "mu_0": np.zeros((n_features, 1)),
                    "V_0": 1e3 * np.eye(n_features),
                    "a_0": -n_features / 2,
                    "b_0": 0,
                }
            elif (
                self.last_layer_prior == "standard-normal-weights-non-informative-scale"
            ):
                ml_noise_sigma = self.network.noise_sigma
                self.last_layer_prior_params = {
                    "mu_0": np.zeros((n_features, 1)),
                    "V_0": (1 / ml_noise_sigma ** 2) * np.eye(n_features),
                    "a_0": -n_features / 2,
                    "b_0": 0,
                }
            elif self.last_layer_prior == "weakly-informative":
                a = 0.5
                b = 0.01
                self.last_layer_prior_params = {
                    "mu_0": np.zeros((n_features, 1)),
                    "V_0": (a / b) * np.eye(n_features),
                    "a_0": a,
                    "b_0": b,
                }
            else:
                raise ValueError(
                    f'When not specifying last_layer_prior_params, you can pass either "non-informative" or "standard-normal-weights-non-informative-scale" as last_layer_prior. You instead passed "{self.last_layer_prior}"'
                )

        self.blr_model = BayesianLinearRegression(**self.last_layer_prior_params)
        self.blr_model.fit(features_train, y_train)
        return self

    def predict(self, x):
        features_test = self.feature_extractor(x).numpy().astype("float32")
        features_test = np.hstack((features_test, np.ones((x.shape[0], 1))))
        df, loc, scale = self.blr_model.predict(features_test)
        df = np.float32(df)
        loc = loc.astype("float32")
        scale = scale.astype("float32")
        scale = np.expand_dims(scale, axis=scale.ndim)
        if self.preprocess_y:
            loc = self.y_preprocessor.inverse_transform(loc)
            if self.y_preprocessor.std is not None:
                scale *= self.y_preprocessor.std
        return tfd.StudentT(df=df, loc=loc, scale=scale)

    def __call__(self, x):
        return self.predict(x)

    def get_weights(self):
        """
        Returns the weights of all layers including the one that is discarded and the marginal t distribution
        of the last layer weights.
        """
        df, loc, dispersion = self.blr_model.unconditional_w_t()
        last_layer_weight_distribution = tfd.StudentT(
            df=df, loc=loc, scale=tf.linalg.tensor_diag_part(dispersion) ** 0.5
        )
        return self.network.get_weights(), last_layer_weight_distribution