Ejemplo n.º 1
0
 def __init__(self, 
              data: Tuple[tf.Tensor, tf.Tensor], 
              m: int = 20, 
              alpha: np.float = 1./np.sqrt(2.), 
              eps_sq: np.float = 1,
              sigma_n_sq: np.float = 1,
              sigma_f_sq: np.float = 1):
                             
     self.num_data = tf.cast(data[1].shape[0], default_float())
     self.data = (tf.cast(tf.squeeze(data[0]), default_float()), tf.cast(data[1], default_float()))
     self.const = tf.cast(0.5*data[1].size*np.log(2*np.pi), default_float())
     
     D = data[0].shape[1]        
     self.flag_1d = D == 1
     self.alpha = tf.cast(alpha, default_float())
     self.alpha_sq = tf.square(self.alpha)
     self.m = tf.cast(m, default_float())
     self.this_range = tf.constant(np.asarray(list(product(range(1, m + 1), repeat=D))).squeeze(), dtype=default_float())
     
     self.this_range_1 = self.this_range - 1.
     self.this_range_1_2 = self.this_range_1 if self.flag_1d else tf.range(m, dtype=default_float())
     self.this_range_1_int = tf.cast(self.this_range_1, tf.int32)
     self.tf_range_dnn_out = tf.range(D)
     self.this_range_1_ln2 = np.log(2.)*self.this_range_1
             
     self.vander_range = tf.range(m+1, dtype=default_float())
     self.eye_k = tf.eye(m**D, dtype=default_float())
     self.yTy = tf.reduce_sum(tf.math.square(self.data[1])) 
     self.coeff_n_tf = tf.constant(np.load(os.path.dirname(os.path.realpath(__file__)) + '/hermite_coeff.npy')[:m, :m], dtype=default_float())
     
     eps_sq = eps_sq*np.ones(D) if D > 1 else eps_sq       
     self.eps_sq = Parameter(eps_sq, transform=positive(), dtype=default_float())
     self.sigma_f_sq = Parameter(sigma_f_sq, transform=positive(), dtype=default_float())
     self.sigma_n_sq = Parameter(sigma_n_sq, transform=positive(), dtype=default_float())
Ejemplo n.º 2
0
    def __init__(self,
                 kernel,
                 inducing_variables,
                 q_mu_initial,
                 q_sqrt_initial,
                 mean_function,
                 white=False,
                 **kwargs):
        super().__init__(**kwargs)

        self.inducing_points = inducing_variables
        self.num_inducing = inducing_variables.shape[0]

        # Initialise q_mu to y^2_pi(i)
        q_mu = q_mu_initial[:, None]
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L.
        q_sqrt = 1e-4 * np.eye(self.num_inducing, dtype=default_float())
        #q_sqrt = np.diag(q_sqrt_initial)
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.kernel = kernel
        self.mean_function = mean_function
        self.white = white
Ejemplo n.º 3
0
    def __init__(self,
                 kernel,
                 inducing_variables,
                 mean_function,
                 white=False,
                 **kwargs):
        super().__init__(**kwargs)

        self.inducing_points = inducing_variables

        self.num_inducing = inducing_variables.shape[0]
        m = inducing_variables.shape[1]

        # Initialise q_mu to y^2_pi(i)
        q_mu = np.zeros((self.num_inducing, 1))
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L.
        q_sqrt = 1e-4 * np.eye(self.num_inducing, dtype=default_float())
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.kernel = kernel
        self.mean_function = mean_function
        self.white = white

        # Initialise to prior (Ku) + jitter.
        if not self.white:
            Ku = self.kernel(self.inducing_points)
            Ku += default_jitter() * tf.eye(self.num_inducing, dtype=Ku.dtype)
            Lu = tf.linalg.cholesky(Ku)
            q_sqrt = Lu
            self.q_sqrt = Parameter(q_sqrt, transform=triangular())
Ejemplo n.º 4
0
    def _create_network(self):
        self.Ws, self.bs = [], []

        for dim_in, dim_out in zip(self.dims[:-1], self.dims[1:]):
            init_xavier_std = (2.0 / (dim_in + dim_out))**0.5
            self.Ws.append(
                Parameter(np.random.randn(dim_in, dim_out) * init_xavier_std))
            self.bs.append(Parameter(np.zeros(dim_out)))
Ejemplo n.º 5
0
    def _init_variational_parameters(self, num_inducing, q_mu, q_sqrt, q_diag):
        """
        Constructs the mean and cholesky of the covariance of the variational Gaussian posterior.
        If a user passes values for `q_mu` and `q_sqrt` the routine checks if they have consistent
        and correct shapes. If a user does not specify any values for `q_mu` and `q_sqrt`, the routine
        initializes them, their shape depends on `num_inducing` and `q_diag`.

        Note: most often the comments refer to the number of observations (=output dimensions) with P,
        number of latent GPs with L, and number of inducing points M. Typically P equals L,
        but when certain multioutput kernels are used, this can change.

        Parameters
        ----------
        :param num_inducing: int
            Number of inducing variables, typically refered to as M.
        :param q_mu: np.array or None
            Mean of the variational Gaussian posterior. If None the function will initialise
            the mean with zeros. If not None, the shape of `q_mu` is checked.
        :param q_sqrt: np.array or None
            Cholesky of the covariance of the variational Gaussian posterior.
            If None the function will initialise `q_sqrt` with identity matrix.
            If not None, the shape of `q_sqrt` is checked, depending on `q_diag`.
        :param q_diag: bool
            Used to check if `q_mu` and `q_sqrt` have the correct shape or to
            construct them with the correct shape. If `q_diag` is true,
            `q_sqrt` is two dimensional and only holds the square root of the
            covariance diagonal elements. If False, `q_sqrt` is three dimensional.
        """
        q_mu = np.zeros(
            (num_inducing, self.num_latent_gps)) if q_mu is None else q_mu
        self.q_mu = Parameter(q_mu, dtype=default_float())  # [M, P]

        if q_sqrt is None:
            if self.q_diag:
                ones = np.ones((num_inducing, self.num_latent_gps),
                               dtype=default_float())
                self.q_sqrt = Parameter(ones, transform=positive())  # [M, P]
            else:
                q_sqrt = [
                    np.eye(num_inducing, dtype=default_float())
                    for _ in range(self.num_latent_gps)
                ]
                q_sqrt = np.array(q_sqrt)
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=triangular())  # [P, M, M]
        else:
            if q_diag:
                assert q_sqrt.ndim == 2
                self.num_latent_gps = q_sqrt.shape[1]
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=positive())  # [M, L|P]
            else:
                assert q_sqrt.ndim == 3
                self.num_latent_gps = q_sqrt.shape[0]
                num_inducing = q_sqrt.shape[1]
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=triangular())  # [L|P, M, M]
Ejemplo n.º 6
0
    def __init__(self,
                 kern,
                 Z,
                 num_outputs,
                 mean_function,
                 white=False,
                 input_prop_dim=None,
                 **kwargs):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.

        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)

        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)

        The layer holds D_out independent GPs with the same kernel and inducing points.

        :param kern: The kernel for the layer (input_dim = D_in)
        :param Z: Inducing points (M, D_in)
        :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs))
        :param mean_function: The mean function
        :return:
        """
        super().__init__(input_prop_dim=input_prop_dim, **kwargs)
        self.num_inducing = Z.shape[0]

        # Inducing points prior mean
        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu, name="q_mu")
        # Square-root of inducing points prior covariance
        q_sqrt = np.tile(
            np.eye(self.num_inducing)[None, :, :], [num_outputs, 1, 1])
        self.q_sqrt = Parameter(q_sqrt, transform=triangular(), name="q_sqrt")

        self.feature = InducingPoints(Z)
        self.kern = kern
        self.mean_function = mean_function

        self.num_outputs = num_outputs
        self.white = white

        if not self.white:  # initialize to prior
            Ku = self.kern.K(Z)
            Lu = np.linalg.cholesky(Ku + np.eye(Z.shape[0]) *
                                    gpflow.default_jitter())
            self.q_sqrt = Parameter(np.tile(Lu[None, :, :],
                                            [num_outputs, 1, 1]),
                                    transform=triangular(),
                                    name="q_sqrt")

        self.Ku, self.Lu, self.Ku_tiled, self.Lu_tiled = None, None, None, None
        self.needs_build_cholesky = True
Ejemplo n.º 7
0
    def __init__(self, variance=1.0, lengthscale=1.0, f_list=None):
        """
        :param variance: the (initial) value for the variance parameter
        :param lengthscale: the (initial) value for the lengthscale parameter(s),
            to induce ARD behaviour this must be initialised as an array the same
            length as the the number of active dimensions e.g. [1., 1., 1.]
        :param f_list: list with information of the functional inputs
        """

        self.variance = Parameter(variance, transform=positive())
        self.lengthscale = Parameter(lengthscale, transform=positive())
        self.f_list = f_list  # list with functional information
Ejemplo n.º 8
0
    def __init__(self,
                 kernel,
                 inducing_variables,
                 num_outputs,
                 mean_function,
                 input_prop_dim=None,
                 white=False,
                 **kwargs):
        super().__init__(input_prop_dim, **kwargs)

        self.num_inducing = inducing_variables.shape[0]
        self.mean_function = mean_function
        self.num_outputs = num_outputs
        self.white = white

        self.kernels = []
        for i in range(self.num_outputs):
            self.kernels.append(copy.deepcopy(kernel))

        # Initialise q_mu to all zeros
        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to identity function
        #q_sqrt = tf.tile(tf.expand_dims(tf.eye(self.num_inducing,
        #    dtype=default_float()), 0), (num_outputs, 1, 1))
        q_sqrt = [
            np.eye(self.num_inducing, dtype=default_float())
            for _ in range(num_outputs)
        ]
        q_sqrt = np.array(q_sqrt)
        # Store as lower triangular matrix L.
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        # Initialise to prior (Ku) + jitter.
        if not self.white:
            Kus = [
                self.kernels[i].K(inducing_variables)
                for i in range(self.num_outputs)
            ]
            Lus = [
                np.linalg.cholesky(Kus[i] + np.eye(self.num_inducing) *
                                   default_jitter())
                for i in range(self.num_outputs)
            ]
            q_sqrt = Lus
            q_sqrt = np.array(q_sqrt)
            self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.inducing_points = []
        for i in range(self.num_outputs):
            self.inducing_points.append(
                inducingpoint_wrapper(inducing_variables))
Ejemplo n.º 9
0
    def __init__(self, 
                 data: Tuple[tf.Tensor, tf.Tensor], 
                 m: int = 20, 
                 d: int = 1,
                 alpha: np.float = 1./np.sqrt(2.), 
                 eps_sq: np.float = 1,
                 sigma_n_sq: np.float = 1,
                 sigma_f_sq: np.float = 1,
                 dir_weights: str = None):
                    
        if data[1].dtype == np.float64:
            K_bd.set_floatx('float64')
        else:
            set_default_float(np.float32)

        self.num_data = tf.cast(data[1].shape[0], default_float())
        self.data = (tf.cast(data[0], default_float()), tf.cast(data[1], default_float()))
        self.const = tf.cast(0.5*data[1].size*np.log(2*np.pi), default_float())
               
        self.flag_1d = d == 1
        self.alpha = tf.cast(alpha, default_float())
        self.alpha_sq = tf.square(self.alpha)
        self.m = tf.cast(m, default_float())
        self.this_range = tf.constant(np.asarray(list(product(range(1, m + 1), repeat=d))).squeeze(), dtype=default_float())
        self.this_range_1 = self.this_range - 1.
        self.this_range_1_2 = self.this_range_1 if self.flag_1d else tf.range(m, dtype=default_float())
        self.this_range_1_int = tf.cast(self.this_range_1, tf.int32)
        self.tf_range_dnn_out = tf.range(d)
        self.this_range_1_ln2 = np.log(2.)*self.this_range_1

        self.vander_range = tf.range(m+1, dtype=default_float())
        self.eye_k = tf.eye(m**d, dtype=default_float())
        self.yTy = tf.reduce_sum(tf.math.square(self.data[1])) 
        self.coeff_n_tf = tf.constant(np.load(os.path.dirname(os.path.realpath(__file__)) + '/hermite_coeff.npy')[:m, :m], dtype=default_float())
        
        eps_sq = eps_sq*np.ones(d) if d > 1 else eps_sq       
        self.eps_sq = Parameter(eps_sq, transform=positive(), dtype=default_float())
        self.sigma_f_sq = Parameter(sigma_f_sq, transform=positive(), dtype=default_float())
        self.sigma_n_sq = Parameter(sigma_n_sq, transform=positive(), dtype=default_float())
       
        model = models.Sequential()
        model.add(layers.Dense(512, activation='tanh', input_dim=data[0].shape[1]))        
        model.add(layers.Dense(256, activation='tanh'))
        model.add(layers.Dense(64, activation='tanh'))
        model.add(layers.Dense(d))      
        
        if dir_weights is not None:
            model.load_weights(dir_weights)
        self.neural_net = model
Ejemplo n.º 10
0
    def __init__(
        self,
        data: OutputData,
        Xp_mean: tf.Tensor,
        Xp_var: tf.Tensor,
        pi: tf.Tensor,
        kernel_K: List[Kernel],
        Zp: tf.Tensor,
        Xs_mean=None,
        Xs_var=None,
        kernel_s=None,
        Zs=None,
        Xs_prior_mean=None,
        Xs_prior_var=None,
        Xp_prior_mean=None,
        Xp_prior_var=None,
        pi_prior=None
    ):
        super().__init__(
            data=data,
            split_space=True, 
            Xp_mean=Xp_mean,
            Xp_var=Xp_var,
            pi=pi,
            kernel_K=kernel_K,
            Zp=Zp,
            Xs_mean=Xs_mean,
            Xs_var=Xs_var,
            kernel_s=kernel_s,
            Zs=Zs,
            Xs_prior_mean=Xs_prior_mean,
            Xs_prior_var=Xs_prior_var,
            Xp_prior_mean=Xp_prior_mean,
            Xp_prior_var=Xp_prior_var,
            pi_prior=pi_prior
        )
        # q(Us | Ms, Ss)
        q_mu = np.zeros((self.M, self.D))
        self.q_mu_s = Parameter(q_mu, dtype=default_float())  # [M, D]

        q_sqrt = [
            np.eye(self.M, dtype=default_float()) for _ in range(self.D)
        ]
        q_sqrt = np.array(q_sqrt)
        self.q_sqrt_s = Parameter(q_sqrt, transform=triangular())  # [D, M, M]
Ejemplo n.º 11
0
 def __init__(self,
              variance=1.0,
              lengthscale=1.0,
              alpha=1.0,
              active_dims=None):
     super().__init__(variance=variance,
                      lengthscale=lengthscale,
                      active_dims=active_dims)
     self.alpha = Parameter(alpha, transform=positive())
Ejemplo n.º 12
0
 def __init__(self, 
              data: Tuple[tf.Tensor, tf.Tensor],  
              m: int = 100, 
              lengthscales = None,
              sigma_n_sq: np.float = 1,
              sigma_f_sq: np.float = 1,
              randn = None):
                             
     self.num_data = tf.cast(data[1].size, default_float())
     self.data = (tf.cast(data[0], default_float()), tf.cast(data[1], default_float()))
     self.const = tf.cast(0.5*data[1].size*np.log(2*np.pi), default_float())
                    
     self.eye_2m = tf.eye(2*m, dtype=default_float())
     self.yTy = tf.reduce_sum(tf.math.square(self.data[1])) 
     self.m_float = tf.cast(m, default_float())
     self.randn = tf.random.normal(shape=[m, data[0].shape[1]], dtype=default_float()) if randn is None else tf.cast(randn[:, None], default_float())
     
     lengthscales0 = np.ones(data[0].shape[1]) if lengthscales is None else lengthscales
     self.lengthscales = Parameter(lengthscales0, transform=positive(), dtype=default_float())
     self.sigma_f_sq = Parameter(sigma_f_sq, transform=positive(), dtype=default_float())
     self.sigma_n_sq = Parameter(sigma_n_sq, transform=positive(), dtype=default_float())
Ejemplo n.º 13
0
 def __init__(self, 
              data: Tuple[tf.Tensor, tf.Tensor],  
              m: int = 100, 
              d: int = 4,
              lengthscales = None,
              sigma_n_sq: np.float = 1,
              sigma_f_sq: np.float = 1,
              dir_weights: str = None):
                 
     if data[1].dtype == np.float64:
         K_bd.set_floatx('float64')
     else:
         set_default_float(np.float32)
         
     self.num_data = tf.cast(data[1].shape[0], default_float())
     self.data = (tf.cast(data[0], default_float()), tf.cast(data[1], default_float()))
     self.const = tf.cast(0.5*data[1].size*np.log(2*np.pi), default_float())
                    
     self.eye_2m = tf.eye(2*m, dtype=default_float())
     self.yTy = tf.reduce_sum(tf.math.square(self.data[1])) 
     self.m_float = tf.cast(m, default_float())
     self.randn = tf.random.normal(shape=[m, d], dtype=default_float())
     
     lengthscales0 = np.ones(d) if lengthscales is None else lengthscales
     self.lengthscales = Parameter(lengthscales0, transform=positive(), dtype=default_float())
     self.sigma_f_sq = Parameter(sigma_f_sq, transform=positive(), dtype=default_float())
     self.sigma_n_sq = Parameter(sigma_n_sq, transform=positive(), dtype=default_float())
    
     model = models.Sequential()
     model.add(layers.Dense(512, activation='tanh', input_dim=data[0].shape[1]))        
     model.add(layers.Dense(256, activation='tanh'))
     model.add(layers.Dense(64, activation='tanh'))
     model.add(layers.Dense(d))      
     
     if dir_weights is not None:
         model.load_weights(dir_weights)
     self.neural_net = model
    def __init__(self, images: TensorData, name: Optional[str] = None):
        """
      :param images: initial values of inducing locations in image form.

      The shape of the inducing variables varies by representation:
        - as Z: [M, height * width * channels_in]
        - as images: [M, height, width, channels_in]
        - as patches: [M, height * width * channels_in]
        - as filters: [height, width, channels_in, M]

      TODO:
        - Generalize to allow for inducing image with multiple patches?
        - Work on naming convention? The term 'image' is a bit too general.
          Patch works, however this term  usually refers to a vectorized form
          and (for now) overlaps with GPflow's own inducing class. Alternatives
          include: filter, window, glimpse
      """
        super().__init__(name=name)
        self._images = Parameter(images, dtype=default_float())
Ejemplo n.º 15
0
  def __init__(self,
               kernel: kernels.Kernel,
               image_shape: List,
               patch_shape: List,
               channels_in: int = 1,
               channels_out: int = 1,
               weights: TensorType = "default",
               strides: List = None,
               padding: str = "VALID",
               dilations: List = None,
               data_format: str = "NHWC"):

    strides = list((1, 1) if strides is None else strides)
    dilations = list((1, 1) if dilations is None else dilations)

    # Sanity checks
    assert len(strides) == 2
    assert len(dilations) == 2
    assert padding in ("VALID", "SAME")
    assert data_format in ("NHWC", "NCHW")

    if isinstance(weights, str) and weights == "default":  # TODO: improve me
      spatial_out = self.get_spatial_out(spatial_in=image_shape,
                                         filter_shape=patch_shape,
                                         strides=strides,
                                         padding=padding,
                                         dilations=dilations)

      weights = tf.ones([tf.reduce_prod(spatial_out)], dtype=default_float())

    super().__init__()
    self.kernel = kernel
    self.image_shape = image_shape
    self.patch_shape = patch_shape
    self.channels_in = channels_in
    self.channels_out = channels_out

    self.strides = strides
    self.padding = padding
    self.dilations = dilations
    self.data_format = data_format
    self._weights = None if (weights is None) else Parameter(weights)
Ejemplo n.º 16
0
class RobustObjectiveMixin:
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.jitter_variance = Parameter(
            max(default_jitter(), 1e-20), transform=positive(0.0), trainable=False, name="jitter"
        )

    def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
        raise NotImplementedError

    def robust_maximum_log_likelihood_objective(self, restore_jitter=True) -> tf.Tensor:
        initial_jitter = self.jitter_variance.numpy()
        N_orders = 20
        for i in range(N_orders):
            self.jitter_variance.assign(10 ** i * initial_jitter)
            logjitter = np.log10(self.jitter_variance.numpy())
            if i > 0:
                if i == 1:
                    print(
                        f"{type(self).__name__}: Failed first computation. " f"Now attempting computation with jitter ",
                        end="",
                    )
                print(f"10**{logjitter:.2f} ", end="", flush=True)
            try:
                val = self._compute_robust_maximum_log_likelihood_objective()
                break
            except tf.errors.InvalidArgumentError as e_inner:
                e_msg = e_inner.message
                if (("Cholesky" not in e_msg) and ("not invertible" not in e_msg)) or i == (N_orders - 1):
                    print(e_msg)
                    raise e_inner
            except AssertionError as e_inner:
                e_msg = e_inner.args
                if i == (N_orders - 1):
                    print(e_msg)
                    raise e_inner
        if restore_jitter:
            self.jitter_variance.assign(initial_jitter)
        if i > 0:
            print("")
        return val
Ejemplo n.º 17
0
    def __init__(
        self,
        data: OutputData,
        kernel: Optional[Kernel] = None,
        latent_dimensions: Optional[int] = 2,
        num_inducing_variables: Optional[int] = None,
        inducing_variable=None,
        *,
        mean_function=None,
        q_diag: bool = False,
        q_mu=None,
        q_sqrt=None,
        whiten: bool = False,
    ):
        """
        - kernel, likelihood, inducing_variables, mean_function are appropriate
          GPflow objects
        - num_latent_gps is the number of latent processes to use, defaults to 2, as
          the dimensionality reduction is at dimensions 2
        - q_diag is a boolean. If True, the covariance is approximated by a
          diagonal matrix.
        - whiten is a boolean. If True, we use the whitened representation of
          the inducing points.
        - num_data is the total number of observations, defaults to X.shape[0]
          (relevant when feeding in external minibatches)
        """

        self.latent_dimensions = latent_dimensions

        #grab data
        self.data = data_input_to_tensor(data)

        #define lat-space initialization
        X_data_mean = pca_reduce(data, self.latent_dimensions)

        num_data, num_latent_gps = data.shape

        self.num_data = num_data

        X_data_var = tf.ones((self.num_data, self.latent_dimensions),
                             dtype=default_float())

        assert X_data_var.ndim == 2

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init Parameters latent
        self.X_data_mean = Parameter(X_data_mean)
        self.X_data_var = Parameter(X_data_var, transform=positive())

        #init parameter inducing point
        if (inducing_variable is None) == (num_inducing_variables is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )

        if inducing_variable is None:
            # By default we initialize by subset of initial latent points
            # Note that tf.random.shuffle returns a copy, it does not shuffle in-place
            #maybe use k-means clustering
            Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables]
            inducing_variable = InducingPoints(Z)

        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(
            list, {k: []
                   for k in ("KL_x", "ELBO", "KL_u")})

        # deal with parameters for the prior mean variance of X
        X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                dtype=default_float())
        X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                              dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        #sanity check

        assert np.all(X_data_mean.shape == X_data_var.shape)
        assert X_data_mean.shape[0] == self.data.shape[
            0], "X mean and Y must be same size."
        assert X_data_var.shape[0] == self.data.shape[
            0], "X var and Y must be same size."
        assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions

        # init the super class, accept args
        super().__init__(kernel, likelihoods.Gaussian(variance=0.1),
                         mean_function, num_latent_gps)
        self.q_diag = q_diag
        self.whiten = whiten
        #self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        # init variational parameters
        num_inducing = self.inducing_variable.num_inducing
        self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
Ejemplo n.º 18
0
def main(args):
    datasets = Datasets(data_path=args.data_path)

    # prepare output files
    outname1 = args.results_dir + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.rmse'
    if not os.path.exists(os.path.dirname(outname1)):
        os.makedirs(os.path.dirname(outname1))
    outfile1 = open(outname1, 'w')

    outname2 = args.results_dir + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.nll'
    outfile2 = open(outname2, 'w')

    outname3 = args.results_dir + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.time'
    outfile3 = open(outname3, 'w')

    # =========================================================================
    # CROSS-VALIDATION LOOP
    # =========================================================================
    running_err = 0
    running_loss = 0
    running_time = 0
    test_errs = np.zeros(args.splits)
    test_nlls = np.zeros(args.splits)
    test_times = np.zeros(args.splits)
    for i in range(args.splits):
        # =====================================================================
        # MODEL CONSTRUCTION
        # =====================================================================
        print('Split: {}'.format(i))
        print('Getting dataset...')
        # get dataset
        data = datasets.all_datasets[args.dataset].get_data(
            i, normalize=args.normalize_data)
        X, Y, Xs, Ys, Y_std = [
            data[_] for _ in ['X', 'Y', 'Xs', 'Ys', 'Y_std']
        ]

        # inducing points via k-means
        Z = kmeans2(X, args.num_inducing, minit='points')[0]

        # set up batches
        batch_size = args.M if args.M < X.shape[0] else X.shape[0]
        train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat()\
            .prefetch(X.shape[0]//2)\
            .shuffle(buffer_size=(X.shape[0]//2))\
            .batch(batch_size)

        print('Setting up DGP model...')
        kernels = []
        dims = []

        # hidden_dim = min(args.max_dim, X.shape[1])
        hidden_dim = X.shape[1] if X.shape[1] < args.max_dim else args.max_dim
        for l in range(args.num_layers):
            if l == 0:
                dim = X.shape[1]
                dims.append(dim)
            else:
                dim = hidden_dim
                dims.append(dim)

            if args.ard:
                # SE kernel with lengthscale per dimension
                kernels.append(
                    SquaredExponential(lengthscale=[1.] * dim) +
                    White(variance=1e-5))
            else:
                # SE kernel with single lengthscale
                kernels.append(
                    SquaredExponential(lengthscale=1.) + White(variance=1e-5))

        # output dim
        dims.append(Y.shape[1])

        dgp_model = DGP(X,
                        Y,
                        Z,
                        dims,
                        kernels,
                        Gaussian(variance=0.05),
                        num_samples=args.num_samples,
                        num_data=X.shape[0])

        # initialise inner layers almost deterministically
        for layer in dgp_model.layers[:-1]:
            layer.q_sqrt = Parameter(layer.q_sqrt.value() * 1e-5,
                                     transform=triangular())

        # =====================================================================
        # TRAINING
        # =====================================================================
        optimiser = tf.optimizers.Adam(args.learning_rate)

        print('Training DGP model...')
        t0 = time.time()
        # training loop
        monitored_training_loop(dgp_model,
                                train_dataset,
                                optimiser=optimiser,
                                logdir=args.log_dir,
                                iterations=args.iterations,
                                logging_iter_freq=args.logging_iter_freq)
        t1 = time.time()

        # =====================================================================
        # TESTING
        # =====================================================================
        test_times[i] = t1 - t0
        print('Time taken to train: {}'.format(t1 - t0))
        outfile3.write('Split {}: {}\n'.format(i + 1, t1 - t0))
        outfile3.flush()
        os.fsync(outfile3.fileno())
        running_time += t1 - t0

        # minibatch test predictions
        means, vars = [], []
        test_batch_size = args.test_batch_size
        if len(Xs) > test_batch_size:
            for mb in range(-(-len(Xs) // test_batch_size)):
                m, v = dgp_model.predict_y(Xs[mb * test_batch_size:(mb + 1) *
                                              test_batch_size, :],
                                           num_samples=args.test_samples)
                means.append(m)
                vars.append(v)
        else:
            m, v = dgp_model.predict_y(Xs, num_samples=args.test_samples)
            means.append(m)
            vars.append(v)

        mean_SND = np.concatenate(means, 1)  # [S, N, D]
        var_SND = np.concatenate(vars, 1)  # [S, N, D]
        mean_ND = np.mean(mean_SND, 0)  # [N, D]

        # rmse
        test_err = np.mean(Y_std * np.mean((Ys - mean_ND)**2.0)**0.5)
        test_errs[i] = test_err
        print('Average RMSE: {}'.format(test_err))
        outfile1.write('Split {}: {}\n'.format(i + 1, test_err))
        outfile1.flush()
        os.fsync(outfile1.fileno())
        running_err += test_err

        # nll
        test_nll = np.mean(
            logsumexp(norm.logpdf(Ys * Y_std, mean_SND * Y_std,
                                  var_SND**0.5 * Y_std),
                      0,
                      b=1 / float(args.test_samples)))
        test_nlls[i] = test_nll
        print('Average test log likelihood: {}'.format(test_nll))
        outfile2.write('Split {}: {}\n'.format(i + 1, test_nll))
        outfile2.flush()
        os.fsync(outfile2.fileno())
        running_loss += test_nll

    outfile1.write('Average: {}\n'.format(running_err / args.splits))
    outfile1.write('Standard deviation: {}\n'.format(np.std(test_errs)))
    outfile2.write('Average: {}\n'.format(running_loss / args.splits))
    outfile2.write('Standard deviation: {}\n'.format(np.std(test_nlls)))
    outfile3.write('Average: {}\n'.format(running_time / args.splits))
    outfile3.write('Standard deviation: {}\n'.format(np.std(test_times)))
    outfile1.close()
    outfile2.close()
    outfile3.close()
Ejemplo n.º 19
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.jitter_variance = Parameter(
         max(default_jitter(), 1e-20), transform=positive(0.0), trainable=False, name="jitter"
     )
Ejemplo n.º 20
0
 def __init__(self, c=None):
     super().__init__()
     c = np.zeros(1) if c is None else c
     self.c = Parameter(c)
Ejemplo n.º 21
0
    def __init__(
        self,
        data: OutputData,
        split_space: bool, 
        Xp_mean: tf.Tensor,
        Xp_var: tf.Tensor,
        pi: tf.Tensor,
        kernel_K: List[Kernel],
        Zp: tf.Tensor,
        Xs_mean=None,
        Xs_var=None,
        kernel_s=None,
        Zs=None,
        Xs_prior_mean=None,
        Xs_prior_var=None,
        Xp_prior_mean=None,
        Xp_prior_var=None,
        pi_prior=None
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param: split_space, if true, have both shared and private space; 
            if false, only have private spaces (note: to recover GPLVM, set split_space=False and let K=1)
        :param Xp_mean: mean latent positions in the private space [N, Qp] (Qp is the dimension of the private space)
        :param Xp_var: variance of the latent positions in the private space [N, Qp]
        :param pi: mixture responsibility of each category to each point [N, K] (K is the number of categories), i.e. q(c)
        :param kernel_K: private space kernel, one for each category
        :param Zp: inducing inputs of the private space [M, Qp]
        :param num_inducing_variables: number of inducing points, M
        :param Xs_mean: mean latent positions in the shared space [N, Qs] (Qs is the dimension of the shared space). i.e. mus in q(Xs) ~ N(Xs | mus, Ss)
        :param Xs_var: variance of latent positions in shared space [N, Qs], i.e. Ss, assumed diagonal
        :param kernel_s: shared space kernel 
        :param Zs: inducing inputs of the shared space [M, Qs] (M is the number of inducing points)
        :param Xs_prior_mean: prior mean used in KL term of bound, [N, Qs]. By default 0. mean in p(Xs)
        :param Xs_prior_var: prior variance used in KL term of bound, [N, Qs]. By default 1. variance in p(Xs)
        :param Xp_prior_mean: prior mean used in KL term of bound, [N, Qp]. By default 0. mean in p(Xp)
        :param Xp_prior_var: prior variance used in KL term of bound, [N, Qp]. By default 1. variance in p(Xp)
        :param pi_prior: prior mixture weights used in KL term of the bound, [N, K]. By default uniform. p(c)        
        """

        # if don't want shared space, set shared space to none --> get a mixture of GPLVM
        # if don't want private space, set shared space to none, set K = 1 and only include 1 kernel in `kernel_K` --> recover the original GPLVM 

        # TODO: think about how to do this with minibatch
        # it's awkward since w/ minibatch the model usually doesn't store the data internally
        # but for gplvm, you need to keep the q(xn) for all the n's
        # so you need to know which ones to update for each minibatch, probably can be solved but not pretty
        # using inference network / back constraints will solve this, since we will be keeping a global set of parameters
        # rather than a set for each q(xn)
        self.N, self.D = data.shape
        self.Qp = Xp_mean.shape[1]
        self.K = pi.shape[1]
        self.split_space = split_space

        assert Xp_var.ndim == 2
        assert len(kernel_K) == self.K
        assert np.all(Xp_mean.shape == Xp_var.shape)
        assert Xp_mean.shape[0] == self.N, "Xp_mean and Y must be of same size"
        assert pi.shape[0] == self.N, "pi and Y must be of the same size"

        super().__init__()
        self.likelihood = likelihoods.Gaussian()
        self.kernel_K = kernel_K
        self.data = data_input_to_tensor(data)
        # the covariance of q(X) as a [N, Q] matrix, the assumption is that Sn's are diagonal
        # i.e. the latent dimensions are uncorrelated
        # otherwise would require a [N, Q, Q] matrix
        self.Xp_mean = Parameter(Xp_mean)
        self.Xp_var = Parameter(Xp_var, transform=positive())
        self.pi = Parameter(pi, transform=tfp.bijectors.SoftmaxCentered())
        self.Zp = inducingpoint_wrapper(Zp)
        self.M = len(self.Zp)

        # initialize the variational parameters for q(U), same way as in SVGP
        # q_mu: List[K], mean of the inducing variables U [M, D], i.e m in q(U) ~ N(U | m, S), 
        #   initialized as zeros
        # q_sqrt: List[K], cholesky of the covariance matrix of the inducing variables [D, M, M]
        #   q_diag is false because natural gradient only works for full covariance
        #   initialized as all identities
        # we need K sets of q(Uk), each approximating fs+fk
        self.q_mu = []
        self.q_sqrt = []
        for k in range(self.K):
            q_mu = np.zeros((self.M, self.D))
            q_mu = Parameter(q_mu, dtype=default_float())  # [M, D]
            self.q_mu.append(q_mu)

            q_sqrt = [
                np.eye(self.M, dtype=default_float()) for _ in range(self.D)
            ]
            q_sqrt = np.array(q_sqrt)
            q_sqrt = Parameter(q_sqrt, transform=triangular())  # [D, M, M]
            self.q_sqrt.append(q_sqrt)

        # deal with parameters for the prior 
        if Xp_prior_mean is None:
            Xp_prior_mean = tf.zeros((self.N, self.Qp), dtype=default_float())
        if Xp_prior_var is None:
            Xp_prior_var = tf.ones((self.N, self.Qp))
        if pi_prior is None:
            pi_prior = tf.ones((self.N, self.K), dtype=default_float()) * 1/self.K

        self.Xp_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xp_prior_mean), dtype=default_float())
        self.Xp_prior_var = tf.convert_to_tensor(np.atleast_1d(Xp_prior_var), dtype=default_float()) 
        self.pi_prior = tf.convert_to_tensor(np.atleast_1d(pi_prior), dtype=default_float()) 


        # if we have both shared space and private space, need to initialize the parameters for the shared space
        if split_space:
            assert Xs_mean is not None and Xs_var is not None and kernel_s is not None and Zs is not None, 'Xs_mean, Xs_var, kernel_s, Zs need to be initialize if `split_space=True`'
            assert Xs_var.ndim == 2 
            assert np.all(Xs_mean.shape == Xs_var.shape)
            assert Xs_mean.shape[0] == self.N, "Xs_mean and Y must be of same size"
            self.Qs = Xs_mean.shape[1]
            self.kernel_s = kernel_s
            self.Xs_mean = Parameter(Xs_mean)
            self.Xs_var = Parameter(Xs_var, transform=positive())
            self.Zs = inducingpoint_wrapper(Zs)

            if len(Zs) != len(Zp):
                raise ValueError(
                    '`Zs` and `Zp` should have the same length'
                )

            if Xs_prior_mean is None:
                Xs_prior_mean = tf.zeros((self.N, self.Qs), dtype=default_float())
            if Xs_prior_var is None:
                Xs_prior_var = tf.ones((self.N, self.Qs))
            self.Xs_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xs_prior_mean), dtype=default_float())
            self.Xs_prior_var = tf.convert_to_tensor(np.atleast_1d(Xs_prior_var), dtype=default_float())

        self.Fq = tf.zeros((self.N, self.K), dtype=default_float())
Ejemplo n.º 22
0
def main(args):
    datasets = Datasets(data_path=args.data_path)

    # Prepare output files
    outname1 = '../tmp/' + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.nll'
    if not os.path.exists(os.path.dirname(outname1)):
        os.makedirs(os.path.dirname(outname1))
    outfile1 = open(outname1, 'w')
    outname2 = '../tmp/' + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.time'
    outfile2 = open(outname2, 'w')

    running_loss = 0
    running_time = 0
    for i in range(args.splits):
        print('Split: {}'.format(i))
        print('Getting dataset...')
        data = datasets.all_datasets[args.dataset].get_data(i)
        X, Y, Xs, Ys, Y_std = [
            data[_] for _ in ['X', 'Y', 'Xs', 'Ys', 'Y_std']
        ]
        Z = kmeans2(X, args.num_inducing, minit='points')[0]

        # set up batches
        batch_size = args.M if args.M < X.shape[0] else X.shape[0]
        train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat()\
                .prefetch(X.shape[0]//2)\
                .shuffle(buffer_size=(X.shape[0]//2))\
                .batch(batch_size)

        print('Setting up DGP model...')
        kernels = []
        for l in range(args.num_layers):
            kernels.append(SquaredExponential() + White(variance=1e-5))

        dgp_model = DGP(X.shape[1],
                        kernels,
                        Gaussian(variance=0.05),
                        Z,
                        num_outputs=Y.shape[1],
                        num_samples=args.num_samples,
                        num_data=X.shape[0])

        # initialise inner layers almost deterministically
        for layer in dgp_model.layers[:-1]:
            layer.q_sqrt = Parameter(layer.q_sqrt.value() * 1e-5,
                                     transform=triangular())

        optimiser = tf.optimizers.Adam(args.learning_rate)

        def optimisation_step(model, X, Y):
            with tf.GradientTape() as tape:
                tape.watch(model.trainable_variables)
                obj = -model.elbo(X, Y, full_cov=False)
                grad = tape.gradient(obj, model.trainable_variables)
            optimiser.apply_gradients(zip(grad, model.trainable_variables))

        def monitored_training_loop(model, train_dataset, logdir, iterations,
                                    logging_iter_freq):
            # TODO: use tensorboard to log trainables and performance
            tf_optimisation_step = tf.function(optimisation_step)
            batches = iter(train_dataset)

            for i in range(iterations):
                X, Y = next(batches)
                tf_optimisation_step(model, X, Y)

                iter_id = i + 1
                if iter_id % logging_iter_freq == 0:
                    tf.print(
                        f'Epoch {iter_id}: ELBO (batch) {model.elbo(X, Y)}')

        print('Training DGP model...')
        t0 = time.time()
        monitored_training_loop(dgp_model,
                                train_dataset,
                                logdir=args.log_dir,
                                iterations=args.iterations,
                                logging_iter_freq=args.logging_iter_freq)
        t1 = time.time()
        print('Time taken to train: {}'.format(t1 - t0))
        outfile2.write('Split {}: {}\n'.format(i + 1, t1 - t0))
        outfile2.flush()
        os.fsync(outfile2.fileno())
        running_time += t1 - t0

        m, v = dgp_model.predict_y(Xs, num_samples=args.test_samples)
        test_nll = np.mean(
            logsumexp(norm.logpdf(Ys * Y_std, m * Y_std, v**0.5 * Y_std),
                      0,
                      b=1 / float(args.test_samples)))
        print('Average test log likelihood: {}'.format(test_nll))
        outfile1.write('Split {}: {}\n'.format(i + 1, test_nll))
        outfile1.flush()
        os.fsync(outfile1.fileno())
        running_loss += t1 - t0

    outfile1.write('Average: {}\n'.format(running_loss / args.splits))
    outfile2.write('Average: {}\n'.format(running_time / args.splits))
    outfile1.close()
    outfile2.close()
Ejemplo n.º 23
0
    def __init__(
        self,
        data: OutputData,
        X_data_mean: Optional[tf.Tensor] = None,
        X_data_var: Optional[tf.Tensor] = None,
        kernel: Optional[Kernel] = None,
        num_inducing_variables: Optional[int] = None,
        inducing_variable=None,
        X_prior_mean=None,
        X_prior_var=None,
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions).
        :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space.
        :param kernel: kernel specification, by default Squared Exponential
        :param num_inducing_variables: number of inducing points, M
        :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default
            random permutation of X_data_mean.
        :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean.
        :param X_prior_var: prior variance used in KL term of bound. By default 1.
        """

        self.latent_dimensions = 2
        #grab data
        self.data = data_input_to_tensor(data)

        #define lat-space initialization
        if X_data_mean is None:
            X_data_mean = pca_reduce(data, self.latent_dimensions)

        num_data, num_latent_gps = X_data_mean.shape

        self.num_data = num_data

        if X_data_var is None:
            X_data_var = tf.ones((self.num_data, self.latent_dimensions),
                                 dtype=default_float())

        assert X_data_var.ndim == 2

        self.output_dim = self.data.shape[-1]  #num_latent maybe

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init GPMODEL
        super().__init__(kernel,
                         likelihoods.Gaussian(variance=0.1),
                         num_latent_gps=num_latent_gps)

        #init Parameters latent
        self.X_data_mean = Parameter(X_data_mean)
        self.X_data_var = Parameter(X_data_var, transform=positive())

        #init parameter inducing point
        if (inducing_variable is None) == (num_inducing_variables is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )

        if inducing_variable is None:
            # By default we initialize by subset of initial latent points
            # Note that tf.random.shuffle returns a copy, it does not shuffle in-place
            #maybe use k-means clustering
            Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables]
            inducing_variable = InducingPoints(Z)

        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(list,
                                            {k: []
                                             for k in ("KL_x", "ELBO")})

        # deal with parameters for the prior mean variance of X
        if X_prior_mean is None:
            X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                    dtype=default_float())
        if X_prior_var is None:
            X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                                  dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        #sanity check

        assert np.all(X_data_mean.shape == X_data_var.shape)
        assert X_data_mean.shape[0] == self.data.shape[
            0], "X mean and Y must be same size."
        assert X_data_var.shape[0] == self.data.shape[
            0], "X var and Y must be same size."
        assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions