Ejemplo n.º 1
0
 def variational_model():
     qpi = ed.Dirichlet(concentration=qpi_alpha, name='qpi')
     # This model works well
     qmu = ed.MultivariateNormalDiag(
         loc=qmu_loc, scale_diag=qmu_scale, name='qmu')
     # qmu = ed.Normal(loc=qmu_loc, scale=qmu_scale, name='qmu')
     qsigma = ed.InverseGamma(concentration=qsigma_alpha, rate=qsigma_beta,
                              name='qsigma')
     qz = ed.MultivariateNormalDiag(loc=qz_loc, scale_diag=qz_scale, name='qz')
     qw = ed.MultivariateNormalDiag(loc=qw_loc, scale_diag=qw_scale, name='qw')
     return qpi, qmu, qsigma, qz, qw
Ejemplo n.º 2
0
def Q(D0,D,n_classes,sample_shape):
    qmu_loc = [tf.Variable(np.zeros([D0]), dtype=dtype) for i in range(n_classes)]
    qmu_scale = [tf.nn.softplus(0.1 * tf.Variable(np.ones([D0]), dtype=dtype)) for i in range(n_classes)]
    qmu = [ed.Normal(loc=qmu_loc[i], scale=qmu_scale[i], name='qmu%d'%i) for i in range(n_classes)]
    
    qsigma_alpha = tf.nn.softplus(0.5 * tf.Variable(np.ones([D0]), dtype=dtype))
    qsigma_beta = tf.nn.softplus(0.5 * tf.Variable(np.ones([D0]), dtype=dtype))
    qsigma = ed.InverseGamma(concentration=qsigma_alpha, rate=qsigma_beta,name='qsigma')
    
    qz = [ed.MultivariateNormalDiag(loc=qmu[i],sample_shape=sample_shape[i],
            scale_diag=qsigma,name='qz%d'%i)for i in range(n_classes)]
    
    qbeta_loc = tf.Variable(tf.zeros([D0, D], dtype=dtype), name="qbeta_loc")
    qbeta_scale = tf.math.softplus(tf.Variable(tf.ones([D0, D], dtype=dtype), name="qbeta_scale"))
    qbeta = ed.Normal(qbeta_loc, qbeta_scale, name="qbeta")

    qalpha_loc = tf.Variable(tf.zeros([D], dtype=dtype), name="qalpha_loc")
    qalpha_scale = tf.math.softplus(tf.Variable(tf.ones([D], dtype=dtype), name="qalpha_scale"))
    qalpha = ed.Normal(qalpha_loc, qalpha_scale, name="qalpha")
    
    qnoise_alpha = tf.nn.softplus(0.5 * tf.Variable(np.ones([1]), dtype=dtype))
    qnoise_beta = tf.nn.softplus(0.5 * tf.Variable(np.ones([1]), dtype=dtype))
    qnoise = ed.InverseGamma(concentration=qnoise_alpha, rate=qnoise_beta,name='qnoise')

    return qmu,qsigma,qz,(qbeta,qalpha),qnoise
Ejemplo n.º 3
0
    def definition(self, ridge_factor=1e-3, name="gp", gp_only=False):
        """Defines Gaussian Process prior with kernel_func.

        Args:
            ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.
            name: (str) name of the random variable
            gp_only: (bool) Whether only return gp.

        Returns:
            (ed.RandomVariable) A random variable representing the Gaussian Process,
                dimension (N,)

        """
        X = tf.convert_to_tensor(self.X, dtype=dtype_util.TF_DTYPE)
        ls = tf.convert_to_tensor(self.ls, dtype=dtype_util.TF_DTYPE)

        gp_mean = tf.zeros(self.n_obs, dtype=dtype_util.TF_DTYPE)

        # covariance
        K_mat = self.kern_func(X, ls=ls, ridge_factor=ridge_factor)

        gp = ed.MultivariateNormalTriL(loc=gp_mean,
                                       scale_tril=tf.cholesky(K_mat),
                                       name=name)

        if gp_only:
            return gp

        y = ed.MultivariateNormalDiag(loc=gp,
                                      scale_identity_multiplier=.01,
                                      name="y")
        return y
Ejemplo n.º 4
0
def model(X, log_ls=0., ridge_factor=1e-4, sample_ls=False):
    """Defines the Gaussian Process Model.

    Args:
        X: (np.ndarray of float32) input training features.
        with dimension (N, D).
        log_ls: (float32) length scale parameter in log scale.
        ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.
        sample_ls: (bool) Whether sample ls parameter.

    Returns:
         (tf.Tensors of float32) model parameters.
    """
    X = tf.convert_to_tensor(X)

    N = X.shape.as_list()[0]

    # specify kernel matrix
    if sample_ls:
        log_ls = ed.Normal(loc=-5., scale=1., name='ls')

    K_mat = rbf(X, ls=tf.exp(log_ls), ridge_factor=ridge_factor)

    # specify model parameters
    gp_f = ed.MultivariateNormalTriL(loc=tf.zeros(N),
                                     scale_tril=tf.cholesky(K_mat),
                                     name="gp_f")
    sigma = ed.Normal(loc=-5., scale=1., name='sigma')

    y = ed.MultivariateNormalDiag(loc=gp_f,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")

    return y, gp_f, sigma, log_ls
Ejemplo n.º 5
0
    def definition(self, **resid_kwargs):
        """Defines Gaussian process with identity mean function.

        Args:
            **resid_kwargs: Keyword arguments for GaussianProcess model
                definition.

        Returns:
            (ed.RandomVariable) outcome random variable.
        """
        # specify identity mean function
        mean_func = self.model_cdf

        # specify residual function
        gp = self.gp_model.definition(gp_only=True,
                                      name="gp",
                                      **resid_kwargs)

        # specify observational noise
        sigma = ed.Normal(loc=_LOG_NOISE_PRIOR_MEAN,
                          scale=_LOG_NOISE_PRIOR_SDEV, name="log_sigma")

        # specify outcome
        cdf_mean = mean_func + gp

        if self.activation:
            cdf_mean = self.activation(cdf_mean)

        cdf = ed.MultivariateNormalDiag(loc=cdf_mean,
                                        scale_identity_multiplier=tf.exp(sigma),
                                        name="cdf")

        return cdf
Ejemplo n.º 6
0
def mfvi_variational_family(X, name="", **kwargs):
    """Defines the mean-field variational family for Gaussian Process.

    Args:
        X: (np.ndarray of float32) input training features, with dimension (N, D).
        name: (str) name for variational parameters.
        kwargs: Dict of other keyword variables.
            For compatibility purpose with other variational family.

    Returns:
        q_f, q_sig: (ed.RandomVariable) variational family.
        q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)

    N, D = X.shape.as_list()

    # define variational parameters
    qf_mean = tf.get_variable(shape=[N], name='{}_mean'.format(name))
    qf_sdev = tf.exp(tf.get_variable(shape=[N], name='{}_sdev'.format(name)))

    # define variational family
    q_f = ed.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name=name)

    return q_f, qf_mean, qf_sdev
Ejemplo n.º 7
0
    def call(self, inputs):
        """Runs the model forward to generate a sequence of productions.

    Args:
      inputs: Unused.

    Returns:
      productions: Tensor of shape [1, num_productions, num_production_rules].
        Slices along the `num_productions` dimension represent one-hot vectors.
    """
        del inputs  # unused
        latent_code = ed.MultivariateNormalDiag(loc=tf.zeros(self.latent_size),
                                                sample_shape=1,
                                                name="latent_code")
        state = self.lstm.zero_state(1, dtype=tf.float32)
        t = 0
        productions = []
        stack = [self.grammar.start_symbol]
        while stack:
            symbol = stack.pop()
            net, state = self.lstm(latent_code, state)
            logits = (self.output_layer(net) +
                      self.grammar.mask(symbol, on_value=0., off_value=-1e9))
            production = ed.OneHotCategorical(logits=logits,
                                              name="production_" + str(t))
            _, rhs = self.grammar.production_rules[tf.argmax(production,
                                                             axis=-1)]
            for symbol in rhs:
                if symbol in self.grammar.nonterminal_symbols:
                    stack.append(symbol)
            productions.append(production)
            t += 1
        return tf.stack(productions, axis=1)
Ejemplo n.º 8
0
    def latent_encoder(self, x, y):
        """Encodes the inputs into one representation.

    Args:
      x: Tensor of shape [batch_size, observations, d_x]. For the prior, these
         are context x-values. For the posterior, these are target x-values.
      y: Tensor of shape [batch_size, observations, d_y]. For the prior, these
         are context y-values. For the posterior, these are target y-values.

    Returns:
      A normal distribution over tensors of shape [batch_size, num_latents].
    """
        encoder_input = tf.concat([x, y], axis=-1)
        per_example_embedding = batch_mlp(encoder_input,
                                          self._latent_encoder_sizes)
        dataset_embedding = tf.reduce_mean(per_example_embedding, axis=1)
        hidden = tf.keras.layers.Dense(
            (self._latent_encoder_sizes[-1] + self._num_latents) // 2,
            activation=tf.nn.relu)(dataset_embedding)
        loc = tf.keras.layers.Dense(self._num_latents, activation=None)(hidden)
        untransformed_scale = tf.keras.layers.Dense(self._num_latents,
                                                    activation=None)(hidden)
        # Constraint scale following Garnelo et al. (2018).
        scale_diag = 0.1 + 0.9 * tf.sigmoid(untransformed_scale)
        return ed.MultivariateNormalDiag(loc=loc, scale_diag=scale_diag)
Ejemplo n.º 9
0
def logistic_regression(features):
    """Bayesian logistic regression, which returns labels given features."""
    coeffs = ed.MultivariateNormalDiag(loc=tf.zeros(features.shape[1]),
                                       name="coeffs")
    labels = ed.Bernoulli(logits=tf.tensordot(features, coeffs, [[1], [0]]),
                          name="labels")
    return labels
Ejemplo n.º 10
0
def model_mixture_adaptive(X, ls=1., n_mix=2, ridge_factor=1e-3):
    """Defines the Adaptive Mixture of Gaussian Process Model.

    Note: Currently this method is not tested and is likely to not
        work well due to explicit sampling of membership variables.
        (i.e. mix_member). More work need to be done to perform
        integrated sampling.

    Args:
        X: (np.ndarray of float32) input training features.
        with dimension (N, D).
        ls: (float32) length scale parameter.
        n_mix: (int8) Number of mixture components.
        ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.

    Returns:
         (tf.Tensors of float32) model parameters.
    """
    # TODO(jereliu): find a way to integrate over adaptive mixture.
    raise Warning(
        "Currently this method is not tested and is likely to not"
        "work well due to explicit sampling of membership variables. "
        "(i.e. mix_member). More work need to be done to perform "
        "integrated sampling.")

    N = X.shape[0]
    K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor)

    gp_weight = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                               reinterpreted_batch_ndims=1,
                               name="gp_w")
    mix_member = ed.Multinomial(total_count=[1.],
                                logits=tf.transpose(gp_weight),
                                name="mix_prob")

    gp_comp = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                             reinterpreted_batch_ndims=1,
                             name="gp_f")

    gp_f = tf.reduce_sum(tf.transpose(gp_comp) * mix_member, axis=-1)

    sigma = ed.Normal(loc=-5., scale=1., name='sigma')

    y = ed.MultivariateNormalDiag(loc=gp_f,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    # y = ed.MixtureSameFamily(
    #     components_distribution=tfd.MultivariateNormalDiag(
    #         loc=gp_comp, scale_identity_multiplier=tf.exp(sigma)),
    #     mixture_distribution=tfd.Categorical(logits=gp_weight),
    #     name="y")

    return gp_weight, mix_member, gp_comp, sigma, y
Ejemplo n.º 11
0
def predict_target(posterior_mu, posterior_sigma,test_data):
    K = len(posterior_mu)
    model = [ed.MultivariateNormalDiag(posterior_mu[i], posterior_sigma) for i in range (K)]
    prob = []
    with tf.Session() as sess:
        for i in range(K):
            label_prob = model[i].distribution.log_prob(test_data).eval()
            prob.append(label_prob)
    pred_target = np.argmax(prob,0) 
    return pred_target
Ejemplo n.º 12
0
def normal_variational_family(shape,
                              name=None,
                              init_loc=None,
                              init_scale=None,
                              trainable=True):
    """Defines mean-field variational family.

    Args:
        shape: (tuple of int) Defines shape of variational random variable.
        name: (str) Name of the random variable.
        init_loc: (float32) Initial values for q_mean.
        init_scale: (float32) Initial values for q_log_sd.
        trainable: (bool) Whether the variational family is trainable.

    Returns:
        q_rv (ed.RandomVariable) RandomVariable representing the variational family.
        q_mean, q_log_sd (tf.Variable) Variational parameters.

    Raises:
        (ValueError) init_loc/init_scale is None when trainable=False.
    """
    if not trainable:
        if init_loc is None or init_scale is None:
            raise ValueError(
                "Initial values cannot be None if trainable=False.")

    with tf.variable_scope("q_{}_scope".format(name),
                           default_name="normal_variational"):
        # TODO(jereliu): manipulate such that we can initiate these values.
        #  allow user to set trainable=False and give value through initializer.
        q_mean = tf.get_variable("q_mean",
                                 shape=shape,
                                 dtype=dtype_util.TF_DTYPE,
                                 initializer=init_loc,
                                 trainable=trainable)
        q_log_sd = tf.get_variable("q_log_sd",
                                   shape=shape,
                                   dtype=dtype_util.TF_DTYPE,
                                   initializer=init_scale,
                                   trainable=trainable)

        rv_shape = shape if shape is not None else init_loc.shape

        if len(rv_shape) == 0:
            q_rv = ed.Normal(loc=q_mean,
                             scale=tf.exp(q_log_sd),
                             name="q_{}".format(name))
        else:
            q_rv = ed.MultivariateNormalDiag(loc=q_mean,
                                             scale_diag=tf.exp(q_log_sd),
                                             name="q_{}".format(name))

        return q_rv, q_mean, q_log_sd
Ejemplo n.º 13
0
    def definition(self, **resid_kwargs):
        """Sets up model definition and parameters.

        Args:
            **resid_kwargs: Keyword arguments for GaussianProcess model
                definition.

        Returns:
            (ed.RandomVariable) outcome random variable.
        """
        # convert data type
        F = tf.convert_to_tensor(self.base_pred_array,
                                 dtype=dtype_util.TF_DTYPE)

        # specify mean function
        W = ed.MultivariateNormalDiag(loc=tf.zeros(shape=(self.n_model,)),
                                      scale_identity_multiplier=_WEIGHT_PRIOR_SDEV,
                                      name="mean_weight")

        FW = tf.matmul(F, tf.expand_dims(W, -1))
        mean_func = tf.reduce_sum(FW, axis=1, name="mean_func")

        # specify residual function
        resid_func = 0.
        if self.add_resid:
            resid_func = self.resid_model.definition(gp_only=True,
                                                     name="resid_func",
                                                     **resid_kwargs)

        # specify observational noise
        sigma = ed.Normal(loc=_LOG_NOISE_PRIOR_MEAN,
                          scale=_LOG_NOISE_PRIOR_SDEV, name="log_sigma")

        # specify outcome
        y = ed.MultivariateNormalDiag(loc=mean_func + resid_func,
                                      scale_identity_multiplier=tf.exp(sigma),
                                      name="y")

        return y
def model(features):
    # Set up fixed effects and other parameters.
    intercept = tf.get_variable("intercept", [])
    service_effects = tf.get_variable("service_effects", [])
    student_stddev_unconstrained = tf.get_variable("student_stddev_pre", [])
    instructor_stddev_unconstrained = tf.get_variable(
        "instructor_stddev_pre", [])  # Set up random effects.
    student_effects = ed.MultivariateNormalDiag(
        loc=tf.zeros(num_students),
        scale_identity_multiplier=tf.exp(student_stddev_unconstrained),
        name="student_effects")
    instructor_effects = ed.MultivariateNormalDiag(
        loc=tf.zeros(num_instructors),
        scale_identity_multiplier=tf.exp(instructor_stddev_unconstrained),
        name="instructor_effects"
    )  # Set up likelihood given fixed and random effects.
    ratings = ed.Normal(
        loc=(service_effects * features["service"] +
             tf.gather(student_effects, features["students"]) +
             tf.gather(instructor_effects, features["instructors"]) +
             intercept),
        scale=1.,
        name="ratings")
    return ratings
Ejemplo n.º 15
0
def EpiAnno(D0,D,n_classes,sample_shape):
    
    mu = [ed.Normal(loc=tf.zeros([D0], dtype),scale=tf.ones([D0], dtype),name='mu%d'%i) for i in range(n_classes)]
    sigma = ed.InverseGamma(concentration=tf.ones([D0], dtype=dtype),
            rate=tf.ones([D0], dtype=dtype),name='sigma') 
    z = [ed.MultivariateNormalDiag(loc=mu[i],sample_shape=sample_shape[i],
            scale_diag=sigma,name='z%d'%i)for i in range(n_classes)]
    
    h = tf.concat(z,axis = 0)
    beta = ed.Normal(tf.zeros([D0, D],dtype = dtype), 1., name="beta")
    alpha = ed.Normal(tf.zeros([D],dtype = dtype), 1., name="alpha")
    
    output = tf.nn.leaky_relu(h @ beta + alpha,alpha = 0.5)
    noise = ed.InverseGamma(concentration=tf.ones([1], dtype=dtype),
                            rate=tf.ones([1], dtype=dtype),name='noise')
    x = ed.Normal(loc = output, scale = noise, name = 'x')
    return mu,sigma,z,(beta,alpha),noise,x
Ejemplo n.º 16
0
    def call(self, inputs):
        """Runs the model forward to return a stochastic encoding.

    Args:
      inputs: Tensor of shape [1, num_productions, num_production_rules]. It is
        a sequence of productions of length `num_productions`. Each production
        is a one-hot vector of length `num_production_rules`: it determines
        which production rule the production corresponds to.

    Returns:
      latent_code_posterior: A random variable capturing a sample from the
        variational distribution, of shape [1, self.latent_size].
    """
        net = self.encoder_net(tf.cast(inputs, tf.float32))
        return ed.MultivariateNormalDiag(loc=net[..., :self.latent_size],
                                         scale_diag=tf.nn.softplus(
                                             net[..., self.latent_size:]),
                                         name="latent_code_posterior")
Ejemplo n.º 17
0
def variational_mfvi(X, mfvi_mixture=False, n_mixture=1, name="", **kwargs):
    """Defines the mean-field variational family for Gaussian Process.

    Args:
        X: (np.ndarray of float32) input training features, with dimension (N, D).
        mfvi_mixture: (float32) Whether to output variational family with a
            mixture of MFVI.
        n_mixture: (int) Number of MFVI mixture component to add.
        name: (str) name for variational parameters.
        kwargs: Dict of other keyword variables.
            For compatibility purpose with other variational family.

    Returns:
        q_f, q_sig: (ed.RandomVariable) variational family.
        q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)

    N, D = X.shape.as_list()

    # define variational parameters
    qf_mean = tf.get_variable(shape=[N], name='{}_mean'.format(name))
    qf_sdev = tf.exp(tf.get_variable(shape=[N], name='{}_sdev'.format(name)))

    # define variational family
    mixture_par_list = []
    if mfvi_mixture:
        gp_dist = tfd.MultivariateNormalDiag(loc=qf_mean,
                                             scale_diag=qf_sdev,
                                             name=name)
        q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family(
            n_mixture=n_mixture, N=N, gp_dist=gp_dist, name=name)
    else:
        q_f = ed.MultivariateNormalDiag(loc=qf_mean,
                                        scale_diag=qf_sdev,
                                        name=name)

    return q_f, qf_mean, qf_sdev, mixture_par_list
Ejemplo n.º 18
0
def variational_mfvi(X, mfvi_mixture=False, n_mixture=1):
    """Defines the mean-field variational family for GPR.

    Args:
        X: (np.ndarray of float32) input training features, shape (N, D).
        mfvi_mixture: (float32) Whether to output variational family with a
            mixture of MFVI.
        n_mixture: (int) Number of MFVI mixture component to add.

    Returns:
        q_f, q_sig: (ed.RandomVariable) variational family.
        q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f
    """
    N, D = X.shape

    # define variational parameters
    qf_mean = tf.get_variable(shape=[N], name='qf_mean')
    qf_sdev = tf.exp(tf.get_variable(shape=[N], name='qf_sdev'))
    q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean')
    q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev'))

    # define variational family
    mixture_par_list = []
    if mfvi_mixture:
        gp_dist = tfd.MultivariateNormalDiag(loc=qf_mean,
                                             scale_diag=qf_sdev,
                                             name='q_f')
        q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family(
            n_mixture=n_mixture, N=N, gp_dist=gp_dist, name='q_f')
    else:
        q_f = ed.MultivariateNormalDiag(loc=qf_mean,
                                        scale_diag=qf_sdev,
                                        name='q_f')

    q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig')

    return q_f, q_sig, qf_mean, qf_sdev, mixture_par_list
Ejemplo n.º 19
0
def model_flat(X,
               base_pred,
               family_tree=None,
               ls_weight=1.,
               ls_resid=1.,
               **kwargs):
    r"""Defines the sparse adaptive ensemble model.

        y           ~   N(f, sigma^2)
        f(x)        ~   gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x))
        w_model     =   tail_free_process(w0_model)
        w0_model(x) ~   gaussian_process(0, k_w(x))

    where the tail_free_process is defined by sparse_ensemble_weight.

    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction
            from base models. For each item in the dictionary,
            key is the model name, and value is the model prediction with
            dimension (N, ).
        ls_weight: (float32) lengthscale for the kernel of ensemble weight GPs.
        ls_resid: (float32) lengthscale for the kernel of residual process GP.
        family_tree: (dict of list or None) A dictionary of list of strings to
            specify the family tree between models, if None then assume there's
            no structure (i.e. flat).
        **kwargs: Additional parameters to pass to sparse_ensemble_weight.

    Returns:
        (tf.Tensors of float32) model parameters.
    """
    # check dimension
    N, D = X.shape
    for key, value in base_pred.items():
        if not value.shape == (N, ):
            raise ValueError(
                "All base-model predictions should have shape ({},), but"
                "observed {} for '{}'".format(N, value.shape, key))

    # specify tail-free priors for ensemble weight
    if not family_tree:
        temp = ed.Normal(loc=tail_free._TEMP_PRIOR_MEAN,
                         scale=tail_free._TEMP_PRIOR_SDEV,
                         name='temp')
    else:
        # specify a list of temp parameters for each node in the tree
        temp = ed.Normal(loc=[tail_free._TEMP_PRIOR_MEAN] * len(family_tree),
                         scale=tail_free._TEMP_PRIOR_SDEV,
                         name='temp')

    # specify ensemble weight
    W = sparse_conditional_weight(X,
                                  base_pred,
                                  temp,
                                  family_tree=family_tree,
                                  ls=ls_weight,
                                  name="ensemble_weight",
                                  **kwargs)

    # specify ensemble prediction
    F = np.asarray(list(base_pred.values())).T
    FW = tf.multiply(F, W)
    ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean")

    # specify residual process
    ensemble_resid = gp.prior(X,
                              ls_resid,
                              kernel_func=gp.rbf,
                              name="ensemble_resid")

    # specify observation noise
    sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN,
                      scale=_NOISE_PRIOR_SDEV,
                      name="sigma")

    # specify observation
    y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    return y
Ejemplo n.º 20
0
def model_tailfree(X,
                   base_pred,
                   family_tree=None,
                   log_ls_weight=None,
                   log_ls_resid=None,
                   **kwargs):
    r"""Defines the sparse adaptive ensemble model.

        y           ~   N(f, sigma^2)
        f(x)        ~   gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x))
        w_model     =   tail_free_process(w0_model)
        w0_model(x) ~   gaussian_process(0, k_w(x))

    where the tail_free_process is defined by sparse_ensemble_weight.

    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction
            from base models. For each item in the dictionary,
            key is the model name, and value is the model prediction with
            dimension (N, ).
        family_tree: (dict of list or None) A dictionary of list of strings to
            specify the family tree between models, if None then assume there's
            no structure (i.e. flat).
        log_ls_weight: (float32) length-scale parameter for weight GP.
            If None then will estimate with normal prior.
        log_ls_resid: (float32) length-scale parameter for residual GP.
            If None then will estimate with normal prior.
        **kwargs: Additional parameters to pass to tail_free.prior.

    Returns:
        (tf.Tensors of float32) model parameters.
    """
    # check dimension
    N, D = X.shape
    for key, value in base_pred.items():
        if not value.shape == (N, ):
            raise ValueError(
                "All base-model predictions should have shape ({},), but"
                "observed {} for '{}'".format(N, value.shape, key))

    # specify prior for lengthscale and observation noise
    if log_ls_weight is None:
        log_ls_weight = ed.Normal(loc=_LS_PRIOR_MEAN,
                                  scale=_LS_PRIOR_SDEV,
                                  name="ls_weight")
    if log_ls_resid is None:
        log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN,
                                 scale=_LS_PRIOR_SDEV,
                                 name="ls_resid")

    sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN,
                      scale=_NOISE_PRIOR_SDEV,
                      name="sigma")

    # specify tail-free priors for ensemble weight
    ensemble_weights, model_names = tail_free.prior(X,
                                                    base_pred,
                                                    family_tree=family_tree,
                                                    ls=tf.exp(log_ls_weight),
                                                    name="ensemble_weight",
                                                    **kwargs)

    # specify ensemble prediction
    base_models = np.asarray([base_pred[name] for name in model_names]).T
    FW = tf.multiply(base_models, ensemble_weights)
    ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean")

    # specify residual process
    ensemble_resid = gp.prior(X,
                              ls=tf.exp(log_ls_resid),
                              kernel_func=gp.rbf,
                              name="ensemble_resid")

    # specify observation
    y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    return y
Ejemplo n.º 21
0
def model(X, base_pred,
          add_resid=True, log_ls_resid=None):
    r"""Defines the sparse adaptive ensemble model.

    y           ~   sum{ f_k(x) * w_k } + delta(x) + epsilon
    w_k         ~   LogisticNormal  ( 0, sigma_k )
    delta(x)    ~   GaussianProcess ( 0, k(x) )
    epsilon     ~   Normal          ( 0, sigma_e )

    where the LogisticNormal is sparse_softmax transformed Normals.

    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction
            from base models. For each item in the dictionary,
            key is the model name, and value is the model prediction with
            dimension (N, ).
        add_resid: (bool) Whether to add residual process to model.
        log_ls_resid: (float32) length-scale parameter for residual GP.
            If None then will estimate with normal prior.

    Returns:
        (tf.Tensors of float32) model parameters.
    """
    # convert data type
    F = np.asarray(list(base_pred.values())).T
    F = tf.convert_to_tensor(F, dtype=tf.float32)
    X = tf.convert_to_tensor(X, dtype=tf.float32)

    # check dimension
    N, D = X.shape
    for key, value in base_pred.items():
        if not value.shape == (N,):
            raise ValueError(
                "All base-model predictions should have shape ({},), but"
                "observed {} for '{}'".format(N, value.shape, key))

    # specify prior for lengthscale and observation noise
    if log_ls_resid is None:
        log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN,
                                 scale=_LS_PRIOR_SDEV, name="ls_resid")

    # specify logistic normal priors for ensemble weight
    temp = ed.Normal(loc=_TEMP_PRIOR_MEAN,
                     scale=_TEMP_PRIOR_SDEV, name='temp')
    W = sparse_logistic_weight(base_pred, temp,
                               name="ensemble_weight")

    # specify ensemble prediction
    FW = tf.matmul(F, W)
    ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean")

    # specify residual process
    if add_resid:
        ensemble_resid = gp.prior(X,
                                  ls=tf.exp(log_ls_resid),
                                  kernel_func=gp.rbf,
                                  name="ensemble_resid")
    else:
        ensemble_resid = 0.

    # specify observation noise
    sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN,
                      scale=_NOISE_PRIOR_SDEV, name="sigma")

    # specify observation
    y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    return y