Esempio n. 1
0
def model(X, base_pred,
          add_resid=True, log_ls_resid=None):
    r"""Defines the sparse adaptive ensemble model.

    y           ~   sum{ f_k(x) * w_k } + delta(x) + epsilon
    w_k         ~   LogisticNormal  ( 0, sigma_k )
    delta(x)    ~   GaussianProcess ( 0, k(x) )
    epsilon     ~   Normal          ( 0, sigma_e )

    where the LogisticNormal is sparse_softmax transformed Normals.

    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction
            from base models. For each item in the dictionary,
            key is the model name, and value is the model prediction with
            dimension (N, ).
        add_resid: (bool) Whether to add residual process to model.
        log_ls_resid: (float32) length-scale parameter for residual GP.
            If None then will estimate with normal prior.

    Returns:
        (tf.Tensors of float32) model parameters.
    """
    # convert data type
    F = np.asarray(list(base_pred.values())).T
    F = tf.convert_to_tensor(F, dtype=tf.float32)
    X = tf.convert_to_tensor(X, dtype=tf.float32)

    # check dimension
    N, D = X.shape
    for key, value in base_pred.items():
        if not value.shape == (N,):
            raise ValueError(
                "All base-model predictions should have shape ({},), but"
                "observed {} for '{}'".format(N, value.shape, key))

    # specify prior for lengthscale and observation noise
    if log_ls_resid is None:
        log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN,
                                 scale=_LS_PRIOR_SDEV, name="ls_resid")

    # specify logistic normal priors for ensemble weight
    temp = ed.Normal(loc=_TEMP_PRIOR_MEAN,
                     scale=_TEMP_PRIOR_SDEV, name='temp')
    W = sparse_logistic_weight(base_pred, temp,
                               name="ensemble_weight")

    # specify ensemble prediction
    FW = tf.matmul(F, W)
    ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean")

    # specify residual process
    if add_resid:
        ensemble_resid = gp.prior(X,
                                  ls=tf.exp(log_ls_resid),
                                  kernel_func=gp.rbf,
                                  name="ensemble_resid")
    else:
        ensemble_resid = 0.

    # specify observation noise
    sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN,
                      scale=_NOISE_PRIOR_SDEV, name="sigma")

    # specify observation
    y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    return y
Esempio n. 2
0
 def model(cfg):
     ed.Normal(0., 1., name='normal', sample_shape=cfg.shape_for_normal)
Esempio n. 3
0
 def __call__(self, x):
   """Computes regularization given an ed.Normal random variable as input."""
   if not isinstance(x, ed.RandomVariable):
     raise ValueError('Input must be an ed.RandomVariable.')
   random_variable = ed.Normal(loc=self.mean, scale=self.stddev)
   return random_variable.distribution.kl_divergence(x.distribution)
Esempio n. 4
0
def variational_model(qw_mean, qw_stddv, qz_mean, qz_stddv):
    qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw")
    qz = ed.Normal(loc=qz_mean, scale=qz_stddv, name="qz")
    return qw, qz
 def variational():
     loc = tf1.get_variable("loc", [])
     qz = ed.Normal(loc=loc, scale=0.5, name="qz")
     return qz
Esempio n. 6
0
 def model():
     x = ed.Normal(loc=0., scale=1., name="x")
     y = ed.Normal(loc=x, scale=1., name="y")
     return x + y
Esempio n. 7
0
 def model():
   loc = ed.Normal(loc=0., scale=1., name="loc")
   x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x")
   return x
Esempio n. 8
0
def model_flat(X,
               base_pred,
               family_tree=None,
               ls_weight=1.,
               ls_resid=1.,
               **kwargs):
    r"""Defines the sparse adaptive ensemble model.

        y           ~   N(f, sigma^2)
        f(x)        ~   gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x))
        w_model     =   tail_free_process(w0_model)
        w0_model(x) ~   gaussian_process(0, k_w(x))

    where the tail_free_process is defined by sparse_ensemble_weight.

    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction
            from base models. For each item in the dictionary,
            key is the model name, and value is the model prediction with
            dimension (N, ).
        ls_weight: (float32) lengthscale for the kernel of ensemble weight GPs.
        ls_resid: (float32) lengthscale for the kernel of residual process GP.
        family_tree: (dict of list or None) A dictionary of list of strings to
            specify the family tree between models, if None then assume there's
            no structure (i.e. flat).
        **kwargs: Additional parameters to pass to sparse_ensemble_weight.

    Returns:
        (tf.Tensors of float32) model parameters.
    """
    # check dimension
    N, D = X.shape
    for key, value in base_pred.items():
        if not value.shape == (N, ):
            raise ValueError(
                "All base-model predictions should have shape ({},), but"
                "observed {} for '{}'".format(N, value.shape, key))

    # specify tail-free priors for ensemble weight
    if not family_tree:
        temp = ed.Normal(loc=tail_free._TEMP_PRIOR_MEAN,
                         scale=tail_free._TEMP_PRIOR_SDEV,
                         name='temp')
    else:
        # specify a list of temp parameters for each node in the tree
        temp = ed.Normal(loc=[tail_free._TEMP_PRIOR_MEAN] * len(family_tree),
                         scale=tail_free._TEMP_PRIOR_SDEV,
                         name='temp')

    # specify ensemble weight
    W = sparse_conditional_weight(X,
                                  base_pred,
                                  temp,
                                  family_tree=family_tree,
                                  ls=ls_weight,
                                  name="ensemble_weight",
                                  **kwargs)

    # specify ensemble prediction
    F = np.asarray(list(base_pred.values())).T
    FW = tf.multiply(F, W)
    ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean")

    # specify residual process
    ensemble_resid = gp.prior(X,
                              ls_resid,
                              kernel_func=gp.rbf,
                              name="ensemble_resid")

    # specify observation noise
    sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN,
                      scale=_NOISE_PRIOR_SDEV,
                      name="sigma")

    # specify observation
    y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    return y
Esempio n. 9
0
 def sample(cfg):
     mu = ed.Normal(0., 1., name="mu")
Esempio n. 10
0
def variational_dgpr(X,
                     Zm,
                     Zs,
                     ls=1.,
                     kern_func=rbf,
                     ridge_factor=1e-3,
                     mfvi_mixture=False,
                     n_mixture=1):
    """Defines the mean-field variational family for GPR.

    Args:
        X: (np.ndarray of float32) input training features, with dimension (Nx, D).
        Zm: (np.ndarray of float32) inducing points for mean, shape (Nm, D).
        Zs: (np.ndarray of float32) inducing points for covar, shape (Ns, D).
        ls: (float32) length scale parameter.
        kern_func: (function) kernel function.
        ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition
        mfvi_mixture: (float32) Whether to output variational family with a
            mixture of MFVI.
        n_mixture: (int) Number of MFVI mixture component to add.

    Returns:
        q_f, q_sig: (ed.RandomVariable) variational family.
        q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f
    """
    X = tf.convert_to_tensor(X)
    Zm = tf.convert_to_tensor(Zm)
    Zs = tf.convert_to_tensor(Zs)

    Nx, Nm, Ns = X.shape.as_list()[0], Zm.shape.as_list()[0], Zs.shape.as_list(
    )[0]

    # 1. Prepare constants
    # compute matrix constants
    Kxx = kern_func(X, ls=ls)
    Kmm = kern_func(Zm, ls=ls)
    Kxm = kern_func(X, Zm, ls=ls)
    Kxs = kern_func(X, Zs, ls=ls)
    Kss = kern_func(Zs, ls=ls, ridge_factor=ridge_factor)

    # 2. Define variational parameters
    # define mean and variance for sigma
    q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean')
    q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev'))

    # define free parameters (i.e. mean and full covariance of f_latent)
    m = tf.get_variable(shape=[Nm, 1], name='qf_m')
    s = tf.get_variable(shape=[Ns * (Ns + 1) / 2], name='qf_s')
    L = fill_triangular(s, name='qf_chol')

    # components for KL objective
    H = tf.eye(Ns) + tf.matmul(L, tf.matmul(Kss, L), transpose_a=True)
    cond_cov_inv = tf.matmul(L, tf.matrix_solve(H, tf.transpose(L)))

    func_norm_mm = tf.matmul(m, tf.matmul(Kmm, m), transpose_a=True)
    log_det_ss = tf.log(tf.matrix_determinant(H))
    cond_norm_ss = tf.reduce_sum(tf.multiply(Kss, cond_cov_inv))

    # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent))
    qf_mean = tf.squeeze(tf.tensordot(Kxm, m, [[1], [0]]), name='qf_mean')
    qf_cov = (Kxx -
              tf.matmul(Kxs, tf.matmul(cond_cov_inv, Kxs, transpose_b=True)) +
              ridge_factor * tf.eye(Nx, dtype=tf.float32))

    # define variational family
    mixture_par_list = []
    if mfvi_mixture:
        gp_dist = dist_util.VariationalGaussianProcessDecoupledDistribution(
            loc=qf_mean,
            covariance_matrix=qf_cov,
            func_norm_mm=func_norm_mm,
            log_det_ss=log_det_ss,
            cond_norm_ss=cond_norm_ss)

        q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family(
            n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name='q_f')
    else:
        q_f = dist_util.VariationalGaussianProcessDecoupled(
            loc=qf_mean,
            covariance_matrix=qf_cov,
            func_norm_mm=func_norm_mm,
            log_det_ss=log_det_ss,
            cond_norm_ss=cond_norm_ss,
            name='q_f')
    q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig')

    return q_f, q_sig, qf_mean, qf_cov, mixture_par_list
Esempio n. 11
0
def model_tailfree(X,
                   base_pred,
                   family_tree=None,
                   log_ls_weight=None,
                   log_ls_resid=None,
                   **kwargs):
    r"""Defines the sparse adaptive ensemble model.

        y           ~   N(f, sigma^2)
        f(x)        ~   gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x))
        w_model     =   tail_free_process(w0_model)
        w0_model(x) ~   gaussian_process(0, k_w(x))

    where the tail_free_process is defined by sparse_ensemble_weight.

    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction
            from base models. For each item in the dictionary,
            key is the model name, and value is the model prediction with
            dimension (N, ).
        family_tree: (dict of list or None) A dictionary of list of strings to
            specify the family tree between models, if None then assume there's
            no structure (i.e. flat).
        log_ls_weight: (float32) length-scale parameter for weight GP.
            If None then will estimate with normal prior.
        log_ls_resid: (float32) length-scale parameter for residual GP.
            If None then will estimate with normal prior.
        **kwargs: Additional parameters to pass to tail_free.prior.

    Returns:
        (tf.Tensors of float32) model parameters.
    """
    # check dimension
    N, D = X.shape
    for key, value in base_pred.items():
        if not value.shape == (N, ):
            raise ValueError(
                "All base-model predictions should have shape ({},), but"
                "observed {} for '{}'".format(N, value.shape, key))

    # specify prior for lengthscale and observation noise
    if log_ls_weight is None:
        log_ls_weight = ed.Normal(loc=_LS_PRIOR_MEAN,
                                  scale=_LS_PRIOR_SDEV,
                                  name="ls_weight")
    if log_ls_resid is None:
        log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN,
                                 scale=_LS_PRIOR_SDEV,
                                 name="ls_resid")

    sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN,
                      scale=_NOISE_PRIOR_SDEV,
                      name="sigma")

    # specify tail-free priors for ensemble weight
    ensemble_weights, model_names = tail_free.prior(X,
                                                    base_pred,
                                                    family_tree=family_tree,
                                                    ls=tf.exp(log_ls_weight),
                                                    name="ensemble_weight",
                                                    **kwargs)

    # specify ensemble prediction
    base_models = np.asarray([base_pred[name] for name in model_names]).T
    FW = tf.multiply(base_models, ensemble_weights)
    ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean")

    # specify residual process
    ensemble_resid = gp.prior(X,
                              ls=tf.exp(log_ls_resid),
                              kernel_func=gp.rbf,
                              name="ensemble_resid")

    # specify observation
    y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    return y
Esempio n. 12
0
def variational_sgpr(X,
                     Z,
                     ls=1.,
                     kern_func=rbf,
                     ridge_factor=1e-3,
                     mfvi_mixture=False,
                     n_mixture=1):
    """Defines the mean-field variational family for GPR.

    Args:
        X: (np.ndarray of float32) input training features, with dimension (Nx, D).
        Z: (np.ndarray of float32) inducing points, with dimension (Nz, D).
        ls: (float32) length scale parameter.
        kern_func: (function) kernel function.
        ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition
        mfvi_mixture: (float32) Whether to output variational family with a
            mixture of MFVI.
        n_mixture: (int) Number of MFVI mixture component to add.

    Returns:
        q_f, q_sig: (ed.RandomVariable) variational family.
        q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f
        mixture_par_list: (list of tf.Variable) variational parameters for
            MFVI mixture ('mixture_logits', 'mixture_logits_mfvi_mix',
            'mean_mfvi', 'sdev_mfvi') if mfvi_mixture=True, else [].
    """
    X = tf.convert_to_tensor(X)
    Z = tf.convert_to_tensor(Z)

    Nx, Nz = X.shape.as_list()[0], Z.shape.as_list()[0]

    # 1. Prepare constants
    # compute matrix constants
    Kxx = kern_func(X, ls=ls)
    Kxz = kern_func(X, Z, ls=ls)
    Kzz = kern_func(Z, ls=ls, ridge_factor=ridge_factor)

    # compute null covariance matrix using Cholesky decomposition
    Kzz_chol_inv = tf.matrix_inverse(tf.cholesky(Kzz))
    Kzz_inv = tf.matmul(Kzz_chol_inv, Kzz_chol_inv, transpose_a=True)

    Kxz_Kzz_chol_inv = tf.matmul(Kxz, Kzz_chol_inv, transpose_b=True)
    Kxz_Kzz_inv = tf.matmul(Kxz, Kzz_inv)
    Sigma_pre = Kxx - tf.matmul(
        Kxz_Kzz_chol_inv, Kxz_Kzz_chol_inv, transpose_b=True)

    # 2. Define variational parameters
    # define mean and variance for sigma
    q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean')
    q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev'))

    # define free parameters (i.e. mean and full covariance of f_latent)
    m = tf.get_variable(shape=[Nz], name='qf_m')
    s = tf.get_variable(
        shape=[Nz * (Nz + 1) / 2],
        # initializer=tf.zeros_initializer(),
        name='qf_s')
    L = fill_triangular(s, name='qf_chol')
    S = tf.matmul(L, L, transpose_b=True)

    # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent))
    qf_mean = tf.tensordot(Kxz_Kzz_inv, m, [[1], [0]], name='qf_mean')
    qf_cov = (
        Sigma_pre +
        tf.matmul(Kxz_Kzz_inv, tf.matmul(S, Kxz_Kzz_inv, transpose_b=True)) +
        ridge_factor * tf.eye(Nx, dtype=tf.float32))

    # define variational family
    mixture_par_list = []
    if mfvi_mixture:
        gp_dist = tfd.MultivariateNormalFullCovariance(
            loc=qf_mean, covariance_matrix=qf_cov)
        q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family(
            n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name='q_f')
    else:
        q_f = ed.MultivariateNormalFullCovariance(loc=qf_mean,
                                                  covariance_matrix=qf_cov,
                                                  name='q_f')
    q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig')

    return q_f, q_sig, qf_mean, qf_cov, mixture_par_list
Esempio n. 13
0
 def simple(cfg):
     ed.Normal(0., 1., name='normal')
Esempio n. 14
0
 def model(cfg):
     ed.Normal(0., 1., name='normal')
Esempio n. 15
0
def estimate_gmm_precision(qx_loc,
                           qx_scale,
                           fixed_expression=False,
                           profile_trace=False,
                           tensorboard_summaries=False,
                           batch_size=100,
                           err_scale=0.2,
                           edge_cutoff=0.7):
    num_samples = qx_loc.shape[0]
    n = qx_loc.shape[1]

    batch_size = min(batch_size, n)

    # [num_samples, n]
    if fixed_expression:
        qx = qx_loc
    else:
        qx = ed.Normal(loc=qx_loc, scale=qx_scale, name="qx")

    b = np.mean(qx_loc, axis=0)

    # variational estimate of w
    # -------------------------
    qw_loc_init = tf.placeholder(tf.float32, (batch_size, n),
                                 name="qw_loc_init")
    qw_loc_init_value = np.zeros((batch_size, n), dtype=np.float32)
    qw_loc = tf.Variable(qw_loc_init, name="qw_loc")
    qw = qw_loc

    # variational estimate of w_scale
    # -------------------------------

    qw_scale_loc_init_value = np.full((batch_size, n), -3.0, dtype=np.float32)
    qw_scale_loc_init = tf.placeholder(tf.float32, (batch_size, n),
                                       name="qw_scale_loc_init")
    qw_scale_loc = tf.Variable(qw_scale_loc_init, name="qw_scale_loc")
    qw_scale = tf.nn.softplus(qw_scale_loc)

    # estimate of b
    # -------------

    by_init_value = np.zeros((batch_size, ), dtype=np.float32)
    by_init = tf.placeholder(tf.float32, (batch_size, ), name="by_init")
    by = tf.Variable(by_init, name="by", trainable=False)  # [batch_size]

    # w
    # -

    w_scale_prior = tfd.HalfCauchy(loc=0.0, scale=1.0, name="w_scale_prior")

    # qw_scale can be shrunk all the way to zero, producing NaNs
    qw_scale = tf.clip_by_value(qw_scale, 1e-4, 10000.0)

    scale_tau = 0.1
    w_prior = tfd.Normal(loc=0.0, scale=qw_scale * scale_tau, name="w_prior")

    # [n, batch_size]
    mask_init = tf.placeholder(tf.float32, (batch_size, n), name="mask_init")
    mask_init_value = np.empty([batch_size, n], dtype=np.float32)
    mask = tf.Variable(mask_init, name="mask", trainable=False)

    qw_masked = qw * mask  # [batch_size, n]

    qx_std = qx - b  # [num_samples, n]

    # CONDITIONAL CORRELATION
    # qxqw = tf.matmul(qx_std, qw_masked, transpose_b=True) # [num_samples, batch_size]
    # y_dist_loc = qxqw + by

    # UNCONDITIONAL CORRELATION
    qxqw = tf.expand_dims(qx_std, 1) * tf.expand_dims(
        qw_masked, 0)  # [num_samples, num_batches, n]
    y_dist_loc = tf.expand_dims(tf.expand_dims(by, 0),
                                -1) + qxqw  # [num_samples, num_batches, n]

    y_dist = tfd.StudentT(loc=y_dist_loc, scale=err_scale, df=10.0)

    y_slice_start_init = tf.placeholder(
        tf.int32, 2, name="y_slice_start_init")  # set to [0, j]
    y_slice_start = tf.Variable(y_slice_start_init,
                                name="y_slice_start",
                                trainable=False)
    y = tf.slice(qx, y_slice_start,
                 [num_samples, batch_size])  # [num_samples, batch_size]

    # y = tf.Print(y, [tf.square(y_dist_loc - tf.expand_dims(y, -1))], "y", summarize=16)

    # objective function
    # ------------------

    y = tf.expand_dims(y, -1)
    y_log_prob = tf.reduce_sum(y_dist.log_prob(y))
    w_log_prob = tf.reduce_sum(w_prior.log_prob(qw_masked))
    w_scale_log_prob = tf.reduce_sum(w_scale_prior.log_prob(qw_scale))
    log_posterior = y_log_prob + w_log_prob + w_scale_log_prob
    elbo = log_posterior

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-2)
    train = optimizer.minimize(-elbo)

    sess = tf.Session()

    niter = 1000
    feed_dict = dict()
    feed_dict[qw_scale_loc_init] = qw_scale_loc_init_value
    feed_dict[qw_loc_init] = qw_loc_init_value
    feed_dict[mask_init] = mask_init_value
    feed_dict[by_init] = by_init_value

    qx_loc_means = np.mean(qx_loc, axis=0)

    # check_ops = tf.add_check_numerics_ops()
    if tensorboard_summaries:
        # tf.summary.histogram("qw_loc_param", qw_loc)
        # tf.summary.histogram("qw_scale_param", qw_scale_param)
        tf.summary.scalar("y_log_prob", y_log_prob)
        tf.summary.scalar("w_log_prob", w_log_prob)
        tf.summary.scalar("w_scale_log_prob", w_scale_log_prob)

        tf.summary.scalar("qw min", tf.reduce_min(qw))
        tf.summary.scalar("qw max", tf.reduce_max(qw))
        tf.summary.scalar("qw_scale min", tf.reduce_min(qw_scale))
        tf.summary.scalar("qw_scale max", tf.reduce_max(qw_scale))

        # tf.summary.histogram("qw_scale_loc_param", qw_scale_loc)
        # tf.summary.histogram("qw_scale_scale_param", qw_scale_scale)

    edges = dict()

    count = 0
    num_batches = math.ceil(n / batch_size)
    for batch_num in range(num_batches):
        # deal with n not necessarily being divisible by batch_size
        if batch_num == num_batches - 1:
            start_j = n - batch_size
        else:
            start_j = batch_num * batch_size

        fillmask(mask_init_value, start_j, batch_size)
        feed_dict[y_slice_start_init] = np.array([0, start_j], dtype=np.int32)

        for k in range(batch_size):
            by_init_value[k] = b[start_j + k]

        sess.run(tf.global_variables_initializer(), feed_dict=feed_dict)

        # if requested, just benchmark one run of the training operation and return
        if profile_trace:
            print("WRITING PROFILING DATA")
            options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            sess.run(train, options=options, run_metadata=run_metadata)
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open('log/timeline.json', 'w') as f:
                f.write(chrome_trace)
            break

        if tensorboard_summaries:
            train_writer = tf.summary.FileWriter(
                "log/" + "batch-" + str(batch_num), sess.graph)
            tf.summary.scalar("elbo", elbo)
            merged_summary = tf.summary.merge_all()

        for t in range(niter):
            # _, elbo_val = sess.run([train, elbo])
            # _, entropy_val, log_posterior_val, elbo_val = sess.run([train, entropy, log_posterior, elbo])
            _, y_log_prob_value, w_log_prob_value, w_scale_log_prob_value = sess.run(
                [train, y_log_prob, w_log_prob, w_scale_log_prob])
            if t % 100 == 0:
                # print((t, elbo_val, log_posterior_val, entropy_val))
                print((y_log_prob_value, w_log_prob_value,
                       w_scale_log_prob_value))
                # print((t, elbo_val))
            if tensorboard_summaries:
                train_writer.add_summary(sess.run(merged_summary), t)

        print("")
        print("batch")
        print(start_j)

        # qw_scale_min, qw_scale_mean, qw_scale_max = sess.run(
        #     [tf.reduce_min(qw_scale), tf.reduce_mean(qw_scale), tf.reduce_max(qw_scale)])
        # print(("qw_scale span", qw_scale_min, qw_scale_mean, qw_scale_max))

        # lower_credible = sess.run(qw.distribution.quantile(0.01))
        # upper_credible = sess.run(qw.distribution.quantile(0.99))
        lower_credible = upper_credible = sess.run(qw)

        print("credible span")
        print(np.max(lower_credible))
        print(np.min(upper_credible))

        print("nonzeros")
        print(np.sum((lower_credible > edge_cutoff)))
        print(np.sum((upper_credible < -edge_cutoff)))

        for k in range(batch_size):
            neighbors = []
            for j in range(n):
                if lower_credible[k, j] > edge_cutoff or upper_credible[
                        k, j] < -edge_cutoff:
                    neighbors.append(
                        (j, lower_credible[k, j], upper_credible[k, j]))
            edges[start_j + k] = neighbors

        count += 1
        if count > 4:
            break

    return edges
Esempio n. 16
0
def estimate_splicing_code(
        qx_feature_loc, qx_feature_scale,
        donor_seqs, acceptor_seqs, alt_donor_seqs, alt_acceptor_seqs,
        donor_cons, acceptor_cons, alt_donor_cons, alt_acceptor_cons,
        tissues):

    num_samples = len(tissues)
    num_tissues = np.max(tissues)

    tissue_matrix = np.zeros((num_samples, num_tissues), dtype=np.float32)
    for (i, j) in enumerate(tissues):
        tissue_matrix[i, j-1] = 1

    seqs = np.hstack(
        [donor_seqs, acceptor_seqs, alt_donor_seqs, alt_acceptor_seqs])
        # [ num_features, seq_length, 4 ]

    cons = np.hstack(
        [donor_cons, acceptor_cons, alt_donor_cons, alt_acceptor_cons])

    seqs = np.concatenate((seqs, np.expand_dims(cons, 2)), axis=2)
    print(seqs.shape)

    # sys.exit()

    num_features = seqs.shape[0]

    # split into testing and training data
    shuffled_feature_idxs = np.arange(num_features)
    np.random.shuffle(shuffled_feature_idxs)


    seqs_train_len = int(np.floor(0.75 * num_features))
    seqs_test_len  = num_features - seqs_train_len

    print(num_features)
    print(seqs_train_len)
    print(seqs_test_len)
    print(qx_feature_loc.shape)
    print(qx_feature_scale.shape)

    train_idxs = shuffled_feature_idxs[:seqs_train_len]
    test_idxs = shuffled_feature_idxs[seqs_train_len:]

    seqs_train = seqs[train_idxs]
    seqs_test = seqs[test_idxs]

    qx_feature_loc_train = qx_feature_loc[:,train_idxs]
    qx_feature_scale_train = qx_feature_scale[:,train_idxs]

    qx_feature_loc_test = qx_feature_loc[:,test_idxs]
    qx_feature_scale_test = qx_feature_scale[:,test_idxs]





    # invented data to test my intuition

    # seqs_train = np.array(
    #     [[[1.0, 0.0],
    #       [1.0, 0.0],
    #       [1.0, 0.0],
    #       [1.0, 0.0]],
    #      [[0.0, 1.0],
    #       [0.0, 1.0],
    #       [0.0, 1.0],
    #       [0.0, 1.0]]],
    #     dtype=np.float32)

    # seqs_test = np.copy(seqs_train)

    # tissue_matrix = np.array(
    #     [[1],
    #      [1],
    #      [1]],
    #     dtype=np.float32)

    # qx_feature_loc_train = np.array(
    #     [[-1.0, 1.0],
    #      [-1.1, 1.1],
    #     #  [-0.5, 0.5]],
    #      [0.9, -0.9]],
    #     dtype=np.float32)

    # qx_feature_scale_train = np.array(
    #     [[0.1, 0.1],
    #      [0.1, 0.1],
    #     #  [0.1, 0.1]],
    #      [1.0, 1.0]],
    #     dtype=np.float32)

    # qx_feature_loc_test = np.copy(qx_feature_loc_train)
    # qx_feature_scale_test = np.copy(qx_feature_scale_train)

    # num_tissues = 1
    # num_samples = qx_feature_loc_train.shape[0]
    # seqs_train_len = 2




    # print(qx_feature_loc_train)
    # print(qx_feature_scale_train)
    # sys.exit()

    keep_prob = tf.placeholder(tf.float32)

    # model
    lyr0_input = tf.placeholder(tf.float32, (None, seqs_train.shape[1], seqs_train.shape[2]))
    # lyr0 = tf.layers.flatten(lyr0_input)
    lyr0 = lyr0_input

    print(lyr0)

    training_flag = tf.placeholder(tf.bool)

    conv1 = tf.layers.conv1d(
        inputs=lyr0,
        filters=32,
        kernel_size=4,
        activation=tf.nn.leaky_relu,
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1),
        name="conv1")

    conv1_dropout = tf.layers.dropout(
        inputs=conv1,
        rate=0.5,
        training=training_flag,
        name="conv1_dropout")

    pool1 = tf.layers.max_pooling1d(
        inputs=conv1_dropout,
        pool_size=2,
        strides=2,
        name="pool1")

    conv2 = tf.layers.conv1d(
        inputs=pool1,
        filters=64,
        kernel_size=4,
        activation=tf.nn.leaky_relu,
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1),
        name="conv2")

    pool2 = tf.layers.max_pooling1d(
        inputs=conv2,
        pool_size=2,
        strides=2,
        name="pool2")

    pool2_flat = tf.layers.flatten(
        pool2, name="pool2_flat")

    # pool2_flat = tf.layers.flatten(conv1_dropout)

    dense1 = tf.layers.dense(
        inputs=pool2_flat,
        units=256,
        activation=tf.nn.leaky_relu,
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1),
        name="dense1")

    # dropout1 = tf.layers.dropout(
    #     inputs=dense1,
    #     rate=0.5,
    #     training=training_flag,
    #     name="dropout1")

    prediction_layer = tf.layers.dense(
        # inputs=dropout1,
        inputs=dense1,
        units=num_tissues,
        activation=tf.identity)
        # [num_features, num_tissues]

    # TODO: eventually this should be a latent variable
    # x_scale = 0.2
    x_scale_prior = tfd.InverseGamma(
        concentration=0.001,
        rate=0.001,
        name="x_scale_prior")

    x_scale = tf.nn.softplus(tf.Variable(tf.fill([seqs_train_len], -3.0)))
    # x_scale = tf.constant(0.1)

    print(tissue_matrix.shape)

    x_mu = tf.matmul(
        tf.constant(tissue_matrix),
        tf.transpose(prediction_layer))
        # [num_samples, num_features]

    x_prior = tfd.Normal(
        loc=x_mu,
        # loc=0.0,
        scale=x_scale,
        name="x_prior")

    # x_prior = tfd.StudentT(
    #     loc=x_mu,
    #     scale=x_scale,
    #     df=2.0,
    #     name="x_prior")

    x_likelihood_loc = tf.placeholder(tf.float32, [num_samples, None])
    x_likelihood_scale = tf.placeholder(tf.float32, [num_samples, None])
    x_likelihood = ed.Normal(
        loc=x_likelihood_loc,
        scale=x_likelihood_scale,
        name="x_likelihood")

    # x = x_likelihood

    x = tf.Variable(
        qx_feature_loc_train,
        # tf.random_normal(qx_feature_loc_train.shape),
        # tf.zeros(qx_feature_loc_train.shape),
        # qx_feature_loc_train + qx_feature_scale_train * np.float32(np.random.randn(*qx_feature_loc_train.shape)),
        # trainable=False,
        name="x")

    print("X")
    print(x)

    # x_delta = tf.Variable(
    #     # qx_feature_loc_train,
    #     # tf.random_normal(qx_feature_loc_train.shape),
    #     tf.zeros(qx_feature_loc_train.shape),
    #     # trainable=False,
    #     name="x")

    # x_delta = tf.Print(x_delta,
    #     [tf.reduce_min(x_delta), tf.reduce_max(x_delta)], "x_delta span")

    # x = tf.Print(x,
    #     [tf.reduce_min(x - qx_feature_loc_train), tf.reduce_max(x - qx_feature_loc_train)],
    #     "x deviance from init")

    # print(x_prior.log_prob(x))
    # print(x_likelihood.log_prob(x))
    # sys.exit()

    # log_prior = tf.reduce_sum(x_prior.log_prob(x_delta))
    # log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu + x_delta))


    log_prior = tf.reduce_sum(x_prior.log_prob(x)) + tf.reduce_sum(x_scale_prior.log_prob(x_scale))
    log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x))

    log_posterior = log_prior + log_likelihood


    # log_posterior = x_likelihood.distribution.log_prob(x_mu)

    sess = tf.Session()

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    train = optimizer.minimize(-log_posterior)

    sess.run(tf.global_variables_initializer())

    # dropout doesn't seem to do much....
    train_feed_dict = {
        training_flag: True,
        # training_flag: False,
        lyr0_input: seqs_train,
        x_likelihood_loc: qx_feature_loc_train,
        x_likelihood_scale: qx_feature_scale_train }

    test_feed_dict = {
        training_flag: False,
        lyr0_input: seqs_test,
        x_likelihood_loc: qx_feature_loc_test,
        x_likelihood_scale: qx_feature_scale_test }

    n_iter = 1000
    mad_sample = median_absolute_deviance_sample(x_mu, x_likelihood)
    for iter in range(n_iter):
        # _, log_prior_value, log_likelihood_value = sess.run(
        #     [train, log_prior, log_likelihood],
        #     feed_dict=train_feed_dict)

        sess.run(
            [train],
            feed_dict=train_feed_dict)


        # print((log_prior_value, log_likelihood_value))

        if iter % 100 == 0:
            # print(iter)
            # print("x")
            # print(sess.run(x))
            # print("x likelihood")
            # print(sess.run(x_likelihood.distribution.log_prob(x), feed_dict=train_feed_dict))
            # print("x_mu")
            # print(sess.run(x_mu, feed_dict=train_feed_dict))
            # print(sess.run(x_mu, feed_dict=test_feed_dict))
            # print("x_mu likelihood")
            # print(sess.run(x_likelihood.distribution.log_prob(x_mu), feed_dict=train_feed_dict))
            # print(sess.run(x_likelihood.distribution.log_prob(x_mu), feed_dict=test_feed_dict))

            print(sess.run(tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu)), feed_dict=train_feed_dict))
            print(sess.run(tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu)), feed_dict=test_feed_dict))
            print(sess.run(tfp.distributions.percentile(x_likelihood.distribution.log_prob(x_mu), 50.0), feed_dict=train_feed_dict))
            print(sess.run(tfp.distributions.percentile(x_likelihood.distribution.log_prob(x_mu), 50.0), feed_dict=test_feed_dict))

            print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict))
            print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict))


    print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict))
    print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict))
Esempio n. 17
0
 def model():
     return ed.Normal(0., 1., name="x")
Esempio n. 18
0
def estimate_splicing_code_from_kmers(
        qx_feature_loc, qx_feature_scale, kmer_usage_matrix, tissues):

    num_samples = len(tissues)
    num_tissues = np.max(tissues)

    tissue_matrix = np.zeros((num_samples, num_tissues), dtype=np.float32)
    for (i, j) in enumerate(tissues):
        tissue_matrix[i, j-1] = 1

    num_features = kmer_usage_matrix.shape[0]
    num_kmers = kmer_usage_matrix.shape[1]

    # split into testing and training data
    shuffled_feature_idxs = np.arange(num_features)
    np.random.shuffle(shuffled_feature_idxs)

    seqs_train_len = int(np.floor(0.75 * num_features))
    seqs_test_len  = num_features - seqs_train_len

    train_idxs = shuffled_feature_idxs[:seqs_train_len]
    test_idxs = shuffled_feature_idxs[seqs_train_len:]

    kmer_usage_matrix_train = kmer_usage_matrix[train_idxs]
    kmer_usage_matrix_test = kmer_usage_matrix[test_idxs]

    qx_feature_loc_train = qx_feature_loc[:,train_idxs]
    qx_feature_scale_train = qx_feature_scale[:,train_idxs]

    qx_feature_loc_test = qx_feature_loc[:,test_idxs]
    qx_feature_scale_test = qx_feature_scale[:,test_idxs]

    W0 = tf.Variable(
        tf.random_normal([num_kmers, 1], mean=0.0, stddev=0.01),
        name="W0")

    # B = tf.Variable(
    #     tf.random_normal([1, num_tissues], mean=0.0, stddev=0.01),
    #     name="B")

    W_prior = tfd.Normal(
        loc=0.0,
        scale=0.1,
        name="W_prior")

    W = tf.Variable(
        tf.random_normal([num_kmers, num_tissues], mean=0.0, stddev=0.01),
        name="W")

    X = tf.placeholder(tf.float32, shape=(None, num_kmers), name="X")

    # Y = B + tf.matmul(X, W0 + W)
    Y = tf.matmul(X, W0 + W)

    print(Y)

    x_scale_prior = tfd.InverseGamma(
        concentration=0.001,
        rate=0.001,
        name="x_scale_prior")

    x_scale = tf.nn.softplus(tf.Variable(tf.fill([seqs_train_len], -3.0)))

    x_mu = tf.matmul(
        tf.constant(tissue_matrix),
        tf.transpose(Y))
        # [num_samples, num_features]

    print(x_mu)

    x_prior = tfd.Normal(
        loc=x_mu,
        scale=x_scale,
        name="x_prior")

    x_likelihood_loc = tf.placeholder(tf.float32, [num_samples, None])
    x_likelihood_scale = tf.placeholder(tf.float32, [num_samples, None])
    x_likelihood = ed.Normal(
        loc=x_likelihood_loc,
        scale=x_likelihood_scale,
        name="x_likelihood")



    # Using likelihood

    x = tf.Variable(
        qx_feature_loc_train,
        name="x")

    # x = x_likelihood_loc

    # x = x_mu

    log_prior = \
        tf.reduce_sum(x_prior.log_prob(x)) + \
        tf.reduce_sum(x_scale_prior.log_prob(x_scale)) + \
        tf.reduce_sum(W_prior.log_prob(W))

    log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x))

    log_posterior = log_prior + log_likelihood



    # Using point estimates

    # x = qx_feature_loc_train

    # log_prior = \
    #     tf.reduce_sum(x_prior.log_prob(x)) + \
    #     tf.reduce_sum(x_scale_prior.log_prob(x_scale)) + \
    #     tf.reduce_sum(W_prior.log_prob(W))

    # log_posterior = log_prior


    sess = tf.Session()

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    train = optimizer.minimize(-log_posterior)

    sess.run(tf.global_variables_initializer())

    train_feed_dict = {
        X: kmer_usage_matrix_train,
        x_likelihood_loc: qx_feature_loc_train,
        x_likelihood_scale: qx_feature_scale_train }

    test_feed_dict = {
        X: kmer_usage_matrix_test,
        x_likelihood_loc: qx_feature_loc_test,
        x_likelihood_scale: qx_feature_scale_test }

    n_iter = 1000
    mad_sample = median_absolute_deviance_sample(x_mu, x_likelihood)
    for iter in range(n_iter):
        # _, log_prior_value, log_likelihood_value = sess.run(
        #     [train, log_prior, log_likelihood],
        #     feed_dict=train_feed_dict)

        sess.run(
            [train],
            feed_dict=train_feed_dict)

        # print((log_prior_value, log_likelihood_value))

        if iter % 100 == 0:
            print(iter)
            print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict))
            print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict))
            print(sess.run(tf.reduce_min(x_scale)))
            print(sess.run(tf.reduce_max(x_scale)))
            # print(sess.run(log_prior, feed_dict=train_feed_dict))
            # print(sess.run(log_likelihood, feed_dict=train_feed_dict))

    return sess.run(W0), sess.run(W)
 def model_builtin():
     return ed.Normal(1., 0.1, name="x")
Esempio n. 20
0
 def neals_funnel():
     x1 = ed.Normal(loc=0., scale=3., name='x1')
     x2 = ed.Normal(loc=0., scale=tf.exp(x1 / 2.), name='x2')
     return x1, x2
 def normal_with_unknown_mean():
     loc = ed.Normal(loc=0., scale=1., name="loc")
     x = ed.Normal(loc=loc, scale=0.5, name="x")
     return x
Esempio n. 22
0
def latent_normal(shape, mean, stdev):
    if not isinstance(mean, list):
        mean = tf.constant(mean, shape=shape)
        stdev = tf.constant(stdev, shape=shape)
    prior = ed.Normal(loc=mean, scale=stdev)
    return prior
Esempio n. 23
0
 def variational_model(qw_mean, qw_stddv, qb_mean, qb_stddv):
     qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw")
     qb = ed.Normal(loc=qb_mean, scale=qb_stddv, name="qb")
     return qw, qb
Esempio n. 24
0
def sparse_conditional_weight(X,
                              parent_name,
                              child_names,
                              base_weights=None,
                              temp=None,
                              kernel_func=gp.rbf,
                              link_func=sparse_softmax,
                              ridge_factor=1e-3,
                              **kernel_kwargs):
    """Defines the conditional distribution of model given parent in the tail-free tree.

    Defines the feature-dependent conditional distribution of model as:

        w(model | x ) = link_func( w_model(x) )
        w_model(x) ~ gaussian_process[0, k_w(x)]


    Args:
        X: (np.ndarray) Input features of dimension (N, D)
        parent_name: (str) The name of the mother node.
        child_names: (list of str) A list of model names for each child in the family.
        base_weights: (tf.Tensor of float32 or None) base logits to be passed to
            link_func corresponding to each child. It has dimension
            (batch_size, num_obs, num_model).
        temp: (tf.Tensor of float32 or None) temperature parameter corresponding
            to the parent node to be passed to link_func, it has dimension
            (batch_size, ).
        kernel_func: (function) kernel function for base ensemble,
            with args (X, **kwargs).
        link_func: (function) a link function that transforms the unnormalized
            base ensemble weights to a K-dimension simplex.
            This function has args (logits, temp)
        ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.
        **kernel_kwargs: Additional parameters to pass to kernel_func through gp.prior.

    Returns:
        (list of tf.Tensor) List normalized ensemble weights, dimension (N, M) with
            dtype float32.
    """
    num_model = len(child_names)

    # define random variables: temperature and raw GP weights
    if not isinstance(temp, tf.Tensor):
        temp = ed.Normal(loc=_TEMP_PRIOR_MEAN,
                         scale=_TEMP_PRIOR_SDEV,
                         name='{}_{}'.format(TEMP_NAME_PREFIX, parent_name))

    if not isinstance(base_weights, tf.Tensor):
        base_weights = tf.stack([
            gp.prior(X,
                     kernel_func=kernel_func,
                     ridge_factor=ridge_factor,
                     name='{}_{}'.format(BASE_WEIGHT_NAME_PREFIX, model_name),
                     **kernel_kwargs) for model_name in child_names
        ],
                                axis=-1)

    # define transformed random variables
    weight_transformed = link_func(base_weights,
                                   tf.exp(temp),
                                   name='{}_{}'.format(COND_WEIGHT_NAME_PREFIX,
                                                       parent_name))

    # split into list then return
    # TODO(jereliu): Ugly code.
    weight_transformed = tf.split(weight_transformed, num_model, axis=-1)
    weight_transformed = [
        tf.squeeze(weight, axis=-1) for weight in weight_transformed
    ]
    return weight_transformed
 def normal_with_unknown_mean():
     loc = ed.Normal(loc=0., scale=1., name="loc")
     x = ed.Normal(loc=loc, scale=0.5, sample_shape=5)
     return x
Esempio n. 26
0
"""

import numpy as np

x_train = np.linspace(-3, 3, num=50)
y_train = np.cos(x_train) + np.random.normal(0, 0.1, size=50)
x_train = x_train.astype(np.float32).reshape((50, 1))
y_train = y_train.astype(np.float32).reshape((50, 1))

import tensorflow as tf
import tensorflow_probability as tfp
#from edward.models import Normal

from tensorflow_probability import edward2 as ed

W_0 = ed.Normal(loc=tf.zeros([1, 2]), scale=tf.ones([1, 2]))
W_1 = ed.Normal(loc=tf.zeros([2, 1]), scale=tf.ones([2, 1]))
b_0 = ed.Normal(loc=tf.zeros(2), scale=tf.ones(2))
b_1 = ed.Normal(loc=tf.zeros(1), scale=tf.ones(1))

x = x_train
y = ed.Normal(loc=tf.matmul(tf.tanh(tf.matmul(x, W_0) + b_0), W_1) + b_1,
              scale=0.1)

#
#qW_0 = ed.Normal(loc=tf.get_variable("qW_0/loc", [1, 2]),
#              scale=tf.nn.softplus(tf.get_variable("qW_0/scale", [1, 2])))
#qW_1 = ed.Normal(loc=tf.get_variable("qW_1/loc", [2, 1]),
#              scale=tf.nn.softplus(tf.get_variable("qW_1/scale", [2, 1])))
#qb_0 = ed.Normal(loc=tf.get_variable("qb_0/loc", [2]),
#              scale=tf.nn.softplus(tf.get_variable("qb_0/scale", [2])))