def logistic_regression(features): coeffs = ed.Normal(loc=tf.zeros(features.shape[1]), scale=1., name="coeffs") intercept = ed.Normal(loc=0., scale=1., name='intercept') outcomes = ed.Bernoulli(logits=tf.tensordot(features, coeffs, [[1], [0]]) + intercept, name='outcomes') return outcomes
def vae(k, d0, dx, N): z = ed.Normal(loc=tf.ones(k), scale=1., sample_shape=N, name="z") decoder = inf.layers.Sequential([ tf.keras.layers.Dense(d0, activation=tf.nn.relu, name="h0"), tf.keras.layers.Dense(dx, name="h1") ], name="decoder") x = ed.Normal(loc=decoder(z, d0, dx), scale=1., name="x") return z, x
def schools_model(num_schools, treatment_stddevs): avg_effect = ed.Normal(loc=0.0, scale=10.0, name="avg_effect") # `mu` avg_stddev = ed.Normal(loc=5.0, scale=1.0, name="avg_stddev") # `log(tau)` school_effects_standard = ed.Normal( loc=tf.zeros(num_schools), scale=tf.ones(num_schools), name="school_effects_standard" ) # `eta` school_effects = avg_effect + tf.exp(avg_stddev) * school_effects_standard # `theta` treatment_effects = ed.Normal( loc=school_effects, scale=treatment_stddevs, name="treatment_effects" ) # `y` return treatment_effects
def probabilistic_pca(data_dim, latent_dim, num_datapoints, stddv_datapoints): # (unmodeled) data w = ed.Normal(loc=tf.zeros([data_dim, latent_dim]), scale=2.0 * tf.ones([data_dim, latent_dim]), name="w") # parameter z = ed.Normal(loc=tf.zeros([latent_dim, num_datapoints]), scale=tf.ones([latent_dim, num_datapoints]), name="z") # parameter x = ed.Normal(loc=tf.matmul(w, z), scale=stddv_datapoints * tf.ones([data_dim, num_datapoints]), name="x") # (modeled) data return x, (w, z)
def tfp_schools_model(num_schools, treatment_stddevs): """Non-centered eight schools model for tfp.""" import tensorflow_probability.python.edward2 as ed import tensorflow as tf avg_effect = ed.Normal(loc=0.0, scale=10.0, name="avg_effect") # `mu` avg_stddev = ed.Normal(loc=5.0, scale=1.0, name="avg_stddev") # `log(tau)` school_effects_standard = ed.Normal( loc=tf.zeros(num_schools), scale=tf.ones(num_schools), name="school_effects_standard" ) # `eta` school_effects = avg_effect + tf.exp(avg_stddev) * school_effects_standard # `theta` treatment_effects = ed.Normal( loc=school_effects, scale=treatment_stddevs, name="treatment_effects" ) # `y` return treatment_effects
def test_dependencies(dependencies1, dependencies2): # register and check dependencies between group of dependent vars for dependent_vars in (dependencies1, dependencies2): last = ed.Normal(0, 1, name=dependent_vars[0]) for i in range(1, len(dependent_vars)): parentname = dependent_vars[i - 1] name = dependent_vars[i] x = ed.Normal(last, 1, name=name) g = tf_graph.get_graph(set([parentname, name])) assert parentname in g.predecessors(name) last = x # all variables registered. Now check independencies between independent groups g = tf_graph.get_graph(set(dependencies1 + dependencies2)) for v1 in dependencies1: for v2 in dependencies2: assert v1 not in g.predecessors(v2) assert v2 not in g.predecessors(v1)
def tfp_schools_model(num_schools, treatment_stddevs): """Non-centered eight schools model for tfp.""" import tensorflow_probability.python.edward2 as ed import tensorflow as tf if int(tf.__version__[0]) > 1: import tensorflow.compat.v1 as tf # pylint: disable=import-error tf.disable_v2_behavior() avg_effect = ed.Normal(loc=0.0, scale=10.0, name="avg_effect") # `mu` avg_stddev = ed.Normal(loc=5.0, scale=1.0, name="avg_stddev") # `log(tau)` school_effects_standard = ed.Normal( loc=tf.zeros(num_schools), scale=tf.ones(num_schools), name="school_effects_standard" ) # `eta` school_effects = avg_effect + tf.exp(avg_stddev) * school_effects_standard # `theta` treatment_effects = ed.Normal( loc=school_effects, scale=treatment_stddevs, name="treatment_effects" ) # `y` return treatment_effects
def qmodel(k, d0, x): encoder = tf.keras.Sequential([ tf.keras.layers.Dense(d0, activation=tf.nn.relu, name="h0"), tf.keras.layers.Dense(2 * k, name="h1") ], name="encoder") output = encoder(x) qz_loc = output[:, :k] qz_scale = tf.nn.softplus(output[:, k:]) + scale_epsilon qz = ed.Normal(loc=qz_loc, scale=qz_scale, name="qz") return qz
def model(X, sample_bias=False, bias_sample_range=None): """Defines a Bayesian Neural Network for regression Note: Output weight prior variance is set to: 10/K Args: # X: (np.ndarray of NP_DTYPE) A matrix of input features between (0, 1), # shape (n, d). # weight_list: (list of tf.Tensor) A list of hidden weight Tensors, # each element has dtype = TF_DTYPE, and shape is (n_feature, n_node) # (if input weight), (n_node, n_node) if hidden weight, # and (n_node, 1) if output weight # bias_list: (list of tf.Tensor) A list of bias term for each layer, # each element has dtype = TF_DTYPE, shape = (,). bias_sample_range: (list of int or None) the index of covariate to sample bias for. If `None` then sample only the first. Returns: (tf.Tensor) The output distribution. """ # define architecture X = tf.convert_to_tensor(X, dtype=dtype_util.TF_DTYPE) n_sample, n_feature = X.shape.as_list() layer_size = [n_feature] + [n_node] * n_layer + [1] # intialize model building weight_list = [] bias_list = [] # input layer # input = tf.get_variable(initializer=X, # trainable=False, # dtype=dtype_util.TF_DTYPE, # name="input") input = X net = input # for (layer_id, (weights, biases)) in enumerate(zip(weight_list[:-1], bias_list[:-1])): # with tf.variable_scope(scope_list[layer_id].original_name_scope): # net = net_util.Dense(net, weights, biases, activation=activation) # hidden layers for layer_id in range(len(layer_size) - 1): with tf.variable_scope("layer_{}".format(layer_id), reuse=True): # configure hidden weight weight_shape = (layer_size[layer_id], layer_size[layer_id + 1]) weight_scale = hidden_weight_sd if layer_id < n_layer else output_weight_sd # define random variables bias_rv = ed.Normal(loc=0., scale=hidden_weight_sd, name="bias_{}".format(layer_id)) weight_rv = ed.Normal(loc=0., scale=tf.ones(shape=weight_shape) * weight_scale, name="weight_{}".format(layer_id)) # add to list for easy access bias_list += [bias_rv] weight_list += [weight_rv] # produce output, optionally, store output-layer hidden nodes if sample_bias: if layer_id == n_layer: phi = net # shape (n_sample, n_node) net = net_util.Dense( net, weight_rv, bias_rv, activation=None if layer_id == n_layer else activation) # final output layer with tf.variable_scope("output"): # produce output prediction y_mean = net[:, 0] # shape (n, ) (i.e., the number of data samples) std_devs = 1. # estimate variable importance (i.e. squared gradient norm) y_mean_grad = tf.gradients(y_mean, X)[0] var_imp = tf.reduce_mean(y_mean_grad**2, axis=0) # estimate variable importance bias if sample_bias: phi_grad = tf.vectorized_map( # shape (n_node, n_sample, n_feature) lambda phi_k: tf.gradients(phi_k, X)[0], tf.transpose(phi)) phi_grad2 = tf.vectorized_map( # shape (n_feature, n_node, n_node) lambda dphi: tf.matmul(dphi, dphi, transpose_b=True), tf.transpose(phi_grad, [2, 0, 1])) phi2_inv = tfp.math.pinv( # shape (n_node, n_node) tf.matmul(phi, phi, transpose_a=True)) var_imp_mat = tf.tensordot( # shape (n_feature, n_node, n_node) phi_grad2, phi2_inv, axes=[2, 0]) var_imp_bias = tf.vectorized_map(tf.linalg.trace, var_imp_mat) # phi_grad = tf.stack( # shape (n_node, n_sample, n_feature) # [tf.gradients(phi[:, k], X)[0] for k in range(n_node)]) # phi_inv = tfp.math.pinv(phi) # shape (n_node, n_sample) # # var_imp_bias = tf.stack([ # tf.reduce_sum(tf.matmul( # phi_grad[:, :, p], phi_inv, transpose_a=True) ** 2) # for p in range(n_feature)]) # var_imp_bias = var_imp_bias / n_sample else: var_imp_bias = None y = ed.Normal(loc=y_mean, scale=std_devs, name="y") return y, y_mean, var_imp, var_imp_bias, weight_list, bias_list
def qmodel(k, d0, x, encoder): output = encoder(x, d0, k) qz_loc = output[:, :k] qz_scale = tf.nn.softplus(output[:, k:]) + scale_epsilon qz = ed.Normal(loc=qz_loc, scale=qz_scale, name="qz") return qz
def vae(k, d0, dx, N, decoder): z = ed.Normal(loc=tf.ones(k), scale=1., sample_shape=N, name="z") x = ed.Normal(loc=decoder(z, d0, dx), scale=1., name="x") return z, x