def top1(self, yhat): yhatT = tf.transpose(yhat) term1 = tf.reduce_mean(tf.nn.sigmoid(-tf.diag_part(yhat) + yhatT) + tf.nn.sigmoid(yhatT**2), axis=0) term2 = tf.nn.sigmoid(tf.diag_part(yhat)**2) / self.batch_size return tf.reduce_mean(term1 - term2)
def _mix_rbf_kernel(X, Y, sigmas, wts=None): """""" if wts is None: wts = [1.0] * sigmas.get_shape()[0] # debug! if len(X.shape) == 2: # matrix XX = tf.matmul(X, X, transpose_b=True) XY = tf.matmul(X, Y, transpose_b=True) YY = tf.matmul(Y, Y, transpose_b=True) elif len(X.shape) == 3: # tensor -- this is computing the Frobenius norm XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]]) XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]]) YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]]) else: raise ValueError(X) X_sqnorms = tf.diag_part(XX) Y_sqnorms = tf.diag_part(YY) r = lambda x: tf.expand_dims(x, 0) c = lambda x: tf.expand_dims(x, 1) K_XX, K_XY, K_YY = 0, 0, 0 for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts): gamma = 1 / (2 * sigma**2) K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms))) K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms))) K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms))) return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape ### Get the various sums of kernels that we'll use # Kts drop the diagonal, but we don't need to compute them explicitly if const_diagonal is not False: const_diagonal = tf.cast(const_diagonal, tf.float32) diag_X = diag_Y = const_diagonal sum_diag_X = sum_diag_Y = m * const_diagonal sum_diag2_X = sum_diag2_Y = m * const_diagonal**2 else: diag_X = tf.diag_part(K_XX) diag_Y = tf.diag_part(K_YY) sum_diag_X = tf.reduce_sum(diag_X) sum_diag_Y = tf.reduce_sum(diag_Y) sum_diag2_X = sq_sum(diag_X) sum_diag2_Y = sq_sum(diag_Y) Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y K_XY_sums_0 = tf.reduce_sum(K_XY, 0) K_XY_sums_1 = tf.reduce_sum(K_XY, 1) Kt_XX_sum = tf.reduce_sum(Kt_XX_sums) Kt_YY_sum = tf.reduce_sum(Kt_YY_sums) K_XY_sum = tf.reduce_sum(K_XY_sums_0) Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y K_XY_2_sum = sq_sum(K_XY) if biased: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) / (m * m) - 2 * K_XY_sum / (m * m)) else: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m - 1)) + (Kt_YY_sum + sum_diag_Y) / (m * (m - 1)) - 2 * K_XY_sum / (m * m)) var_est = (2 / (m**2 * (m - 1)**2) * (2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum) - (4 * m - 6) / (m**3 * (m - 1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2) + 4 * (m - 2) / (m**3 * (m - 1)**2) * (sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0)) - 4 * (m - 3) / (m**3 * (m - 1)**2) * K_XY_2_sum - (8 * m - 12) / (m**5 * (m - 1)) * K_XY_sum**2 + 8 / (m**3 * (m - 1)) * (1 / m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum - dot(Kt_XX_sums, K_XY_sums_1) - dot(Kt_YY_sums, K_XY_sums_0))) return mmd2, var_est
def zero_diag(input): """Helper function that zeros matrix diagonal. Args: input: 2-D float32 `Tensor`. Returns: 2-D float32 `Tensor` with diagonal zeroed. """ return input - tf.diag(tf.diag_part(input))
def decov_loss(xs): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' """ x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x-m, 2) corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5*(corr_frob_sqr - corr_diag_sqr) return loss
def rank_loss(sentence_emb, image_emb, margin=0.2): """Experimental rank loss, thanks to kkurach@ for the code.""" with tf.name_scope("rank_loss"): # Normalize first as this is assumed in cosine similarity later. sentence_emb = tf.nn.l2_normalize(sentence_emb, 1) image_emb = tf.nn.l2_normalize(image_emb, 1) # Both sentence_emb and image_emb have size [batch, depth]. scores = tf.matmul(image_emb, tf.transpose(sentence_emb)) # [batch, batch] diagonal = tf.diag_part(scores) # [batch] cost_s = tf.maximum(0.0, margin - diagonal + scores) # [batch, batch] cost_im = tf.maximum( 0.0, margin - tf.reshape(diagonal, [-1, 1]) + scores) # [batch, batch] # Clear diagonals. batch_size = tf.shape(sentence_emb)[0] empty_diagonal_mat = tf.ones_like(cost_s) - tf.eye(batch_size) cost_s *= empty_diagonal_mat cost_im *= empty_diagonal_mat return tf.reduce_mean(cost_s) + tf.reduce_mean(cost_im)
def posterior_pred(self, x): self.Kinv_Y = tf.cholesky_solve(self.L_xx, self.t_Y) self.K_xX = self.create_kernel(x, self.t_X) self.K_xx = self.create_kernel(x, x) self.y_mu = tf.matmul(self.K_xX, self.Kinv_Y) self.K_xx_d = tf.diag_part(self.K_xx) + self.noise_var * tf.ones( [tf.shape(x)[0]], dtype=self.dtype) self.y_var = self.K_xx_d - tf.reduce_sum(tf.square( tf.matrix_triangular_solve(self.L_xx, tf.transpose(self.K_xX))), axis=0) self.y_var = self.y_var[:, tf.newaxis] return self.y_mu, self.y_var
def pr_re_fbeta(cm, pos_indices, beta=1): """Uses a confusion matrix to compute precision, recall and fbeta.""" num_classes = cm.shape[0] neg_indices = [i for i in range(num_classes) if i not in pos_indices] cm_mask = np.ones([num_classes, num_classes]) cm_mask[neg_indices, neg_indices] = 0 diag_sum = tf.reduce_sum(tf.diag_part(cm * cm_mask)) cm_mask = np.ones([num_classes, num_classes]) cm_mask[:, neg_indices] = 0 tot_pred = tf.reduce_sum(cm * cm_mask) cm_mask = np.ones([num_classes, num_classes]) cm_mask[neg_indices, :] = 0 tot_gold = tf.reduce_sum(cm * cm_mask) pr = safe_div(diag_sum, tot_pred) re = safe_div(diag_sum, tot_gold) fbeta_score = safe_div((1. + beta**2) * pr * re, beta**2 * pr + re) return pr, re, fbeta_score
def regularize_diag_off_diag_dip(covariance_matrix, lambda_od, lambda_d): """Compute on and off diagonal regularizers for DIP-VAE models. Penalize deviations of covariance_matrix from the identity matrix. Uses different weights for the deviations of the diagonal and off diagonal entries. Args: covariance_matrix: Tensor of size [num_latent, num_latent] to regularize. lambda_od: Weight of penalty for off diagonal elements. lambda_d: Weight of penalty for diagonal elements. Returns: dip_regularizer: Regularized deviation from diagonal of covariance_matrix. """ covariance_matrix_diagonal = tf.diag_part(covariance_matrix) covariance_matrix_off_diagonal = covariance_matrix - tf.diag( covariance_matrix_diagonal) dip_regularizer = tf.add( lambda_od * tf.reduce_sum(covariance_matrix_off_diagonal**2), lambda_d * tf.reduce_sum((covariance_matrix_diagonal - 1)**2)) return dip_regularizer
def _pairwise_distances(embeddings, squared=False): """Compute the 2D matrix of distances between all the embeddings. Args: embeddings: tensor of shape (batch_size, embed_dim) squared: Boolean. If true, output is the pairwise squared euclidean distance matrix. If false, output is the pairwise euclidean distance matrix. Returns: pairwise_distances: tensor of shape (batch_size, batch_size) """ # Get the dot product between all embeddings # shape (batch_size, batch_size) dot_product = tf.matmul(embeddings, tf.transpose(embeddings)) # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`. # This also provides more numerical stability (the diagonal of the result will be exactly 0). # shape (batch_size,) square_norm = tf.diag_part(dot_product) # Compute the pairwise distance matrix as we have: # ||a - b||^2 = ||a||^2 - 2 <a, b> + ||b||^2 # shape (batch_size, batch_size) distances = tf.expand_dims( square_norm, 1) - 2.0 * dot_product + tf.expand_dims(square_norm, 0) # Because of computation errors, some distances might be negative so we put everything >= 0.0 distances = tf.maximum(distances, 0.0) if not squared: # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal) # we need to add a small epsilon where distances == 0.0 mask = tf.to_float(tf.equal(distances, 0.0)) distances = distances + mask * 1e-16 distances = tf.sqrt(distances) # Correct the epsilon added: set the distances on the mask to be exactly 0.0 distances = distances * (1.0 - mask) return distances
def bpr(self, yhat): yhatT = tf.transpose(yhat) return tf.reduce_mean( -tf.log(tf.nn.sigmoid(tf.diag_part(yhat) - yhatT)))
def add_contrastive_loss(hidden, hidden_norm=True, temperature=1.0, tpu_context=None, weights=1.0): """Compute loss for model. Args: hidden: hidden vector (`Tensor`) of shape (bsz, dim). hidden_norm: whether or not to use normalization on the hidden vector. temperature: a `floating` number for temperature scaling. tpu_context: context information for tpu. weights: a weighting number or vector. Returns: A loss scalar. The logits for contrastive prediction task. The labels for contrastive prediction task. """ # Get (normalized) hidden1 and hidden2. if hidden_norm: hidden = tf.math.l2_normalize(hidden, -1) hidden1, hidden2 = tf.split(hidden, 2, 0) batch_size = tf.shape(hidden1)[0] # Gather hidden1/hidden2 across replicas and create local labels. if tpu_context is not None: hidden1_large = tpu_cross_replica_concat(hidden1, tpu_context) hidden2_large = tpu_cross_replica_concat(hidden2, tpu_context) enlarged_batch_size = tf.shape(hidden1_large)[0] # TODO(iamtingchen): more elegant way to convert u32 to s32 for replica_id. replica_id = tf.cast(tf.cast(xla.replica_id(), tf.uint32), tf.int32) labels_idx = tf.range(batch_size) + replica_id * batch_size labels = tf.one_hot(labels_idx, enlarged_batch_size * 2) masks = tf.one_hot(labels_idx, enlarged_batch_size) else: hidden1_large = hidden1 hidden2_large = hidden2 labels = tf.one_hot(tf.range(batch_size), batch_size * 2) masks = tf.one_hot(tf.range(batch_size), batch_size) logits_aa = tf.matmul(hidden1, hidden1_large, transpose_b=True) / temperature logits_aa = logits_aa - masks * LARGE_NUM logits_bb = tf.matmul(hidden2, hidden2_large, transpose_b=True) / temperature logits_bb = logits_bb - masks * LARGE_NUM logits_ab = tf.matmul(hidden1, hidden2_large, transpose_b=True) / temperature logits_ba = tf.matmul(hidden2, hidden1_large, transpose_b=True) / temperature logits_a = tf.concat([logits_ab, logits_aa], 1) logits_b = tf.concat([logits_ba, logits_bb], 1) if FLAGS.loss_func != 'NT-Xent': logits_positive = tf.diag_part(logits_ab) temp_positive = tf.tile(tf.expand_dims(logits_positive, -1), [1, logits_a.shape[1]]) masks_a = tf.cast(tf.greater_equal(logits_a, temp_positive - 1e-5), tf.float32) masks_b = tf.cast(tf.greater_equal(logits_b, temp_positive - 1e-5), tf.float32) logits_a = logits_a - masks_a * LARGE_NUM logits_b = logits_b - masks_b * LARGE_NUM logits_negative_a = tf.reduce_max(logits_a, axis=1) logits_negative_b = tf.reduce_max(logits_b, axis=1) #print(logits_negative_a, logits_negative_b) if FLAGS.loss_func == 'NT-Logistic': loss_a = tf.reduce_mean( tf.log(1 + tf.exp(-logits_positive)) + tf.log(1 + tf.exp(logits_negative_a))) loss_b = tf.reduce_mean( tf.log(1 + tf.exp(-logits_positive)) + tf.log(1 + tf.exp(logits_negative_b))) tf.losses.add_loss(loss_a + loss_b) #print(loss_a, loss_b) return loss_a + loss_b, logits_ab, labels else: loss_a = tf.reduce_mean( tf.maximum(logits_negative_a - logits_positive + MARGIN, 0)) loss_b = tf.reduce_mean( tf.maximum(logits_negative_b - logits_positive + MARGIN, 0)) tf.losses.add_loss(loss_a + loss_b) return loss_a + loss_b, logits_ab, labels loss_a = tf.losses.softmax_cross_entropy(labels, logits_a, weights=weights) loss_b = tf.losses.softmax_cross_entropy(labels, logits_b, weights=weights) #print(loss_a, loss_b) loss = loss_a + loss_b return loss, logits_ab, labels
def __init__(self, embeddings, latent_inters, latent_varies, degrees, edge_types, edge_type2dim, placeholders, margin=0.1, neg_sample_weights=1., batch_size=100): self.embeddings = embeddings self.latent_inters = latent_inters #model的中间层 self.latent_varies = latent_varies #model的变化 self.edge_types = edge_types #边界 self.degrees = degrees self.edge_type2dim = edge_type2dim self.obj_type2n = { i: self.edge_type2dim[i, j][0][0] for i, j in self.edge_types } #0:500 1:400 self.margin = margin self.neg_sample_weights = neg_sample_weights self.batch_size = batch_size self.inputs = placeholders['batch'] #0 self.batch_edge_type_idx = placeholders['batch_edge_type_idx'] #0 self.batch_row_edge_type = placeholders['batch_row_edge_type'] #0 self.batch_col_edge_type = placeholders['batch_col_edge_type'] #0 self.row_inputs = tf.squeeze(gather_cols(self.inputs, [0])) #→lables self.col_inputs = tf.squeeze(gather_cols(self.inputs, [1])) obj_type_n = [self.obj_type2n[i] for i in range(len(self.embeddings))] self.obj_type_lookup_start = tf.cumsum([0] + obj_type_n[:-1]) self.obj_type_lookup_end = tf.cumsum(obj_type_n) labels = tf.reshape(tf.cast(self.row_inputs, dtype=tf.int64), [self.batch_size, 1]) # 这一段是文章中的一个方法(可以先不管他):负采样 # estimate the model through negative sampling # for each drug-drug edge in graph(vi,r,vj),we sample a random edge(vi,r,vn), # vnis randomly choosed according to sampling distribution Pr neg_samples_list = [] for i, j in self.edge_types: for k in range(self.edge_types[i, j]): neg_samples, _, _ = tf.nn.fixed_unigram_candidate_sampler( true_classes=labels, num_true=1, num_sampled=self.batch_size, unique=False, range_max=len(self.degrees[i][k]), distortion=0.75, unigrams=self.degrees[i][k].tolist()) neg_samples_list.append(neg_samples) self.neg_samples = tf.cast(tf.gather(neg_samples_list, self.batch_edge_type_idx), dtype=tf.int64) # tf.int32 self.preds = self.batch_predict(self.row_inputs, self.col_inputs) self.outputs = tf.diag_part(self.preds) # 返回矩阵对角线元素 self.outputs = tf.reshape(self.outputs, [-1]) #outputs输出到交叉熵损失函数 self.neg_preds = self.batch_predict(self.neg_samples, self.col_inputs) self.neg_outputs = tf.diag_part(self.neg_preds) self.neg_outputs = tf.reshape(self.neg_outputs, [-1]) self.predict() self.build()
def _build_graph( Npartitions, voc_size, batch_size, gamma_regularizer, reg2, optimizer_param, optimizer_type, init_std_dev=.05, ): graph = tf.Graph() with graph.as_default(): chosen_index_1 = tf.placeholder(dtype=tf.int32, shape=(batch_size)) chosen_index_2 = tf.placeholder(dtype=tf.int32, shape=(batch_size)) is_corrections_pl = tf.placeholder_with_default(tf.ones( batch_size, dtype=tf.float32), shape=(batch_size)) learning_rate_pl = tf.placeholder(dtype=tf.float32) t_weights_free = tf.Variable(tf.truncated_normal( [Npartitions, Npartitions], mean=0., stddev=init_std_dev), dtype=tf.float32) t_weights_free_sym = t_weights_free + tf.transpose(t_weights_free) t_weights = tf.reshape( tf.nn.softmax( tf.reshape(t_weights_free_sym, [Npartitions * Npartitions])), [Npartitions, Npartitions]) t_topics_free = tf.Variable(tf.truncated_normal( [Npartitions, voc_size], mean=0., stddev=init_std_dev), dtype=tf.float32) t_topics = tf.nn.softmax(t_topics_free) #default axis is (-1) t_topics_free_pl = tf.placeholder(tf.float32, shape=[Npartitions, voc_size]) t_weights_free_pl = tf.placeholder( tf.float32, shape=[Npartitions, Npartitions]) t_weights_free_assign_op = tf.assign(t_weights_free, t_weights_free_pl) t_topics_free_assign_op = tf.assign(t_topics_free, t_topics_free_pl) t_gamma = gamma_regularizer t_gamma2 = reg2 pre_target = tf.log((tf.reduce_sum((tf.matmul( tf.expand_dims( tf.transpose(tf.gather(t_topics, chosen_index_1, axis=1)), -1), tf.expand_dims( tf.transpose(tf.gather(t_topics, chosen_index_2, axis=1)), 1)) * t_weights), axis=[1, 2]))) target = tf.reduce_mean(is_corrections_pl * tf.where( tf.is_nan(pre_target), tf.zeros_like(pre_target), pre_target )) + t_gamma * tf.reduce_sum( tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum( tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1)) #now optimizer t_loss = -target #t_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) #t_optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum = .9) #t_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate) if optimizer_type == 'adam': t_optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate_pl, **optimizer_param) elif optimizer_type == 'rmsprop': t_optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate_pl, **optimizer_param) else: raise ValueError('Unknown optimizer') opt_vars = t_optimizer.variables() opt_vars_pls = [ tf.placeholder(dtype=v.dtype, shape=v.shape) for v in opt_vars ] opt_vars_assigns = [ tf.assign(v, pl) for v, pl in zip(opt_vars, opt_vars_pls) ] t_train_op = t_optimizer.minimize(t_loss) t_tfinit = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=2) t_loss_to_display = -(target - (t_gamma * tf.reduce_sum( tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum( tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1)) )) return (graph, t_tfinit, t_loss_to_display, t_topics, t_train_op, t_weights, chosen_index_1, chosen_index_2, is_corrections_pl, saver, t_topics_free_pl, t_weights_free_pl, t_weights_free_assign_op, t_topics_free_assign_op, pre_target, learning_rate_pl, t_weights_free, t_topics_free, opt_vars, opt_vars_pls, opt_vars_assigns)
def invert( settings, samples, para_path, g_tolerance=None, e_tolerance=0.1, n_iter=None, max_iter=10000, heuristic_sigma=None, ): """ Return the latent space points corresponding to a set of a samples ( from gradient descent ) Note: this function is designed for ONE sample generation """ # num_samples = samples.shape[0] # cast samples to float32 samples = np.float32(samples) # get the model # if settings is a string, assume it's an identifier and load if type(settings) == str: settings = json.load(open("./experiments/settings/" + settings + ".txt", "r")) # print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,) # if not g_tolerance is None: # print('until gradient norm is below', g_tolerance) # else: # print('until error is below', e_tolerance) # get parameters parameters = model.load_parameters(para_path) # # assertions # assert samples.shape[2] == settings['num_generated_features'] # create VARIABLE Z Z = tf.get_variable( name="Z", shape=[1, settings["seq_length"], settings["latent_dim"]], initializer=tf.random_normal_initializer(), ) # create outputs G_samples = generator_o( Z, settings["hidden_units_g"], settings["seq_length"], 1, settings["num_generated_features"], reuse=False, parameters=parameters, ) # generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale'] # generator_settings = dict((k, settings[k]) for k in generator_vars) # G_samples = model.generator(Z, **generator_settings, reuse=True) fd = None # define loss mmd-based loss if heuristic_sigma is None: heuristic_sigma = mmd.median_pairwise_distance_o(samples) # this is noisy print("heuristic_sigma:", heuristic_sigma) samples = tf.reshape( samples, [1, settings["seq_length"], settings["num_generated_features"]] ) Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel( G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1)) ) similarity_per_sample = tf.diag_part(Kxy) reconstruction_error_per_sample = 1 - similarity_per_sample # reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2]) similarity = tf.reduce_mean(similarity_per_sample) reconstruction_error = 1 - similarity # updater # solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z]) # solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z]) solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize( reconstruction_error_per_sample, var_list=[Z] ) # solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z]) grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0] grad_per_Z = tf.norm(grad_Z, axis=(1, 2)) grad_norm = tf.reduce_mean(grad_per_Z) # solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z]) print("Finding latent state corresponding to samples...") sess = tf.Session() sess.run(tf.global_variables_initializer()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) error = sess.run(reconstruction_error, feed_dict=fd) g_n = sess.run(grad_norm, feed_dict=fd) # print(g_n) i = 0 if not n_iter is None: while i < n_iter: _ = sess.run(solver, feed_dict=fd) error = sess.run(reconstruction_error, feed_dict=fd) i += 1 else: if not g_tolerance is None: while g_n > g_tolerance: _ = sess.run(solver, feed_dict=fd) error, g_n = sess.run( [reconstruction_error, grad_norm], feed_dict=fd ) i += 1 print(error, g_n) if i > max_iter: break else: while np.abs(error) > e_tolerance: _ = sess.run(solver, feed_dict=fd) error = sess.run(reconstruction_error, feed_dict=fd) i += 1 # print(error) if i > max_iter: break Zs = sess.run(Z, feed_dict=fd) Gs = sess.run(G_samples, feed_dict={Z: Zs}) error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd) print("Z found in", i, "iterations with final reconstruction error of", error) tf.reset_default_graph() return Gs, Zs, error_per_sample, heuristic_sigma
def predict(self, k_test_test, sess, get_var=False): self.k_test_test = k_test_test if self.l_np is None: self._build_cholesky() start_time = time.time() while self.current_stability_eps < 10: try: start_time = time.time() self.l_np, self.v_np = sess.run( [self.l, self.v], feed_dict={ self.y_pl: self.output_y, self.K_data_data_pl: self.k_data_data, self.stability_eps: self.current_stability_eps }) tf.logging.info("Computed L_DD in %.3f secs" % (time.time() - start_time)) break except tf.errors.InvalidArgumentError: if self.current_stability_eps < 1: self.current_stability_eps *= 10 else: self.current_stability_eps += 1 tf.logging.info( "Cholesky decomposition failed, trying larger epsilon" ": {}".format(self.current_stability_eps)) if self.current_stability_eps > 8: raise ArithmeticError("Could not compute Cholesky decomposition.") self.K_data_test_pl = tf.placeholder(tf.float64, [1291, 327], name="K_data_test") self.K_test_test_pl = tf.placeholder(tf.float64, [327, 327], name="K_test_test") a = tf.matrix_triangular_solve(self.l, self.K_data_test_pl) fmean = tf.matmul(a, self.v, transpose_a=True) fvar = tf.diag_part(self.K_test_test_pl) - tf.reduce_sum( tf.square(a), 0) fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.output_y.shape[1]]) self.fmean = fmean self.fvar = fvar start_time = time.time() mean_pred, var_pred = sess.run( [self.fmean, self.fvar], feed_dict={ self.K_data_test_pl: self.k_data_test, #self.K_data_data_pl: self.k_data_data, self.l: self.l_np, self.v: self.v_np, self.K_test_test_pl: self.k_test_test }) tf.logging.info("Did regression in %.3f secs" % (time.time() - start_time)) return mean_pred, var_pred
def cross_entropy(self, yhat): return tf.reduce_mean(-tf.log(tf.diag_part(yhat) + 1e-24))
def create_model(self, x, y, *args): if self.process_y: self.f_mu = Regression().fit(x, y) self.Ymu = self.f_mu(x) self.Ys2 = np.std((y - self.Ymu)) y = (y - self.Ymu) / self.Ys2 self.t_X = tf.constant(x, dtype=self.dtype) self.t_Y = tf.constant(y, dtype=self.dtype) self.t_N = tf.shape(self.t_Y)[0] self.t_D = tf.shape(self.t_Y)[1] self.t_Q = tf.shape(self.t_X)[0] self.t_M = tf.shape(self.t_X)[1] self.M = x.shape[1] if self.kernel == 'Squared Exponential': self.kernel_function = self.sq_exp_kernel self.signal_var = self.init_variable(args[0][0], positive=True) self.lengthscale = self.init_variable([args[0][1]] * self.M, positive=True, multi=self.variable_l) self.noise_var = self.init_variable(args[0][2], positive=True) self.hparamd = ['Signal Variance', 'Lengthscale'] self.hparams = [self.signal_var, self.lengthscale] if self.kernel == 'Periodic': self.kernel_function = self.sq_exp_kernel self.signal_var = self.init_variable(args[0][0], True) self.gamma = self.init_variable(args[0][0], True) self.period = self.init_variable(args[0][0], True) self.noise_var = self.init_variable(args[0][0], True) self.p_mu = self.init_variable(tf.log(self.t_Y), False) self.p_s2 = self.init_variable(1.0, True) self.hparamd = ['Signal Variance', 'Gamma', 'Period'] self.hparams = [self.signal_var, self.gamma, self.period] self.create_kernel = lambda t_x1, t_x2: self.kernel_function( t_x1, t_x2, self.hparams) ### CREATING THE TRAINING MATRICES ### self.K_xx = self.create_kernel(self.t_X, self.t_X) + ( self.noise_var + self.jitter) * tf.eye(self.t_N, dtype=self.dtype) self.L_xx = tf.cholesky(self.K_xx) self.logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.L_xx))) self.Kinv_YYt = 0.5 * tf.reduce_sum( tf.square( tf.matrix_triangular_solve(self.L_xx, self.t_Y, lower=True))) ### Initialising loose priors ### self.hprior = 0 if self.variable_l: self.hprior += 0.5 * tf.square(tf.log(self.hparams[0])) self.hprior += tf.reduce_sum(0.5 * tf.square(tf.log(self.hparams[1]))) else: for i in self.hparams: self.hprior += 0.5 * tf.square(tf.log(i)) self.noise_prior = 0.5 * tf.square(tf.log(self.noise_var)) ### Negative marginal log likelihood under Gaussian assumption ### if self.distribution == 'Gaussian': pi_term = tf.constant(0.5 * np.log(2.0 * np.pi), dtype=self.dtype) self.term1 = pi_term * tf.cast(self.t_D, dtype = self.dtype) * tf.cast(self.t_N, dtype = self.dtype) \ + 0.5 * tf.cast(self.t_D, dtype = self.dtype) * self.logdet \ + self.Kinv_YYt if self.distribution == 'Poisson' and self.kernel == 'Periodic': self.Kinv = tf.cholesky_solve(self.L_xx, tf.eye(self.t_N, dtype=self.dtype)) self.term1 = -tf.reduce_sum(self.t_Y*self.p_mu - tf.exp(self.p_mu + self.p_s2/2)) \ + (1/2)*(tf.trace(self.Kinv @ (self.p_s2*tf.eye(self.t_N, dtype=self.dtype) + [email protected](self.p_mu))) \ - tf.cast(self.t_N, dtype = self.dtype) + self.logdet - tf.cast(self.t_N, dtype = self.dtype)*tf.log(self.p_s2)) self.objective = self.term1 + self.hprior + self.noise_prior