def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None): """ `sample_conditional` will return a sample from the conditional distribution. In most cases this means calculating the conditional mean m and variance v and then returning m + sqrt(v) * eps, with eps ~ N(0, 1). However, for some combinations of Mok and Mof more efficient sampling routines exists. The dispatcher will make sure that we use the most efficient one. :return: samples, mean, cov samples has shape [num_samples, N, P] or [N, P] if num_samples is None mean and cov as for conditional() """ if full_cov and full_output_cov: raise NotImplementedError("The combination of both full_cov and full_output_cov is not " "implemented for sample_conditional.") logger.debug("sample conditional: InducingFeature Kernel") mean, cov = conditional(Xnew, feat, kern, f, q_sqrt=q_sqrt, white=white, full_cov=full_cov, full_output_cov=full_output_cov) if full_cov: # mean: N x P # cov: P x N x N mean = tf.matrix_transpose(mean) # now P x N samples = _sample_mvn(mean, cov, 'full', num_samples=num_samples) # (S x) P x N samples = tf.matrix_transpose(samples) # now (S x) N x P else: cov_structure = "full" if full_output_cov else "diag" samples = _sample_mvn(mean, cov, cov_structure, num_samples=num_samples) # [(S,), N, P] return samples, mean, cov
def testNonBatchMatrixDynamicallyDefined(self): matrix = [[1, 2, 3], [4, 5, 6]] # Shape (2, 3) expected_transposed = [[1, 4], [2, 5], [3, 6]] # Shape (3, 2) with self.test_session(): matrix_ph = tf.placeholder(tf.int32) transposed = tf.matrix_transpose(matrix_ph) self.assertAllEqual(expected_transposed, transposed.eval(feed_dict={matrix_ph: matrix}))
def _quadrature_expectation(p, obj1, feature1, obj2, feature2, num_gauss_hermite_points): """ Handling of quadrature expectations for Markov Gaussians (useful for time series) Fallback method for missing analytic expectations wrt Markov Gaussians Nota Bene: obj1 is always associated with x_n, whereas obj2 always with x_{n+1} if one requires e.g. <x_{n+1} K_{x_n, Z}>_p(x_{n:n+1}), compute the transpose and then transpose the result of the expectation """ num_gauss_hermite_points = 40 if num_gauss_hermite_points is None else num_gauss_hermite_points warnings.warn("Quadrature is used to calculate the expectation. This means that " "an analytical implementations is not available for the given combination.") if obj2 is None: eval_func = lambda x: get_eval_func(obj1, feature1)(x) mu, cov = p.mu[:-1], p.cov[0, :-1] # cross covariances are not needed elif obj1 is None: eval_func = lambda x: get_eval_func(obj2, feature2)(x) mu, cov = p.mu[1:], p.cov[0, 1:] # cross covariances are not needed else: eval_func = lambda x: (get_eval_func(obj1, feature1, np.s_[:, :, None])(tf.split(x, 2, 1)[0]) * get_eval_func(obj2, feature2, np.s_[:, None, :])(tf.split(x, 2, 1)[1])) mu = tf.concat((p.mu[:-1, :], p.mu[1:, :]), 1) # Nx2D cov_top = tf.concat((p.cov[0, :-1, :, :], p.cov[1, :-1, :, :]), 2) # NxDx2D cov_bottom = tf.concat((tf.matrix_transpose(p.cov[1, :-1, :, :]), p.cov[0, 1:, :, :]), 2) cov = tf.concat((cov_top, cov_bottom), 1) # Nx2Dx2D return mvnquad(eval_func, mu, cov, num_gauss_hermite_points)
def testNonBatchMatrix(self): matrix = [[1, 2, 3], [4, 5, 6]] # Shape (2, 3) expected_transposed = [[1, 4], [2, 5], [3, 6]] # Shape (3, 2) with self.test_session(): transposed = tf.matrix_transpose(matrix) self.assertEqual((3, 2), transposed.get_shape()) self.assertAllEqual(expected_transposed, transposed.eval())
def _sample_conditional(Xnew, X, kern, f, *, q_sqrt=None, white=False, full_cov=False, full_output_cov=False, num_samples=None): if full_cov and full_output_cov: raise NotImplementedError("The combination of both full_cov and full_output_cov is not " "implemented for sample_conditional.") logger.debug("sample conditional: Kernel") if full_output_cov: raise NotImplementedError("full_output_cov is not implemented") mean, cov = conditional(Xnew, X, kern, f, q_sqrt=q_sqrt, white=white, full_cov=full_cov) if full_cov: mean = tf.matrix_transpose(mean) cov_structure = "full" if full_cov else "diag" samples = _sample_mvn(mean, cov, cov_structure, num_samples=num_samples) if full_cov: samples = tf.matrix_transpose(samples) return samples, mean, cov
def _dot(self, slist1, slist2, tf_embs): """ Simple dot product between two vectors of embeddings. This returns a matrix of positive real numbers. """ matlist1 = tf.gather(tf_embs, slist1, name='matlist1') matlist2 = tf.matrix_transpose(tf.gather(tf_embs, slist2, name='matlist2')) return tf.batch_matmul(matlist1, matlist2)
def _expectation(p, kern, feat, mean, none, nghp=None): """ Compute the expectation: expectation[n] = <K_{Z, x_n} m(x_n)>_p(x_n) or the equivalent for MarkovGaussian :return: NxMxQ """ return tf.matrix_transpose(expectation(p, mean, (kern, feat), nghp=nghp))
def _expectation(p, mean, none, kern, feat, nghp=None): """ Compute the expectation: expectation[n] = <x_n K_{x_n, Z}>_p(x_n) - K_{.,} :: Linear kernel or the equivalent for MarkovGaussian :return: NxDxM """ return tf.matrix_transpose(expectation(p, (kern, feat), mean))
def _conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False): """ Most efficient routine to project L independent latent gps through a mixing matrix W. The mixing matrix is a member of the `SeparateMixedMok` and has shape P x L. The covariance matrices used to calculate the conditional have the following shape: - Kuu: L x M x M - Kuf: L x M x N - Kff: L x N or L x N x N Further reference ----------------- - See `gpflow.conditionals._conditional` for a detailed explanation of conditional in the single-output case. - See the multiouput notebook for more information about the multiouput framework. """ logger.debug("conditional: (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok") independent_cond = conditional.dispatch(object, SeparateIndependentMof, SeparateIndependentMok, object) gmu, gvar = independent_cond(Xnew, feat, kern, f, full_cov=full_cov, q_sqrt=q_sqrt, full_output_cov=False, white=white) # N x L, L x N x N or N x L gmu = tf.matrix_transpose(gmu) # L x N if not full_cov: gvar = tf.matrix_transpose(gvar) # L x N (x N) Wgmu = tf.tensordot(gmu, kern.W, [[0], [1]]) # N x P if full_output_cov: Wt_expanded = tf.matrix_transpose(kern.W)[:, None, :] # L x 1 x P if full_cov: Wt_expanded = tf.expand_dims(Wt_expanded, axis=-1) # L x 1 x P x 1 gvarW = tf.expand_dims(gvar, axis=2) * Wt_expanded # L x N x P (x N) WgvarW = tf.tensordot(gvarW, kern.W, [[0], [1]]) # N x P (x N) x P else: if not full_cov: WgvarW = tf.tensordot(gvar, kern.W ** 2, [[0], [1]]) # N x P else: WgvarW = tf.tensordot(kern.W ** 2, gvar, [[1], [0]]) # P x N (x N) return Wgmu, WgvarW
def _inverse(self, y, z, reuse): hh, ww, nc = gs(y)[1:] if y.dtype == tf.float32: nptype = np.float32 else: nptype = np.float64 rotation = tf.get_variable( "1x1_conv_weight", dtype=y.dtype, initializer=random_rotation_matrix(nc).astype(nptype), trainable=True) rotation = (rotation - tf.matrix_transpose(rotation)) / 2.0 # make skew symmetric rotation = matrix_exponential(rotation, name="MatrixExpFor1x1Convolution") _rot = tf.cast(tf.matrix_transpose(rotation), y.dtype) kernel = tf.reshape(_rot, shape=[1, 1, nc, nc]) x = self.conv(y, kernel) return x
def testBatchMatrixDynamicallyDefined(self): matrix_0 = [[1, 2, 3], [4, 5, 6]] matrix_0_t = [[1, 4], [2, 5], [3, 6]] matrix_1 = [[11, 22, 33], [44, 55, 66]] matrix_1_t = [[11, 44], [22, 55], [33, 66]] batch_matrix = [matrix_0, matrix_1] # Shape (2, 2, 3) expected_transposed = [matrix_0_t, matrix_1_t] # Shape (2, 3, 2) with self.test_session(): batch_matrix_ph = tf.placeholder(tf.int32) transposed = tf.matrix_transpose(batch_matrix_ph) self.assertAllEqual(expected_transposed, transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
def left_grad(U, S, V, dU, dV): U, V = (V, U) dU, dV = (dV, dU) D = tf.matmul(dU, tf.matrix_diag(1 / (s + 1e-8))) US = tf.matmul(U, S) grad = tf.matmul(D, V, transpose_b=True)\ +tf.matmul(tf.matmul(U,tf.matrix_diag(tf.matrix_diag_part(-tf.matmul(U,D,transpose_a=True)))), V, transpose_b=True)\ +tf.matmul(2*tf.matmul(US, msym(KT*(tf.matmul(V,-tf.matmul(V,tf.matmul(D,US,transpose_a=True)),transpose_a=True)))),V,transpose_b=True) grad = tf.matrix_transpose(grad) return grad
def testBatchMatrix(self): matrix_0 = [[1, 2, 3], [4, 5, 6]] matrix_0_t = [[1, 4], [2, 5], [3, 6]] matrix_1 = [[11, 22, 33], [44, 55, 66]] matrix_1_t = [[11, 44], [22, 55], [33, 66]] batch_matrix = [matrix_0, matrix_1] # Shape (2, 2, 3) expected_transposed = [matrix_0_t, matrix_1_t] # Shape (2, 3, 2) with self.test_session(): transposed = tf.matrix_transpose(batch_matrix) self.assertEqual((2, 3, 2), transposed.get_shape()) self.assertAllEqual(expected_transposed, transposed.eval())
def make_attention_mat(self, x1, x2): # x1 [batch_size, vec_dim, sentence_length, 1] # tf.matrix_transpose(x2) [batch_size, vec_dim, 1, sentence_length] # 广æ’产生一个 [sentence_length_0, sentence_length_1]的矩阵 # x1 - tf.matrix_transpose(x2) [batch_size, vec_dim, sentence_length, sentence_length] # euclidean [bath_size, sentence_length, sentence_length] euclidean = tf.sqrt( tf.reduce_sum(tf.square(x1 - tf.matrix_transpose(x2)), axis=1) + self.eclipse) return 1 / (1 + euclidean)
def _scaled_square_dist(self, X, X2): """ Returns ((X - X2ᵀ)/lengthscales)². Due to the implementation and floating-point imprecision, the result may actually be very slightly negative for entries very close to each other. """ X = X / self.lengthscales Xs = tf.reduce_sum(tf.square(X), axis=-1, keepdims=True) if X2 is None: dist = -2 * tf.matmul(X, X, transpose_b=True) dist += Xs + tf.matrix_transpose(Xs) return dist X2 = X2 / self.lengthscales X2s = tf.reduce_sum(tf.square(X2), axis=-1, keepdims=True) dist = -2 * tf.matmul(X, X2, transpose_b=True) dist += Xs + tf.matrix_transpose(X2s) return dist
def recursive_kernel(self, points1, points2, depth): if depth == 1: mag_sqr1 = tf.reduce_sum(points1**2, 1, keep_dims=True) mag_sqr2 = tf.reduce_sum(points2**2, 1, keep_dims=True) point_prod = tf.matmul(points1, points2, transpose_b=True) # points1 @ points2.T else: mag_sqr1 = tf.expand_dims( self.diag_recursive_kernel(points1, depth - 1), 1) mag_sqr2 = tf.expand_dims( self.diag_recursive_kernel(points2, depth - 1), 1) point_prod = self.recursive_kernel(points1, points2, depth - 1) mag_prod = tf.sqrt(mag_sqr1) * tf.matrix_transpose(tf.sqrt(mag_sqr2)) cos_angles = ( 2 * point_prod) / (tf.sqrt(1 + 2 * mag_sqr1) * tf.matrix_transpose(tf.sqrt(1 + 2 * mag_sqr2))) return (((mag_prod**self.degree) / np.pi) * self.angular_func(cos_angles))
def forward_pass(self, embed, weights_input, biases_input, weights_output): """ :param embed: :param weights: :param biases: :return: """ """ ======================================================= Implement the forwrad pass described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ======================================================= """ print('forwadpass inputs') print(embed, weights_input, biases_input, weights_output) layer1 = tf.add(tf.matmul(weights_input, tf.matrix_transpose(embed)), biases_input) layer1 = tf.math.pow(layer1, tf.fill(tf.shape(layer1), 3.0)) # tanh activation function # layer1 = tf.math.tanh(layer1, name = 'tanh_activation') # sigmoid activation function # layer1 = tf.math.sigmoid(layer1, name = 'sigmoid_activation') # relu activation function # layer1 = tf.nn.relu(layer1, name = 'relu_activation') print('layer1') print(layer1) p = tf.matrix_transpose(tf.matmul(weights_output, layer1)) print('p') print(p) return p
def CNN_layer(variable_scope, x1, x2, d): # x1, x2 = [batch, d, s, 1] with tf.variable_scope(variable_scope): if model_type == "ABCNN1" or model_type == "ABCNN3": with tf.name_scope("att_mat"): aW = tf.get_variable(name="aW", shape=(s, d), initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg)) # [batch, s, s] att_mat = make_attention_mat(x1, x2) # [batch, s, s] * [s,d] => [batch, s, d] # matrix transpose => [batch, d, s] # expand dims => [batch, d, s, 1] x1_a = tf.expand_dims(tf.matrix_transpose(tf.einsum("ijk,kl->ijl", att_mat, aW)), -1) x2_a = tf.expand_dims(tf.matrix_transpose( tf.einsum("ijk,kl->ijl", tf.matrix_transpose(att_mat), aW)), -1) # [batch, d, s, 2] x1 = tf.concat([x1, x1_a], axis=3) x2 = tf.concat([x2, x2_a], axis=3) left_conv = convolution(name_scope="left", x=pad_for_wide_conv(x1), d=d) right_conv = convolution(name_scope="right", x=pad_for_wide_conv(x2), d=d) left_attention, right_attention = None, None if model_type == "ABCNN2" or model_type == "ABCNN3": # [batch, s+w-1, s+w-1] att_mat = make_attention_mat(left_conv, right_conv) # [batch, s+w-1], [batch, s+w-1] left_attention, right_attention = tf.reduce_sum(att_mat, axis=2), tf.reduce_sum(att_mat, axis=1) left_wp = w_pool(variable_scope="left", x=left_conv, attention=left_attention) left_ap = all_pool(variable_scope="left", x=left_conv) right_wp = w_pool(variable_scope="right", x=right_conv, attention=right_attention) right_ap = all_pool(variable_scope="right", x=right_conv) return left_wp, left_ap, right_wp, right_ap
def scNBMF_model(G, C, k, variable_idx, sample_idx, T_, y_, psi, penalty_type, lambda_for_l1, eps=1e-8): ''' scNBMF model G: Number of genes C: Number of cells variable_idx: Gene index sample_idx: Cell index T_: Total counts or read depth y_: Count expression matrix psi: Dispersion parameters computed by edgeR penalty_type: 1 means l1_penalty and others means l2_penalty lambda_for_l1: The coeffcient of l1 or l2_penalty return: LL : loss function for the model ''' W = tf.Variable(np.random.randn(G, k), name='weights') H = tf.Variable(np.random.randn(k, C), name='PCs') S = tf.Variable(np.array([0.]), name='Scaling') W_ = tf.gather(W, variable_idx) psi_ = tf.gather(psi, variable_idx) H_ = tf.gather(tf.matrix_transpose(H), sample_idx) eta_ = tf.reduce_sum(W_ * H_, 1) mu_ = tf.exp(eta_ + S + tf.log(T_)) LL = tf.reduce_sum(y_ * tf.log(mu_ + eps) - (y_ + psi_) * tf.log(mu_ + psi_ + eps)) if penalty_type == 1: Wpenalty = get_weight(W, lambda_for_l1) else: Wpenalty = get_weight2(W, lambda_for_l1) beta = 1 LL = tf.reduce_mean(LL + beta * Wpenalty) return LL
def kronecker_vec(self, X, m, n): leading_dim = tf.shape(X)[:-2] blocks = [] for i in range(n): blocks.append([]) for j in range(m): idx = i * m + j block = tf.matrix_transpose( tf.reshape(X[..., idx, :], tf.concat([leading_dim, [n, m]], 0))) blocks[-1].append(block) return tf.concat([tf.concat(b, -2) for b in blocks], -1)
def generator(inputs, is_training=True): feat, _ = inputs embedding = tf.get_variable(name='embedding', shape=[FLAGS.vocab_size, FLAGS.emb_dim], initializer=tf.random_uniform_initializer( -0.08, 0.08)) softmax_w = tf.matrix_transpose(embedding) softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) batch_size = tf.shape(feat)[0] cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.mem_dim) if is_training: cell = tf.nn.rnn_cell.DropoutWrapper(cell, FLAGS.keep_prob, FLAGS.keep_prob) zero_state = cell.zero_state(batch_size, tf.float32) sequence, logits, log_probs, rnn_outs = [], [], [], [] _, state = cell(feat, zero_state) state_bl = state tf.get_variable_scope().reuse_variables() for t in range(FLAGS.max_caption_length): if t == 0: rnn_inp = tf.zeros([batch_size], tf.int32) + FLAGS.start_id rnn_inp = tf.nn.embedding_lookup(embedding, rnn_inp) rnn_out, state = cell(rnn_inp, state) rnn_outs.append(rnn_out) logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b) categorical = tf.contrib.distributions.Categorical(logits=logit) fake = categorical.sample() log_prob = categorical.log_prob(fake) sequence.append(fake) log_probs.append(log_prob) logits.append(logit) rnn_inp = fake sequence = tf.stack(sequence, axis=1) log_probs = tf.stack(log_probs, axis=1) logits = tf.stack(logits, axis=1) baseline = [] state = state_bl for t in range(FLAGS.max_caption_length): if t == 0: rnn_inp = tf.zeros([batch_size], tf.int32) + FLAGS.start_id rnn_inp = tf.nn.embedding_lookup(embedding, rnn_inp) rnn_out, state = cell(rnn_inp, state) logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b) fake = tf.argmax(logit, axis=1, output_type=tf.int32) baseline.append(fake) rnn_inp = fake baseline = tf.stack(baseline, axis=1) return sequence, logits, log_probs, baseline
def get_matrix_tree(r, A): L = tf.reduce_sum(A, 1) L = tf.matrix_diag(L) L = L - A r_diag = tf.matrix_diag(r) LL = L + r_diag LL_inv = tf.matrix_inverse(LL) #batch_l, doc_l, doc_l LL_inv_diag_ = tf.matrix_diag_part(LL_inv) d0 = tf.multiply(r, LL_inv_diag_) LL_inv_diag = tf.expand_dims(LL_inv_diag_, 2) tmp1 = tf.multiply(A, tf.matrix_transpose(LL_inv_diag)) tmp2 = tf.multiply(A, tf.matrix_transpose(LL_inv)) d = tmp1 - tmp2 d = tf.concat([tf.expand_dims(d0, [1]), d], 1) return d
def _expectation(p, lin_kern, feat1, rbf_kern, feat2, nghp=None): """ Compute the expectation: expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n) - K_lin_{.,.} :: Linear kernel - K_rbf_{.,.} :: RBF kernel Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint active_dims, in which case the joint expectations simplify into a product of expectations :return: NxM1xM2 """ return tf.matrix_transpose(expectation(p, (rbf_kern, feat2), (lin_kern, feat1)))
def __affine_image(self,imgs,r,t): # The Tensor [imgs].format is [NHWC] r = tf.matrix_inverse(r) r = tf.matrix_transpose(r) rm = tf.reshape(tf.pad(r, [[0, 0], [0, 0], [0, 1]], mode='CONSTANT'), [-1, 6]) rm = tf.pad(rm, [[0, 0], [0, 2]], mode='CONSTANT') tm = tf.contrib.image.translations_to_projective_transforms(tf.reshape(t, [-1, 2])) rtm = tf.contrib.image.compose_transforms(rm, tm) return tf.contrib.image.transform(imgs, rtm, "BILINEAR")
def forward_pass_parallel(self, embed, weights_input, biases_input, weights_input2, biases_input2, weights_input3, biases_input3, weights_output): """ :param embed: :param weights: :param biases: :return: """ """ ======================================================= two layer forward pass function ======================================================= """ print('forwadpass inputs') print(embed, weights_input, biases_input, weights_input2, biases_input2, weights_input3, biases_input3, weights_output) embed1, embed2, embed3 = tf.split(embed, [ Config.embedding_size * Config.n_Tokens1, Config.embedding_size * Config.n_Tokens2, Config.embedding_size * Config.n_Tokens3 ], 1) print(embed1, embed2, embed3) layer1 = tf.add(tf.matmul(weights_input, tf.transpose(embed1)), biases_input) layer1 = tf.math.pow(layer1, 3.0) #layer1 = tf.math.tanh(layer1, name = 'tanh_activation1') layer2 = tf.add(tf.matmul(weights_input2, tf.transpose(embed2)), biases_input2) layer2 = tf.math.pow(layer2, 3.0) #layer2 = tf.math.tanh(layer2, name = 'tanh_activation2') layer3 = tf.add(tf.matmul(weights_input3, tf.transpose(embed3)), biases_input2) layer3 = tf.math.pow(layer3, 3.0) #layer3 = tf.math.tanh(layer3 , name = 'tanh_activation3') print(layer1, layer2, layer3) layer123 = layer1 + layer2 + layer3 #tf.concat([layer1, layer2,layer3], 0) print('--------') print('layer123', layer123) print('--------') print('weights_output', weights_output) p = tf.matrix_transpose(tf.matmul(weights_output, layer123)) print('--------') print('p', p) return p
def LSTM_layer(self, variables_scope, x1, x2): with tf.variable_scope(variables_scope) as scope: #reconstruct squeeze data x1 = self.squeeze_data(x1) x2 = self.squeeze_data(x2) #input data to birnn L_rnn = self.birnn_x1(x1) R_rnn = self.birnn_x1(x2, reuse=True) _length = len(L_rnn) L_rnn = tf.transpose(L_rnn, [1, 0, 2]) R_rnn = tf.transpose(R_rnn, [1, 0, 2]) expend_L_rnn = tf.expand_dims(L_rnn, -1) trans_L_rnn = tf.transpose(expend_L_rnn, [0, 2, 1, 3]) expend_R_rnn = tf.expand_dims(R_rnn, -1) trans_R_rnn = tf.transpose(expend_R_rnn, [0, 2, 1, 3]) #attention matrix with tf.name_scope("rnn_att_mat"): aW = tf.get_variable(name="aW", shape=(_length, self.lstm_cell * 2)) att_mat = self.att_mat(trans_L_rnn, trans_R_rnn) x1_a = tf.transpose( tf.matrix_transpose(tf.einsum("ijk,kl->ijl", att_mat, aW)), [0, 2, 1]) x2_a = tf.transpose( tf.matrix_transpose( tf.einsum("ijk,kl->ijl", tf.transpose(att_mat, [0, 2, 1]), aW)), [0, 2, 1]) # attention layer L_att = self.Attention_1(L_rnn, ) R_att = self.Attention_1(R_rnn, reuse=True) with tf.variable_scope("att2att"): L_att_2 = self.Attention_2(x1_a) R_att_2 = self.Attention_2(x2_a, reuse=True) return L_att, R_att, L_att_2, R_att_2 pass
def batch_bilinear(x, weights_w, weights_h): # x: [ batch_size, channels, height, width ] x_shape = x.get_shape().as_list() # coords_w: [ batch_size, channels, width ] # coords_h: [ batch_size, channels, height ] coords_w = weights_to_coords(weights_w) coords_h = weights_to_coords(weights_h) coords_w = tf.identity(coords_w, name='coords_w') coords_h = tf.identity(coords_h, name='coords_h') tf.add_to_collection('70f92c137c01d89c6477c5ef22411bfe', [coords_w, coords_h]) # idx__ : [ batch_size, channels, _, 2 ], 2 = (#batch, #channel) mesh = tf.meshgrid(tf.range(x_shape[1]), tf.range(x_shape[0])) idx = tf.expand_dims(tf.stack([mesh[1], mesh[0]],-1), 2) idx_h = tf.tile(idx, [1, 1, x_shape[2], 1]) idx_w = tf.tile(idx, [1, 1, x_shape[3], 1]) coords_0_ = tf.concat([idx_h, tf.expand_dims(tf.cast(tf.floor(coords_h), tf.int32), -1)], -1) coords_1_ = tf.concat([idx_h, tf.expand_dims(tf.cast(tf.ceil(coords_h), tf.int32), -1)], -1) coords__0 = tf.concat([idx_w, tf.expand_dims(tf.cast(tf.floor(coords_w), tf.int32), -1)], -1) coords__1 = tf.concat([idx_w, tf.expand_dims(tf.cast(tf.ceil(coords_w), tf.int32), -1)], -1) vals_0_ = tf.matrix_transpose(tf.gather_nd(x, coords_0_)) vals_1_ = tf.matrix_transpose(tf.gather_nd(x, coords_1_)) vals_00 = tf.gather_nd(vals_0_, coords__0) vals_01 = tf.gather_nd(vals_0_, coords__1) vals_10 = tf.gather_nd(vals_1_, coords__0) vals_11 = tf.gather_nd(vals_1_, coords__1) coords_x = tf.expand_dims(coords_w - tf.floor(coords_w), 3) coords_y = tf.expand_dims(coords_h - tf.floor(coords_h), 2) vals = vals_00 + \ (vals_10 - vals_00) * coords_x + \ (vals_01 - vals_00) * coords_y + \ (vals_11 + vals_00 - vals_10 - vals_01) * coords_x * coords_y return vals
def __net_load_constants(self, variable_scope_name): ###define some constants view_mat_for_normal_init = tf.constant_initializer(self.mat_for_normal) view_mat_for_normal = tf.get_variable( name="view_mat_for_normal", dtype=tf.float32, shape=self.mat_for_normal.shape, trainable=False, initializer=view_mat_for_normal_init) view_mat_for_normal_t = tf.matrix_transpose(view_mat_for_normal) view_mat_model_init = tf.constant_initializer(self.mat_model) view_mat_model = tf.get_variable(name="view_mat_model", dtype=tf.float32, shape=self.mat_model.shape, trainable=False, initializer=view_mat_model_init) view_mat_model_t = tf.matrix_transpose(view_mat_model) cam_pos_init = tf.constant_initializer(self.cam_pos) cam_pos = tf.get_variable(name="cam_pos", dtype=tf.float32, shape=self.cam_pos.shape, trainable=False, initializer=cam_pos_init) #shape=[3] self.endPoints[variable_scope_name + "cam_pos"] = cam_pos light_normals_init = tf.constant_initializer(self.light_normals) light_normals = tf.get_variable(name="light_normals", dtype=tf.float32, shape=self.light_normals.shape, trainable=False, initializer=light_normals_init) light_poses_init = tf.constant_initializer(self.light_poses) light_poses = tf.get_variable(name="light_poses", dtype=tf.float32, shape=self.light_poses.shape, trainable=False, initializer=light_poses_init) self.endPoints[variable_scope_name + "light_poses"] = light_poses return view_mat_for_normal_t, view_mat_model_t, light_normals, light_poses, cam_pos
def __init__(self, hparams, iterator, cv=None): self.hparams = hparams self.iterator = iterator #To compute RNN vectors, we need W and rnn_cell and dynamic_rnn self.W = tf.get_variable( 'embeddings', shape=[self.hparams.size_vocab, self.hparams.d]) txt1_vectors = tf.nn.embedding_lookup(self.W, self.iterator.txt1) rnn_cell = rnn.BasicLSTMCell(self.hparams.d, self.hparams.forget_bias) with tf.variable_scope('rnn'): _, state_txt1 = tf.nn.dynamic_rnn( cell=rnn_cell, inputs=txt1_vectors, sequence_length=self.iterator.len_txt1, dtype=tf.float32) vec_txt1 = state_txt1.h self.vec_txt1 = vec_txt1 if cv is not None: self.M = tf.Variable(tf.eye(self.hparams.d), name='M') txt2_vectors = tf.nn.embedding_lookup(self.W, self.iterator.txt2) with tf.variable_scope('rnn', reuse=True): _, state_txt2 = tf.nn.dynamic_rnn( cell=rnn_cell, inputs=txt2_vectors, sequence_length=self.iterator.len_txt2, dtype=tf.float32) vec_txt2 = state_txt2.h self.saver = tf.train.Saver(tf.global_variables()) self.WC = tf.get_variable('candidate_vectors', shape=[cv.shape[0], cv.shape[1]]) self.WC_assign = tf.assign(self.WC, cv) self.candidate_vectors = tf.nn.embedding_lookup( self.WC, self.iterator.indexes) #Concatenate bs x 1 x d with bs x NC x d; Result bs x NC+1 x d self.gt_with_candidate_vectors = tf.concat([ tf.reshape(vec_txt2, [-1, 1, self.hparams.d]), self.candidate_vectors ], 1) scores = tf.matmul( tf.reshape(tf.matmul(vec_txt1, self.M), [-1, 1, self.hparams.d]), tf.matrix_transpose(self.gt_with_candidate_vectors)) self.scores = tf.reshape(scores, [tf.shape(vec_txt1)[0], -1]) else: self.saver = tf.train.Saver(tf.global_variables())
def unpack_smm(theta_smm, name='unpack_theta_smm'): # extract point-estimates for Student-t mixture components with tf.name_scope(name): mu, L_k_raw = theta_smm # make sure that L is a valid Cholesky decomposition and compute scaling matrix with tf.name_scope('compute_prec'): L_k = tf.linalg.LinearOperatorLowerTriangular(L_k_raw, name='to_triL').to_dense() L_k = tf.matrix_set_diag(L_k, tf.nn.softplus(tf.matrix_diag_part(L_k), name='softplus_diag'), name='L') Sigma = tf.matmul(L_k, tf.matrix_transpose(L_k), name='precision') return tf.tuple((mu, Sigma), name='theta_smm_unpacked')
def prior_fn(latent_dimension): cov_init = util.positive_definate_initializer([10] + [latent_dimension] * 2) eigvals = tf.self_adjoint_eig( tf.divide(cov_init + tf.matrix_transpose(cov_init), 2., name='symmetrised'))[0] cov_init = tf.Print(cov_init, [cov_init]) return parameterized_distributions.gmm.GMM(10, latent_dimension, cov_init=cov_init, trainable=True).model
def __call__( self, x_1, x_2, reuse=False, ): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() euclidean = tf.sqrt( tf.reduce_sum(tf.square(x_1 - tf.matrix_transpose(x_2)), axis=1)) return 1 / (1 + euclidean)
def get_pdist2(self, X, Y): if X.shape.ndims == 1: X = X[None, :] if Y.shape.ndims == 1: Y = Y[None, :] assert X.shape[1] == Y.shape[1] pdist2 = tf.reduce_sum(tf.square(X), axis=1, keep_dims=True) pdist2 -= 2.0 * tf.matmul(X, Y, transpose_b=True) pdist2 += tf.matrix_transpose( tf.reduce_sum(tf.square(Y), axis=1, keep_dims=True)) self.pdist2 = pdist2 return pdist2
def mult_mod(M,N,left_right): tensor_shape = M.shape dims = N.shape if left_right == 'r': #M tensor of size (batch_size, n, m) #N tensor of size (m, p) n = tensor_shape[1].value m = dims[0] p = dims[1] y = tf.reshape(tf.reshape(M, [-1, m]) @ N, [-1, n, p]) elif left_right == 'l': #M tensor of size (batch_size, n, m) #N tensor of size (p, n) m = tensor_shape[2].value p = dims[0] n = dims[1] MT = tf.matrix_transpose(M) NT = tf.matrix_transpose(N) MTNT = tf.reshape(tf.reshape(MT, [-1, n]) @ NT, [-1, m, p]) y = tf.matrix_transpose(MTNT) return(y)
def testBatchMatrixDynamicallyDefined(self): matrix_0 = [[1, 2, 3], [4, 5, 6]] matrix_0_t = [[1, 4], [2, 5], [3, 6]] matrix_1 = [[11, 22, 33], [44, 55, 66]] matrix_1_t = [[11, 44], [22, 55], [33, 66]] batch_matrix = [matrix_0, matrix_1] # Shape (2, 2, 3) expected_transposed = [matrix_0_t, matrix_1_t] # Shape (2, 3, 2) with self.test_session(): batch_matrix_ph = tf.placeholder(tf.int32) transposed = tf.matrix_transpose(batch_matrix_ph) self.assertAllEqual( expected_transposed, transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
def __init__(self, tensor_x, tensor_y): """ :param tensor_x: (..., channel_x, sample) :param tensor_y: (..., channel_y, sample) """ self.x = tensor_x self.y = tensor_y self.mean_x = tf.reduce_mean(tensor_x, axis=-1, keepdims=True) self.mean_y = tf.reduce_mean(tensor_y, axis=-1, keepdims=True) x_ = self.x - self.mean_x y_ = self.y - self.mean_y s_xx = tf.matmul(x_, tf.matrix_transpose(x_)) s_yy = tf.matmul(y_, tf.matrix_transpose(y_)) s_xy = tf.matmul(x_, tf.matrix_transpose(y_)) s_yx = tf.matmul(y_, tf.matrix_transpose(x_)) self.M = tf.linalg.inv(s_xx) @ s_xy @ tf.linalg.inv(s_yy) @ s_yx self.rho = tf.linalg.trace(self.M) # \sum\rho^2
def AffineTransformLayer(imgs, r, t): r = tf.matrix_inverse(r) r = tf.matrix_transpose(r) rm = tf.reshape(tf.pad(r, [[0, 0], [0, 0], [0, 1]], mode='CONSTANT'), [-1, 6]) rm = tf.pad(rm, [[0, 0], [0, 2]], mode='CONSTANT') tm = tf.contrib.image.translations_to_projective_transforms( tf.reshape(t, [-1, 2])) rtm = tf.contrib.image.compose_transforms(rm, tm) return tf.contrib.image.transform(imgs, rtm, "BILINEAR")
def gram(layer, factor): """ Get style with gram matrix. layer with shape(batch, height, weight, channels) of activations. """ shape = tf.shape(layer) num_images = shape[0] num_filters = shape[3] size = tf.size(layer) filters = tf.reshape(layer, tf.stack([num_images, -1, num_filters])) grams = tf.matmul(tf.matrix_transpose(filters), filters) / tf.to_float( size / factor) # FLAGS.batch_size) return grams
def compute_adjacency_matrix(hidden_features, inputs_data_label, num_task): new_hidden_features = change_datastruct(hidden_features, num_task) new_inputs_data_label = change_datastruct(inputs_data_label, num_task) adjacency_matrixs = [] for i in range(num_task): dist_matrix = -compute_pairwise_dist_tf(new_hidden_features[i]) sign_matrix = 2 * tf.matmul( new_inputs_data_label[i], tf.matrix_transpose(new_inputs_data_label[i])) - 1 adjacency_matrix = tf.exp(dist_matrix) * sign_matrix adjacency_matrixs.append(adjacency_matrix) adjacency_matrixs = tf.stack(adjacency_matrixs) return adjacency_matrixs
def gp_conditional(z, fz, x, full_cov, kernel, Kzz_chol=None): ''' GP gp_conditional f(x) | f(z)==fz :param z: shape [n_z, n_covariates] :param fz: shape [n_particles, n_z] :param x: shape [n_x, n_covariates] :return: a distribution with shape [n_particles, n_x] ''' n_z = int(z.shape[0]) n_particles = tf.shape(fz)[0] if Kzz_chol is None: Kzz_chol = tf.cholesky(kernel(z, z)) # Mean[fx|fz] = Kxz @ inv(Kzz) @ fz; Cov[fx|z] = Kxx - Kxz @ inv(Kzz) @ Kzx # With ill-conditioned Kzz, the inverse is often asymmetric, which # breaks further cholesky decomposition. We compute a symmetric one. Kzz_chol_inv = tf.matrix_triangular_solve(Kzz_chol, tf.eye(n_z)) Kzz_inv = tf.matmul(tf.transpose(Kzz_chol_inv), Kzz_chol_inv) Kxz = kernel(x, z) # [n_x, n_z] Kxziz = tf.matmul(Kxz, Kzz_inv) mean_fx_given_fz = tf.matmul(fz, tf.matrix_transpose(Kxziz)) if full_cov: cov_fx_given_fz = kernel(x, x) - tf.matmul(Kxziz, tf.transpose(Kxz)) cov_fx_given_fz = tf.tile( tf.expand_dims(tf.cholesky(cov_fx_given_fz), 0), [n_particles, 1, 1]) fx_given_fz = zs.distributions.MultivariateNormalCholesky( mean_fx_given_fz, cov_fx_given_fz) else: # diag(AA^T) = sum(A**2, axis=-1) var = kernel.Kdiag(x) - \ tf.reduce_sum(tf.matmul( Kxz, tf.matrix_transpose(Kzz_chol_inv)) ** 2, axis=-1) std = tf.sqrt(var) fx_given_fz = zs.distributions.Normal( mean=mean_fx_given_fz, std=std, group_ndims=1) return fx_given_fz
def create(self, fixed_embeddings, linked_embeddings, context_tensor_arrays, attention_tensor, during_training, stride=None): """Requires |stride|; otherwise see base class.""" check.NotNone(stride, 'BiaffineDigraphNetwork requires "stride" and must be called ' 'in the bulk feature extractor component.') # TODO(googleuser): Add dropout during training. del during_training # Retrieve (possibly averaged) weights. weights_arc = self._component.get_variable('weights_arc') weights_source = self._component.get_variable('weights_source') root = self._component.get_variable('root') # Extract the source and target token activations. Use |stride| to collapse # batch and beam into a single dimension. sources = network_units.lookup_named_tensor('sources', linked_embeddings) targets = network_units.lookup_named_tensor('targets', linked_embeddings) source_tokens_bxnxs = tf.reshape(sources.tensor, [stride, -1, self._source_dim]) target_tokens_bxnxt = tf.reshape(targets.tensor, [stride, -1, self._target_dim]) num_tokens = tf.shape(source_tokens_bxnxs)[1] # Compute the arc, source, and root potentials. arcs_bxnxn = digraph_ops.ArcPotentialsFromTokens( source_tokens_bxnxs, target_tokens_bxnxt, weights_arc) sources_bxnxn = digraph_ops.ArcSourcePotentialsFromTokens( source_tokens_bxnxs, weights_source) roots_bxn = digraph_ops.RootPotentialsFromTokens( root, target_tokens_bxnxt, weights_arc, weights_source) # Combine them into a single matrix with the roots on the diagonal. adjacency_bxnxn = digraph_ops.CombineArcAndRootPotentials( arcs_bxnxn + sources_bxnxn, roots_bxn) # The adjacency matrix currently has sources on rows and targets on columns, # but we want targets on rows so that maximizing within a row corresponds to # selecting sources for a given target. adjacency_bxnxn = tf.matrix_transpose(adjacency_bxnxn) return [tf.reshape(adjacency_bxnxn, [-1, num_tokens])]
def _updated_mat(self, mat, v, diag): # Get dense matrix defined by its square root, which is an update of `mat`: # A = (mat + v D v^T) (mat + v D v^T)^T # D is the diagonal matrix with `diag` on the diagonal. # If diag is None, then it defaults to the identity matrix, so DV^T = V^T if diag is None: diag_vt = tf.matrix_transpose(v) else: diag_mat = tf.matrix_diag(diag) diag_vt = tf.matmul(diag_mat, v, adjoint_b=True) v_diag_vt = tf.matmul(v, diag_vt) sqrt = mat + v_diag_vt a = tf.matmul(sqrt, sqrt, adjoint_b=True) return a.eval()
def _conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False): """ Multi-output GP with independent GP priors. Number of latent processes equals the number of outputs (L = P). The covariance matrices used to calculate the conditional have the following shape: - Kuu: P x M x M - Kuf: P x M x N - Kff: P x N or P x N x N Further reference ----------------- - See `gpflow.conditionals._conditional` for a detailed explanation of conditional in the single-output case. - See the multiouput notebook for more information about the multiouput framework. - See above for the parameters and the return value. """ logger.debug("conditional: object, SharedIndependentMof, SeparateIndependentMok, object") # Following are: P x M x M - P x M x N - P x N(x N) Kmms = Kuu(feat, kern, jitter=settings.numerics.jitter_level) # P x M x M Kmns = Kuf(feat, kern, Xnew) # P x M x N kern_list = kern.kernels if isinstance(kern, Combination) else [kern.kern] * len(feat.feat_list) Knns = tf.stack([k.K(Xnew) if full_cov else k.Kdiag(Xnew) for k in kern_list], axis=0) fs = tf.transpose(f)[:, :, None] # P x M x 1 # P x 1 x M x M or P x M x 1 q_sqrts = tf.transpose(q_sqrt)[:, :, None] if q_sqrt.shape.ndims == 2 else q_sqrt[:, None, :, :] def single_gp_conditional(t): Kmm, Kmn, Knn, f, q_sqrt = t return base_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white) rmu, rvar = tf.map_fn(single_gp_conditional, (Kmms, Kmns, Knns, fs, q_sqrts), (settings.float_type, settings.float_type)) # P x N x 1, P x 1 x N x N or P x N x 1 fmu = tf.matrix_transpose(rmu[:, :, 0]) # N x P if full_cov: fvar = rvar[:, 0, :, :] # P x N x N else: fvar = tf.transpose(rvar[..., 0]) # N x P return fmu, _expand_independent_outputs(fvar, full_cov, full_output_cov)
def _arccosine(self, slist1, slist2, tf_embs): """ Uses an arccosine kernel of degree 0 to calculate the similarity matrix between two vectors of embeddings. This is just cosine similarity projected into the [0,1] interval. """ dot = self._dot(slist1, slist2, tf_embs) # This calculation corresponds to an arc-cosine with # degree 0. It can be interpreted as cosine # similarity but projected into a [0,1] interval. # TODO: arc-cosine with degree 1. tf_pi = tf.constant(np.pi, dtype=tf.float64) tf_norms = tf.constant(self.norms, dtype=tf.float64, name='norms') normlist1 = tf.gather(tf_norms, slist1, name='normlist1') normlist2 = tf.matrix_transpose(tf.gather(tf_norms, slist2, name='normlist2')) norms = tf.batch_matmul(normlist1, normlist2) cosine = tf.clip_by_value(tf.truediv(dot, norms), -1, 1) angle = tf.acos(cosine) angle = tf.select(tf.is_nan(angle), tf.ones_like(angle) * tf_pi, angle) return 1 - (angle / tf_pi)
def K(self, X, X2=None, presliced=False): if not presliced: X, X2 = self._slice(X, X2) X_denominator = tf.sqrt(self._weighted_product(X)) if X2 is None: X2 = X X2_denominator = X_denominator else: X2_denominator = tf.sqrt(self._weighted_product(X2)) numerator = self._weighted_product(X, X2) X_denominator = tf.expand_dims(X_denominator, -1) X2_denominator = tf.matrix_transpose(tf.expand_dims(X2_denominator, -1)) cos_theta = numerator / X_denominator / X2_denominator jitter = 1e-15 theta = tf.acos(jitter + (1 - 2 * jitter) * cos_theta) return self.variance * (1. / np.pi) * self._J(theta) * \ X_denominator ** self.order * \ X2_denominator ** self.order
def _validate_correlationness(self, x): if not self.validate_args: return x checks = [ tf.assert_less_equal( tf.cast(-1., dtype=x.dtype.base_dtype), x, message='Correlations must be >= -1.'), tf.assert_less_equal( x, tf.cast(1., x.dtype.base_dtype), message='Correlations must be <= 1.'), tf.assert_near( tf.matrix_diag_part(x), tf.cast(1., x.dtype.base_dtype), message='Self-correlations must be = 1.'), tf.assert_near( x, tf.matrix_transpose(x), message='Correlation matrices must be symmetric') ] with tf.control_dependencies(checks): return tf.identity(x)
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None): """ Compute the expectation: expectation[n] = <(\Sum_i K1_i_{Z1, x_n}) (\Sum_j K2_j_{x_n, Z2})>_p(x_n) - \Sum_i K1_i_{.,.}, \Sum_j K2_j_{.,.} :: Sum kernels :return: NxM1xM2 """ crossexps = [] if kern1 == kern2 and feat1 == feat2: # avoid duplicate computation by using transposes for i, k1 in enumerate(kern1.kernels): crossexps.append(expectation(p, (k1, feat1), (k1, feat1), nghp=nghp)) for k2 in kern1.kernels[:i]: eKK = expectation(p, (k1, feat1), (k2, feat2), nghp=nghp) eKK += tf.matrix_transpose(eKK) crossexps.append(eKK) else: for k1, k2 in it.product(kern1.kernels, kern2.kernels): crossexps.append(expectation(p, (k1, feat1), (k2, feat2), nghp=nghp)) return functools.reduce(tf.add, crossexps)
def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed): """Returns a uniformly random `Tensor` of "correlation-like" matrices. A "correlation-like" matrix is a symmetric square matrix with all entries between -1 and 1 (inclusive) and 1s on the main diagonal. Of these, the ones that are positive semi-definite are exactly the correlation matrices. Args: num_rows: Python `int` dimension of the correlation-like matrices. batch_shape: `Tensor` or Python `tuple` of `int` shape of the batch to return. dtype: `dtype` of the `Tensor` to return. seed: Random seed. Returns: matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]` and dtype `dtype`. Each entry is in [-1, 1], and each matrix along the bottom two dimensions is symmetric and has 1s on the main diagonal. """ num_entries = num_rows * (num_rows + 1) / 2 ones = tf.ones(shape=[num_entries], dtype=dtype) # It seems wasteful to generate random values for the diagonal since # I am going to throw them away, but `fill_triangular` fills the # diagonal, so I probably need them. # It's not impossible that it would be more efficient to just fill # the whole matrix with random values instead of messing with # `fill_triangular`. Then would need to filter almost half out with # `matrix_band_part`. unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed) tril = util.fill_triangular(unifs) symmetric = tril + tf.matrix_transpose(tril) diagonal_ones = tf.ones( shape=util.pad(batch_shape, axis=0, back=True, value=num_rows), dtype=dtype) return tf.matrix_set_diag(symmetric, diagonal_ones)
def lanczos_bidiag(operator, k, orthogonalize=True, starting_vector=None, name="lanczos_bidiag"): """Computes a Lanczos bidiagonalization for a linear operator. Computes matrices `U` of shape `[m, k+1]`, `V` of shape `[n, k]` and lower bidiagonal matrix `B` of shape `[k+1, k]`, that satisfy the equations `A * V = U * B` and `A' * U[:, :-1] = V * B[:-1, :]'`. The columns of `U` are orthonormal and form a basis for the Krylov subspace `K(A*A', U[:,0])`. The columns of `V` are orthonormal and form a basis for the Krylov subspace `K(A'*A, A' U[:,0])`. Args: operator: An object representing a linear operator with attributes: - shape: Either a list of integers or a 1-D `Tensor` of type `int32` of length 2. `shape[0]` is the dimension on the domain of the operator, `shape[1]` is the dimension of the co-domain of the operator. On other words, if operator represents an M x N matrix A, `shape` must contain `[M, N]`. - dtype: The datatype of input to and output from `apply` and `apply_adjoint`. - apply: Callable object taking a vector `x` as input and returning a vector with the result of applying the operator to `x`, i.e. if `operator` represents matrix `A`, `apply` should return `A * x`. - apply_adjoint: Callable object taking a vector `x` as input and returning a vector with the result of applying the adjoint operator to `x`, i.e. if `operator` represents matrix `A`, `apply_adjoint` should return `conj(transpose(A)) * x`. k: An integer or a scalar Tensor of type `int32`. Determines the maximum number of steps to run. If an invariant subspace is found, the algorithm may terminate before `k` steps have been run. orthogonalize: If `True`, perform full orthogonalization. If `False` no orthogonalization is performed. starting_vector: If not null, must be a `Tensor` of shape `[n]`. name: A name scope for the operation. Returns: output: A namedtuple representing a Lanczos bidiagonalization of `operator` with attributes: u: A rank-2 `Tensor` of type `operator.dtype` and shape `[operator.shape[0], k_actual+1]`, where `k_actual` is the number of steps run. v: A rank-2 `Tensor` of type `operator.dtype` and shape `[operator.shape[1], k_actual]`, where `k_actual` is the number of steps run. alpha: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`. beta: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`. """ def tarray(size, dtype, name): return tf.TensorArray( dtype=dtype, size=size, tensor_array_name=name, clear_after_read=False) # Reads a row-vector at location i in tarray and returns it as a # column-vector. def read_colvec(tarray, i): return tf.expand_dims(tarray.read(i), -1) # Writes an column-vector as a row-vecor at location i in tarray. def write_colvec(tarray, colvec, i): return tarray.write(i, tf.squeeze(colvec)) # Ephemeral class holding Lanczos bidiagonalization state: # u = left Lanczos vectors # v = right Lanczos vectors # alpha = diagonal of B_k. # beta = subdiagonal of B_k. # Notice that we store the left and right Lanczos vectors as the _rows_ # of u and v. This is done because tensors are stored row-major and # TensorArray only supports packing along dimension 0. lanzcos_bidiag_state = collections.namedtuple("LanczosBidiagState", ["u", "v", "alpha", "beta"]) def update_state(old, i, u, v, alpha, beta): return lanzcos_bidiag_state( write_colvec(old.u, u, i + 1), write_colvec(old.v, v, i), old.alpha.write(i, alpha), old.beta.write(i, beta)) def gram_schmidt_step(j, basis, v): """Makes v orthogonal to the j'th vector in basis.""" v_shape = v.get_shape() basis_vec = read_colvec(basis, j) v -= tf.batch_matmul(basis_vec, v, adj_x=True) * basis_vec v.set_shape(v_shape) return j + 1, basis, v def orthogonalize_once(i, basis, v): j = tf.constant(0, dtype=tf.int32) _, _, v = tf.while_loop(lambda j, basis, v: j < i, gram_schmidt_step, [j, basis, v]) return util.l2normalize(v) # Iterated modified Gram-Schmidt orthogonalization adapted from PROPACK. # TODO(rmlarsen): This is possibly the slowest implementation of # iterated Gram-Schmidt orthogonalization since the abacus. Move to C++. def orthogonalize_(i, basis, v): v_norm = util.l2norm(v) v_new, v_new_norm = orthogonalize_once(i, basis, v) # If the norm decreases more than 1/sqrt(2), run a second # round of MGS. See proof in: # B. N. Parlett, ``The Symmetric Eigenvalue Problem'', # Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109 return tf.cond(v_new_norm < 0.7071 * v_norm, lambda: orthogonalize_once(i, basis, v), lambda: (v_new, v_new_norm)) def stopping_criterion(i, _): # TODO(rmlarsen): Stop if an invariant subspace is detected. return i < k def lanczos_bidiag_step(i, ls): """Extends the Lanczos bidiagonalization ls by one step.""" u = read_colvec(ls.u, i) r = operator.apply_adjoint(u) # The shape inference doesn't work across cond, save and reapply the shape. r_shape = r.get_shape() r = tf.cond( i > 0, lambda: r - ls.beta.read(i - 1) * read_colvec(ls.v, i - 1), lambda: r) r.set_shape(r_shape) if orthogonalize: v, alpha = orthogonalize_(i - 1, ls.v, r) else: v, alpha = util.l2normalize(r) p = operator.apply(v) - alpha * u if orthogonalize: u, beta = orthogonalize_(i, ls.u, p) else: u, beta = util.l2normalize(p) return i + 1, update_state(ls, i, u, v, alpha, beta) with tf.name_scope(name): dtype = operator.dtype if starting_vector is None: starting_vector = tf.random_uniform( operator.shape[:1], -1, 1, dtype=dtype) u0, _ = util.l2normalize(starting_vector) ls = lanzcos_bidiag_state( u=write_colvec(tarray(k + 1, dtype, "u"), u0, 0), v=tarray(k, dtype, "v"), alpha=tarray(k, dtype, "alpha"), beta=tarray(k, dtype, "beta")) i = tf.constant(0, dtype=tf.int32) _, ls = tf.while_loop(stopping_criterion, lanczos_bidiag_step, [i, ls]) return lanzcos_bidiag_state( tf.matrix_transpose(ls.u.pack()), tf.matrix_transpose(ls.v.pack()), ls.alpha.pack(), ls.beta.pack())
def gen_decoder(hparams, inputs, targets, targets_present, encoding_state, is_training, is_validating, reuse=None): """Define the Decoder graph. The Decoder will now impute tokens that have been masked from the input seqeunce. """ config = get_config() gen_decoder_rnn_size = hparams.gen_rnn_size if FLAGS.seq2seq_share_embedding: with tf.variable_scope('decoder/rnn', reuse=True): embedding = tf.get_variable('embedding', [FLAGS.vocab_size, gen_decoder_rnn_size]) with tf.variable_scope('decoder', reuse=reuse): # Neural architecture search cell. cell = custom_cell.Alien(config.hidden_size) if is_training: [h2h_masks, _, _, output_mask] = variational_dropout.generate_variational_dropout_masks( hparams, config.keep_prob) else: output_mask = None cell_gen = custom_cell.GenericMultiRNNCell([cell] * config.num_layers) # Hidden encoder states. hidden_vector_encodings = encoding_state[0] # Carry forward the final state tuple from the encoder. # State tuples. state_gen = encoding_state[1] if FLAGS.attention_option is not None: (attention_keys, attention_values, _, attention_construct_fn) = attention_utils.prepare_attention( hidden_vector_encodings, FLAGS.attention_option, num_units=gen_decoder_rnn_size, reuse=reuse) with tf.variable_scope('rnn'): sequence, logits, log_probs = [], [], [] if not FLAGS.seq2seq_share_embedding: embedding = tf.get_variable('embedding', [FLAGS.vocab_size, gen_decoder_rnn_size]) softmax_w = tf.matrix_transpose(embedding) softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) if is_training and FLAGS.keep_prob < 1: rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) for t in xrange(FLAGS.sequence_length): if t > 0: tf.get_variable_scope().reuse_variables() # Input to the Decoder. if t == 0: # Always provide the real input at t = 0. rnn_inp = rnn_inputs[:, t] # If the input is present, read in the input at t. # If the input is not present, read in the previously generated. else: real_rnn_inp = rnn_inputs[:, t] fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) # While validating, the decoder should be operating in teacher # forcing regime. Also, if we're just training with cross_entropy # use teacher forcing. if is_validating or (is_training and FLAGS.gen_training_strategy == 'cross_entropy'): rnn_inp = real_rnn_inp else: rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, fake_rnn_inp) if is_training: state_gen = list(state_gen) for layer_num, per_layer_state in enumerate(state_gen): per_layer_state = LSTMTuple( per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num]) state_gen[layer_num] = per_layer_state # RNN. rnn_out, state_gen = cell_gen(rnn_inp, state_gen) if is_training: rnn_out = output_mask * rnn_out if FLAGS.attention_option is not None: rnn_out = attention_construct_fn(rnn_out, attention_keys, attention_values) # # TODO(liamfedus): Assert not "monotonic" attention_type. # # TODO(liamfedus): FLAGS.attention_type. # context_state = revised_attention_utils._empty_state() # rnn_out, context_state = attention_construct_fn( # rnn_out, attention_keys, attention_values, context_state, t) logit = tf.matmul(rnn_out, softmax_w) + softmax_b # Output for Decoder. # If input is present: Return real at t+1. # If input is not present: Return fake for t+1. real = targets[:, t] categorical = tf.contrib.distributions.Categorical(logits=logit) fake = categorical.sample() log_prob = categorical.log_prob(fake) output = tf.where(targets_present[:, t], real, fake) # Add to lists. sequence.append(output) log_probs.append(log_prob) logits.append(logit) return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( log_probs, axis=1))
def testMultivariateFromScalarBatchScalarEvent(self): with self.test_session() as sess: shift = np.array([-1, 0, 1], dtype=np.float32) scale = la.LinearOperatorTriL( [[[-1., 0, 0], [2, 1, 0], [3, 2, 1]], [[2, 0, 0], [3, -2, 0], [4, 3, 2]]], is_non_singular=True, is_positive_definite=False) # Overriding shapes must be compatible w/bijector; most bijectors are # batch_shape agnostic and only care about event_ndims. # In the case of `Affine`, if we got it wrong then it would fire an # exception due to incompatible dimensions. fake_mvn = ds.TransformedDistribution( distribution=ds.Normal(mu=0., sigma=1.), bijector=bs.AffineLinearOperator(shift, scale), batch_shape=scale.batch_shape, # [2] event_shape=[scale.domain_dimension.value], # [3] validate_args=True) # Note: Affine ellided this tile. actual_mean = np.tile(shift, [2, 1]) # Since LinOp.apply doesn't support `adjoint_b` nor composition, # we cannot do: scale.apply(scale, adjoint_b=True).eval() actual_cov = scale.apply(tf.matrix_transpose(scale.to_dense())).eval() actual_mvn = ds.MultivariateNormalFull(mu=actual_mean, sigma=actual_cov) # Ensure sample works by checking first, second moments. n = 5e3 y = fake_mvn.sample(int(n), seed=0) sample_mean = tf.reduce_mean(y, 0) centered_y = tf.transpose(y - sample_mean, [1, 2, 0]) sample_cov = tf.matmul(centered_y, centered_y, transpose_b=True) / n [sample_mean_, sample_cov_] = sess.run([sample_mean, sample_cov]) self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1) self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1) # Ensure all other functions work as intended. x = fake_mvn.sample(5, seed=0).eval() self.assertAllEqual([5, 2, 3], x.shape) self.assertAllEqual(actual_mvn.get_event_shape(), fake_mvn.get_event_shape()) self.assertAllEqual(actual_mvn.event_shape().eval(), fake_mvn.event_shape().eval()) self.assertAllEqual(actual_mvn.get_batch_shape(), fake_mvn.get_batch_shape()) self.assertAllEqual(actual_mvn.batch_shape().eval(), fake_mvn.batch_shape().eval()) self.assertAllClose(actual_mvn.log_prob(x).eval(), fake_mvn.log_prob(x).eval(), atol=0., rtol=1e-7) self.assertAllClose(actual_mvn.prob(x).eval(), fake_mvn.prob(x).eval(), atol=0., rtol=1e-6) self.assertAllClose(actual_mvn.entropy().eval(), fake_mvn.entropy().eval(), atol=0., rtol=1e-6) for unsupported_fn in (fake_mvn.log_cdf, fake_mvn.cdf, fake_mvn.survival_function, fake_mvn.log_survival_function): with self.assertRaisesRegexp( NotImplementedError, "not implemented when overriding event_shape"): self.assertRaisesRegexp(unsupported_fn(x))
def uncertain_conditional(Xnew_mu, Xnew_var, feat, kern, q_mu, q_sqrt, *, mean_function=None, full_output_cov=False, full_cov=False, white=False): """ Calculates the conditional for uncertain inputs Xnew, p(Xnew) = N(Xnew_mu, Xnew_var). See ``conditional`` documentation for further reference. :param Xnew_mu: mean of the inputs, size N x Din :param Xnew_var: covariance matrix of the inputs, size N x Din x Din :param feat: gpflow.InducingFeature object, only InducingPoints is supported :param kern: gpflow kernel or ekernel object. :param q_mu: mean inducing points, size M x Dout :param q_sqrt: cholesky of the covariance matrix of the inducing points, size Dout x M x M :param full_output_cov: boolean wheter to compute covariance between output dimension. Influences the shape of return value ``fvar``. Default is False :param white: boolean whether to use whitened representation. Default is False. :return fmean, fvar: mean and covariance of the conditional, size ``fmean`` is N x Dout, size ``fvar`` depends on ``full_output_cov``: if True ``f_var`` is N x Dout x Dout, if False then ``f_var`` is N x Dout """ # TODO(VD): Tensorflow 1.7 doesn't support broadcasting in``tf.matmul`` and # ``tf.matrix_triangular_solve``. This is reported in issue 216. # As a temporary workaround, we are using ``tf.einsum`` for the matrix # multiplications and tiling in the triangular solves. # The code that should be used once the bug is resolved is added in comments. if not isinstance(feat, InducingPoints): raise NotImplementedError if full_cov: # TODO(VD): ``full_cov`` True would return a ``fvar`` of shape N x N x D x D, # encoding the covariance between input datapoints as well. # This is not implemented as this feature is only used for plotting purposes. raise NotImplementedError pXnew = Gaussian(Xnew_mu, Xnew_var) num_data = tf.shape(Xnew_mu)[0] # number of new inputs (N) num_ind = tf.shape(q_mu)[0] # number of inducing points (M) num_func = tf.shape(q_mu)[1] # output dimension (D) q_sqrt_r = tf.matrix_band_part(q_sqrt, -1, 0) # D x M x M eKuf = tf.transpose(expectation(pXnew, (kern, feat))) # M x N (psi1) Kuu = feat.Kuu(kern, jitter=settings.numerics.jitter_level) # M x M Luu = tf.cholesky(Kuu) # M x M if not white: q_mu = tf.matrix_triangular_solve(Luu, q_mu, lower=True) Luu_tiled = tf.tile(Luu[None, :, :], [num_func, 1, 1]) # remove line once issue 216 is fixed q_sqrt_r = tf.matrix_triangular_solve(Luu_tiled, q_sqrt_r, lower=True) Li_eKuf = tf.matrix_triangular_solve(Luu, eKuf, lower=True) # M x N fmean = tf.matmul(Li_eKuf, q_mu, transpose_a=True) eKff = expectation(pXnew, kern) # N (psi0) eKuffu = expectation(pXnew, (kern, feat), (kern, feat)) # N x M x M (psi2) Luu_tiled = tf.tile(Luu[None, :, :], [num_data, 1, 1]) # remove this line, once issue 216 is fixed Li_eKuffu = tf.matrix_triangular_solve(Luu_tiled, eKuffu, lower=True) Li_eKuffu_Lit = tf.matrix_triangular_solve(Luu_tiled, tf.matrix_transpose(Li_eKuffu), lower=True) # N x M x M cov = tf.matmul(q_sqrt_r, q_sqrt_r, transpose_b=True) # D x M x M if mean_function is None or isinstance(mean_function, mean_functions.Zero): e_related_to_mean = tf.zeros((num_data, num_func, num_func), dtype=settings.float_type) else: # Update mean: \mu(x) + m(x) fmean = fmean + expectation(pXnew, mean_function) # Calculate: m(x) m(x)^T + m(x) \mu(x)^T + \mu(x) m(x)^T, # where m(x) is the mean_function and \mu(x) is fmean e_mean_mean = expectation(pXnew, mean_function, mean_function) # N x D x D Lit_q_mu = tf.matrix_triangular_solve(Luu, q_mu, adjoint=True) e_mean_Kuf = expectation(pXnew, mean_function, (kern, feat)) # N x D x M # einsum isn't able to infer the rank of e_mean_Kuf, hence we explicitly set the rank of the tensor: e_mean_Kuf = tf.reshape(e_mean_Kuf, [num_data, num_func, num_ind]) e_fmean_mean = tf.einsum("nqm,mz->nqz", e_mean_Kuf, Lit_q_mu) # N x D x D e_related_to_mean = e_fmean_mean + tf.matrix_transpose(e_fmean_mean) + e_mean_mean if full_output_cov: fvar = ( tf.matrix_diag(tf.tile((eKff - tf.trace(Li_eKuffu_Lit))[:, None], [1, num_func])) + tf.matrix_diag(tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov)) + # tf.matrix_diag(tf.trace(tf.matmul(Li_eKuffu_Lit, cov))) + tf.einsum("ig,nij,jh->ngh", q_mu, Li_eKuffu_Lit, q_mu) - # tf.matmul(q_mu, tf.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) - fmean[:, :, None] * fmean[:, None, :] + e_related_to_mean ) else: fvar = ( (eKff - tf.trace(Li_eKuffu_Lit))[:, None] + tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov) + tf.einsum("ig,nij,jg->ng", q_mu, Li_eKuffu_Lit, q_mu) - fmean ** 2 + tf.matrix_diag_part(e_related_to_mean) ) return fmean, fvar
def generator(hparams, inputs, targets, targets_present, is_training, is_validating, reuse=None): """Define the Generator graph. G will now impute tokens that have been masked from the input seqeunce. """ tf.logging.info( 'Undirectional generative model is not a useful model for this MaskGAN ' 'because future context is needed. Use only for debugging purposes.') config = get_config() config.keep_prob = [hparams.gen_nas_keep_prob_0, hparams.gen_nas_keep_prob_1] configs.print_config(config) init_scale = config.init_scale initializer = tf.random_uniform_initializer(-init_scale, init_scale) with tf.variable_scope('gen', reuse=reuse, initializer=initializer): # Neural architecture search cell. cell = custom_cell.Alien(config.hidden_size) if is_training: [h2h_masks, _, _, output_mask] = variational_dropout.generate_variational_dropout_masks( hparams, config.keep_prob) else: output_mask = None cell_gen = custom_cell.GenericMultiRNNCell([cell] * config.num_layers) initial_state = cell_gen.zero_state(FLAGS.batch_size, tf.float32) with tf.variable_scope('rnn'): sequence, logits, log_probs = [], [], [] embedding = tf.get_variable('embedding', [FLAGS.vocab_size, hparams.gen_rnn_size]) softmax_w = tf.matrix_transpose(embedding) softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) if is_training and FLAGS.keep_prob < 1: rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) for t in xrange(FLAGS.sequence_length): if t > 0: tf.get_variable_scope().reuse_variables() # Input to the model is the first token to provide context. The # model will then predict token t > 0. if t == 0: # Always provide the real input at t = 0. state_gen = initial_state rnn_inp = rnn_inputs[:, t] # If the input is present, read in the input at t. # If the input is not present, read in the previously generated. else: real_rnn_inp = rnn_inputs[:, t] fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) # While validating, the decoder should be operating in teacher # forcing regime. Also, if we're just training with cross_entropy # use teacher forcing. if is_validating or (is_training and FLAGS.gen_training_strategy == 'cross_entropy'): rnn_inp = real_rnn_inp else: rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, fake_rnn_inp) if is_training: state_gen = list(state_gen) for layer_num, per_layer_state in enumerate(state_gen): per_layer_state = LSTMTuple( per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num]) state_gen[layer_num] = per_layer_state # RNN. rnn_out, state_gen = cell_gen(rnn_inp, state_gen) if is_training: rnn_out = output_mask * rnn_out logit = tf.matmul(rnn_out, softmax_w) + softmax_b # Real sample. real = targets[:, t] categorical = tf.contrib.distributions.Categorical(logits=logit) fake = categorical.sample() log_prob = categorical.log_prob(fake) # Output for Generator will either be generated or the input. # # If present: Return real. # If not present: Return fake. output = tf.where(targets_present[:, t], real, fake) # Add to lists. sequence.append(output) log_probs.append(log_prob) logits.append(logit) # Produce the RNN state had the model operated only # over real data. real_state_gen = initial_state for t in xrange(FLAGS.sequence_length): tf.get_variable_scope().reuse_variables() rnn_inp = rnn_inputs[:, t] # RNN. rnn_out, real_state_gen = cell_gen(rnn_inp, real_state_gen) final_state = real_state_gen return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( log_probs, axis=1), initial_state, final_state)
def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self): vector = [1, 2, 3] with self.test_session(): with self.assertRaisesRegexp(ValueError, "should be a "): tf.matrix_transpose(vector)
def _maybe_adjoint(self, x, adjoint): if adjoint: return tf.matrix_transpose(x) else: return x
def __init__(self, loc=None, covariance_matrix=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalFullCovariance"): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `covariance_matrix` arguments. The `event_shape` is given by last dimension of the matrix implied by `covariance_matrix`. The last dimension of `loc` (if provided) must broadcast with this. A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive definite matrix. In other words it is (real) symmetric with all eigenvalues strictly positive. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. covariance_matrix: Floating-point, symmetric positive definite `Tensor` of same `dtype` as `loc`. The strict upper triangle of `covariance_matrix` is ignored, so if `covariance_matrix` is not symmetric no error will be raised (unless `validate_args is True`). `covariance_matrix` has shape `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if neither `loc` nor `covariance_matrix` are specified. """ parameters = dict(locals()) # Convert the covariance_matrix up to a scale_tril and call MVNTriL. with tf.name_scope(name) as name: with tf.name_scope("init", values=[loc, covariance_matrix]): dtype = dtype_util.common_dtype([loc, covariance_matrix], tf.float32) loc = loc if loc is None else tf.convert_to_tensor( loc, name="loc", dtype=dtype) if covariance_matrix is None: scale_tril = None else: covariance_matrix = tf.convert_to_tensor( covariance_matrix, name="covariance_matrix", dtype=dtype) if validate_args: covariance_matrix = control_flow_ops.with_dependencies([ tf.assert_near( covariance_matrix, tf.matrix_transpose(covariance_matrix), message="Matrix was not symmetric") ], covariance_matrix) # No need to validate that covariance_matrix is non-singular. # LinearOperatorLowerTriangular has an assert_non_singular method that # is called by the Bijector. # However, cholesky() ignores the upper triangular part, so we do need # to separately assert symmetric. scale_tril = tf.cholesky(covariance_matrix) super(MultivariateNormalFullCovariance, self).__init__( loc=loc, scale_tril=scale_tril, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def gen_decoder(hparams, inputs, targets, targets_present, encoding_state, is_training, is_validating, reuse=None): """Define the Decoder graph. The Decoder will now impute tokens that have been masked from the input seqeunce. """ gen_decoder_rnn_size = hparams.gen_rnn_size targets = tf.Print(targets, [targets], message='targets', summarize=50) if FLAGS.seq2seq_share_embedding: with tf.variable_scope('decoder/rnn', reuse=True): embedding = tf.get_variable('embedding', [FLAGS.vocab_size, hparams.gen_rnn_size]) with tf.variable_scope('decoder', reuse=reuse): def lstm_cell(): return tf.contrib.rnn.BasicLSTMCell( gen_decoder_rnn_size, forget_bias=0.0, state_is_tuple=True, reuse=reuse) attn_cell = lstm_cell if is_training and hparams.gen_vd_keep_prob < 1: def attn_cell(): return variational_dropout.VariationalDropoutWrapper( lstm_cell(), FLAGS.batch_size, hparams.gen_rnn_size, hparams.gen_vd_keep_prob, hparams.gen_vd_keep_prob) cell_gen = tf.contrib.rnn.MultiRNNCell( [attn_cell() for _ in range(hparams.gen_num_layers)], state_is_tuple=True) # Hidden encoder states. hidden_vector_encodings = encoding_state[0] # Carry forward the final state tuple from the encoder. # State tuples. state_gen = encoding_state[1] if FLAGS.attention_option is not None: (attention_keys, attention_values, _, attention_construct_fn) = attention_utils.prepare_attention( hidden_vector_encodings, FLAGS.attention_option, num_units=gen_decoder_rnn_size, reuse=reuse) def make_mask(keep_prob, units): random_tensor = keep_prob # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) return tf.floor(random_tensor) / keep_prob if is_training: output_mask = make_mask(hparams.gen_vd_keep_prob, hparams.gen_rnn_size) with tf.variable_scope('rnn'): sequence, logits, log_probs = [], [], [] if not FLAGS.seq2seq_share_embedding: embedding = tf.get_variable('embedding', [FLAGS.vocab_size, hparams.gen_rnn_size]) softmax_w = tf.matrix_transpose(embedding) softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) # TODO(adai): Perhaps append IMDB labels placeholder to input at # each time point. rnn_outs = [] fake = None for t in xrange(FLAGS.sequence_length): if t > 0: tf.get_variable_scope().reuse_variables() # Input to the Decoder. if t == 0: # Always provide the real input at t = 0. rnn_inp = rnn_inputs[:, t] # If the input is present, read in the input at t. # If the input is not present, read in the previously generated. else: real_rnn_inp = rnn_inputs[:, t] # While validating, the decoder should be operating in teacher # forcing regime. Also, if we're just training with cross_entropy # use teacher forcing. if is_validating or FLAGS.gen_training_strategy == 'cross_entropy': rnn_inp = real_rnn_inp else: fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, fake_rnn_inp) # RNN. rnn_out, state_gen = cell_gen(rnn_inp, state_gen) if FLAGS.attention_option is not None: rnn_out = attention_construct_fn(rnn_out, attention_keys, attention_values) if is_training: rnn_out *= output_mask rnn_outs.append(rnn_out) if FLAGS.gen_training_strategy != 'cross_entropy': logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b) # Output for Decoder. # If input is present: Return real at t+1. # If input is not present: Return fake for t+1. real = targets[:, t] categorical = tf.contrib.distributions.Categorical(logits=logit) if FLAGS.use_gen_mode: fake = categorical.mode() else: fake = categorical.sample() log_prob = categorical.log_prob(fake) output = tf.where(targets_present[:, t], real, fake) else: real = targets[:, t] logit = tf.zeros(tf.stack([FLAGS.batch_size, FLAGS.vocab_size])) log_prob = tf.zeros(tf.stack([FLAGS.batch_size])) output = real # Add to lists. sequence.append(output) log_probs.append(log_prob) logits.append(logit) if FLAGS.gen_training_strategy == 'cross_entropy': logits = tf.nn.bias_add( tf.matmul( tf.reshape(tf.stack(rnn_outs, 1), [-1, gen_decoder_rnn_size]), softmax_w), softmax_b) logits = tf.reshape(logits, [-1, FLAGS.sequence_length, FLAGS.vocab_size]) else: logits = tf.stack(logits, axis=1) return (tf.stack(sequence, axis=1), logits, tf.stack(log_probs, axis=1))