def __call__(self): """ """ embed_size = self.embed_size row_idxs = tf.placeholder(tf.int32, shape=(None, ), name='row_idxs') col_idxs = tf.placeholder(tf.int32, shape=(None, ), name='col_idxs') S, U, _ = tf.svd(self.pretrained_vocab.embeddings) self.embeddings = U[:, :embed_size] * S[:embed_size] old_rows = tf.gather(self.pretrained_vocab.embeddings, row_idxs) old_cols = tf.gather(self.pretrained_vocab.embeddings, col_idxs) new_rows = tf.gather(self.embeddings, row_idxs) new_cols = tf.gather(self.embeddings, col_idxs) old_matmul = tf.matmul(old_rows, old_cols, transpose_b=True) new_matmul = tf.matmul(new_rows, new_cols, transpose_b=True) if self.embed_loss == 'cross_entropy': old_matmul = tf.expand_dims(tf.nn.softmax(old_matmul), axis=1) new_matmul = tf.expand_dims(tf.nn.softmax(new_matmul), axis=2) loss = -tf.reduce_sum(tf.matmul(old_matmul, tf.log(new_matmul))) / tf.to_float( tf.shape(row_idxs)[0]) elif self.embed_loss == 'l2_loss': loss = tf.reduce_sum( (old_matmul - new_matmul)**2 / 2) / tf.to_float( tf.shape(row_idxs)[0]) else: raise ValueError( 'embed_loss must be in "(cross_entropy, l2_loss)"') return {'row_idxs': row_idxs, 'col_idxs': col_idxs, 'loss': loss}
def tpu_matrix_compressor(self, a_matrix): """Low-rank decomposition of a_matrix using tpu operations. For training on tpus, we only use basic tf operations (as py_func is not supported). Args: a_matrix: input matrix. Returns: A list of two matrices [b_matrix,c_matrix] which is the low-rank decomposition of a_matrix. Rank is taken from spec.rank. """ s, u, v = tf.svd(a_matrix) logging.info( 'Inside tpu_matrix_compressor: u,s,v shapes are: %s, %s, %s', u.shape, s.shape, v.shape) rank = comp_op_utils.compute_compressed_rank_from_matrix_shape( tuple(a_matrix.shape.dims), self._spec.rank) b_matrix = u[:, :rank] c_matrix = tf.transpose(v)[:rank, :] s_mat = tf.diag(tf.sqrt(s[:rank])) b_matrix = tf.matmul(b_matrix, s_mat) c_matrix = tf.matmul(s_mat, c_matrix) logging.info( 'Inside tpu_matrix_compressor: a_matrix,b_matrix,c_matrix' 'shapes are: %s, %s, %s', a_matrix.shape, b_matrix.shape, c_matrix.shape) return [b_matrix, c_matrix]
def _orthogonal_init(self, shape, initializer, dtype=tf.float32, redundant_rank=False): if redundant_rank: matrix1 = initializer(shape=shape, dtype=dtype) _, u1, v1 = tf.svd(matrix1, full_matrices=False, compute_uv=True) matrix2 = initializer(shape=shape, dtype=dtype) _, u2, v2 = tf.svd(matrix2, full_matrices=False, compute_uv=True) u = tf.concat([u1, u2], axis=1) v = tf.concat([v1, v2], axis=0) else: matrix = initializer(shape=shape, dtype=dtype) _, u, v = tf.svd(matrix, full_matrices=False, compute_uv=True) return u, v
def symmetric_orthogonalization(x): """Maps 9D input vectors onto SO(3) via symmetric orthogonalization.""" # Innner dimensions of the input should be 3x3 matrices. m = tf.reshape(x, (-1, 3, 3)) _, u, v = tf.svd(m) det = tf.linalg.det(tf.matmul(u, v, transpose_b=True)) r = tf.matmul(tf.concat( [u[:, :, :-1], u[:, :, -1:] * tf.reshape(det, [-1, 1, 1])], 2), v, transpose_b=True) return r
def svd_orthogonalize(m): """Convert 9D representation to SO(3) using SVD orthogonalization. Args: m: [BATCH, 3, 3] 3x3 matrices. Returns: [BATCH, 3, 3] SO(3) rotation matrices. """ m_transpose = tf.matrix_transpose(tf.math.l2_normalize(m, axis=-1)) _, u, v = tf.svd(m_transpose) det = tf.linalg.det(tf.matmul(v, u, transpose_b=True)) # Check orientation reflection. r = tf.matmul(tf.concat( [v[:, :, :-1], v[:, :, -1:] * tf.reshape(det, [-1, 1, 1])], 2), u, transpose_b=True) return r
def train_incremental_pca(step, inputs, n_components): """Implement the incremental PCA model from: D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual Tracking, International Journal of Computer Vision, Volume 77, Issue 1-3, pp. 125-141, May 2008. See http://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf Args: step: Training step variable. inputs: A float32 `Tensor` of input data. n_components: Number of components to keep. Returns: A tuple of `noise_variance` and `train_op` `Tensor`-s, where `noise_variance` is the estimated noise covariance following the Probabilistic PCA model from Tipping and Bishop 1999. See "Pattern Recognition and Machine Learning" by C. Bishop, 12.2.1 p. 574 or http://www.miketipping.com/papers/met-mppca.pdf. """ with tf.variable_scope('IncrementalPCA', [inputs]): n_samples, n_features = inputs.shape n_samples_seen = tf.get_variable( 'n_samples_seen', [1], dtype=tf.int32, initializer=tf.zeros_initializer(), ) running_mean = tf.get_variable( 'running_mean', [1, n_features], initializer=tf.zeros_initializer() ) components = tf.get_variable( 'components', [n_components, n_features], initializer=tf.zeros_initializer(), ) singular_vals = tf.get_variable('singular_vals', [n_components]) n_total_samples = tf.cast(n_samples_seen + n_samples, tf.float32) col_mean = running_mean * tf.to_float(n_samples_seen) col_mean += tf.reduce_sum(inputs, -2, keepdims=True) col_mean /= n_total_samples col_batch_mean = tf.reduce_mean(inputs, -2, keepdims=True) mean_correction = tf.sqrt( tf.to_float( (n_samples_seen * n_samples) / (n_samples_seen + n_samples) ) ) * (running_mean - col_batch_mean) x = tf.concat( [ tf.reshape(singular_vals, [-1, 1]) * components, inputs - col_batch_mean, mean_correction, ], axis=0, ) s, _, v = tf.svd(x, full_matrices=False, compute_uv=True) v = -tf.transpose(v) abs_v = tf.abs(v) m = tf.equal(abs_v, tf.reduce_max(abs_v, axis=-2, keepdims=True)) m = tf.cast(m, v.dtype) signs = tf.sign(tf.reduce_sum(v * m, axis=-2, keepdims=True)) v *= signs explained_variance = tf.square(s) / (n_total_samples - 1) noise_variance = tf.reduce_mean(explained_variance[n_components:]) with tf.control_dependencies( [ components.assign(v[:n_components]), singular_vals.assign(s[:n_components]), ] ): train_op = tf.group( n_samples_seen.assign_add([n_samples]), running_mean.assign(col_mean), step.assign_add(1), name='train_op', ) return train_op, noise_variance
def posdef_eig_svd(mat): """Computes the singular values and left singular vectors of a matrix.""" evals, evecs, _ = tf.svd(mat) return evals, evecs
def wct_tf(content, style, alpha, eps=1e-8): '''TensorFlow version of Whiten-Color Transform Assume that content/style encodings have shape 1xHxWxC See p.4 of the Universal Style Transfer paper for corresponding equations: https://arxiv.org/pdf/1705.08086.pdf ''' # Remove batch dim and reorder to CxHxW content_t = tf.transpose(tf.squeeze(content), (2, 0, 1)) style_t = tf.transpose(tf.squeeze(style), (2, 0, 1)) Cc, Hc, Wc = tf.unstack(tf.shape(content_t)) Cs, Hs, Ws = tf.unstack(tf.shape(style_t)) # CxHxW -> CxH*W content_flat = tf.reshape(content_t, (Cc, Hc * Wc)) style_flat = tf.reshape(style_t, (Cs, Hs * Ws)) # Content covariance mc = tf.reduce_mean(content_flat, axis=1, keep_dims=True) fc = content_flat - mc fcfc = tf.matmul(fc, fc, transpose_b=True) / ( tf.cast(Hc * Wc, tf.float32) - 1.) + tf.eye(Cc) * eps # Style covariance ms = tf.reduce_mean(style_flat, axis=1, keep_dims=True) fs = style_flat - ms fsfs = tf.matmul(fs, fs, transpose_b=True) / ( tf.cast(Hs * Ws, tf.float32) - 1.) + tf.eye(Cs) * eps # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603 with tf.device('/cpu:0'): Sc, Uc, _ = tf.svd(fcfc) Ss, Us, _ = tf.svd(fsfs) # Filter small singular values k_c = tf.reduce_sum(tf.cast(tf.greater(Sc, 1e-5), tf.int32)) k_s = tf.reduce_sum(tf.cast(tf.greater(Ss, 1e-5), tf.int32)) # Whiten content feature Dc = tf.diag(tf.pow(Sc[:k_c], -0.5)) fc_hat = tf.matmul( tf.matmul(tf.matmul(Uc[:, :k_c], Dc), Uc[:, :k_c], transpose_b=True), fc) # Color content with style Ds = tf.diag(tf.pow(Ss[:k_s], 0.5)) fcs_hat = tf.matmul( tf.matmul(tf.matmul(Us[:, :k_s], Ds), Us[:, :k_s], transpose_b=True), fc_hat) # Re-center with mean of style fcs_hat = fcs_hat + ms # Blend whiten-colored feature with original content feature blended = alpha * fcs_hat + (1 - alpha) * (fc + mc) # CxH*W -> CxHxW blended = tf.reshape(blended, (Cc, Hc, Wc)) # CxHxW -> 1xHxWxC blended = tf.expand_dims(tf.transpose(blended, (1, 2, 0)), 0) return blended
def test_spectral_norm(): np.random.seed(2019) input_size = 7 kernel_size = 3 in_channel = 2 out_channel = 6 # for easy testing, we assume outputs of convolution have the same spatial # dimensions as inputs, so do not change the `stride` and `padding` here stride = 1 padding = "SAME" coeff = 0.9 power_iter = 100 w_np = np.random.normal(size=(kernel_size, kernel_size, in_channel, out_channel)) x_np = np.random.normal(size=(1, input_size, input_size, in_channel)) w = tf.constant(w_np, dtype=tf.float32) x = tf.constant(x_np, dtype=tf.float32) # spectral norm of reshaped kernel with tf.variable_scope("sigma"): sigma = spectral_norm(w, coeff, power_iter, debug=True) # spectral norm of unfolded kernel with tf.variable_scope("sigma_conv"): sigma_conv = spectral_norm_conv(w, coeff, power_iter, in_shape=x.shape, out_shape=(1, input_size, input_size, out_channel), stride=1, padding=padding, debug=True) # create y to check unfolded kernel y = tf.nn.conv2d( x, filter=w, strides=stride, padding=padding, ) # svd of reshaped kernel w_reshaped = tf.reshape(w, [-1, w.shape[-1]]) s_reshaped = tf.svd(w_reshaped, compute_uv=False)[0] sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) _sigma, _sigma_conv, _s_reshaped, _y = sess.run( [sigma, sigma_conv, s_reshaped, y]) # unfold kernel: y = conv2d(x, w) <=> y = x * w_unfolded w_unfolded = np.zeros( [in_channel * (input_size**2), out_channel * (input_size**2)]) for c1 in range(in_channel): for c2 in range(out_channel): first_row = input_size * input_size * c1 first_col = input_size * input_size * c2 this_block = unfold_kernel(w_np[:, :, c1, c2], input_size) w_unfolded[first_row:(first_row + input_size * input_size), first_col:(first_col + input_size * input_size)] = this_block x_np_reshaped = np.reshape(np.transpose(x_np, axes=[0, 3, 1, 2]), [1, -1]) y_unfolded = np.dot(x_np_reshaped, w_unfolded) _y_reshaped = np.reshape(np.transpose(_y, axes=[0, 3, 1, 2]), [1, -1]) print("Mean Absolute Error between conv2d(x, w) and x * w_unfolded : {}.".\ format(np.mean(np.abs(_y_reshaped - y_unfolded)))) print("Largest singular value of reshaped kernel by tf.svd: {}.".format( _s_reshaped)) print("Largest singular value of reshaped kernel estimated after {} iteration: {}.".\ format(power_iter, _sigma[0, 0])) s_unfolded = np.linalg.svd(w_unfolded, compute_uv=False)[0] print("Largest singular value of unfolded kernel by np.linalg.svd: {}.". format(s_unfolded)) print("Largest singular value of unfolded kernel estimated after {} iteration: {}.".\ format(power_iter, _sigma_conv[0, 0]))