def Test(self): np.random.seed(1) x_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) if is_complex: x_np += 1j * np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) for compute_uv in False, True: for full_matrices in False, True: with self.test_session() as sess: if use_static_shape_: x_tf = tf.constant(x_np) else: x_tf = tf.placeholder(dtype_) if compute_uv: s_tf, u_tf, v_tf = tf.svd(x_tf, compute_uv=compute_uv, full_matrices=full_matrices) if use_static_shape_: s_tf_val, u_tf_val, v_tf_val = sess.run([s_tf, u_tf, v_tf]) else: s_tf_val, u_tf_val, v_tf_val = sess.run([s_tf, u_tf, v_tf], feed_dict={x_tf: x_np}) else: s_tf = tf.svd(x_tf, compute_uv=compute_uv, full_matrices=full_matrices) if use_static_shape_: s_tf_val = sess.run(s_tf) else: s_tf_val = sess.run(s_tf, feed_dict={x_tf: x_np}) if compute_uv: u_np, s_np, v_np = np.linalg.svd(x_np, compute_uv=compute_uv, full_matrices=full_matrices) else: s_np = np.linalg.svd(x_np, compute_uv=compute_uv, full_matrices=full_matrices) # We explicitly avoid the situation where numpy eliminates a first # dimension that is equal to one s_np = np.reshape(s_np, s_tf_val.shape) CompareSingularValues(self, s_np, s_tf_val) if compute_uv: CompareSingularVectors(self, u_np, u_tf_val, min(shape_[-2:])) CompareSingularVectors(self, np.conj(np.swapaxes(v_np, -2, -1)), v_tf_val, min(shape_[-2:])) CheckApproximation(self, x_np, u_tf_val, s_tf_val, v_tf_val, full_matrices) CheckUnitary(self, u_tf_val) CheckUnitary(self, v_tf_val)
def Test(self): np.random.seed(1) if dtype_ in (np.float32, np.float64): x = np.random.uniform(low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) elif dtype == np.complex64: x = np.random.uniform(low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(np.float32) + 1j * np.random.uniform(low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(np.float32) else: x = np.random.uniform(low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(np.float64) + 1j * np.random.uniform(low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(np.float64) for compute_uv in False, True: for full_matrices in False, True: with self.test_session(): if x.ndim == 2: if compute_uv: tf_s, tf_u, tf_v = tf.svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: if compute_uv: tf_s, tf_u, tf_v = tf.batch_svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.batch_svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) if compute_uv: np_u, np_s, np_v = np.linalg.svd(x, compute_uv=compute_uv, full_matrices=full_matrices) else: np_s = np.linalg.svd(x, compute_uv=compute_uv, full_matrices=full_matrices) CompareSingularValues(self, np_s, tf_s.eval()) if compute_uv: CompareSingularVectors(self, np_u, tf_u.eval(), min(shape_[-2:])) CompareSingularVectors(self, np.conj(np.swapaxes(np_v, -2, -1)), tf_v.eval(), min(shape_[-2:])) CheckApproximation(self, x, tf_u, tf_s, tf_v, full_matrices) CheckUnitary(self, tf_u) CheckUnitary(self, tf_v)
def testBatchAndSvd(self): with self.cached_session(): mat = [[1., 2.], [2., 3.]] batched_mat = tf.expand_dims(mat, [0]) result = tf.matmul(mat, mat).eval() result_batched = tf.batch_matmul(batched_mat, batched_mat).eval() self.assertAllEqual(result_batched, np.expand_dims(result, 0)) self.assertAllEqual( tf.svd(mat, False, True).eval(), tf.svd(mat, compute_uv=False, full_matrices=True).eval())
def testWrongDimensions(self): # The input to batch_svd should be a tensor of at least rank 2. scalar = tf.constant(1.) with self.assertRaisesRegexp(ValueError, "Shape must be at least rank 2 but is rank 0"): tf.svd(scalar) vector = tf.constant([1., 2.]) with self.assertRaisesRegexp(ValueError, "Shape must be at least rank 2 but is rank 1"): tf.svd(vector)
def __tensor_norm__(self,tensor,order): if order in ['Si']: # Schatten inf norm s,U,V=tf.svd(tensor,full_matrices=False) return tf.norm(s,ord=np.inf) elif order[0]=='S': # Schatten norm s,U,V=tf.svd(tensor,full_matrices=False) sub_order=int(order[1:]) return tf.norm(s,ord=sub_order) else: sub_order=int(order) return tf.norm(tensor,ord=sub_order)
def Test(self): np.random.seed(1) x = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) if dtype_ == np.float32: atol = 1e-4 else: atol = 1e-14 for compute_uv in False, True: for full_matrices in False, True: with self.test_session(): if x.ndim == 2: if compute_uv: tf_s, tf_u, tf_v = tf.svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: if compute_uv: tf_s, tf_u, tf_v = tf.batch_svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.batch_svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) if compute_uv: np_u, np_s, np_v = np.linalg.svd(x, compute_uv=compute_uv, full_matrices=full_matrices) else: np_s = np.linalg.svd(x, compute_uv=compute_uv, full_matrices=full_matrices) self.assertAllClose(np_s, tf_s.eval(), atol=atol) if compute_uv: CompareSingularVectors(self, np_u, tf_u.eval(), min(shape_[-2:]), atol) CompareSingularVectors(self, np.swapaxes(np_v, -2, -1), tf_v.eval(), min(shape_[-2:]), atol) CheckApproximation(self, x, tf_u, tf_s, tf_v, full_matrices, atol) CheckUnitary(self, tf_u) CheckUnitary(self, tf_v)
def random_orthonormal_initializer(shape, dtype=tf.float32, partition_info=None): # pylint: disable=unused-argument """Variable initializer that produces a random orthonormal matrix.""" if len(shape) != 2 or shape[0] != shape[1]: raise ValueError("Expecting square shape, got %s" % shape) _, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True) return u
def update(w_old, X, y, L2_param=0): ''' w_new = w_old - w_update w_update = (X'RX+lambda*I)^(-1) (X'(mu-y) + lambda*w_old) lambda is L2_param w_old: dx1 X: Nxd y: Nx1 --- w_update: dx1 ''' d = X.shape.as_list()[1] mu = tf.sigmoid(tf.matmul(X, w_old)) # Nx1 R_flat = mu * (1 - mu) # element-wise, Nx1 L2_reg_term = L2_param * tf.eye(d) XRX = tf.matmul(tf.transpose(X), R_flat*X) + L2_reg_term # dxd S,U,V = tf.svd(XRX, full_matrices=True, compute_uv=True) S = tf.expand_dims(S, 1) # calculate pseudo inverse via SVD S_pinv = tf.where(tf.not_equal(S, 0), 1/S, tf.zeros_like(S)) # not good, will produce inf when divide by 0 XRX_pinv = tf.matmul(V, S_pinv*tf.transpose(U)) # w = w - (X^T R X)^(-1) X^T (mu-y) #w_new = tf.assign(w_old, w_old - tf.matmul(tf.matmul(XRX_pinv, tf.transpose(X)), mu - y)) w_update = tf.matmul(XRX_pinv, tf.matmul(tf.transpose(X), mu - y) + L2_param*w_old) return w_update
def set_similarity(self, valid_examples=None, pca=True): if valid_examples == None: if pca: valid_examples = np.array(range(20)) else: valid_examples = np.array(range(self.num_vocabulary)) self.valid_dataset = tf.constant(valid_examples, dtype=tf.int32) self.norm = tf.sqrt( tf.reduce_sum(tf.square(self.g_embeddings), 1, keep_dims=True)) self.normalized_embeddings = self.g_embeddings / self.norm # PCA if self.num_vocabulary >= 20 and pca == True: emb = tf.matmul(self.normalized_embeddings, tf.transpose(self.normalized_embeddings)) s, u, v = tf.svd(emb) u_r = tf.strided_slice(u, begin=[0, 0], end=[20, self.num_vocabulary], strides=[1, 1]) self.normalized_embeddings = tf.matmul(u_r, self.normalized_embeddings) self.valid_embeddings = tf.nn.embedding_lookup( self.normalized_embeddings, self.valid_dataset) self.similarity = tf.matmul(self.valid_embeddings, tf.transpose(self.normalized_embeddings))
def feedforward(self,input,is_training): update_sigma = [] # 1. Get the input Shape and reshape the tensor into [Batch,Dim] width,channel = input.shape[1],input.shape[3] reshape_input = tf.reshape(input,[batch_size,-1]) trans_input = reshape_input.shape[1] # 2. Perform SVD and get the sigma value and get the sigma value singular_values, u, _ = tf.svd(reshape_input,full_matrices=False) def training_fn(): # 3. Training sigma1 = tf.diag(singular_values) sigma = tf.slice(sigma1, [0,0], [trans_input, (width*width*channel)//4]) pca = tf.matmul(u, sigma) update_sigma.append(tf.assign(self.moving_sigma,self.moving_sigma*0.9 + sigma* 0.1 )) return pca,update_sigma def testing_fn(): # 4. Testing calculate hte pca using the Exponentially Weighted Moving Averages pca = tf.matmul(u, self.moving_sigma) return pca,update_sigma pca,update_sigma = tf.cond(is_training, true_fn=training_fn, false_fn=testing_fn) pca_reshaped = tf.reshape(pca,[batch_size,(width//2),(width//2),channel]) out_put = self.alpha * pca_reshaped +self.beta # out_put = tf.layers.batch_normalization(out_put, center=True, scale=True, training=is_training) return out_put,update_sigma
def align(X, Y): # align shapes from X to optimal tranformation between X and Y n_pc_points = X.shape[1] mu_x = tf.reduce_mean(X, axis=1) mu_y = tf.reduce_mean(Y, axis=1) concat_mu_x = tf.tile(tf.expand_dims(mu_x, 1), [1, n_pc_points, 1]) concat_mu_y = tf.tile(tf.expand_dims(mu_y, 1), [1, n_pc_points, 1]) centered_y = tf.expand_dims(Y - concat_mu_y, 2) centered_x = tf.expand_dims(X - concat_mu_x, 2) # transpose y centered_y = tf.einsum('ijkl->ijlk', centered_y) mult_xy = tf.einsum('abij,abjk->abik', centered_y, centered_x) # sum C = tf.einsum('abij->aij', mult_xy) s, u, v = tf.svd(C) v = tf.einsum("aij->aji", v) R_opt = tf.einsum("aij,ajk->aik", u, v) t_opt = mu_y - tf.einsum("aki,ai->ak", R_opt, mu_x) concat_R_opt = tf.tile(tf.expand_dims(R_opt, 1), [1, n_pc_points, 1, 1]) concat_t_opt = tf.tile(tf.expand_dims(t_opt, 1), [1, n_pc_points, 1]) opt_labels = tf.einsum("abki,abi->abk", concat_R_opt, X) + concat_t_opt return opt_labels
def _iso_from_svd_decomp(env, decomp_device=None): with tf.device(decomp_device): env_r = tf.reshape(env, (env.shape[0], -1)) s, u, v = tf.svd(env_r) vh = tf.linalg.adjoint(v) vh = tf.reshape(vh, (vh.shape[0], env.shape[1], env.shape[2])) return u, s, vh
def cal_cluster_loss(inputs, mask, dist_type='Euclid', use_svd=False): ''' calculate the sum of Lapalace Matrix eigen value as cluster loss inputs: b * seq_len * embed_dim return: scalar cluster_loss ''' shape = inputs.shape.as_list() W = dist_matrix(inputs, mask, dist_type=dist_type) # b * seq_len * seq_len degree = tf.reduce_sum(W, axis=2) # b * seq_len degree = tf.reshape(degree, [shape[0], shape[1], 1]) # b * seq_len * 1 tf_eye = tf.eye(shape[1], batch_shape=[shape[0]]) # b * seq_len * seq_len D = tf.multiply(degree, tf_eye) D_norm = tf.multiply(1 / tf.sqrt(degree), tf_eye) # b * seq_len * seq_len L = D - W L = tf.matmul(D_norm, L) L = tf.matmul(L, D_norm) I = tf.eye(shape[1], batch_shape=[shape[0]]) L = L - 0.5 * I # friendly to power iteration if use_svd: s = tf.svd(L, compute_uv=False) s = s[:, 0] else: s = power_iteration(L)[0] s = tf.reshape(s, [shape[0]]) return -s # b
def pca_pool_with_mask(temp, m = 1): [N, H, W, K, C] = temp.get_shape().as_list() if m == 1: temp = tf.transpose(temp, [0,1,2,4,3]) temp = tf.reshape(temp, [-1, K, 1]) else: temp = tf.transpose(temp, [0,4,3,1,2]) temp = tf.reshape(temp, [-1, K, H*W]) # compute for svd [s, u, v] = tf.svd(tf.matmul(temp, tf.transpose(temp, [0,2,1])), compute_uv=True) # use mark to remove Eigenvector except for the first one, which is the main component temp_mark = np.zeros([K,K]) temp_mark[:,0] = 1 mark = tf.constant(temp_mark, dtype=tf.float32) # after reduce_sum actually it has been transposed automatically u = tf.reduce_sum(tf.multiply(u, mark), axis=2) u = tf.reshape(u, [-1, 1, K]) u = u / np.sqrt(K) # divide sqrt(k) to remove the effect of size of window temp = tf.matmul(u, temp)/np.sqrt(K) if m == 1: temp = tf.reshape(temp, [-1, H, W, C]) u = tf.transpose(tf.reshape(u, [N, H, W, C, K]), [0, 1, 2, 4, 3]) else: temp = tf.reshape(temp, [-1, C, H, W]) temp = tf.transpose(temp, [0, 2, 3, 1]) u = tf.transpose(tf.reshape(u, [N, C, K, 1, 1]), [0, 3, 4, 2, 1]) u = tf.multiply(u, tf.ones_like(y)) return temp, u
def SVD(X, n, name=None): with tf.variable_scope(name): sz = X.get_shape().as_list() if len(sz) > 2: x = tf.reshape(X, [sz[0], sz[1] * sz[2], sz[3]]) n = min(n, sz[1] * sz[2], sz[3]) else: x = tf.reshape(X, [sz[0], 1, -1]) n = 1 sz = x.get_shape().as_list() with tf.device('CPU'): g = tf.get_default_graph() with g.gradient_override_map({"Svd": "Svd_"}): s, u, v = tf.svd(x, full_matrices=False) s = tf.slice(s, [0, 0], [-1, n]) s = removenan(s) s = s / tf.sqrt(tf.reduce_sum(tf.square(s), 1, keepdims=True) + 1e-3) U = tf.slice(u, [0, 0, 0], [-1, -1, n]) V = tf.slice(v, [0, 0, 0], [-1, -1, n]) V = removenan(V) U = removenan(U) V /= tf.sqrt(tf.reduce_sum(tf.square(V), 1, keepdims=True) + 1e-3) U /= tf.sqrt(tf.reduce_sum(tf.square(U), 1, keepdims=True) + 1e-3) return s, U, V
def orthogonal_procrustes(A, B, dtype=tf.float32): """ Compute the matrix solution of the orthogonal Procrustes problem. Tensorflow port of `scipy.spatial.orthogonal_procrustes` Args: A: (B, M, N) array-like B: (B, M, N) array-like Returns: R: (B, N, N) tensor - transform for each element of the batch scale: (B, 1) tensor - sum of singular vlaues of A.T @ B. """ with tf.name_scope('orthogonal_procrustes'): A = tf.convert_to_tensor(A, dtype=dtype) B = tf.convert_to_tensor(B, dtype=dtype) if A.shape.ndims < 3: raise ValueError('expected ndim to be 3, but observed %s' % A.shape.ndims) # if A.shape != B.shape: # raise ValueError('the shapes of A and B differ (%s vs %s)' % ( # A.shape, B.shape)) w, u, v = tf.svd(tf.matmul(A, B, transpose_a=True)) R = tf.matmul(u, v, transpose_b=True) scale = tf.reduce_sum(w, axis=-1, keepdims=True) return R, scale
def symsqrt(mat, eps=1e-7): """Symmetric square root.""" s, u, v = tf.svd(mat) # sqrt is unstable around 0, just use 0 in such case print("Warning, cutting off at eps") si = tf.where(tf.less(s, eps), s, tf.sqrt(s)) return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse(mat, eps=1e-10): """Computes pseudo-inverse of mat, treating eigenvalues below eps as 0.""" s, u, v = tf.svd(mat) eps = 1e-10 # zero threshold for eigenvalues si = tf.where(tf.less(s, eps), s, 1./s) return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse(mat, eps=1e-10): """Computes pseudo-inverse of mat, treating eigenvalues below eps as 0.""" s, u, v = tf.svd(mat) eps = 1e-10 # zero threshold for eigenvalues si = tf.where(tf.less(s, eps), s, 1. / s) return u @ tf.diag(si) @ tf.transpose(v)
def Test(self): np.random.seed(1) x = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) for compute_uv in False, True: for full_matrices in False, True: with self.test_session(): if x.ndim == 2: if compute_uv: tf_s, tf_u, tf_v = tf.svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: if compute_uv: tf_s, tf_u, tf_v = tf.batch_svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.batch_svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) if compute_uv: np_u, np_s, np_v = np.linalg.svd( x, compute_uv=compute_uv, full_matrices=full_matrices) else: np_s = np.linalg.svd(x, compute_uv=compute_uv, full_matrices=full_matrices) CompareSingularValues(self, np_s, tf_s.eval()) if compute_uv: CompareSingularVectors(self, np_u, tf_u.eval(), min(shape_[-2:])) CompareSingularVectors(self, np.swapaxes(np_v, -2, -1), tf_v.eval(), min(shape_[-2:])) CheckApproximation(self, x, tf_u, tf_s, tf_v, full_matrices) CheckUnitary(self, tf_u) CheckUnitary(self, tf_v)
def Test(self): np.random.seed(1) x = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) if dtype_ == np.float32: atol = 1e-4 else: atol = 1e-14 for compute_uv in False, True: for full_matrices in False, True: with self.test_session(): if x.ndim == 2: if compute_uv: tf_s, tf_u, tf_v = tf.svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: if compute_uv: tf_s, tf_u, tf_v = tf.batch_svd( tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) else: tf_s = tf.batch_svd(tf.constant(x), compute_uv=compute_uv, full_matrices=full_matrices) if compute_uv: np_u, np_s, np_v = np.linalg.svd( x, compute_uv=compute_uv, full_matrices=full_matrices) else: np_s = np.linalg.svd(x, compute_uv=compute_uv, full_matrices=full_matrices) self.assertAllClose(np_s, tf_s.eval(), atol=atol) if compute_uv: _CompareSingularVectors(self, np_u, tf_u.eval(), atol) _CompareSingularVectors(self, np.swapaxes(np_v, -2, -1), tf_v.eval(), atol)
def gradient_eig_comparision(): epsilon = 1e-4 from scipy.io import loadmat data = loadmat('/tmp/test.mat') data = data['x'] input_shape = (None, 10, 10, 3) tf_input = tf.placeholder(K.floatx(), shape=input_shape, name='tf_input') x = SecondaryStatistic(normalization=None, name='second')(tf_input) cov_mat = x # x = LogTransform(1e-4, name='log')(x) s, u, v = tf.svd(x) inner = s + epsilon inner = tf.log(inner) inner = tf.matrix_diag(inner) tf_log = tf.matmul(u, tf.matmul(inner, tf.transpose(u, [0, 2, 1]))) y_grads = tf.placeholder(tf.float32, shape=(None, 3, 3)) grad_s = tf.gradients(tf_log, s, grad_ys=y_grads)[0] # grad_v = tf.gradients(tf_log, v, grad_ys=y_grads) # not used!, so no gradient is calculated. grad_u = tf.gradients(tf_log, u, grad_ys=y_grads)[0] grad_x = tf.gradients(tf_log, tf_input, grad_ys=y_grads)[0] grad_S = pesudo_gradient(s, u, v, grad_s, grad_u) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = K.get_session(config=config) with sess.as_default(): result = x.eval({tf_input: data}) cov_mat_eval = cov_mat.eval({tf_input: data}) grad_s_eval = grad_s.eval({ tf_input: data, y_grads: np.ones((2, 3, 3), dtype=np.float32) }) grad_u_eval = grad_u.eval({ tf_input: data, y_grads: np.ones((2, 3, 3), dtype=np.float32) }) grad_input_eval = grad_x.eval({ tf_input: data, y_grads: np.ones((2, 3, 3), dtype=np.float32) }) grad_S_eval = grad_S.eval({ tf_input: data, y_grads: np.ones((2, 3, 3), dtype=np.float32) }) # print(grad_s_eval) # print(grad_u_eval) # print(grad_input_eval) # check for gradients print(grad_S_eval) mat_grads = loadmat('/tmp/gradients.mat') mat_grads = mat_grads['lower2']['dzdx'] mat_grads = mat_grads[0][0] mat_grads = np.transpose(mat_grads, [3, 0, 1, 2]) assert_allclose(grad_input_eval, mat_grads, rtol=1e-4)
def do_pca(co_occurrence_matrix, sess, step): # 将输入矩阵做奇异值分解 s, u, v = tf.svd(tf.cast(co_occurrence_matrix, tf.float32)) # 使用奇异值将矩阵降维到 VOCAB_SIZE x EMBED_SIZE embed_matrix = tf.matmul(u[:, :EMBED_SIZE], tf.diag(s[:EMBED_SIZE])) # 保存得到的嵌入矩阵 save_embed_matrix(sess, embed_matrix) print('%s => Step %s' % (datetime.now(), step))
def _approx_matrix(self, A, reltol=1e-6): s, u, v = tf.svd(A) atol = tf.reduce_max(s) * reltol s = tf.boolean_mask(s, s > atol) s = tf.diag(tf.concat([s, tf.zeros([tf.shape(A)[0] - tf.size(s)])], 0)) return tf.matmul(u, tf.matmul(s, tf.transpose(v)))
def svd_model(matrix, embedding_size=256): with tf.Graph().as_default() as g: X = tf.placeholder(dtype=tf.float32, shape=matrix.shape, name="X") d, u, v = tf.svd(X, name="SVD") truncated_d = d[:embedding_size] truncated_u = u[:, :embedding_size] return g, X, truncated_d, truncated_u, v
def spectral_norm_svd(input_): if len(input_.shape) < 2: raise ValueError( "Spectral norm can only be applied to multi-dimensional tensors") w = tf.reshape(input_, (-1, input_.shape[-1])) s, _, _ = tf.svd(w) return s[0]
def TFMatrixPower(mat_,exp_): """ General Matrix Power in Tensorflow. This is NOT differentiable as of 1.2. tf.matrix_inverse and tf.matrix_determinant are though. """ s,u,v = tf.svd(mat_,full_matrices=True,compute_uv=True) return tf.transpose(tf.matmul(u,tf.matmul(tf.diag(tf.pow(s,exp_)),tf.transpose(v))))
def _update_w(self, features): cond = lambda c, *args: tf.less(c, self.length - 1) initial_right = self.C2s.read(1) initial_right.set_shape([None, None, None]) initial_left = self.RMPS.w_zero initial_left.set_shape([None]) right_with_dc_dfl = tf.einsum('til,tl->ti', initial_right, self.dc_dfl) combined_l_feature = tf.einsum('j,tn->tjn', initial_left, features[0]) initial_gradient = tf.einsum('tj,tin->nij', right_with_dc_dfl, combined_l_feature) _, gradient, _ = tf.while_loop( cond=cond, body=self._get_gradient_for_w, loop_vars=[1, initial_gradient, features], shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None]), tf.TensorShape([None, None, None]) ]) final_right = self.RMPS.w_final final_left = self.C1s.read(self.length - 1) right_with_dc_dfl = tf.einsum('il,tl->ti', final_right, self.dc_dfl) combined_l_feature = tf.einsum('tj,tn->tjn', final_left, features[self.length - 1]) combined_all = tf.einsum('tj,tin->nij', right_with_dc_dfl, combined_l_feature) gradient = tf.add(gradient, combined_all) * self.rate_of_change updated_w = tf.add(self.RMPS.w, gradient) dims = tf.shape(updated_w) l_dim = dims[0] * dims[1] r_dim = dims[2] flattened_updated_w = tf.reshape(updated_w, [l_dim, r_dim]) s, u, v = tf.svd(flattened_updated_w) filtered_u = utils.check_nan(u, 'u', replace_nan=True) filtered_v = utils.check_nan(v, 'v', replace_nan=True) filtered_s = tf.boolean_mask(s, tf.greater(s, self.min_singular_value)) s_size = tf.size(filtered_s) # s_size = tf.Print(s_size, [s_size], message='bond dim: ') # TODO: Have min_Size settable min_size = 1 case1 = lambda: min_size case2 = lambda: self.max_size case3 = lambda: s_size m = tf.case( { tf.less(s_size, min_size): case1, tf.greater(s_size, self.max_size): case2 }, default=case3, exclusive=True) u_cropped = filtered_u[:, 0:m] # m = tf.Print(m, [m, dims, tf.shape(u_cropped), tf.shape(filtered_u), s_size, tf.shape(gradient), l_dim, r_dim]) u_cropped = tf.reshape(u_cropped, [dims[0], m, m]) v_cropped = tf.transpose(filtered_v[:, 0:m]) self.v_matrix = v_cropped self._updated_w = tf.einsum('ij,jnl->inl', v_cropped, u_cropped)
def _procrustes(x, y, compute_optimal_scale=True): """ A Numpy port of MATLAB `procrustes` function. Args matX: array NxM of targets, with N number of points and M point dimensionality matY: array NxM of inputs compute_optimal_scale: whether we compute optimal scale or force it to be 1 Returns: d: squared error after transformation z: transformed Y t: computed rotation b: scaling c: translation """ mu_x = tf.reduce_mean(x, axis=0) mu_y = tf.reduce_mean(y, axis=0) x0 = x - mu_x y0 = y - mu_y ss_x = tf.reduce_sum(tf.square(x0)) ss_y = tf.reduce_sum(tf.square(y0)) # centred Frobenius norm norm_x = tf.sqrt(ss_x) norm_y = tf.sqrt(ss_y) # scale to equal (unit) norm x0 = x0 / norm_x y0 = y0 / norm_y # optimum rotation matrix of Y a = tf.matmul(tf.transpose(x0), y0) s, u, v = tf.svd(a, full_matrices=False) t = tf.matmul(v, tf.transpose(u)) # Make sure we have a rotation det_t = tf.matrix_determinant(t) v_s = tf.concat([v[:, :-1], tf.expand_dims(v[:, -1] * tf.sign(det_t), axis=1)], axis=1) s_s = tf.concat([s[:-1], tf.expand_dims(tf.sign(det_t) * s[-1], 0)], axis=0) t = tf.matmul(v_s, tf.transpose(u)) trace_ta = tf.reduce_sum(s_s) if compute_optimal_scale: # Compute optimum scaling of Y. b = trace_ta * norm_x / norm_y d = 1 - tf.square(trace_ta) z = norm_x * trace_ta * tf.matmul(y0, t) + mu_x else: # If no scaling allowed b = 1 d = 1 + ss_y / ss_x - 2 * trace_ta * norm_y / norm_x z = norm_y * tf.matmul(y0, t) + mu_x c = tf.expand_dims(mu_x, 0) - b * tf.matmul(tf.expand_dims(mu_y, 0), t) return d, z, t, b, c
def __init__(self, target, name, do_inverses=False): self.name = name self.target = target self.do_inverses = do_inverses self.tf_svd = SvdTuple(tf.svd(target)) self.update_counter = 0 self.init = SvdTuple( ones(target.shape[0], name=name+"_s_init"), Identity(target.shape[0], name=name+"_u_init"), Identity(target.shape[0], name=name+"_v_init"), Identity(target.shape[0], name=name+"_inv_init"), ) assert self.tf_svd.s.shape == self.init.s.shape assert self.tf_svd.u.shape == self.init.u.shape assert self.tf_svd.v.shape == self.init.v.shape # assert self.tf_svd.inv.shape == self.init.inv.shape self.cached = SvdTuple( tf.Variable(self.init.s, name=name+"_s"), tf.Variable(self.init.u, name=name+"_u"), tf.Variable(self.init.v, name=name+"_v"), tf.Variable(self.init.inv, name=name+"_inv"), ) self.s = self.cached.s self.u = self.cached.u self.v = self.cached.v self.inv = self.cached.inv self.holder = SvdTuple( tf.placeholder(default_dtype, shape=self.cached.s.shape, name=name+"_s_holder"), tf.placeholder(default_dtype, shape=self.cached.u.shape, name=name+"_u_holder"), tf.placeholder(default_dtype, shape=self.cached.v.shape, name=name+"_v_holder"), tf.placeholder(default_dtype, shape=self.cached.inv.shape, name=name+"_inv_holder") ) self.update_tf_op = tf.group( self.cached.s.assign(self.tf_svd.s), self.cached.u.assign(self.tf_svd.u), self.cached.v.assign(self.tf_svd.v), self.cached.inv.assign(self.tf_svd.inv) ) self.update_external_op = tf.group( self.cached.s.assign(self.holder.s), self.cached.u.assign(self.holder.u), self.cached.v.assign(self.holder.v), ) self.update_externalinv_op = tf.group( self.cached.inv.assign(self.holder.inv), ) self.init_ops = (self.s.initializer, self.u.initializer, self.v.initializer, self.inv.initializer)
def SVD_eid(X, n, name=None): with tf.variable_scope(name): sz = X.get_shape().as_list() if len(sz) == 4: x = tf.reshape(X, [-1, sz[1] * sz[2], sz[3]]) elif len(sz) == 3: x = X else: x = tf.expand_dims(X, 1) n = 1 _, HW, D = x.get_shape().as_list() x_ = tf.stop_gradient(x) if HW / D < 3 / 2 and 2 / 3 < HW / D: with tf.device('CPU'): g = tf.get_default_graph() with g.gradient_override_map({"Svd": "Svd_"}): s, u, v = tf.svd(x_, full_matrices=False) else: if HW < D: xxt = tf.matmul(x_, x_, transpose_b=True) with tf.device('CPU'): _, u_svd, _ = tf.svd(xxt, full_matrices=False) v_svd = tf.matmul(x_, u_svd, transpose_a=True) s_svd = tf.linalg.norm(v_svd, axis=1) v_svd = removenan(v_svd / tf.expand_dims(s_svd, 1)) else: xtx = tf.matmul(x_, x_, transpose_a=True) with tf.device('CPU'): _, _, v_svd = tf.svd(xtx, full_matrices=False) u_svd = tf.matmul(x_, v_svd) s_svd = tf.linalg.norm(u_svd, axis=1) u_svd = removenan(u_svd / tf.expand_dims(s_svd, 1)) s, u, v = SVD_grad_map(x, s_svd, u_svd, v_svd) s = tf.reshape(s, [-1, min(HW, D)]) u = tf.reshape(u, [-1, HW, min(HW, D)]) v = tf.reshape(v, [-1, D, min(HW, D)]) s = tf.nn.l2_normalize(tf.slice(s, [0, 0], [-1, n]), 1) U = tf.nn.l2_normalize(tf.slice(u, [0, 0, 0], [-1, -1, n]), 1) V = tf.nn.l2_normalize(tf.slice(v, [0, 0, 0], [-1, -1, n]), 1) return s, U, V
def procrustes_conv(input_data, conv_size, stride=(1, 1), padding='SAME', name=None): kernel = _variable_with_weight_decay( 'weights', shape=[conv_size[0], conv_size[1], 1, conv_size[3]], stddev=5e-2, wd=0.0) ksizes = [1, conv_size[0], conv_size[1], 1] strides = [1, stride[0], stride[1], 1] # Get image patches patches = tf.extract_image_patches(input_data, ksizes, strides, [ 1, ] * 4, padding, name) # Vectorize resulting tensor as list of patches patches_shaped = tf.reshape(patches, [ patches.shape[0].value * patches.shape[1].value * patches.shape[2].value, conv_size[0], conv_size[1], conv_size[2] ]) # Average out color channel for procrustes rotation patches_color_avg = tf.reduce_mean(patches_shaped, 3) X = tf.reshape(patches_color_avg, [ patches.shape[0].value * patches.shape[1].value * patches.shape[2].value, conv_size[0], conv_size[1] ]) res_channels = [] rot_channels = [] # for each convolution filter for k in range(conv_size[3]): W = tf.squeeze(kernel[:, :, :, k]) # is the kernel X_T = tf.transpose(X, perm=[0, 2, 1]) M = tf.map_fn(lambda x: tf.matmul(W, x), X_T) sM = tf.svd(M, full_matrices=True) R = tf.matmul(sM[1], tf.transpose(sM[2], perm=[0, 2, 1])) y = tf.reduce_sum(tf.multiply(tf.matmul(R, X), W), axis=(1, 2)) y_shaped = tf.reshape(y, [ patches.shape[0].value, patches.shape[1].value, patches.shape[2].value, 1 ]) res_channels.append(y_shaped) r_shaped = tf.reshape(R, [ patches.shape[0].value, patches.shape[1].value, patches.shape[2].value, R.shape[1].value, R.shape[2].value, 1 ]) rot_channels.append(r_shaped) output_res = tf.concat(res_channels, 3) output_rot = tf.concat(rot_channels, 5) """ output_rot = tf.reshape(output_rot, [output_rot.shape[0].value, output_rot.shape[1].value, output_rot.shape[2].value, output_rot.shape[3].value * output_rot.shape[4].value * output_rot.shape[5].value]) output_tensor = tf.concat([output_res, output_rot], 3) """ return output_res
def fit(self): self._graph = tf.Graph() with self._graph.as_default(): self._X = tf.placeholder(self._dtype, shape=self._data.shape) singular_values, u, _ = tf.svd(self._X) sigma = tf.diag(singular_values) with tf.Session(graph=self._graph) as sess: self._u, self._singular_values, self._sigma = sess.run( [u, singular_values, sigma], feed_dict={self._X: self._data})
def posterior_mean_and_sample(self, candidates): """Draw samples for test predictions. Given a Tensor of 'candidates' inputs, returns samples from the posterior and the posterior mean prediction for those inputs. Args: candidates: A (num-examples x num-dims) Tensor containing the inputs for which to return predictions. Returns: y_mean: The posterior mean prediction given these inputs y_sample: A sample from the posterior of the outputs given these inputs """ # Cross-covariance for test predictions w = tf.identity(self.weights_train) inds = tf.squeeze( tf.reshape( tf.tile( tf.reshape(tf.range(self.n_out), (self.n_out, 1)), (1, tf.shape(candidates)[0])), (-1, 1))) cross_cov = self.cov(tf.tile(candidates, [self.n_out, 1]), self.x_train) cross_task_cov = self.task_cov(tf.one_hot(inds, self.n_out), w) cross_cov *= cross_task_cov # Test mean prediction y_mean = tf.matmul(cross_cov, tf.matmul(self.input_inv, self.y_train)) # Test sample predictions # Note this can be done much more efficiently using Kronecker products # if all tasks are fully observed (which we won't assume) test_cov = ( self.cov(tf.tile(candidates, [self.n_out, 1]), tf.tile(candidates, [self.n_out, 1])) * self.task_cov(tf.one_hot(inds, self.n_out), tf.one_hot(inds, self.n_out)) - tf.matmul(cross_cov, tf.matmul(self.input_inv, tf.transpose(cross_cov)))) # Get the matrix square root through an SVD for drawing samples # This seems more numerically stable than the Cholesky s, _, v = tf.svd(test_cov, full_matrices=True) test_sqrt = tf.matmul(v, tf.matmul(tf.diag(s), tf.transpose(v))) y_sample = ( tf.matmul( test_sqrt, tf.random_normal([tf.shape(test_sqrt)[0], 1], dtype=tf.float64)) + y_mean) y_sample = ( tf.transpose(tf.reshape(y_sample, (self.n_out, -1))) * self.input_std + self.input_mean) return y_mean, y_sample
def tf_grass_proj(z, gdim): ztmp0 = tf.reshape(z, [-1, gdim[0], gdim[0]]) # ztmp = tf.matmul(ztmp0,tf.transpose(ztmp0,perm=[0,2,1])) _, z_g, _ = tf.svd(ztmp0) ztmp2 = tf.matmul(z_g[:, :, :gdim[1]], tf.transpose(z_g[:, :, :gdim[1]], perm=[0, 2, 1])) z1 = tf.reshape(ztmp2, [-1, gdim[0] * gdim[0]]) return z1
def svd(): with tf.name_scope("data"): matrix=tf.placeholder() embed_matrix=tf.svd(matrix) with tf.Session() as sess: co_matrix=build_co_matrix() embed_matri=sess.run([embed_matrix],feed_dict={matrix:co_matrix})
def p_inv(matrix): '''Returns the Moore-Penrose pseudoinverse''' s, u, v = tf.svd(matrix) threshold = tf.reduce_max(s) * 1e-5 s_mask = tf.boolean_mask(s, s > threshold) s_inv = tf.diag( tf.concat([1. / s_mask, tf.zeros([tf.size(s) - tf.size(s_mask)])], 0)) return tf.matmul(v, tf.matmul(s_inv, tf.transpose(u)))
def svd(A, full_matrices=False, compute_uv=True, name=None): M, N = A.get_shape().as_list() P = min(M, N) S0, U0, V0 = map(tf.stop_gradient, tf.svd(A, full_matrices=True, name=name)) Ui = tf.transpose(U0) Vti = V0 S = tf.matmul(Ui, tf.matmul(A, Vti)) S = tf.matrix_diag_part(S) return S
def s_norm(tensor,order): s,U,V=tf.svd(tensor,full_matrices=False) result=None if type(order) in [int,float]: result=tf.norm(s,ord=order) elif type(order) in [list,tuple]: result=[tf.norm(s,ord=order_item) for order_item in order] else: raise ValueError('Unrecognized order of s_norm: %s'%str(order)) return s,result
def set_similarity(self, valid_examples=None, pca=True): if valid_examples == None: if pca: valid_examples = np.array(range(20)) else: valid_examples = np.array(range(self.num_vocabulary)) self.valid_dataset = tf.constant(valid_examples, dtype=tf.int32) self.norm = tf.sqrt(tf.reduce_sum(tf.square(self.g_embeddings), 1, keep_dims=True)) self.normalized_embeddings = self.g_embeddings / self.norm # PCA if self.num_vocabulary >= 20 and pca == True: emb = tf.matmul(self.normalized_embeddings, tf.transpose(self.normalized_embeddings)) s, u, v = tf.svd(emb) u_r = tf.strided_slice(u, begin=[0, 0], end=[20, self.num_vocabulary], strides=[1, 1]) self.normalized_embeddings = tf.matmul(u_r, self.normalized_embeddings) self.valid_embeddings = tf.nn.embedding_lookup( self.normalized_embeddings, self.valid_dataset) self.similarity = tf.matmul(self.valid_embeddings, tf.transpose(self.normalized_embeddings))
def _uinv_decomp(X_sq, cutoff=0.0, decomp_mode="eigh", decomp_device=None): with tf.device(decomp_device): if decomp_mode == "svd": # hermitian, positive matrix, so eigvals = singular values e, v, _ = tf.svd(X_sq) elif decomp_mode == "eigh": e, v = tf.linalg.eigh(X_sq) e = tf.cast( e, e.dtype.real_dtype) # The values here should be real anyway else: raise ValueError("Invalid decomp_mode: {}".format(decomp_mode)) # NOTE: Negative values are always due to precision problems. # NOTE: Inaccuracies here mean the final tensor is not exactly isometric! e_pinvsqrt = tf.where(e <= cutoff, tf.zeros_like(e), 1 / tf.sqrt(e)) e_pinvsqrt_mat = tf.diag(tf.cast(e_pinvsqrt, v.dtype)) X_uinv = tf.matmul(v @ e_pinvsqrt_mat, v, adjoint_b=True) return X_uinv, e
def _symmetric_matrix_square_root(mat, eps=1e-10): """Compute square root of a symmetric matrix. Note that this is different from an elementwise square root. We want to compute M' where M' = sqrt(mat) such that M' * M' = mat. Also note that this method **only** works for symmetric matrices. Args: mat: Matrix to take the square root of. eps: Small epsilon such that any element less than eps will not be square rooted to guard against numerical instability. Returns: Matrix square root of mat. """ # Unlike numpy, tensorflow's return order is (s, u, v) s, u, v = tf.svd(mat) # sqrt is unstable around 0, just use 0 in such case si = tf.where(tf.less(s, eps), s, tf.sqrt(s)) # Note that the v returned by Tensorflow is v = V # (when referencing the equation A = U S V^T) # This is unlike Numpy which returns v = V^T return tf.matmul( tf.matmul(u, tf.diag(si)), v, transpose_b=True)
def testWrongDimensions(self): # The input to svd should be 2-dimensional tensor. scalar = tf.constant(1.) with self.assertRaises(ValueError): tf.svd(scalar) vector = tf.constant([1., 2.]) with self.assertRaises(ValueError): tf.svd(vector) tensor = tf.constant([[[1., 2.], [3., 4.]], [[1., 2.], [3., 4.]]]) with self.assertRaises(ValueError): tf.svd(tensor) # The input to batch_svd should be a tensor of at least rank 2. scalar = tf.constant(1.) with self.assertRaises(ValueError): tf.batch_svd(scalar) vector = tf.constant([1., 2.]) with self.assertRaises(ValueError): tf.batch_svd(vector)
def __init__(self, target, name, do_inverses=False, use_resource=False): self.name = name self.target = target self.do_inverses = do_inverses self.tf_svd = SvdTuple(tf.svd(target)) self.update_counter = 0 self.use_resource = use_resource self.init = SvdTuple( ones(target.shape[0], name=name+"_s_init"), Identity(target.shape[0], name=name+"_u_init"), Identity(target.shape[0], name=name+"_v_init"), Identity(target.shape[0], name=name+"_inv_init"), ) assert self.tf_svd.s.shape == self.init.s.shape assert self.tf_svd.u.shape == self.init.u.shape assert self.tf_svd.v.shape == self.init.v.shape # assert self.tf_svd.inv.shape == self.init.inv.shape if not self.use_resource: self.cached = SvdTuple( tf.Variable(self.init.s, name=name+"_s"), tf.Variable(self.init.u, name=name+"_u"), tf.Variable(self.init.v, name=name+"_v"), tf.Variable(self.init.inv, name=name+"_inv"), ) else: from tensorflow.python.ops import resource_variable_ops as rr self.cached = SvdTuple( rr.ResourceVariable(self.init.s, name=name+"_s"), rr.ResourceVariable(self.init.u, name=name+"_u"), rr.ResourceVariable(self.init.v, name=name+"_v"), rr.ResourceVariable(self.init.inv, name=name+"_inv"), ) self.s = self.cached.s self.u = self.cached.u self.v = self.cached.v self.inv = self.cached.inv if not use_resource: self.holder = SvdTuple( tf.placeholder(default_dtype, shape=self.cached.s.shape, name=name+"_s_holder"), tf.placeholder(default_dtype, shape=self.cached.u.shape, name=name+"_u_holder"), tf.placeholder(default_dtype, shape=self.cached.v.shape, name=name+"_v_holder"), tf.placeholder(default_dtype, shape=self.cached.inv.shape, name=name+"_inv_holder") ) else: self.holder = self.init self.update_tf_op = tf.group( self.cached.s.assign(self.tf_svd.s), self.cached.u.assign(self.tf_svd.u), self.cached.v.assign(self.tf_svd.v), self.cached.inv.assign(self.tf_svd.inv) ) self.update_external_op = tf.group( self.cached.s.assign(self.holder.s), self.cached.u.assign(self.holder.u), self.cached.v.assign(self.holder.v), ) self.update_externalinv_op = tf.group( self.cached.inv.assign(self.holder.inv), ) self.init_ops = (self.s.initializer, self.u.initializer, self.v.initializer, self.inv.initializer)
def nuclear_norm_grad(x, dy): _, U, V = tf.svd(x, full_matrices=False, compute_uv=True) grad = tf.matmul(U, tf.transpose(V)) return dy * grad
def nuclear_norm(x): sigma = tf.svd(x, full_matrices=False, compute_uv=False) norm = tf.reduce_sum(sigma) return norm
def svd_decomposition(tensor: tf.Tensor, split_axis: int, max_singular_values: Optional[int] = None, max_truncation_error: Optional[float] = None ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: """Computes the singular value decomposition (SVD) of a tensor. The SVD is performed by treating the tensor as a matrix, with an effective left (row) index resulting from combining the axes `tensor.shape[:split_axis]` and an effective right (column) index resulting from combining the axes `tensor.shape[split_axis:]`. For example, if `tensor` had a shape (2, 3, 4, 5) and `split_axis` was 2, then `u` would have shape (2, 3, 6), `s` would have shape (6), and `vh` would have shape (6, 4, 5). If `max_singular_values` is set to an integer, the SVD is truncated to keep at most this many singular values. If `max_truncation_error > 0`, as many singular values will be truncated as possible, so that the truncation error (the norm of discarded singular values) is at most `max_truncation_error`. If both `max_singular_values` snd `max_truncation_error` are specified, the number of retained singular values will be `min(max_singular_values, nsv_auto_trunc)`, where `nsv_auto_trunc` is the number of singular values that must be kept to maintain a truncation error smaller than `max_truncation_error`. The output consists of three tensors `u, s, vh` such that: ```python u[i1,...,iN, j] * s[j] * vh[j, k1,...,kM] == tensor[i1,...,iN, k1,...,kM] ``` Note that the output ordering matches numpy.linalg.svd rather than tf.svd. Args: tensor: A tensor to be decomposed. split_axis: Where to split the tensor's axes before flattening into a matrix. max_singular_values: The number of singular values to keep, or `None` to keep them all. max_truncation_error: The maximum allowed truncation error or `None` to not do any truncation. Returns: u: Left tensor factor. s: Vector of ordered singular values from largest to smallest. vh: Right tensor factor. s_rest: Vector of discarded singular values (length zero if no truncation). """ left_dims = tf.shape(tensor)[:split_axis] right_dims = tf.shape(tensor)[split_axis:] tensor = tf.reshape(tensor, [tf.reduce_prod(left_dims), tf.reduce_prod(right_dims)]) s, u, v = tf.svd(tensor) if max_singular_values is None: max_singular_values = tf.size(s, out_type=tf.int64) else: max_singular_values = tf.constant(max_singular_values, dtype=tf.int64) if max_truncation_error is not None: # Cumulative norms of singular values in ascending order. trunc_errs = tf.sqrt(tf.cumsum(tf.square(s), reverse=True)) # We must keep at least this many singular values to ensure the # truncation error is <= max_truncation_error. num_sing_vals_err = tf.count_nonzero( tf.cast(trunc_errs > max_truncation_error, dtype=tf.int32)) else: num_sing_vals_err = max_singular_values num_sing_vals_keep = tf.minimum(max_singular_values, num_sing_vals_err) # tf.svd() always returns the singular values as a vector of float{32,64}. # since tf.math_ops.real is automatically applied to s. This causes # s to possibly not be the same dtype as the original tensor, which can cause # issues for later contractions. To fix it, we recast to the original dtype. s = tf.cast(s, tensor.dtype) s_rest = s[num_sing_vals_keep:] s = s[:num_sing_vals_keep] u = u[:, :num_sing_vals_keep] v = v[:, :num_sing_vals_keep] vh = tf.linalg.adjoint(v) dim_s = tf.shape(s)[0] # must use tf.shape (not s.shape) to compile u = tf.reshape(u, tf.concat([left_dims, [dim_s]], axis=-1)) vh = tf.reshape(vh, tf.concat([[dim_s], right_dims], axis=-1)) return u, s, vh, s_rest
def sharp(input): s,U,V=tf.svd(input,full_matrices=False) return tf.matmul(U,tf.transpose(V))*tf.reduce_sum(s),s
def _layer(self, inputs, params=None, id_layer=0): """Construct the layer id_layer in the computation graph of tensorflow. Parameters ---------- inputs: tuple of tensors (n_in) a tuple of tensor containing all the necessary inputs to construct the layer, either network inputs or previous layer output. params: tuple of tensor (n_param) a tuple with the parameter of the previous layers, used to share the parameters accross layers. This is not used if the network do not use the shared parameter. id_layer: int A layer identifier passed during the construction of the network. It should be its rank in the graph. Returns ------- outputs: tuple of tensors (n_out) st n_out = n_in, to chain the layers. params: tuple of tensors (n_param) with the parameters of this layer """ L = self.L K, p = self.D.shape Zk, X, lmbd = inputs D = tf.constant(self.D) DD = tf.constant(self.S0) if params: self.log.debug('(Layer{}) - shared params'.format(id_layer)) A, S = params else: if len(self.warm_param) > id_layer: self.log.debug('(Layer{})- warm params'.format(id_layer)) wp = self.warm_param[id_layer] else: self.log.debug('(Layer{}) - new params'.format(id_layer)) wp = [np.eye(K, dtype=np.float32), np.ones(K, dtype=np.float32) * L] A = tf.Variable(initial_value=tf.constant(wp[0], shape=[K, K]), name='A') S = tf.Variable(tf.constant(wp[1], shape=[K]), name='S') # Projection of A on the stieffel manifold with tf.name_scope("unary_projection"): _, P, Q = tf.svd(tf.cast(A, tf.float64), full_matrices=True) An = tf.matmul(P, Q, transpose_b=True) tf.add_to_collection('svd', A.assign(tf.cast(An, tf.float32))) tf.add_to_collection('Unitary', A) with tf.name_scope('unit_reg'): I = tf.constant(np.eye(K, dtype=np.float32)) r = tf.squared_difference(I, tf.matmul(A, A, transpose_a=True)) tf.add_to_collection("regularisation", tf.reduce_sum(r)) S1 = 1 / S as1 = tf.matmul(A, tf.diag(S1)) with tf.name_scope("hidden"): hk = tf.matmul(self.X, tf.matmul(D, as1, transpose_a=True)) if id_layer > 0: hk += tf.matmul(Zk, (A - tf.matmul(DD, as1))) output = soft_thresholding(hk, self.lmbd * S1) output = tf.matmul(output, A, transpose_b=True, name="output") return [output, X, lmbd], (A, S)
def wct_tf(content, style, alpha, eps=1e-8): '''TensorFlow version of Whiten-Color Transform Assume that content/style encodings have shape 1xHxWxC See p.4 of the Universal Style Transfer paper for corresponding equations: https://arxiv.org/pdf/1705.08086.pdf ''' # Remove batch dim and reorder to CxHxW content_t = tf.transpose(tf.squeeze(content), (2, 0, 1)) style_t = tf.transpose(tf.squeeze(style), (2, 0, 1)) Cc, Hc, Wc = tf.unstack(tf.shape(content_t)) Cs, Hs, Ws = tf.unstack(tf.shape(style_t)) # CxHxW -> CxH*W content_flat = tf.reshape(content_t, (Cc, Hc*Wc)) style_flat = tf.reshape(style_t, (Cs, Hs*Ws)) # Content covariance mc = tf.reduce_mean(content_flat, axis=1, keep_dims=True) fc = content_flat - mc fcfc = tf.matmul(fc, fc, transpose_b=True) / (tf.cast(Hc*Wc, tf.float32) - 1.) + tf.eye(Cc)*eps # Style covariance ms = tf.reduce_mean(style_flat, axis=1, keep_dims=True) fs = style_flat - ms fsfs = tf.matmul(fs, fs, transpose_b=True) / (tf.cast(Hs*Ws, tf.float32) - 1.) + tf.eye(Cs)*eps # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603 with tf.device('/cpu:0'): Sc, Uc, _ = tf.svd(fcfc) Ss, Us, _ = tf.svd(fsfs) ## Uncomment to perform SVD for content/style with np in one call ## This is slower than CPU tf.svd but won't segfault for ill-conditioned matrices # @jit # def np_svd(content, style): # '''tf.py_func helper to run SVD with NumPy for content/style cov tensors''' # Uc, Sc, _ = np.linalg.svd(content) # Us, Ss, _ = np.linalg.svd(style) # return Uc, Sc, Us, Ss # Uc, Sc, Us, Ss = tf.py_func(np_svd, [fcfc, fsfs], [tf.float32, tf.float32, tf.float32, tf.float32]) # Filter small singular values k_c = tf.reduce_sum(tf.cast(tf.greater(Sc, 1e-5), tf.int32)) k_s = tf.reduce_sum(tf.cast(tf.greater(Ss, 1e-5), tf.int32)) # Whiten content feature Dc = tf.diag(tf.pow(Sc[:k_c], -0.5)) fc_hat = tf.matmul(tf.matmul(tf.matmul(Uc[:,:k_c], Dc), Uc[:,:k_c], transpose_b=True), fc) # Color content with style Ds = tf.diag(tf.pow(Ss[:k_s], 0.5)) fcs_hat = tf.matmul(tf.matmul(tf.matmul(Us[:,:k_s], Ds), Us[:,:k_s], transpose_b=True), fc_hat) # Re-center with mean of style fcs_hat = fcs_hat + ms # Blend whiten-colored feature with original content feature blended = alpha * fcs_hat + (1 - alpha) * (fc + mc) # CxH*W -> CxHxW blended = tf.reshape(blended, (Cc,Hc,Wc)) # CxHxW -> 1xHxWxC blended = tf.expand_dims(tf.transpose(blended, (1,2,0)), 0) return blended
def svd_tensor(t, left_axes, right_axes, nsv_max=None, auto_trunc_max_err=0.0): """Computes the singular value decomposition (SVD) of a tensor. The SVD is performed by treating the tensor as a matrix, with an effective left (row) index resulting from combining the `left_axes` of the input tensor `t` and an effective right (column) index resulting from combining the `right_axes`. Transposition is used to move axes of the input tensor into position as as required. The output retains the full index structure of the original tensor. If `nsv_max` is set to an integer, the SVD is truncated to keep `min(nsv_max, nsv)` singular values, where `nsv` is the number of singular values returned by the SVD. If `auto_trunc_max_err > 0`, as many singular values will be truncated as possible, so that the truncation error (the norm of discarded singular values) is at most `auto_trunc_max_err`. If both `nsv_max` snd `auto_trunc_max_err` are specified, the number of retained singular values will be `min(nsv_max, nsv_auto_trunc)`, where `nsv_auto_trunc` is the number of singular values that must be kept to maintain a truncation error smaller than `auto_trunc_max_err`. The output consists of three tensors `u, s, vh` such that: ```python u[i1,...,iN, j] * s[j] * vh[j, k1,...,kM] == t_tr[i1,...,iN, k1,...,kM] ``` where ```t_tr == tf.transpose(t, (*left_axes, *right_axes))```. Note that the output ordering matches numpy.linalg.svd rather than tf.svd. Args: t: A tensor to be decomposed. left_axes: The axes of `t` to be treated as the left index. right_axes: The axes of `t` to be treated as the right index. nsv_max: The number of singular values to keep, or `None` to keep them all. auto_trunc_max_err: The maximum allowed truncation error. Returns: u: Left tensor factor. s: Vector of singular values. vh: Right tensor factor. s_rest: Vector of discarded singular values (length zero if no truncation). """ t_shp = tf.shape(t) left_dims = [t_shp[i] for i in left_axes] right_dims = [t_shp[i] for i in right_axes] t_t = tf.transpose(t, (*left_axes, *right_axes)) t_tr = tf.reshape(t_t, (np.prod(left_dims), np.prod(right_dims))) s, u, v = tf.svd(t_tr) if nsv_max is None: nsv_max = tf.size(s, out_type=tf.int64) else: nsv_max = tf.cast(nsv_max, tf.int64) # Cumulative norms of singular values in ascending order. trunc_errs = tf.sqrt(tf.cumsum(tf.square(s), reverse=True)) # We must keep at least this many singular values to ensure the # truncation error is <= auto_trunc_max_err. nsv_err = tf.count_nonzero(trunc_errs > auto_trunc_max_err) nsv_keep = tf.minimum(nsv_max, nsv_err) # tf.svd() always returns the singular values as a vector of real. # Generally, however, we want to contract s with Tensors of the original # input type. To make this easy, cast here! s = tf.cast(s, t.dtype) s_rest = s[nsv_keep:] s = s[:nsv_keep] u = u[:, :nsv_keep] v = v[:, :nsv_keep] vh = tf.linalg.adjoint(v) dim_s = tf.shape(s)[0] # must use tf.shape (not s.shape) to compile u = tf.reshape(u, (*left_dims, dim_s)) vh = tf.reshape(vh, (dim_s, *right_dims)) return u, s, vh, s_rest
def pseudo_inverse_sqrt(mat, eps=1e-7): """half pseduo-inverse""" s, u, v = tf.svd(mat) # zero threshold for eigenvalues si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s)) return u @ tf.diag(si) @ tf.transpose(v)
open("svd_in", "wb").write(body) # import requests # r = requests.get(url, auth=('usrname', 'password'), verify=False,stream=True) # r.raw.decode_content = True # with open("svd_in", 'wb') as f: # shutil.copyfileobj(r.raw, f) dtype = np.float32 matrix0 = np.genfromtxt('svd_in', delimiter= ",").astype(dtype) print(matrix0.shape) assert matrix0.shape == (784, 784) matrix = tf.placeholder(dtype) sess = tf.InteractiveSession() s0,u0,v0 = sess.run(tf.svd(matrix), feed_dict={matrix: matrix0}) print("u any NaNs: %s"% (np.isnan(u0).any(),)) print("u all NaNs: %s"% (np.isnan(u0).all(),)) print("matrix0 any NaNs: %s"% (np.isnan(matrix0).any(),)) # segfault bt # #0 0x00007fffe320e121 in Eigen::BDCSVD<Eigen::Matrix<float, -1, -1, 1, -1, -1> >::perturbCol0(Eigen::Ref<Eigen::Array<float, -1, 1, 0, -1, 1>, 0, Eigen::InnerStride<1> > const&, Eigen::Ref<Eigen::Array<float, -1, 1, 0, -1, 1>, 0, Eigen::InnerStride<1> > const&, Eigen::Ref<Eigen::Array<long, 1, -1, 1, 1, -1>, 0, Eigen::InnerStride<1> > const&, Eigen::Matrix<float, -1, 1, 0, -1, 1> const&, Eigen::Ref<Eigen::Array<float, -1, 1, 0, -1, 1>, 0, Eigen::InnerStride<1> > const&, Eigen::Ref<Eigen::Array<float, -1, 1, 0, -1, 1>, 0, Eigen::InnerStride<1> > const&, Eigen::Ref<Eigen::Array<float, -1, 1, 0, -1, 1>, 0, Eigen::InnerStride<1> >) () # from /home/yaroslav/.conda/envs/whitening/lib/python3.5/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so # #1 0x00007fffe320fa81 in Eigen::BDCSVD<Eigen::Matrix<float, -1, -1, 1, -1, -1> >::computeSVDofM(long, long, Eigen::Matrix<float, -1, -1, 0, -1, -1>&, Eigen::Matrix<float, -1, 1, 0, -1, 1>&, Eigen::Matrix<float, -1, -1, 0, -1, -1>&) () # from /home/yaroslav/.conda/envs/whitening/lib/python3.5/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so # #2 0x00007fffe321e21c in Eigen::BDCSVD<Eigen::Matrix<float, -1, -1, 1, -1, -1> >::divide(long, long, long, long, long) () # from /home/yaroslav/.conda/envs/whitening/lib/python3.5/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so # #3 0x00007fffe321dbb8 in Eigen::BDCSVD<Eigen::Matrix<float, -1, -1, 1, -1, -1> >::divide(long, long, long, long, long) () # from /home/yaroslav/.conda/envs/whitening/lib/python3.5/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so # #4 0x00007fffe32220bd in Eigen::BDCSVD<Eigen::Matrix<float, -1, -1, 1, -1, -1> >::compute(Eigen::Matrix<float, -1, -1, 1, -1, -1> const&, unsigned int) () # from /home/yaroslav/.conda/envs/whitening/lib/python3.5/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so
def wct_style_swap(content, style, alpha, patch_size=3, stride=1, eps=1e-8): '''Modified Whiten-Color Transform that performs style swap on whitened content/style encodings before coloring Assume that content/style encodings have shape 1xHxWxC ''' content_t = tf.transpose(tf.squeeze(content), (2, 0, 1)) style_t = tf.transpose(tf.squeeze(style), (2, 0, 1)) Cc, Hc, Wc = tf.unstack(tf.shape(content_t)) Cs, Hs, Ws = tf.unstack(tf.shape(style_t)) # CxHxW -> CxH*W content_flat = tf.reshape(content_t, (Cc, Hc*Wc)) style_flat = tf.reshape(style_t, (Cs, Hs*Ws)) # Content covariance mc = tf.reduce_mean(content_flat, axis=1, keep_dims=True) fc = content_flat - mc fcfc = tf.matmul(fc, fc, transpose_b=True) / (tf.cast(Hc*Wc, tf.float32) - 1.) + tf.eye(Cc)*eps # Style covariance ms = tf.reduce_mean(style_flat, axis=1, keep_dims=True) fs = style_flat - ms fsfs = tf.matmul(fs, fs, transpose_b=True) / (tf.cast(Hs*Ws, tf.float32) - 1.) + tf.eye(Cs)*eps # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603 with tf.device('/cpu:0'): Sc, Uc, _ = tf.svd(fcfc) Ss, Us, _ = tf.svd(fsfs) ## Uncomment to perform SVD for content/style with np in one call ## This is slower than CPU tf.svd but won't segfault for ill-conditioned matrices # @jit # def np_svd(content, style): # '''tf.py_func helper to run SVD with NumPy for content/style cov tensors''' # Uc, Sc, _ = np.linalg.svd(content) # Us, Ss, _ = np.linalg.svd(style) # return Uc, Sc, Us, Ss # Uc, Sc, Us, Ss = tf.py_func(np_svd, [fcfc, fsfs], [tf.float32, tf.float32, tf.float32, tf.float32]) k_c = tf.reduce_sum(tf.cast(tf.greater(Sc, 1e-5), tf.int32)) k_s = tf.reduce_sum(tf.cast(tf.greater(Ss, 1e-5), tf.int32)) ### Whiten content Dc = tf.diag(tf.pow(Sc[:k_c], -0.5)) fc_hat = tf.matmul(tf.matmul(tf.matmul(Uc[:,:k_c], Dc), Uc[:,:k_c], transpose_b=True), fc) # Reshape before passing to style swap, CxH*W -> 1xHxWxC whiten_content = tf.expand_dims(tf.transpose(tf.reshape(fc_hat, [Cc,Hc,Wc]), [1,2,0]), 0) ### Whiten style before swapping Ds = tf.diag(tf.pow(Ss[:k_s], -0.5)) whiten_style = tf.matmul(tf.matmul(tf.matmul(Us[:,:k_s], Ds), Us[:,:k_s], transpose_b=True), fs) # Reshape before passing to style swap, CxH*W -> 1xHxWxC whiten_style = tf.expand_dims(tf.transpose(tf.reshape(whiten_style, [Cs,Hs,Ws]), [1,2,0]), 0) ### Style swap whitened encodings ss_feature = style_swap(whiten_content, whiten_style, patch_size, stride) # HxWxC -> CxH*W ss_feature = tf.transpose(tf.reshape(ss_feature, [Hc*Wc,Cc]), [1,0]) ### Color style-swapped encoding with style Ds_sq = tf.diag(tf.pow(Ss[:k_s], 0.5)) fcs_hat = tf.matmul(tf.matmul(tf.matmul(Us[:,:k_s], Ds_sq), Us[:,:k_s], transpose_b=True), ss_feature) fcs_hat = fcs_hat + ms ### Blend style-swapped & colored encoding with original content encoding blended = alpha * fcs_hat + (1 - alpha) * (fc + mc) # CxH*W -> CxHxW blended = tf.reshape(blended, (Cc,Hc,Wc)) # CxHxW -> 1xHxWxC blended = tf.expand_dims(tf.transpose(blended, (1,2,0)), 0) return blended
def policy_gradient(): with tf.variable_scope("policy"): params = tf.get_variable("policy_parameters", [4,2]) state = tf.placeholder("float", [None, 4], name="state") # NOTE: have to specify shape of actions so we can call # get_shape when calculating g_log_prob below actions = tf.placeholder("float", [200, 2], name="actions") advantages = tf.placeholder("float", [None,], name="advantages") linear = tf.matmul(state, params) probabilities = tf.nn.softmax(linear) my_variables = tf.trainable_variables() # calculate the probability of the chosen action given the state action_log_prob = tf.log(tf.reduce_sum( tf.multiply(probabilities, actions), reduction_indices=[1])) # calculate the gradient of the log probability at each point in time # NOTE: doing this because tf.gradients only returns a summed version action_log_prob_flat = tf.reshape(action_log_prob, (-1,)) g_log_prob = tf.stack( [tf.gradients(action_log_prob_flat[i], my_variables)[0] for i in range(action_log_prob_flat.get_shape()[0])]) g_log_prob = tf.reshape(g_log_prob, (200, 8, 1)) # calculate the policy gradient by multiplying by the advantage function g = tf.multiply(g_log_prob, tf.reshape(advantages, (200, 1, 1))) # sum over time g = 1.00 / 200.00 * tf.reduce_sum(g, reduction_indices=[0]) # calculate the Fischer information matrix and its inverse F2 = tf.map_fn(lambda x: tf.matmul(x, tf.transpose(x)), g_log_prob) F = 1.0 / 200.0 * tf.reduce_sum(F2, reduction_indices=[0]) # calculate inverse of positive definite clipped F # NOTE: have noticed small eigenvalues (1e-10) that are negative, # using SVD to clip those out, assuming they're rounding errors S, U, V = tf.svd(F) atol = tf.reduce_max(S) * 1e-6 S_inv = tf.divide(1.0, S) S_inv = tf.where(S < atol, tf.zeros_like(S), S_inv) S_inv = tf.diag(S_inv) F_inv = tf.matmul(S_inv, tf.transpose(U)) F_inv = tf.matmul(V, F_inv) # calculate natural policy gradient ascent update F_inv_g = tf.matmul(F_inv, g) # calculate a learning rate normalized such that a constant change # in the output control policy is achieved each update, preventing # any parameter changes that hugely change the output learning_rate = tf.sqrt( tf.divide(0.001, tf.matmul(tf.transpose(g), F_inv_g))) update = tf.multiply(learning_rate, F_inv_g) update = tf.reshape(update, (4, 2)) # update trainable parameters # NOTE: whenever my_variables is fetched they're also updated my_variables[0] = tf.assign_add(my_variables[0], update) return probabilities, state, actions, advantages, my_variables
def testWrongDimensions(self): # The input to svd should be 2-dimensional tensor. scalar = tf.constant(1.) with self.assertRaisesRegexp(ValueError, "Shape must be rank 2 but is rank 0"): tf.svd(scalar) vector = tf.constant([1., 2.]) with self.assertRaisesRegexp(ValueError, "Shape must be rank 2 but is rank 1"): tf.svd(vector) tensor = tf.constant([[[1., 2.], [3., 4.]], [[1., 2.], [3., 4.]]]) with self.assertRaisesRegexp(ValueError, "Shape must be rank 2 but is rank 3"): tf.svd(tensor) scalar = tf.constant(1. + 1.0j) with self.assertRaises(ValueError): tf.svd(scalar) vector = tf.constant([1. + 1.0j, 2. + 2.0j]) with self.assertRaises(ValueError): tf.svd(vector) tensor = tf.constant([[[1. + 1.0j, 2. + 2.0j], [3. + 3.0j, 4. + 4.0j]], [[1. + 1.0j, 2. + 2.0j], [3. + 3.0j, 4. + 4.0j]]]) with self.assertRaises(ValueError): tf.svd(tensor) # The input to batch_svd should be a tensor of at least rank 2. scalar = tf.constant(1.) with self.assertRaisesRegexp(ValueError, "Shape must be at least rank 2 but is rank 0"): tf.batch_svd(scalar) vector = tf.constant([1., 2.]) with self.assertRaisesRegexp(ValueError, "Shape must be at least rank 2 but is rank 1"): tf.batch_svd(vector) scalar = tf.constant(1. + 1.0j) with self.assertRaises(ValueError): tf.batch_svd(scalar) vector = tf.constant([1. + 1.0j, 2. + 2.0j]) with self.assertRaises(ValueError): tf.batch_svd(vector)