def invertible_1x1_conv(z, logdet, reverse=False, name=None, use_bias=False): with tf.variable_scope(name, "invconv"): shape = z.get_shape().as_list() w_shape = [shape[3], shape[3]] # Sample a random orthogonal matrix: w_init = np.linalg.qr(np.random.randn(*w_shape))[0].astype('float32') w = tf.get_variable("W", dtype=tf.float32, initializer=w_init) det_w = tf.matrix_determinant(tf.cast(w, 'float64')) dlogdet = tf.cast(tf.log(abs(det_w)), 'float32') * shape[1] * shape[2] if use_bias: b = tf.get_variable("bias", [1, 1, 1, shape[3]]) if not reverse: _w = w[tf.newaxis, tf.newaxis, ...] z = tf.nn.conv2d(z, _w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet += dlogdet if use_bias: z += b else: if use_bias: z -= b w_inv = tf.matrix_inverse(w) _w = w_inv[tf.newaxis, tf.newaxis, ...] z = tf.nn.conv2d(z, _w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet -= dlogdet return z, logdet
def entropy(self, mean=None, cov=1): """Entropy of probability distribution. This is not vectorized with respect to any arguments. Parameters ---------- mean : tf.Tensor, optional A 1-D tensor. Defaults to zero mean. cov : tf.Tensor, optional A 1-D or 2-D tensor. Defaults to identity matrix. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ if cov is 1: d = 1 det_cov = 1.0 else: cov = tf.cast(cov, dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d*tf.log(2*np.pi) + tf.log(det_cov))
def _get_fldj_numerical(self, bijector, x, event_ndims, eps=1.e-6, input_to_vector=tfb.Identity, output_to_vector=tfb.Identity): """Numerically approximate the forward log det Jacobian of a bijector. Args: bijector: the bijector whose Jacobian we wish to approximate x: the value for which we want to approximate the Jacobian event_ndims: number of dimensions in an event eps: epsilon to add when forming (f(x+eps)-f(x)) / eps input_to_vector: a bijector that maps the input value to a vector output_to_vector: a bijector that maps the output value to a vector Returns: A numerical approximation to the log det Jacobian of bijector.forward evaluated at x. """ x_vector = input_to_vector.forward(x) n = tf.shape(x_vector)[-1] x_plus_eps_vector = x_vector + eps * tf.eye(n, dtype=x_vector.dtype) x_plus_eps = input_to_vector.inverse(x_plus_eps_vector) f_x = bijector.forward(x) f_x_vector = output_to_vector.forward(f_x) f_x_plus_eps = bijector.forward(x_plus_eps) f_x_plus_eps_vector = output_to_vector.forward(f_x_plus_eps) jacobian_numerical = (f_x_plus_eps_vector - f_x_vector) / eps return ( tf.log(tf.abs(tf.matrix_determinant(jacobian_numerical))) + input_to_vector.forward_log_det_jacobian(x, event_ndims=event_ndims) - output_to_vector.forward_log_det_jacobian(f_x, event_ndims=event_ndims))
def entropy(self, mean=None, cov=1): """ Note entropy does not depend on its mean. Arguments ---------- mean: tf.Tensor, optional vector. Defaults to zero mean. cov: tf.Tensor, optional vector or matrix. Defaults to identity. Returns ------- tf.Tensor scalar """ if cov == 1: d = 1 det_cov = 1.0 else: cov = tf.cast(cov, dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d*tf.log(2*np.pi) + tf.log(det_cov))
def Test(self): with self.test_session(): np.random.seed(1) m = np.random.uniform(low=1.0, high=100.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) a = tf.constant(m) epsilon = np.finfo(dtype_).eps # Optimal stepsize for central difference is O(epsilon^{1/3}). delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build tol = 1e-3 if len(shape_) == 2: c = tf.matrix_determinant(a) else: c = tf.batch_matrix_determinant(a) out_shape = shape_[:-2] # last two dimensions hold matrices theoretical, numerical = tf.test.compute_gradient(a, shape_, c, out_shape, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def test_logjac(self): """ We have hand-crafted the log-jacobians for speed. Check they're correct wrt a tensorflow derived version """ # there is no jacobian: loop manually def jacobian(f): return tf.pack([tf.gradients(f(self.x)[i], self.x)[0] for i in range(10)]) tf_jacs = [ tf.log(tf.matrix_determinant(jacobian(t.tf_forward))) for t in self.transforms if type(t) is not GPflow.transforms.LowerTriangular ] hand_jacs = [ t.tf_log_jacobian(self.x) for t in self.transforms if type(t) is not GPflow.transforms.LowerTriangular ] for j1, j2 in zip(tf_jacs, hand_jacs): self.assertTrue( np.allclose( self.session.run(j1, feed_dict={self.x: self.x_np}), self.session.run(j2, feed_dict={self.x: self.x_np}), ) )
def _det_large_enough_mask(x, det_bounds): """Returns whether the input matches the given determinant limit. Args: x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`. det_bounds: A floating-point `Tensor` that must broadcast to shape `[B1, ..., Bn]`, giving the desired lower bound on the determinants in `x`. Returns: mask: A floating-point `Tensor` of shape [B1, ..., Bn]. Each scalar is 1 if the corresponding matrix had determinant above the corresponding bound, otherwise 0. """ # For the curious: I wonder whether it is possible and desirable to # use a Cholesky decomposition-based algorithm for this, since the # only matrices whose determinant this code cares about will be PSD. # Didn't figure out how to code that in TensorFlow. # # Expert opinion is that it would be about twice as fast since # Cholesky is roughly half the cost of Gaussian Elimination with # Partial Pivoting. But this is less of an impact than the switch in # _psd_mask. return tf.cast( tf.matrix_determinant(x) > det_bounds, dtype=x.dtype)
def _compareDeterminant(self, matrix_x): with self.test_session(): # Check the batch version, which should work for ndim >= 2 self._compareDeterminantBase( matrix_x, tf.batch_matrix_determinant(matrix_x)) if matrix_x.ndim == 2: # Check the simple version self._compareDeterminantBase(matrix_x, tf.matrix_determinant(matrix_x))
def test_determinants(self): with self.test_session(): for batch_shape in [(), (2, 3,)]: for k in [1, 4]: operator, mat = self._build_operator_and_mat(batch_shape, k) expected_det = tf.matrix_determinant(mat).eval() self._compare_results(expected_det, operator.det()) self._compare_results(np.log(expected_det), operator.log_det())
def testBatchGradientUnknownSize(self): with self.test_session(): batch_size = tf.constant(3) matrix_size = tf.constant(4) batch_identity = tf.tile(tf.expand_dims(tf.diag(tf.ones([matrix_size])), 0), [batch_size, 1, 1]) determinants = tf.matrix_determinant(batch_identity) reduced = tf.reduce_sum(determinants) sum_grad = tf.gradients(reduced, batch_identity)[0] self.assertAllClose(batch_identity.eval(), sum_grad.eval())
def test_det_dynamic(self): with self.test_session() as sess: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=True) op_det_v, mat_det_v = sess.run( [operator.determinant(), tf.matrix_determinant(mat)], feed_dict=feed_dict) self.assertAllClose(op_det_v, mat_det_v)
def test_det(self): with self.test_session() as sess: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: operator, mat, _ = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=False) op_det = operator.determinant() self.assertAllEqual(shape[:-2], op_det.get_shape()) op_det_v, mat_det_v = sess.run([op_det, tf.matrix_determinant(mat)]) self.assertAllClose(op_det_v, mat_det_v)
def construct_loss_graph(self): x = self.xp y = self.yp xs = x/self.ls K, Ki = self.construct_covariance_graph(xs) yT = tf.transpose(y) Kiy = tf.matmul(Ki, y) lK = tf.log(tf.matrix_determinant(K)) L = tf.matmul(yT, Kiy) + lK ones = tf.ones(tf.pack([tf.shape(xs)[0]]), dtype=tf.float64) L = L/tf.reduce_sum(ones) * 0.5 return L
def logpdf(self, x, mean=None, cov=1): """ Parameters ---------- x : np.array or tf.Tensor vector or matrix mean : np.array or tf.Tensor, optional vector. Defaults to zero mean. cov : np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ x = tf.cast(tf.convert_to_tensor(x), dtype=tf.float32) x_shape = get_dims(x) if len(x_shape) == 1: d = x_shape[0] else: d = x_shape[1] if mean is None: r = x else: mean = tf.cast(tf.convert_to_tensor(mean), dtype=tf.float32) r = x - mean if cov is 1: cov_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(tf.convert_to_tensor(cov), dtype=tf.float32) if len(cov.get_shape()) == 1: # vector cov_inv = tf.diag(1.0 / cov) det_cov = tf.reduce_prod(cov) else: # matrix cov_inv = tf.matrix_inverse(cov) det_cov = tf.matrix_determinant(cov) lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov) if len(x_shape) == 1: r = tf.reshape(r, shape=(d, 1)) lps -= 0.5 * tf.matmul(tf.matmul(r, cov_inv, transpose_a=True), r) return tf.squeeze(lps) else: # TODO vectorize further out = [] for r_vec in tf.unpack(r): r_vec = tf.reshape(r_vec, shape=(d, 1)) out += [tf.squeeze(lps - 0.5 * tf.matmul( tf.matmul(r_vec, cov_inv, transpose_a=True), r_vec))] return tf.pack(out) """
def _compareDeterminant(self, matrix_x): with self.test_session(): if matrix_x.ndim == 2: tf_ans = tf.matrix_determinant(matrix_x) else: tf_ans = tf.batch_matrix_determinant(matrix_x) out = tf_ans.eval() shape = matrix_x.shape if shape[-1] == 0 and shape[-2] == 0: np_ans = np.ones(shape[:-2]).astype(matrix_x.dtype) else: np_ans = np.array(np.linalg.det(matrix_x)).astype(matrix_x.dtype) self.assertAllClose(np_ans, out) self.assertShapeEqual(np_ans, tf_ans)
def logpdf(self, x, mean=None, cov=1): """ Arguments ---------- x: tf.Tensor vector mean: tf.Tensor, optional vector. Defaults to zero mean. cov: tf.Tensor, optional vector or matrix. Defaults to identity. Returns ------- tf.Tensor scalar """ x = tf.cast(tf.squeeze(x), dtype=tf.float32) d = get_dims(x)[0] if mean is None: r = tf.ones([d]) * x else: mean = tf.cast(tf.squeeze(mean), dtype=tf.float32) r = x - mean if cov == 1: cov_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(tf.squeeze(cov), dtype=tf.float32) if len(cov.get_shape()) == 1: cov_inv = tf.diag(1.0 / cov) det_cov = tf.reduce_prod(cov) else: cov_inv = tf.matrix_inverse(cov) det_cov = tf.matrix_determinant(cov) r = tf.reshape(r, shape=(d, 1)) lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov) - \ 0.5 * tf.matmul(tf.matmul(r, cov_inv, transpose_a=True), r) """ # TensorFlow can't reverse-mode autodiff Cholesky L = tf.cholesky(cov) L_inv = tf.matrix_inverse(L) det_cov = tf.pow(tf.matrix_determinant(L), 2) inner = dot(L_inv, r) out = -0.5*d*tf.log(2*np.pi) - \ 0.5*tf.log(det_cov) - \ 0.5*tf.matmul(tf.transpose(inner), inner) """ return tf.squeeze(lps)
def test_det(self): self._maybe_skip("det") with self.test_session() as sess: for use_placeholder in False, True: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: if dtype.is_complex: self.skipTest( "tf.matrix_determinant does not work with complex, so this " "test is being skipped.") operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=use_placeholder) op_det = operator.determinant() if not use_placeholder: self.assertAllEqual(shape[:-2], op_det.get_shape()) op_det_v, mat_det_v = sess.run( [op_det, tf.matrix_determinant(mat)], feed_dict=feed_dict) self.assertAC(op_det_v, mat_det_v)
def add_loss(self): """ add self.K which is the Kronecker product of self.K_c and self.K_x add self.sigma , which is the NM*NM covariance matrix of train_y get negative log likelihood of train_y, see paper nips """ temp_K=tf.tile(tf.reshape(self.K_c, [self.config.task_num, 1, self.config.task_num, 1]), [1, self.config.train_length, 1, self.config.train_length]) \ * tf.tile(tf.reshape(self.K_x, [1, self.config.train_length, 1, self.config.train_length]), [self.config.task_num, 1, self.config.task_num, 1]) self.K=tf.reshape(temp_K, [self.config.train_length*self.config.task_num, self.config.train_length*self.config.task_num]) temp_D_I=tf.tile(tf.reshape(self.D, [self.config.task_num, 1, self.config.task_num, 1]), [1, self.config.train_length, 1, self.config.train_length]) \ * tf.tile(tf.reshape(tf.constant(np.eye(self.config.train_length).astype(np.float32)), [1, self.config.train_length, 1, self.config.train_length]), [self.config.task_num, 1, self.config.task_num, 1]) D_I=tf.reshape(temp_D_I, [self.config.train_length*self.config.task_num, self.config.train_length*self.config.task_num]) self.sigma=self.K+D_I self.loss=0.5*tf.matmul(tf.matmul(tf.transpose(self.train_y), tf.matrix_inverse(self.sigma)), self.train_y) \ +0.5*tf.log(tf.matrix_determinant(self.sigma)+0.01)
def entropy(self, mean=None, cov=1): """ Note entropy does not depend on its mean. Parameters ---------- mean : np.array or tf.Tensor, optional vector. Defaults to zero mean. cov : np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ if cov is 1: d = 1 det_cov = 1.0 else: cov = tf.cast(tf.convert_to_tensor(cov), dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d*tf.log(2*np.pi) + tf.log(det_cov))
# 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1, # 2, ..., 6 # Hint: Use tf.range() and tf.diag(). ############################################################################### out = tf.diag(tf.range(6) + 1) print(sess.run(out)) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### out = tf.random_normal([10, 10]) det = tf.matrix_determinant(out) print(sess.run(out)) print(sess.run(det)) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) y, idx = tf.unique(x) print(sess.run((y))) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution,
def _forward_log_det_jacobian(self, x): return -tf.matrix_determinant(x)
def _inverse_log_det_jacobian(self, x): return tf.matrix_determinant(x)
C = tf.random_uniform([3, 2]) logging.debug(sess.run( C)) # Note that we are reinitializing, hence the new random variables # Create matrix from np array D = tf.convert_to_tensor( np.array([[1., 2., 3.], [-3., -7., -1.], [0., 5., -2.]])) logging.debug(sess.run(D)) # Matrix addition/subtraction logging.debug(sess.run(A + B)) logging.debug(sess.run(B - B)) # Matrix Multiplication logging.debug(sess.run(tf.matmul(B, identity_matrix))) # Matrix Transpose logging.debug(sess.run(tf.transpose(C))) # Again, new random variables # Matrix Determinant logging.debug(sess.run(tf.matrix_determinant(D))) # Matrix Inverse logging.debug(sess.run(tf.matrix_inverse(D))) # Cholesky Decomposition logging.debug(sess.run(tf.cholesky(identity_matrix))) # Eigenvalues and Eigenvectors logging.debug(sess.run(tf.self_adjoint_eig(D)))
def main(): stddev=0.1 x_train, y_train = toy_dataset() steps = 40001 num_features = 2 num_classes = 2 num_hidden = 3 num_obs = x_train.shape[0] sess = tf.InteractiveSession() x_train = tf.constant(x_train, dtype=tf.float32) y_train = tf.constant(y_train, dtype=tf.int32) mu = tf.reduce_mean(x_train, 0) sigma = tf.div(tf.matmul(tf.transpose(x_train-mu), x_train-mu),tf.constant(num_obs-1, dtype=tf.float32)) weight = weight_variable([num_hidden, num_classes], stddev) bias = bias_variable([num_classes]) center = weight_variable([num_hidden, 1, num_features], stddev) scale = bias_variable([1]) inverse_sigma = tf.mul(scale, tf.matrix_inverse(sigma)) tmp = tf.sub(x_train, center) half = tf.einsum('ijk,kl->ijl', tmp, inverse_sigma) tmp = tf.reduce_sum(tf.mul(half, tmp), 2) tmp = tf.exp(tf.mul(tf.constant(-.5, dtype=tf.float32), tmp)) mag = tf.pow(tf.constant(2 * np.pi, dtype=tf.float32), tf.constant(-.5*num_features)) mag = tf.mul(mag, tf.pow( tf.matrix_determinant(inverse_sigma), .5)) pdf = tf.mul(mag, tf.exp(tmp)) y_lin = tf.add(tf.einsum('ij,ik->jk', pdf, weight), bias) prediction = tf.argmax(y_lin, 1) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_lin, y_train)) learn_rate = 0.1 train_step = tf.train.GradientDescentOptimizer(learn_rate).minimize(cross_entropy) print 'Start training' start = time.time() sess.run(tf.initialize_all_variables()) c1x=[];c2x=[];c3x=[];c4x=[] c1y=[];c2y=[];c3y=[];c4y=[] plot_steps = steps/40 for i in range(steps): # if i % 100 == 0: # train_accuracy = accuracy.eval(feed_dict={x: x_train, y: y_train}) # print("step %d, training accuracy %g" % (i, train_accuracy)) train_step.run() #feed_dict={x: x_train, y: y_train}) if i%200==0: #print 'steps %d' % i print 'accuracy %.3f at steps %d' % (np.mean( prediction.eval() == np.array([0]*50+[1]*50+[0]*50)), i) if i%plot_steps == 0: tmp = center.eval().reshape([num_hidden, 2]) c1x.append(tmp[0][0]) c2x.append(tmp[1][0]) # c3x.append(tmp[2][0]) # c4x.append(tmp[3][0]) c1y.append(tmp[0][1]) c2y.append(tmp[1][1]) # c3y.append(tmp[2][1]) # c4y.append(tmp[3][1]) set_filename='Hidden%dlearnRate%.3fStep%dStddev%.3f' %(num_hidden, learn_rate, steps, stddev) set_filename = set_filename.replace('.', '_') to_save={'x_train': x_train.eval(), 'y_train':tf.argmax(y_train, 1).eval(), 'c1x':c1x, 'c2x':c2x, 'c3x':c3x, 'c4x':c4x, 'c1y':c1y, 'c2y':c2y, 'c3y':c3y, 'c4y':c4y, 'pdf': pdf.eval(), 'sigma':sigma.eval(), 'center':center.eval(), 'scale':scale.eval(), 'magnitude': mag.eval(), 'prediction':prediction.eval()} with open(set_filename, 'wb') as f: pickle.dump(to_save, f, pickle.HIGHEST_PROTOCOL) print time.time() - start
shape=[COMPONENTS, DIMENSIONS, DIMENSIONS]) initial_weights = tf.placeholder_with_default(tf.cast( tf.constant(1.0 / COMPONENTS, shape=[COMPONENTS]), tf.float64), shape=[COMPONENTS]) # trainable variables: component means, covariances, and weights means = tf.Variable(initial_means, dtype=tf.float64) covariances = tf.Variable(initial_covariances, dtype=tf.float64) weights = tf.Variable(initial_weights, dtype=tf.float64) # E-step: recomputing responsibilities with respect to the current parameter values differences = tf.subtract(tf.expand_dims(input, 0), tf.expand_dims(means, 1)) diff_times_inv_cov = tf.matmul(differences, tf.matrix_inverse(covariances)) sum_sq_dist_times_inv_cov = tf.reduce_sum(diff_times_inv_cov * differences, 2) log_coefficients = tf.expand_dims( ln2piD + tf.log(tf.matrix_determinant(covariances)), 1) log_components = -0.5 * (log_coefficients + sum_sq_dist_times_inv_cov) log_weighted = log_components + tf.expand_dims(tf.log(weights), 1) log_shift = tf.expand_dims(tf.reduce_max(log_weighted, 0), 0) exp_log_shifted = tf.exp(log_weighted - log_shift) exp_log_shifted_sum = tf.reduce_sum(exp_log_shifted, 0) gamma = exp_log_shifted / exp_log_shifted_sum # M-step: maximizing parameter values with respect to the computed responsibilities gamma_sum = tf.reduce_sum(gamma, 1) gamma_weighted = gamma / tf.expand_dims(gamma_sum, 1) means_ = tf.reduce_sum( tf.expand_dims(input, 0) * tf.expand_dims(gamma_weighted, 2), 1) differences_ = tf.subtract(tf.expand_dims(input, 0), tf.expand_dims(means_, 1)) sq_dist_matrix = tf.matmul(tf.expand_dims(differences_, 3), tf.expand_dims(differences_, 2))
# 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1, # 2, ..., 6 # Hint: Use tf.range() and tf.diag(). ############################################################################### square_dig = tf.diag(tf.range(1, 7, 1)) print(sess.run(square_dig)) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### square = tf.random_normal([10, 10], mean=0, stddev=-1) det = tf.matrix_determinant(square) print(sess.run(det)) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) unique, _ = tf.unique(x) print(sess.run(unique)) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution,
def init_eta_omega(self, beta, epsilon, init_eta, init_omega): # Here we define the symbolic function for the dual and the gradient self.beta = beta self.epsilon = epsilon # Init dual param values self.param_eta = init_eta self.param_omega = init_omega self.param_eta_non_lin = init_eta self.param_omega_non_lin = init_omega param_eta = tf.placeholder(dtype=tf.float32, shape=[], name="param_eta") param_omega = tf.placeholder(dtype=tf.float32, shape=[], name="param_omega") old_entropy = tf.placeholder(dtype=tf.float32, shape=[], name="old_entropy") varphis = tf.placeholder(dtype=tf.float32, shape=[None, None], name="varphis") Kt = tf.placeholder(dtype=tf.float32, shape=[None, None], name="Kt") prec = tf.placeholder(dtype=tf.float32, shape=[None, None], name="prec") Waa = tf.placeholder(dtype=tf.float32, shape=[None, None], name="Waa") Wsa = tf.placeholder(dtype=tf.float32, shape=[None, None], name="Wsa") wa = tf.placeholder(dtype=tf.float32, shape=[None, None], name="wa") # varphis = ext.new_tensor( # 'varphis', # ndim=2, # dtype=theano.config.floatX # ) # Kt = ext.new_tensor( # 'Kt', # ndim=2, # dtype=theano.config.floatX # ) # prec = ext.new_tensor( # 'prec', # ndim=2, # dtype=theano.config.floatX # ) # Waa = ext.new_tensor( # 'Waa', # ndim=2, # dtype=theano.config.floatX # ) # Wsa = ext.new_tensor( # 'Wsa', # ndim=2, # dtype=theano.config.floatX # ) # wa = ext.new_tensor( # 'wa', # ndim=2, # dtype=theano.config.floatX # ) if self.beta == 0: beta = 0 else: beta = old_entropy - self.beta # beta = self.printt('beta shape: ', beta) # log_action_prob = self.printn('log_action_prob shape: ', log_action_prob) # action_prob = self.printn('action_prob shape: ', action_prob) # q_values = self.printn('q_values shape: ', q_values) # beta = self.printn('beta shape: ', beta) # ha(s): eta * (\varphi(s)^T * K^T * \Sigma^{-1} + W_{sa}) + wa(s)) ha = tf.matmul(varphis, param_eta * tf.matmul(Kt, prec) + Wsa) + wa # hss(s): eta * (\varphi(s)^T * K^T * \Sigma^{-1} * K * \varphi(s)) varphisKt = tf.matmul(varphis, Kt) hss = param_eta * tf.reduce_sum(tf.matmul(varphisKt, prec) * varphisKt, axis=1) Haa = param_eta * prec + Waa # Haa = 0.5 * (Haa + TT.transpose(Haa)) HaaInv = tf.matrix_inverse(Haa) # The two terms 'term1' and 'term2' which come from normalizers of the # 1. Original policy distribution # 2. The distribution after completing the square sigma = tf.matrix_inverse(prec) term1 = -0.5 * param_eta * tf.log( tf.matrix_determinant(2 * np.pi * sigma)) if self.beta == 0: term2 = 0.5 * param_eta * tf.log( tf.matrix_determinant(2 * np.pi * param_eta * HaaInv)) else: term2 = 0.5 * (param_eta + param_omega) * tf.log( tf.matrix_determinant(2 * np.pi * (param_eta + param_omega) * HaaInv)) dual = param_eta * self.epsilon - param_omega * beta + \ term1 + term2 + tf.reduce_mean( 0.5 * (tf.reduce_sum(tf.matmul(ha, HaaInv) * ha, axis=1) - hss)) # Symbolic dual gradient dual_grad = tf.gradients(xs=[param_eta, param_omega], ys=dual) # Eval functions. f_dual = U.function( inputs=[varphis, Kt, prec, Waa, Wsa, wa] + [param_eta, param_omega, old_entropy], outputs=dual, # mode='DebugMode' # TEST ) f_dual_grad = U.function( inputs=[varphis, Kt, prec, Waa, Wsa, wa] + [param_eta, param_omega, old_entropy], outputs=dual_grad, # mode='DebugMode' # TEST ) # # # TEST # d0 = param_eta * self.epsilon - param_omega * beta # d1 = term1 # d2 = term2 # d3 = TT.mean(0.5 * (TT.sum(TT.dot(ha, HaaInv) * ha, axis=1))) # d4 = TT.mean(hss) # f_duals = ext.compile_function( # inputs=[varphis, Kt, prec, Waa, Wsa, wa] + [param_eta, param_omega, old_entropy], # outputs=[d0, d1, d2, d3, d4] # ) # # END TEST self.opt_info = dict( f_dual=f_dual, f_dual_grad=f_dual_grad, # f_duals=f_duals, # TEST )
def forward(self, inputs, grid, is_training=True, reuse=False): def preprocessing(inputs): dims = inputs.get_shape() if len(dims) == 3: inputs = tf.expand_dims(inputs, dim=0) mean_BGR = tf.reshape(self.mean_BGR, [1, 1, 1, 3]) inputs = inputs[:, :, :, ::-1] + mean_BGR return inputs ## -----------------------depth and normal FCN-------------------------- inputs = preprocessing(inputs) with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, stride=1, padding='SAME', weights_initializer=weight_from_caffe(self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)): with tf.variable_scope('fcn', reuse=reuse): ##---------------------vgg depth------------------------------------ conv1 = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') pool1 = slim.max_pool2d(conv1, [3, 3], stride=2, padding='SAME', scope='pool1') conv2 = slim.repeat(pool1, 2, slim.conv2d, 128, [3, 3], scope='conv2') pool2 = slim.max_pool2d(conv2, [3, 3], stride=2, padding='SAME', scope='pool2') conv3 = slim.repeat(pool2, 3, slim.conv2d, 256, [3, 3], scope='conv3') pool3 = slim.max_pool2d(conv3, [3, 3], stride=2, padding='SAME', scope='pool3') conv4 = slim.repeat(pool3, 3, slim.conv2d, 512, [3, 3], scope='conv4') pool4 = slim.max_pool2d(conv4, [3, 3], stride=1, padding='SAME', scope='pool4') conv5 = slim.repeat(pool4, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5') pool5 = slim.max_pool2d(conv5, [3, 3], stride=1, padding='SAME', scope='pool5') pool5a = slim.avg_pool2d(pool5, [3, 3], stride=1, padding='SAME', scope='pool5a') fc6 = slim.conv2d(pool5a, 1024, [3, 3], stride=1, rate=12, scope='fc6') fc6 = slim.dropout(fc6, 0.5, is_training=is_training, scope='drop6') fc7 = slim.conv2d(fc6, 1024, [1, 1], scope='fc7') fc7 = slim.dropout(fc7, 0.5, is_training=is_training, scope='drop7') pool6_1x1 = slim.avg_pool2d(fc7, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1') pool6_1x1_norm = slim.unit_norm(pool6_1x1, dim=3, scope='pool6_1x1_norm_new') pool6_1x1_norm_scale = pool6_1x1_norm * 10 pool6_1x1_norm_upsample = tf.tile( pool6_1x1_norm_scale, [1, 61, 81, 1], name='pool6_1x1_norm_upsample') out = tf.concat([fc7, pool6_1x1_norm_upsample], axis=-1, name='out') out_reduce = slim.conv2d(out, 256, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_reduce', padding='SAME', weights_initializer=weight_from_caffe( self.pretrain_weight), biases_initializer=bias_from_caffe( self.pretrain_weight)) out_conv = slim.conv2d(out_reduce, 256, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='out_conv', padding='SAME', weights_initializer=weight_from_caffe( self.pretrain_weight), biases_initializer=bias_from_caffe( self.pretrain_weight)) out_conv_increase = slim.conv2d( out_conv, 1024, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_conv_increase', padding='SAME', weights_initializer=weight_from_caffe( self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)) fc8_nyu_depth = slim.conv2d(out_conv_increase, 1, [1, 1], activation_fn=None, scope='fc8_nyu_depth') fc8_upsample = tf.image.resize_images( fc8_nyu_depth, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) # ---------------------------------------vgg depth end --------------------------------------- ## ----------------- vgg norm--------------------------------------------------------------- conv1_norm = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1_norm') pool1_norm = slim.max_pool2d(conv1_norm, [3, 3], stride=2, padding='SAME', scope='pool1_norm') conv2_norm = slim.repeat(pool1_norm, 2, slim.conv2d, 128, [3, 3], scope='conv2_norm') pool2_norm = slim.max_pool2d(conv2_norm, [3, 3], stride=2, padding='SAME', scope='pool2_norm') conv3_norm = slim.repeat(pool2_norm, 3, slim.conv2d, 256, [3, 3], scope='conv3_norm') pool3_norm = slim.max_pool2d(conv3_norm, [3, 3], stride=2, padding='SAME', scope='pool3_norm') conv4_norm = slim.repeat(pool3_norm, 3, slim.conv2d, 512, [3, 3], scope='conv4_norm') pool4_norm = slim.max_pool2d(conv4_norm, [3, 3], stride=1, padding='SAME', scope='pool4_norm') conv5_norm = slim.repeat(pool4_norm, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5_norm') pool5_norm = slim.max_pool2d(conv5_norm, [3, 3], stride=1, padding='SAME', scope='pool5_norm') pool5a_norm = slim.avg_pool2d(pool5_norm, [3, 3], stride=1, padding='SAME', scope='pool5a_norm') fc6_norm = slim.conv2d(pool5a_norm, 1024, [3, 3], stride=1, rate=12, scope='fc6_norm') fc6_norm = slim.dropout(fc6_norm, 0.5, is_training=is_training, scope='drop6_norm') fc7_norm = slim.conv2d(fc6_norm, 1024, [1, 1], scope='fc7_norm') fc7_norm = slim.dropout(fc7_norm, 0.5, is_training=is_training, scope='drop7_norm') pool6_1x1_norm_new = slim.avg_pool2d( fc7_norm, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1_norm_new') pool6_1x1_norm_norm = slim.unit_norm( pool6_1x1_norm_new, dim=3, scope='pool6_1x1_norm_new') pool6_1x1_norm_scale_norm = pool6_1x1_norm_norm * 10 pool6_1x1_norm_upsample_norm = tf.tile( pool6_1x1_norm_scale_norm, [1, 61, 81, 1], name='pool6_1x1_norm_upsample') out_norm = tf.concat([fc7_norm, pool6_1x1_norm_upsample_norm], axis=-1, name='out_norm') fc8_nyu_norm_norm = slim.conv2d(out_norm, 3, [1, 1], activation_fn=None, scope='fc8_nyu_norm_norm') fc8_upsample_norm = tf.image.resize_images( fc8_nyu_norm_norm, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) fc8_upsample_norm = slim.unit_norm(fc8_upsample_norm, dim=3) # -------------------------------------vgg norm end--------------------------------------------- # ------------- depth to normal + norm refinement--------------------------------------------------- with tf.variable_scope('noise', reuse=reuse): fc8_upsample_norm = tf.squeeze(fc8_upsample_norm) fc8_upsample_norm = tf.reshape( fc8_upsample_norm, [self.batch_size, self.crop_size_h, self.crop_size_w, 3]) norm_matrix = tf.extract_image_patches( images=fc8_upsample_norm, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') matrix_c = tf.reshape(norm_matrix, [ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3 ]) fc8_upsample_norm = tf.expand_dims(fc8_upsample_norm, axis=4) angle = tf.matmul(matrix_c, fc8_upsample_norm) valid_condition = tf.greater(angle, self.thresh) valid_condition_all = tf.tile(valid_condition, [1, 1, 1, 1, 3]) exp_depth = tf.exp(fc8_upsample * 0.69314718056) depth_repeat = tf.tile(exp_depth, [1, 1, 1, 3]) points = tf.multiply(grid, depth_repeat) point_matrix = tf.extract_image_patches( images=points, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') matrix_a = tf.reshape(point_matrix, [ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3 ]) matrix_a_zero = tf.zeros_like(matrix_a, dtype=tf.float32) matrix_a_valid = tf.where(valid_condition_all, matrix_a, matrix_a_zero) matrix_a_trans = tf.matrix_transpose(matrix_a_valid, name='matrix_transpose') matrix_b = tf.ones(shape=[ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 1 ]) point_multi = tf.matmul(matrix_a_trans, matrix_a_valid, name='matrix_multiplication') with tf.device('cpu:0'): matrix_deter = tf.matrix_determinant(point_multi) inverse_condition = tf.greater(matrix_deter, 1e-5) inverse_condition = tf.expand_dims(inverse_condition, axis=3) inverse_condition = tf.expand_dims(inverse_condition, axis=4) inverse_condition_all = tf.tile(inverse_condition, [1, 1, 1, 3, 3]) diag_constant = tf.ones([3], dtype=tf.float32) diag_element = tf.diag(diag_constant) diag_element = tf.expand_dims(diag_element, axis=0) diag_element = tf.expand_dims(diag_element, axis=0) diag_element = tf.expand_dims(diag_element, axis=0) diag_matrix = tf.tile(diag_element, [ self.batch_size, self.crop_size_h, self.crop_size_w, 1, 1 ]) inversible_matrix = tf.where(inverse_condition_all, point_multi, diag_matrix) with tf.device('cpu:0'): inv_matrix = tf.matrix_inverse(inversible_matrix) generated_norm = tf.matmul( tf.matmul(inv_matrix, matrix_a_trans), matrix_b) norm_normalize = slim.unit_norm((generated_norm), dim=3) norm_normalize = tf.reshape( norm_normalize, [self.batch_size, self.crop_size_h, self.crop_size_w, 3]) norm_scale = norm_normalize * 10.0 conv1_noise = slim.repeat(norm_scale, 2, slim.conv2d, 64, [3, 3], scope='conv1_noise') pool1_noise = slim.max_pool2d(conv1_noise, [3, 3], stride=2, padding='SAME', scope='pool1_noise') # conv2_noise = slim.repeat(pool1_noise, 2, slim.conv2d, 128, [3, 3], scope='conv2_noise') conv3_noise = slim.repeat(conv2_noise, 3, slim.conv2d, 256, [3, 3], scope='conv3_noise') fc1_noise = slim.conv2d(conv3_noise, 512, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='fc1_noise', padding='SAME') encode_norm_noise = slim.conv2d(fc1_noise, 3, [3, 3], activation_fn=None, stride=1, scope='encode_norm_noise', padding='SAME') encode_norm_upsample_noise = tf.image.resize_images( encode_norm_noise, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) sum_norm_noise = tf.add(norm_normalize, encode_norm_upsample_noise) norm_pred_noise = slim.unit_norm(sum_norm_noise, dim=3) norm_pred_all = tf.concat([ tf.expand_dims(tf.squeeze(fc8_upsample_norm), axis=0), norm_pred_noise, inputs * 0.00392156862 ], axis=3) norm_pred_all = slim.repeat( norm_pred_all, 3, slim.conv2d, 128, [3, 3], rate=2, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_norm_noise_new') norm_pred_all = slim.repeat( norm_pred_all, 3, slim.conv2d, 128, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_norm_noise_new') norm_pred_final = slim.conv2d( norm_pred_all, 3, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='norm_conv3_noise_new') norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) # ------------- normal to depth + depth refinement--------------------------------------------------- with tf.variable_scope('norm_depth', reuse=reuse): grid_patch = tf.extract_image_patches( images=grid, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') grid_patch = tf.reshape(grid_patch, [ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3 ]) _, _, depth_data = tf.split(value=matrix_a, num_or_size_splits=3, axis=4) tmp_matrix_zero = tf.zeros_like(angle, dtype=tf.float32) valid_angle = tf.where(valid_condition, angle, tmp_matrix_zero) lower_matrix = tf.matmul(matrix_c, tf.expand_dims(grid, axis=4)) condition = tf.greater(lower_matrix, 1e-5) tmp_matrix = tf.ones_like(lower_matrix) lower_matrix = tf.where(condition, lower_matrix, tmp_matrix) lower = tf.reciprocal(lower_matrix) valid_angle = tf.where(condition, valid_angle, tmp_matrix_zero) upper = tf.reduce_sum(tf.multiply(matrix_c, grid_patch), [4]) ratio = tf.multiply(lower, tf.expand_dims(upper, axis=4)) estimate_depth = tf.multiply(ratio, depth_data) valid_angle = tf.multiply( valid_angle, tf.reciprocal( tf.tile( tf.reduce_sum(valid_angle, [3, 4], keep_dims=True) + 1e-5, [1, 1, 1, 81, 1]))) depth_stage1 = tf.reduce_sum( tf.multiply(estimate_depth, valid_angle), [3, 4]) depth_stage1 = tf.expand_dims(tf.squeeze(depth_stage1), axis=2) depth_stage1 = tf.clip_by_value(depth_stage1, 0, 10.0) exp_depth = tf.expand_dims(tf.squeeze(exp_depth), axis=2) depth_all = tf.expand_dims(tf.concat([ depth_stage1, exp_depth, tf.squeeze(inputs) * 0.00392156862 ], axis=2), axis=0) depth_pred_all = slim.repeat( depth_all, 3, slim.conv2d, 128, [3, 3], rate=2, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_depth_noise_new') depth_pred_all = slim.repeat( depth_pred_all, 3, slim.conv2d, 128, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_depth_noise_new') final_depth = slim.conv2d( depth_pred_all, 1, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='depth_conv3_noise_new') with tf.variable_scope('edge_refinemet', reuse=reuse): print(inputs.shape) edges = tf.py_func(myfunc_canny, [inputs], tf.float32) edges = tf.reshape(edges, [1, self.crop_size_h, self.crop_size_w, 1]) edge_input_depth = final_depth edge_input_norm = norm_pred_final # edge prediction for depth edge_inputs = tf.concat([edges, inputs * 0.00784], axis=3) edges_encoder = slim.repeat( edge_inputs, 3, slim.conv2d, 32, [3, 3], rate=2, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_edge_refinement') edges_encoder = slim.repeat( edges_encoder, 3, slim.conv2d, 32, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_edge_refinement') edges_predictor = slim.conv2d( edges_encoder, 8, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='edge_weight') edges_all = edges_predictor + tf.tile(edges, [1, 1, 1, 8]) edges_all = tf.clip_by_value(edges_all, 0.0, 1.0) dlr, drl, dud, ddu, nlr, nrl, nud, ndu = tf.split( edges_all, num_or_size_splits=8, axis=3) # 4 iteration depth final_depth = propagate(edge_input_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) # 4 iteration norm norm_pred_final = propagate(edge_input_norm, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) return final_depth, fc8_upsample_norm, norm_pred_final, fc8_upsample
def log_determinant(Z): logdet = tf.log(tf.matrix_determinant(Z)) return logdet
def det_loop_time(outputs, inputs): # inputs is dim_latent x dim_latent matrix return tf.matrix_determinant( tf.matmul(inputs, inputs, transpose_b=True) + 1e-6 * np.eye(self.dim_latent))
# Hint: Use tf.range() and tf.diag(). ############################################################################### diag_vals = tf.range(start=1, limit=7) diag = tf.diag(diagonal=diag_vals, name='diag') print(diag.eval()) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### rand = tf.random_normal(shape=[10, 10]) det = tf.matrix_determinant(input=rand, name='det') print(rand.eval(), '\n', det.eval()) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x5 = tf.constant(value=[5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) uni, idx = tf.unique(x5, name='unique') print(x5.eval(), '\n', uni.eval()) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution.
import tensorflow as tf a = tf.constant([[1., 2.], [3., 4.]]) b = tf.constant([[5., 6.], [7., 8.]]) x1 = tf.matmul(a, b) x2 = tf.matrix_determinant(a) x3 = tf.matrix_inverse(a) with tf.Session() as sess: y1, y2, y3 = sess.run([x1, x2, x3]) print(y1) print(y2) print(y3)
# 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1, # 2, ..., 6 # Hint: Use tf.range() and tf.diag(). ############################################################################### #创建对角矩阵 x = tf.diag(tf.range(1, 7)) print(sess.run(x)) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### #计算行列式 i = tf.random_normal((10, 10)) x = tf.matrix_determinant(i) print(sess.run(x)) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) value, index = tf.unique(x) print(sess.run([value, index])) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution.
def getHessianMLP(n_input, n_hidden, n_output): batch_size = 1 # Each time getHessianMLP is called, we create a new graph so that the default graph (which exists a priori) won't be filled with old ops. g = tf.Graph() with g.as_default(): # First create placeholders for inputs and targets: x_input, y_target x_input = tf.placeholder(tf.float32, shape=[batch_size, n_input]) y_target = tf.placeholder(tf.float32, shape=[batch_size, n_output]) # Start constructing a computational graph for multilayer perceptron ### Since we want to store parameters as one long vector, we first define our parameters as below and then ### reshape it later according to each layer specification. l1 = tf.truncated_normal([n_input * n_hidden, 1]) # parameters = tf.Variable(tf.concat( # [tf.zeros([n_input * n_hidden, 1]), tf.ones([n_hidden, 1]), tf.zeros([n_hidden * n_output, 1]), # tf.ones([n_output, 1])], 0)) parameters = tf.Variable(np.arange(1, 18, dtype=np.float32)) with tf.name_scope("hidden") as scope: idx_from = 0 weights = tf.reshape( tf.slice(parameters, begin=[idx_from], size=[n_input * n_hidden]), [n_input, n_hidden]) idx_from = idx_from + n_input * n_hidden biases = tf.reshape( tf.slice(parameters, begin=[idx_from], size=[n_hidden]), [n_hidden]) # tf.Variable(tf.truncated_normal([n_hidden])) hidden = tf.matmul(x_input, weights) + biases with tf.name_scope("linear") as scope: idx_from = idx_from + n_hidden weights = tf.reshape( tf.slice(parameters, begin=[idx_from], size=[n_hidden * n_output], name='linear_chen'), [n_hidden, n_output]) idx_from = idx_from + n_hidden * n_output biases = tf.reshape( tf.slice(parameters, begin=[idx_from], size=[n_output], name='linear_jie'), [n_output]) output = tf.nn.softmax(tf.matmul(hidden, weights) + biases) # Define cross entropy loss loss = -tf.reduce_sum(y_target * tf.log(output)) ### Note: We can call tf.trainable_variables to get GraphKeys.TRAINABLE_VARIABLES ### because we are using g as our default graph inside the "with" scope. # Get trainable variables tvars = tf.trainable_variables() # Get gradients of loss with repect to parameters hess = tf.hessians(loss, tvars) hess_det = tf.matrix_determinant(hess) # dloss_dw = tf.gradients(loss, tvars)[0] # dim, _ = dloss_dw.get_shape() # hess = [] # for i in range(dim): # # tf.slice: https://www.tensorflow.org/versions/0.6.0/api_docs/python/array_ops.html#slice # dfx_i = tf.slice(dloss_dw, begin=[i, 0], size=[1, 1]) # ddfx_i = tf.gradients(dfx_i, parameters)[ # 0] # whenever we use tf.gradients, make sure you get the actual tensors by putting [0] at the end # hess.append(ddfx_i) # hess = tf.squeeze(hess) init_op = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init_op) feed_dict = { x_input: np.random.random([batch_size, n_input]), y_target: np.random.random([batch_size, n_output]) } print(sess.run([hess, hess_det], feed_dict))
def testWrongDimensions(self): # The input to the determinant should be a 2-dimensional tensor. tensor1 = tf.constant([1., 2.]) with self.assertRaises(ValueError): tf.matrix_determinant(tensor1)
C = tf.random_uniform([3,2]) print(sess.run(C)) #随机 print(sess.run(C)) # 通过numpy 创建矩阵 D = tf.convert_to_tensor(np.array([[1., 2., 3.], [-3., -7., -1.], [0., 5., -2.]])) print(sess.run(D)) # 矩阵加减法 print(sess.run(A+B)) print(sess.run(B-B)) # 矩阵乘法 print(sess.run(tf.matmul(A,C ))) # 矩阵转置 print(sess.run(tf.transpose(C))) # Again, new random variables # 矩阵行列式 print(sess.run(tf.matrix_determinant(D))) # 矩阵的逆 print(sess.run(tf.matrix_inverse(D))) # 矩阵的特征值和特征向量 print(sess.run(tf.self_adjoint_eig(D))) #特殊函数 #数学函数略 #激活函数 x_vals = np.linspace(start=-10., stop=10., num=100) # 整流线性单元( Rectifier linear unit, Re LU )是神经网络最常用的非线性函数。其函数为 max(O, ),连续但不平滑 print(sess.run(tf.nn.relu([-3., 3., 10.]))) y_relu = sess.run(tf.nn.relu(x_vals)) #sigmoid 函数是最常用的连续、平滑的激励函数 它也被称作逻辑函数( Logistic数),表示为 l/(l+exp(-x)) sigmoid 函数由于在机器学习训练过程中反向传播项趋近 0,因此不怎么使用 使用方式如下: print(sess.run(tf.nn.sigmoid([-1., 0., 1.]))) y_sigmoid = sess.run(tf.nn.sigmoid(x_vals))
A = tf.truncated_normal([2, 3]) print(sess.run(A)) # 2x3 constant matrix: B = tf.fill([2, 3], 5.0) print(sess.run(B)) # 3x2 random uniform matrix: C = tf.random_uniform([3, 2]) print(sess.run(C)) # Create matrix from np array: D = tf.convert_to_tensor(np.array([[1., 2., 3.], [-3., -7., -1.], [0., 5., -2.]])) print(sess.run(D)) # Matrix Operations Matrix addition/subtraction: print(sess.run(A + B)) print(sess.run(B - B)) # Matrix Multiplication: print(sess.run(tf.matmul(B, identity_matrix))) # Matrix Transpose: print(sess.run(tf.transpose(C))) # Matrix Determinant: print(sess.run(tf.matrix_determinant(D))) # Matrix Inverse: print(sess.run(tf.matrix_inverse(D))) # Cholesky Decomposition: print(sess.run(tf.cholesky(identity_matrix))) # Eigenvalues and Eigenvectors: We use tf.self_adjoint_eig() function, which returns two objects, # first one is an array of eigenvalues, the second is a matrix of the eigenvectors. eigenvalues, eigenvectors = sess.run(tf.self_adjoint_eig(D)) print(eigenvalues) print(eigenvectors)
print('matrix1 = ') print(matrix1) print('matrix2 = ') print(matrix2) matrix1 = tf.constant(matrix1) matrix2 = tf.constant(matrix2) matrix_product = tf.matmul(matrix1, matrix2) matrix_sum = tf.add(matrix1, matrix2) matrix3 = np.array([(2,7,2), (1,4,2), (9,0,2)], dtype='float32') print('matrix3 = ') print(matrix3) matrix_det = tf.matrix_determinant(matrix3) with tf.Session() as sess: result1 = sess.run(matrix_product) result2 = sess.run(matrix_sum) result3 = sess.run(matrix_det) print('matrix1 * matrix2 = ') print(result1) print('matrix1 + matrix2 = ') print(result2) print('matrix3 determinant result = ') print(result3)
# 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1, # 2, ..., 6 # Hint: Use tf.range() and tf.diag(). ############################################################################### values = tf.range(1, 7) out = tf.diag(values) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### m = tf.random_normal([10, 10], mean=10, stddev=1) out = tf.matrix_determinant(m) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) unique_values, indices = tf.unique(x) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution. # Use tf.cond() to return: # - The mean squared error of (x - y) if the average of all elements in (x - y)
def build_model(self): # Compression Network # Takes x # Produces x' # Produces z = concat((z_c, z_r)) (Equations 1, 2, 3) # z_r = concat((eu_dist, cos_sim)) self.input = tf.placeholder( shape=(None, self.input_dim), dtype=tf.float32, name="input", ) encoder_1 = tf.layers.dense( inputs=self.input, units=12, activation=tf.tanh, ) encoder_2 = tf.layers.dense( inputs=encoder_1, units=4, activation=tf.tanh, ) self.z_c = tf.layers.dense( inputs=encoder_2, units=1, activation=None, ) decoder_1 = tf.layers.dense( inputs=self.z_c, units=4, activation=tf.tanh, ) decoder_2 = tf.layers.dense( inputs=decoder_1, units=12, activation=tf.tanh, ) self.recon = tf.layers.dense( inputs=decoder_2, units=self.input_dim, activation=None, ) eu_dist = tf.norm(self.input - self.recon, axis=1, keep_dims=True) / tf.norm(self.input, axis=1, keep_dims=True) cos_sim = tf.reduce_sum(self.input * self.recon, axis=1, keep_dims=True) / (tf.norm(self.input, axis=1, keep_dims=True) * tf.norm(self.recon, axis=1, keep_dims=True)) self.z_r = tf.concat((eu_dist, cos_sim), axis=1) self.z = tf.concat((self.z_c, self.z_r), axis=1) # Estimation Network # Takes z = concat((z_c, z_r)) # Produces p, where gamma = softmax(p) = soft mixture-component membership prediction (Equation 4) self.is_train = tf.placeholder( # for dropout shape=None, dtype=tf.bool, name="is_train", ) estim_1 = tf.layers.dense( inputs=self.z, units=10, activation=tf.tanh, ) estim_dropout = tf.layers.dropout( inputs=estim_1, rate=0.5, training=self.is_train, ) self.p = tf.layers.dense( inputs=estim_dropout, units=self.gmm_k, activation=None, ) self.gamma = tf.nn.softmax(self.p) # GMM parameters: gmm_dist (phi), gmm_mean (mu), gmm_cov (epsilon) (Equation 5) # self.gmm_dist = tf.expand_dims(tf.reduce_mean(self.gamma, axis=0, keep_dims=True), axis=2) self.gmm_dist = tf.transpose(tf.reduce_mean(self.gamma, axis=0, keep_dims=True)) self.gmm_mean = tf.matmul(self.gamma, self.z, transpose_a=True) / tf.transpose(tf.reduce_sum(self.gamma, axis=0, keep_dims=True)) self.diff_mean = diff_mean = tf.tile(tf.expand_dims(self.z, axis=0), tf.constant([self.gmm_k, 1, 1])) - tf.expand_dims(self.gmm_mean, axis=1) self.gmm_cov = tf.matmul(tf.transpose(diff_mean, perm=[0, 2, 1]), tf.expand_dims(tf.transpose(self.gamma), axis=2) * diff_mean) / tf.expand_dims(tf.transpose(tf.reduce_sum(self.gamma, axis=0, keep_dims=True)), axis=2) # Energy Function (Equation 6) energy_numerator = tf.exp(-0.5 * tf.reduce_sum(tf.matmul(self.diff_mean, self.gmm_cov) * self.diff_mean, axis=2)) energy_denominator = tf.expand_dims(tf.expand_dims(tf.sqrt(tf.matrix_determinant(2 * np.pi * self.gmm_cov)), axis=1), axis=2) self.energy = tf.expand_dims(-tf.log(tf.reduce_sum(tf.reduce_sum(tf.expand_dims(self.gmm_dist, axis=1) * energy_numerator / energy_denominator, axis=0), axis=0)), axis=1) # Loss Function (Equation 7) # Reconstruction loss + lmda_1 * Energy loss + lmda_2 * Diagonal loss # self.recon_loss = recon_loss = tf.losses.mean_squared_error(self.input, self.recon) self.recon_loss = recon_loss = tf.reduce_mean(tf.norm((self.input - self.recon), axis=1) ** 2) self.energy_loss = energy_loss = tf.reduce_mean(self.energy) self.diagonal_loss = diagonal_loss = tf.reduce_sum(tf.pow(tf.matrix_diag_part(self.gmm_cov), -tf.ones_like(tf.matrix_diag_part(self.gmm_cov)))) self.loss = recon_loss + self.lmda_1 * energy_loss + self.lmda_2 * diagonal_loss self.optimize = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(self.loss)
#!/usr/bin/python3 import tensorflow as tf import numpy as np matrix1 = np.array([(2, 2, 2), (2, 2, 2), (2, 2, 2)], dtype='int32') matrix2 = np.array([(1, 1, 1), (1, 1, 1), (1, 1, 1)], dtype='int32') print(matrix1) print(matrix2) matrix1 = tf.constant(matrix1) matrix2 = tf.constant(matrix2) matrix_product = tf.matmul(matrix1, matrix2) matrix_sum = tf.add(matrix1, matrix2) matrix_3 = np.array([(2, 7, 2), (1, 4, 2), (9, 0, 2)], dtype='float32') print(matrix_3) matrix_det = tf.matrix_determinant(matrix_3) with tf.Session() as sess: result1 = sess.run(matrix_product) result2 = sess.run(matrix_sum) result3 = sess.run(matrix_det) print(result1) print(result2) print(result3)
def log_determinant(Z): logdet=tf.log(tf.matrix_determinant(Z)) return logdet
def invertible_1x1_conv(name, z, reverse=False): if True: # Set to "False" to use the LU-decomposed version with tf.variable_scope(name, reuse=tf.AUTO_REUSE): shape = int_shape(z) w_shape = [shape[-1], shape[-1]] # Sample a random orthogonal matrix: w_init = np.linalg.qr(np.random.randn( *w_shape))[0].astype('float32') w = tf.get_variable("W", dtype=tf.float32, initializer=w_init) # dlogdet = tf.linalg.LinearOperator(w).log_abs_determinant() * shape[1]*shape[2] logdet = tf.cast(tf.log(abs(tf.matrix_determinant( tf.cast(w, 'float64')))), 'float32') * shape[1]*shape[2]*shape[3] if not reverse: _w = tf.reshape(w, [1, 1, 1] + w_shape) z = tf.nn.conv3d(z, _w, [1, 1, 1, 1, 1], 'SAME', data_format='NDHWC') return z, logdet else: _w = tf.matrix_inverse(w) _w = tf.reshape(_w, [1, 1, 1]+w_shape) z = tf.nn.conv3d(z, _w, [1, 1, 1, 1, 1], 'SAME', data_format='NDHWC') return z, -logdet else: # LU-decomposed version shape = int_shape(z) with tf.variable_scope(name, reuse=tf.AUTO_REUSE): dtype = 'float64' # Random orthogonal matrix: import scipy np_w = scipy.linalg.qr(np.random.randn(shape[-1], shape[-1]))[ 0].astype('float32') np_p, np_l, np_u = scipy.linalg.lu(np_w) np_s = np.diag(np_u) np_sign_s = np.sign(np_s) np_log_s = np.log(abs(np_s)) np_u = np.triu(np_u, k=1) p = tf.get_variable("P", initializer=np_p, trainable=False) l = tf.get_variable("L", initializer=np_l) sign_s = tf.get_variable( "sign_S", initializer=np_sign_s, trainable=False) log_s = tf.get_variable("log_S", initializer=np_log_s) # S = tf.get_variable("S", initializer=np_s) u = tf.get_variable("U", initializer=np_u) p = tf.cast(p, dtype) l = tf.cast(l, dtype) sign_s = tf.cast(sign_s, dtype) log_s = tf.cast(log_s, dtype) u = tf.cast(u, dtype) w_shape = [shape[-1], shape[-1]] l_mask = np.tril(np.ones(w_shape, dtype=dtype), -1) l = l * l_mask + tf.eye(*w_shape, dtype=dtype) u = u * np.transpose(l_mask) + tf.diag(sign_s * tf.exp(log_s)) w = tf.matmul(p, tf.matmul(l, u)) if True: u_inv = tf.matrix_inverse(u) l_inv = tf.matrix_inverse(l) p_inv = tf.matrix_inverse(p) w_inv = tf.matmul(u_inv, tf.matmul(l_inv, p_inv)) else: w_inv = tf.matrix_inverse(w) w = tf.cast(w, tf.float32) w_inv = tf.cast(w_inv, tf.float32) log_s = tf.cast(log_s, tf.float32) if not reverse: w = tf.reshape(w, [1, 1, 1] + w_shape) z = tf.nn.conv3d(z, w, [1, 1, 1, 1, 1], 'SAME', data_format='NDHWC') logdet = tf.reduce_sum(log_s) * (shape[1]*shape[2]*shape[3]) return z, logdet else: w_inv = tf.reshape(w_inv, [1, 1, 1]+w_shape) z = tf.nn.conv3d( z, w_inv, [1, 1, 1, 1, 1], 'SAME', data_format='NDHWC') logdet = -tf.reduce_sum(log_s) * (shape[1]*shape[2]*shape[3]) return z, logdet
def invertible_conv2D_emerging_1x1(name, z, logdet, ksize=3, dilation=1, reverse=False, checkpoint_fn=None, decomposition=None, unit_testing=False): shape = Z.int_shape(z) batchsize, height, width, n_channels = shape assert (ksize - 1) % 2 == 0 kcent = (ksize - 1) // 2 with tf.variable_scope(name): if decomposition is None or decomposition == '': # Sample a random orthogonal matrix: w_init = np.linalg.qr(np.random.randn( shape[3], shape[3]))[0].astype('float32') w = tf.get_variable("W", dtype=tf.float32, initializer=w_init) dlogdet = tf.cast( tf.log(abs(tf.matrix_determinant(tf.cast(w, 'float64')))), 'float32') * shape[1] * shape[2] w_inv = tf.matrix_inverse(w) elif decomposition == 'PLU' or decomposition == 'LU': # LU-decomposed version dtype = 'float64' # Random orthogonal matrix: import scipy np_w = scipy.linalg.qr(np.random.randn( shape[3], shape[3]))[0].astype('float32') np_p, np_l, np_u = scipy.linalg.lu(np_w) np_s = np.diag(np_u) np_sign_s = np.sign(np_s) np_log_s = np.log(abs(np_s)) np_u = np.triu(np_u, k=1) p = tf.get_variable("P", initializer=np_p, trainable=False) l = tf.get_variable("L", initializer=np_l) sign_s = tf.get_variable("sign_S", initializer=np_sign_s, trainable=False) log_s = tf.get_variable("log_S", initializer=np_log_s) u = tf.get_variable("U", initializer=np_u) p = tf.cast(p, 'float64') l = tf.cast(l, 'float64') sign_s = tf.cast(sign_s, 'float64') log_s = tf.cast(log_s, 'float64') u = tf.cast(u, 'float64') l_mask = np.tril(np.ones([shape[3], shape[3]], dtype=dtype), -1) l = l * l_mask + tf.eye(shape[3], dtype=dtype) u = u * np.transpose(l_mask) + tf.diag(sign_s * tf.exp(log_s)) w = tf.matmul(p, tf.matmul(l, u)) u_inv = tf.matrix_inverse(u) l_inv = tf.matrix_inverse(l) p_inv = tf.matrix_inverse(p) w_inv = tf.matmul(u_inv, tf.matmul(l_inv, p_inv)) w = tf.cast(w, tf.float32) w_inv = tf.cast(w_inv, tf.float32) log_s = tf.cast(log_s, tf.float32) dlogdet = tf.reduce_sum(log_s) * (shape[1] * shape[2]) elif decomposition == 'QR': np_s = np.ones(shape[3], dtype='float32') np_u = np.zeros((shape[3], shape[3]), dtype='float32') if unit_testing: np_s = 1 + 0.02 * np.random.randn(shape[3]).astype('float32') np_u = np.random.randn(shape[3], shape[3]).astype('float32') np_u = np.triu(np_u, k=1).astype('float32') u_mask = np.triu(np.ones([shape[3], shape[3]], dtype='float32'), 1) s = tf.get_variable("S", initializer=np_s) u = tf.get_variable("U", initializer=np_u) log_s = tf.log(tf.abs(s)) r = u * u_mask + tf.diag(s) # Householder transformations I = tf.eye(shape[3]) q = I for i in range(shape[3]): v_np = np.random.randn(shape[3], 1).astype('float32') v = tf.get_variable("v_{}".format(i), initializer=v_np) vT = tf.transpose(v) q_i = I - 2 * tf.matmul(v, vT) / tf.matmul(vT, v) q = tf.matmul(q, q_i) # Modified Gram–Schmidt process # def inner(a, b): # return tf.reduce_sum(a * b) # def proj(v, u): # return u * inner(v, u) / inner(u, u) # q = [] # for i in range(shape[3]): # v_np = np.random.randn(shape[3], 1).astype('float32') # v = tf.get_variable("v_{}".format(i), initializer=v_np) # for j in range(i): # p = proj(v, q[j]) # v = v - proj(v, q[j]) # q.append(v) # q = tf.concat(q, axis=1) # q = q / tf.norm(q, axis=0, keepdims=True) q_inv = tf.transpose(q) r_inv = tf.matrix_inverse(r) w = tf.matmul(q, r) w_inv = tf.matmul(r_inv, q_inv) dlogdet = tf.reduce_sum(log_s) * (shape[1] * shape[2]) else: raise ValueError('Unknown decomposition: {}'.format(decomposition)) mask_np = get_conv_square_ar_mask(ksize, ksize, n_channels, n_channels) mask_upsidedown_np = mask_np[::-1, ::-1, ::-1, ::-1].copy() mask = tf.constant(mask_np) mask_upsidedown = tf.constant(mask_upsidedown_np) filter_shape = [ksize, ksize, n_channels, n_channels] w1_np = get_conv_weight_np(filter_shape) w2_np = get_conv_weight_np(filter_shape) w1 = tf.get_variable('W1', dtype=tf.float32, initializer=w1_np) w2 = tf.get_variable('W2', dtype=tf.float32, initializer=w2_np) b = tf.get_variable('b', [n_channels], initializer=tf.zeros_initializer()) b = tf.reshape(b, [1, 1, 1, -1]) w1 = w1 * mask w2 = w2 * mask_upsidedown def log_abs_diagonal(w): return tf.log(tf.abs(tf.diag_part(w[kcent, kcent]))) def forward(z, logdet): w_ = tf.reshape(w, [1, 1] + [shape[3], shape[3]]) z = tf.nn.conv2d(z, w_, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet += dlogdet z = tf.nn.conv2d(z, w1, [1, 1, 1, 1], dilations=[1, dilation, dilation, 1], padding='SAME', data_format='NHWC') logdet += tf.reduce_sum(log_abs_diagonal(w1)) * (height * width) if checkpoint_fn is not None: checkpoint_fn(z, logdet) z = tf.nn.conv2d(z, w2, [1, 1, 1, 1], dilations=[1, dilation, dilation, 1], padding='SAME', data_format='NHWC') logdet += tf.reduce_sum(log_abs_diagonal(w2)) * (height * width) if checkpoint_fn is not None: checkpoint_fn(z, logdet) z = z + b return z, logdet def forward_fast(z, logdet): """ Convolution with [(k+1) // 2]^2 filters. """ # Smaller versions of w1, w2. w1_s = w1[kcent:, kcent:, :, :] w2_s = w2[:-kcent, :-kcent, :, :] pad = kcent * dilation # standard filter shape: [v, u, c_in, c_out] # standard fmap shape: [b, h, w, c] w_ = tf.transpose(tf.reshape(w, [1, 1] + [shape[3], shape[3]]), (0, 1, 3, 2)) w_equiv = tf.nn.conv2d(tf.transpose(w1_s, (3, 0, 1, 2)), w_, [1, 1, 1, 1], padding='SAME') w_equiv = tf.transpose(w_equiv, (1, 2, 3, 0)) z = tf.pad(z, [[0, 0], [0, pad], [0, pad], [0, 0]], 'CONSTANT') z = tf.nn.conv2d(z, w_equiv, [1, 1, 1, 1], dilations=[1, dilation, dilation, 1], padding='VALID', data_format='NHWC') logdet += tf.reduce_sum(log_abs_diagonal(w1)) * (height * width) if checkpoint_fn is not None: checkpoint_fn(z, logdet) z = tf.pad(z, [[0, 0], [pad, 0], [pad, 0], [0, 0]], 'CONSTANT') z = tf.nn.conv2d(z, w2_s, [1, 1, 1, 1], dilations=[1, dilation, dilation, 1], padding='VALID', data_format='NHWC') logdet += tf.reduce_sum(log_abs_diagonal(w2)) * (height * width) if checkpoint_fn is not None: checkpoint_fn(z, logdet) z = z + b return z, logdet if not reverse: x, logdet = forward_fast(z, logdet) # x_, _ = forward(z, logdet) # x = tf.Print( # x, data=[tf.reduce_mean(tf.square(x - x_))], message='diff') return x, logdet else: logdet -= dlogdet logdet -= tf.reduce_sum(log_abs_diagonal(w2)) * (height * width) x = tf.py_func( Inverse(is_upper=1, dilation=dilation), inp=[z, w2, b], Tout=tf.float32, stateful=True, name='conv2dinverse2', ) logdet -= tf.reduce_sum(log_abs_diagonal(w1)) * (height * width) x = tf.py_func( Inverse(is_upper=0, dilation=dilation), inp=[x, w1, tf.zeros_like(b)], Tout=tf.float32, stateful=True, name='conv2dinverse1', ) x.set_shape(z.get_shape()) z_recon, _ = forward_fast(x, tf.zeros_like(logdet)) w_inv = tf.reshape(w_inv, [1, 1] + [shape[3], shape[3]]) x = tf.nn.conv2d(x, w_inv, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet -= dlogdet # mse = tf.sqrt(tf.reduce_mean(tf.pow(z_recon - z, 2))) # x = tf.Print( # x, # data=[mse], # message='RMSE of inverse', # ) return x, logdet
def invertible_1x1_conv(name, z, logdet, reverse=False): if True: # Set to "False" to use the LU-decomposed version with tf.variable_scope(name): shape = Z.int_shape(z) w_shape = [shape[3], shape[3]] # Sample a random orthogonal matrix: w_init = np.linalg.qr(np.random.randn( *w_shape))[0].astype('float32') w = tf.get_variable("W", dtype=tf.float32, initializer=w_init) # dlogdet = tf.linalg.LinearOperator(w).log_abs_determinant() * shape[1]*shape[2] dlogdet = tf.cast(tf.log(abs(tf.matrix_determinant( tf.cast(w, 'float64')))), 'float32') * shape[1]*shape[2] if not reverse: _w = tf.reshape(w, [1, 1] + w_shape) z = tf.nn.conv2d(z, _w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet += dlogdet return z, logdet else: _w = tf.matrix_inverse(w) _w = tf.reshape(_w, [1, 1]+w_shape) z = tf.nn.conv2d(z, _w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet -= dlogdet return z, logdet else: # LU-decomposed version shape = Z.int_shape(z) with tf.variable_scope(name): dtype = 'float64' # Random orthogonal matrix: import scipy np_w = scipy.linalg.qr(np.random.randn(shape[3], shape[3]))[ 0].astype('float32') np_p, np_l, np_u = scipy.linalg.lu(np_w) np_s = np.diag(np_u) np_sign_s = np.sign(np_s) np_log_s = np.log(abs(np_s)) np_u = np.triu(np_u, k=1) p = tf.get_variable("P", initializer=np_p, trainable=False) l = tf.get_variable("L", initializer=np_l) sign_s = tf.get_variable( "sign_S", initializer=np_sign_s, trainable=False) log_s = tf.get_variable("log_S", initializer=np_log_s) # S = tf.get_variable("S", initializer=np_s) u = tf.get_variable("U", initializer=np_u) p = tf.cast(p, dtype) l = tf.cast(l, dtype) sign_s = tf.cast(sign_s, dtype) log_s = tf.cast(log_s, dtype) u = tf.cast(u, dtype) w_shape = [shape[3], shape[3]] l_mask = np.tril(np.ones(w_shape, dtype=dtype), -1) l = l * l_mask + tf.eye(*w_shape, dtype=dtype) u = u * np.transpose(l_mask) + tf.diag(sign_s * tf.exp(log_s)) w = tf.matmul(p, tf.matmul(l, u)) if True: u_inv = tf.matrix_inverse(u) l_inv = tf.matrix_inverse(l) p_inv = tf.matrix_inverse(p) w_inv = tf.matmul(u_inv, tf.matmul(l_inv, p_inv)) else: w_inv = tf.matrix_inverse(w) w = tf.cast(w, tf.float32) w_inv = tf.cast(w_inv, tf.float32) log_s = tf.cast(log_s, tf.float32) if not reverse: w = tf.reshape(w, [1, 1] + w_shape) z = tf.nn.conv2d(z, w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet += tf.reduce_sum(log_s) * (shape[1]*shape[2]) return z, logdet else: w_inv = tf.reshape(w_inv, [1, 1]+w_shape) z = tf.nn.conv2d( z, w_inv, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet -= tf.reduce_sum(log_s) * (shape[1]*shape[2]) return z, logdet
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) controller_inputs.append(goal) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 6 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 6 * num_groups, 1) # Batch, 6 * num_groups, 1 if nn_control: intermediate = tf.matmul( W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]), controller_inputs) # Batch, #actuations, 1 assert intermediate.shape == (batch_size, len(actuations), 1) assert intermediate.shape[2] == 1 intermediate = intermediate[:, :, 0] # Batch, #actuations actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength else: #IPython.embed() actuation = tf.expand_dims( actuation_seq[0, state.step_count // (num_steps // num_acts), :], 0) debug = { 'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation } total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress total_actuation = total_actuation + act return total_actuation, debug res = (30, 30) bc = get_bounding_box_bc(res) if config == 'B': bc[0][:, :, :5] = -1 # Sticky bc[1][:, :, :5] = 0 # Sticky sim = Simulation(dt=0.0025, num_particles=num_particles, grid_res=res, gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * 6 final_position = final_state[:, s:s + 2] final_velocity = final_state[:, s + 2:s + 4] gamma = 0.0 loss1 = tf.reduce_sum((final_position - goal)**2) loss2 = tf.reduce_sum(final_velocity**2) loss_velocity = loss2 loss_act = tf.reduce_sum(actuation_seq**2.0) loss_zero = tf.reduce_sum(actuation_seq * 0.0) loss = loss1 + gamma * loss2 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) youngs_modulus = tf.Variable( 10.0 * tf.ones(shape=[1, 1, num_particles], dtype=tf.float32), trainable=True) initial_state = sim.get_initial_state( position=np.array(initial_positions), youngs_modulus=tf.identity(youngs_modulus)) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if use_bfgs: B = [ tf.Variable(tf.eye(tf.size(trainable)), trainable=False) for trainable in trainables ] sess.run(tf.global_variables_initializer()) sim.set_initial_state(initial_state=initial_state) sym = sim.gradients_sym(loss, variables=trainables) sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) if config == 'A': goal_input = np.array([[ 0.5 + (random.random() - 0.5) * goal_range * 2, 0.6 + (random.random() - 0.5) * goal_range ] for _ in range(batch_size)], dtype=np.float32) elif config == 'B': goal_input = np.array([[ 0.65 + (random.random() - 0.5) * goal_range * 2, 0.55 + (random.random() - 0.5) * goal_range ] for _ in range(batch_size)], dtype=np.float32) # Optimization loop #IPython.embed() #In progress code ''' memo = sim.run( initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss) IPython.embed() def loss_callback(): memo = sim.run( initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss) return loss ''' c1 = 1e-4 c2 = 0.9 def eval_sim(loss_tensor): memo = sim.run(initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss_tensor) grad = sim.eval_gradients(sym=sym, memo=memo) return memo.loss, grad, memo def flatten_trainables(): return tf.concat( [tf.squeeze(ly.flatten(trainable)) for trainable in trainables], 0) def flatten_vectors(vectors): return tf.concat( [tf.squeeze(ly.flatten(vector)) for vector in vectors], 0) def assignment_run(xs): sess.run([trainable.assign(x) for x, trainable in zip(xs, trainables)]) def f_and_grad_step(step_size, x, delta_x): old_x = [x_i.eval() for x_i in x] assignment_run([ x_i + step_size * delta_x_i for x_i, delta_x_i in zip(x, delta_x) ]) #take step loss, grad, _ = eval_sim(loss) assignment_run(old_x) #revert return loss, grad def wolfe_1(delta_x, new_f, current_f, current_grad, step_size): valid = new_f <= current_f + c1 * step_size * tf.tensordot( flatten_vectors(current_grad), flatten_vectors(delta_x), 1) return valid.eval() def wolfe_2(delta_x, new_grad, current_grad, step_size): valid = np.abs( tf.tensordot(flatten_vectors(new_grad), flatten_vectors(delta_x), 1).eval()) <= -c2 * tf.tensordot( flatten_vectors(current_grad), flatten_vectors(delta_x), 1).eval() return valid def zoom(a_min, a_max, search_dirs, current_f, current_grad): while True: a_mid = (a_min + a_max) / 2.0 print('a_min: ', a_min, 'a_max: ', a_max, 'a_mid: ', a_mid) step_loss_min, step_grad_min = f_and_grad_step( a_min, trainables, search_dirs) step_loss, step_grad = f_and_grad_step(a_mid, trainables, search_dirs) valid_1 = wolfe_1(search_dirs, step_loss, current_f, current_grad, a_mid) valid_2 = wolfe_2(search_dirs, step_grad, current_grad, a_mid) if not valid_1 or step_loss >= step_loss_min: a_max = a_mid else: if valid_2: return a_mid if tf.tensordot(flatten_vectors(step_grad), flatten_vectors(search_dirs), 1) * (a_max - a_min) >= 0: a_max = a_min a_min = a_mid loss_val, grad, memo = eval_sim( loss ) #TODO: this is to get dimensions, find a better way to do this without simming old_g_flat = [None] * len(grad) old_v_flat = [None] * len(grad) t = time.time() loss_val, grad, memo = eval_sim(loss) #BFGS update: #IPython.embed() if use_pygmo: def assignment_helper(x): assignments = [] idx = 0 x = x.astype(np.float32) for v in trainables: #first, get count: var_cnt = tf.size(v).eval() assignments += [ v.assign(tf.reshape(x[idx:idx + var_cnt], v.shape)) ] idx += var_cnt sess.run(assignments) class RobotProblem: def __init__(self, use_act): self.use_act = use_act goal_ball = 0.002 def fitness(self, x): assignment_helper(x) if self.use_act: loss_act_val, _, _ = eval_sim(loss_act) else: loss_act_val, _, _ = eval_sim(loss_zero) loss_val, _, _ = eval_sim(loss) c1, _, memo = eval_sim(loss_velocity) sim.visualize(memo) return [ loss_act_val.astype(np.float64), loss_val.astype(np.float64) - self.goal_ball, c1.astype(np.float64) - self.goal_ball ] def get_nic(self): return 2 def get_nec(self): return 0 def gradient(self, x): assignment_helper(x) _, grad, _ = eval_sim(loss) _, grad_velocity, _ = eval_sim(loss_velocity) _, grad_act, _ = eval_sim(loss_act) return np.concatenate([ flatten_vectors(grad_act).eval().astype(np.float64), flatten_vectors(grad).eval().astype(np.float64), flatten_vectors(grad_velocity).eval().astype(np.float64) ]) #return flatten_vectors(grad).eval().astype(np.float64) def get_bounds(self): #actuation lb = [] ub = [] acts = trainables[0] lb += [-5] * tf.size(acts).eval() ub += [5] * tf.size(acts).eval() designs = trainables[1] lb += [5] * tf.size(designs).eval() ub += [20] * tf.size(designs).eval() return (lb, ub) #IPython.embed() uda = pg.nlopt("slsqp") #uda = ppnf.snopt7(screen_output = False, library = "/home/aespielberg/snopt/lib/libsnopt7.so") algo = pg.algorithm(uda) #algo.extract(pg.nlopt).local_optimizer = pg.nlopt('lbfgs') algo.extract(pg.nlopt).maxeval = 20 algo.set_verbosity(1) udp = RobotProblem(False) bounds = udp.get_bounds() mean = (np.array(bounds[0]) + np.array(bounds[1])) / 2.0 num_vars = len(mean) prob = pg.problem(udp) pop = pg.population(prob, size=1) pop.set_x(0, np.random.normal(scale=0.3, loc=mean, size=(num_vars, ))) pop.problem.c_tol = [1e-4] * prob.get_nc() #pop.problem.c_tol = [1e-4] * prob.get_nc() pop.problem.f_tol_rel = [100000.0] #IPython.embed() pop = algo.evolve(pop) IPython.embed() #IPython.embed() #We need to refactor this for real old_x = pop.champion_x udp = RobotProblem(True) prob = pg.problem(udp) pop = pg.population(prob, size=1) pop.set_x(0, old_x) pop.problem.c_tol = [1e-4] * prob.get_nc() #pop.problem.f_tol = [1e-6] pop.problem.f_tol_rel = [1e-4] pop = algo.evolve(pop) #now a second time _, _, memo = eval_sim(loss) sim.visualize(memo) return for i in range(1000000): if use_bfgs: bfgs = [None] * len(grad) B_update = [None] * len(grad) search_dirs = [None] * len(grad) #TODO: for now, assuming there is only one trainable and one grad for ease for v, g, idx in zip(trainables, grad, range(len(grad))): g_flat = ly.flatten(g) v_flat = ly.flatten(v) if B[idx] == None: B[idx] = tf.eye(tf.size(v_flat)) if i > 0: y_flat = tf.squeeze(g_flat - old_g_flat[idx]) s_flat = tf.squeeze(v_flat - old_v_flat[idx]) B_s_flat = tf.tensordot(B[idx], s_flat, 1) term_1 = -tf.tensordot(B_s_flat, tf.transpose(B_s_flat), 0) / tf.tensordot( s_flat, B_s_flat, 1) term_2 = tf.tensordot(y_flat, y_flat, 0) / tf.tensordot( y_flat, s_flat, 1) B_update[idx] = B[idx].assign(B[idx] + term_1 + term_2) sess.run([B_update[idx]]) if tf.abs(tf.matrix_determinant(B[idx])).eval() < 1e-6: sess.run([B[idx].assign(tf.eye(tf.size(v_flat)))]) search_dir = -tf.transpose(g_flat) else: #search_dir = -tf.matrix_solve_ls(B[idx],tf.transpose(g_flat), l2_regularizer=0.0, fast=True) #adding regularizer for stability search_dir = -tf.matmul( tf.linalg.inv(B[idx]), tf.transpose(g_flat)) #TODO: inverse bad,speed htis up search_dir_reshape = tf.reshape(search_dir, g.shape) search_dirs[idx] = search_dir_reshape old_g_flat[idx] = g_flat old_v_flat[idx] = v_flat.eval() #TODO: B upate #Now it's linesearch time if wolfe_search: a_max = 0.1 a_1 = a_max / 2.0 a_0 = 0.0 iterate = 1 while True: step_loss, step_grad = f_and_grad_step( a_1, trainables, search_dirs) print(a_1) valid_1 = wolfe_1(search_dirs, step_loss, loss_val, grad, a_1) valid_2 = wolfe_2(search_dirs, step_grad, grad, a_1) print('wolfe 1: ', valid_1, 'wolfe 2: ', valid_2) if (not valid_1) or (iterate > 1 and step_loss > loss_val): print('cond1') a = zoom(a_0, a_1, search_dirs, loss_val, grad) if valid_2: print('cond2') a = a_1 break if tf.tensordot(flatten_vectors(step_grad), flatten_vectors(search_dirs), 1).eval() >= 0: print('cond3') a = zoom(a_1, a_0, search_dirs, current_f, current_grad) break print('no cond') temp = a_1 a_1 = (a_1 + a_max) / 2.0 a_0 = temp iterate += 1 if iterate > 5: #close enough a = a_1 break else: a = lr for v, idx in zip(trainables, range(len(grad))): print('final a ', a) bfgs[idx] = v.assign(v + search_dirs[idx] * a) sess.run(bfgs) print('stepped!!') else: gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad) ] sess.run(gradient_descent) print('iter {:5d} time {:.3f} loss {:.4f}'.format( i, time.time() - t, memo.loss)) if i % 1 == 0: sim.visualize(memo) #in progress code '''
def test_MatrixDeterminant(self): t = tf.matrix_determinant(self.random(3, 3)) self.check(t)
def run_update(imu_meas, dt, prev_state, prev_covar, gfull, g, fkstat, Hk, lift_g_bias_covar, lift_a_bias_covar, lift_g_covar, lift_a_covar, nn_meas, nn_covar): pred_rot_euler = dt * imu_meas[..., 0:3] - dt * prev_state[..., 14:17] pred_rot = euler2rot(pred_rot_euler) pred_global = dt * imu_meas[..., 1:3] - dt * prev_state[ ..., 15:17] + prev_state[..., 6:8] pred_global_rot = euler2rot2param(pred_global) pred_list = [] # pos = dt * np.dot(pred_rot, x[3:6]) + (0.5 * dt * dt) * ( # np.dot(pred_global_rot, gfull) + imu_meas[3:6] + 2 * np.cross(imu_meas[0:3] - x[14:17], x[3:6]) - x[8:11]) # position prediction pred_list.append( tf.squeeze(tf.matmul(pred_rot, dt * tf.expand_dims(prev_state[..., 3:6], axis=-1))) + (0.5 * dt * dt) * \ (tf.squeeze(tf.matmul(pred_global_rot, gfull)) + imu_meas[:, 3:6] + 2 * tf.cross(imu_meas[:, 0:3] - prev_state[..., 14:17], prev_state[..., 3:6]) - prev_state[..., 8:11])) # velocity prediction pred_list.append(tf.squeeze(tf.matmul(pred_rot, tf.expand_dims(prev_state[..., 3:6], axis=-1))) + dt * \ (tf.squeeze(tf.matmul(pred_global_rot, gfull)) + imu_meas[:, 3:6] + 2 * tf.cross(imu_meas[:, 0:3] - prev_state[..., 14:17], prev_state[..., 3:6]) - prev_state[..., 8:11])) # global pitch and roll prediction pred_list.append(pred_global) # accelerometer bias prediction pred_list.append(prev_state[..., 8:11]) # relative orientation prediction pred_list.append(pred_rot_euler) # gyro bias update pred_list.append(prev_state[..., 14:17]) # pack state pred_state = tf.concat(pred_list, axis=1) # build Jacobian dRglobal_dE = tf.concat([ tf.zeros([imu_meas.shape[0], 3, 1], dtype=tf.float32), getLittleJacobian(pred_global) ], axis=-1) Fk = tf.concat([tf.concat([tf.zeros([imu_meas.shape[0], 3, 3]), dt * pred_rot + dt * dt * skew(imu_meas[:, 0:3] - prev_state[..., 14:17]), 0.5 * dt * dt * g * getLittleJacobian(pred_global), -0.5 * dt * dt * tf.eye(3, batch_shape=[imu_meas.shape[0]], dtype=tf.float32), tf.zeros([imu_meas.shape[0], 3, 3]), -dt * getJacobian(pred_rot_euler, dt * prev_state[..., 3:6]) - g * 0.5 * dt * dt * dt * dRglobal_dE + dt*dt*skew(prev_state[..., 3:6])], axis=-1), tf.concat([tf.zeros([imu_meas.shape[0], 3, 3]), pred_rot + 2 * dt * skew(imu_meas[:, 0:3] - prev_state[..., 14:17]), dt * g * getLittleJacobian(pred_global), -dt * tf.eye(3, batch_shape=[imu_meas.shape[0]], dtype=tf.float32), tf.zeros([imu_meas.shape[0], 3, 3]), -dt * getJacobian(pred_rot_euler, prev_state[..., 3:6]) + \ -g * dt * dt * dRglobal_dE + 2 * dt * skew(prev_state[..., 3:6])], axis=-1) ], axis=1) Fkfull = tf.concat([Fk, fkstat], axis=1) # Combine covariance matrices into one large matrix. order is measurement noise (gyro then acc), then bias noise ( #gyro then acc) noise_covar = tf.concat( (tf.concat( (lift_g_covar, tf.zeros([imu_meas.shape[0], 3, 9], dtype=tf.float32)), axis=-1), tf.concat((tf.zeros([imu_meas.shape[0], 3, 3], dtype=tf.float32), lift_a_covar, tf.zeros([imu_meas.shape[0], 3, 6], dtype=tf.float32)), axis=-1), tf.concat((tf.zeros([imu_meas.shape[0], 3, 6], dtype=tf.float32), lift_g_bias_covar, tf.zeros([imu_meas.shape[0], 3, 3], dtype=tf.float32)), axis=-1), tf.concat((tf.zeros([imu_meas.shape[0], 3, 9], dtype=tf.float32), lift_a_bias_covar), axis=-1)), axis=-2) reuse = tf.concat( (tf.zeros([imu_meas.shape[0], 3, 1], dtype=tf.float32), getLittleJacobian(pred_global)), axis=-1) dpi = tf.concat( (getJacobian(pred_rot_euler, dt * prev_state[..., 3:6]) * -dt + (0.5 * dt * dt) * (-dt * g * reuse) + dt * dt * skew(prev_state[..., 3:6]), (-0.5 * dt * dt) * tf.eye(3, batch_shape=[imu_meas.shape[0]], dtype=tf.float32), tf.zeros([imu_meas.shape[0], 3, 3], dtype=tf.float32), tf.zeros([imu_meas.shape[0], 3, 3], dtype=tf.float32)), axis=-1) dvi = tf.concat( (getJacobian(pred_rot_euler, prev_state[..., 3:6]) * -dt + dt * (-dt * g * reuse) + 2 * dt * skew(prev_state[..., 3:6]), (-dt * tf.eye(3, batch_shape=[imu_meas.shape[0]], dtype=tf.float32)), tf.zeros([imu_meas.shape[0], 3, 3], dtype=tf.float32), tf.zeros([imu_meas.shape[0], 3, 3], dtype=tf.float32)), axis=-1) drotglobal = tf.tile( tf.expand_dims(tfe.Variable([[0, -dt, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -dt, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=tf.float32, trainable=False), axis=0), [imu_meas.shape[0], 1, 1]) daccbias = tf.tile( tf.expand_dims(tfe.Variable([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=tf.float32, trainable=False), axis=0), [imu_meas.shape[0], 1, 1]) drotrel = tf.tile( tf.expand_dims(tfe.Variable([[-dt, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, -dt, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -dt, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=tf.float32, trainable=False), axis=0), [imu_meas.shape[0], 1, 1]) dgyrobias = tf.tile( tf.expand_dims(tfe.Variable([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], dtype=tf.float32, trainable=False), axis=0), [imu_meas.shape[0], 1, 1]) J_noise = tf.concat((dpi, dvi, drotglobal, daccbias, drotrel, dgyrobias), axis=-2) # Assemble global covariance matrix Qk = tf.matmul(J_noise, tf.matmul( noise_covar, J_noise, transpose_b=True)) + 1.0e-2 * tf.eye( 17, batch_shape=[imu_meas.shape[0]], dtype=tf.float32) tf.assert_positive(tf.matrix_determinant(Qk), [Qk, J_noise, noise_covar]) pred_covar = tf.matmul( Fkfull, tf.matmul(prev_covar, Fkfull, transpose_b=True)) + Qk tf.assert_positive(tf.matrix_determinant(pred_covar), [Fkfull]) yk = tf.expand_dims(nn_meas, axis=-1) - tf.matmul( Hk, tf.expand_dims(pred_state, axis=-1)) tf.assert_positive(tf.matrix_determinant(nn_covar), [nn_covar]) Sk = tf.matmul(Hk, tf.matmul( pred_covar, Hk, transpose_b=True)) + nn_covar + 1.0 * tf.eye( 6, batch_shape=[imu_meas.shape[0]], dtype=tf.float32) tf.assert_positive(tf.matrix_determinant(Sk), [Sk]) Kk = tf.matmul(pred_covar, tf.matmul(Hk, tf.matrix_inverse(Sk), transpose_a=True)) X = tf.squeeze(tf.expand_dims(pred_state, axis=-1) + tf.matmul(Kk, yk), axis=2) covar = pred_covar - tf.matmul( Kk, tf.matmul(Hk, pred_covar), name="ekf_matmul") tf.assert_positive(tf.matrix_determinant(pred_covar), [imu_meas, prev_state, prev_covar, nn_meas, nn_covar]) return X, covar
def testNonSquareMatrix(self): # When the determinant of a non-square matrix is attempted we should return # an error with self.assertRaises(ValueError): tf.matrix_determinant( np.array([[1., 2., 3.], [3., 5., 4.]]).astype(np.float32))
# Create 4x4 Random Uniform Matrix A_rand_matrix = tf.random_uniform([4, 4]) print(sess.run(A_rand_matrix)) # Create a Matrix From a np Array ATensor_from_array = tf.convert_to_tensor( np.array([[12.7, 21.5, 13.2], [-3.3, -17.2, -11.3], [0.1, 5.1, -2.7]])) print(sess.run(ATensor_from_array)) # Calculate Matrix Addition and Subtraction print(sess.run(A_identity_matrix + A_matrix)) print(sess.run(A_identity_matrix - A_matrix)) # Calculate Matrix Transpose print(sess.run(tf.transpose(A_const_matrix))) # Calculate Matrix Determinant print(sess.run(tf.matrix_determinant(A_rand_matrix))) # Calculate Another Matrix Multiplication print(sess.run(tf.matmul(A_matrix, A_identity_matrix))) # Calculate Matrix Inverse print(sess.run(tf.matrix_inverse(A_rand_matrix))) print(sess.run(tf.matrix_inverse(A_matrix))) # Calculate Eigenvalues and Eigenvectors print(sess.run(tf.self_adjoint_eig(A_rand_matrix))) print(sess.run(tf.self_adjoint_eig(A_matrix)))
import tensorflow as tf sess = tf.InteractiveSession() x = tf.constant([[2, 5, 3, -5], [0, 3, -2, 5], [4, 3, 5, 3], [6, 1, 4, 0]]) y = tf.constant([[4, -7, 4, -3, 4], [6, 4, -7, 4, 7], [2, 3, 2, 1, 4], [1, 5, 5, 5, 2]]) floatx = tf.constant([[2., 5., 3., -5.], [0., 3., -2., 5.], [4., 3., 5., 3.], [6., 1., 4., 0.]]) tf.transpose(x).eval() # Transpose matrix tf.matmul(x, y).eval() # Matrix multiplication tf.matrix_determinant(floatx).eval() # Matrix determinant tf.matrix_inverse(floatx).eval() # Matrix inverse tf.matrix_solve(floatx, [[1], [1], [1], [1]]).eval() # Solve Matrix system
def invertible_1x1_conv(name, z, logdet, reverse=False): if True: # Set to "False" to use the LU-decomposed version with tf.variable_scope(name): shape = ops.int_shape(z) C = shape[3] w = tf.get_variable("w", shape=( C, C), dtype=tf.float32, initializer=tf.initializers.orthogonal()) dlogdet = tf.cast(tf.log(abs(tf.matrix_determinant( tf.cast(w, 'float64')))), 'float32') * shape[1]*shape[2] if not reverse: w = tf.reshape(w, [1, 1, C, C]) z = tf.nn.conv2d(z, w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet += dlogdet return z, logdet else: w = tf.matrix_inverse(w) w = tf.reshape(w, [1, 1, C, C]) z = tf.nn.conv2d(z, w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet -= dlogdet return z, logdet else: # LU-decomposed version shape = ops.int_shape(z) with tf.variable_scope(name): dtype = 'float64' # Random orthogonal matrix: import scipy np_w = scipy.linalg.qr(np.random.randn(shape[3], shape[3]))[ 0].astype('float32') np_p, np_l, np_u = scipy.linalg.lu(np_w) # pylint: disable=E1101 np_s = np.diag(np_u) np_sign_s = np.sign(np_s) np_log_s = np.log(abs(np_s)) np_u = np.triu(np_u, k=1) p = tf.get_variable("P", initializer=np_p, trainable=False) l = tf.get_variable("L", initializer=np_l) sign_s = tf.get_variable( "sign_S", initializer=np_sign_s, trainable=False) log_s = tf.get_variable("log_S", initializer=np_log_s) # S = tf.get_variable("S", initializer=np_s) u = tf.get_variable("U", initializer=np_u) p = tf.cast(p, dtype) l = tf.cast(l, dtype) sign_s = tf.cast(sign_s, dtype) log_s = tf.cast(log_s, dtype) u = tf.cast(u, dtype) w_shape = [shape[3], shape[3]] l_mask = np.tril(np.ones(w_shape, dtype=dtype), -1) l = l * l_mask + tf.eye(*w_shape, dtype=dtype) u = u * np.transpose(l_mask) + tf.diag(sign_s * tf.exp(log_s)) w = tf.matmul(p, tf.matmul(l, u)) if True: u_inv = tf.matrix_inverse(u) l_inv = tf.matrix_inverse(l) p_inv = tf.matrix_inverse(p) w_inv = tf.matmul(u_inv, tf.matmul(l_inv, p_inv)) else: w_inv = tf.matrix_inverse(w) w = tf.cast(w, tf.float32) w_inv = tf.cast(w_inv, tf.float32) log_s = tf.cast(log_s, tf.float32) if not reverse: w = tf.reshape(w, [1, 1] + w_shape) z = tf.nn.conv2d(z, w, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet += tf.reduce_sum(log_s) * (shape[1]*shape[2]) return z, logdet else: w_inv = tf.reshape(w_inv, [1, 1]+w_shape) z = tf.nn.conv2d( z, w_inv, [1, 1, 1, 1], 'SAME', data_format='NHWC') logdet -= tf.reduce_sum(log_s) * (shape[1]*shape[2]) return z, logdet
# 2, ..., 6 # Hint: Use tf.range() and tf.diag(). ############################################################################### val = tf.range(1, 7) out_1e = tf.diag(val) # print(sess.run(out_1e)) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### mat = tf.random_normal([10, 10]) out_1f = tf.matrix_determinant(mat) # print(sess.run(out_1f)) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) out_1g, _ = tf.unique(x) # print(sess.run(out_1g)) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution.
def _det_ok_mask(x, det_bounds): return tf.cast(tf.matrix_determinant(x) > det_bounds, dtype=x.dtype)
def __init__(self, config, output_size, latent_vector_input, data_vector_input, name_scope): """ :param config: dictionary :param output_size: int :param latent_vector_input: generalized tensorflow input :param name_scope: string """ logger = logging.getLogger(self.__class__.__name__+":"+str(name_scope)) # get config # --------- depth = get_or_default(config, 'depth', 3, logger) translation_network_config = get_or_default(config, 'translation_network', {}, logger) translation_networks_args = {'output_size': output_size, 'name_scope': name_scope, 'name': 'translation_network', 'config': translation_network_config} # --------- # affine transformation of the input triangular_params = LowerTriangularParameters(output_size, name_scope, trainable_bias=False) # generate masks, width and translation networks # --------- masks = [] t_model = [] for d in range(depth): # alternating random masks for the coupling layers if d % 2 == 0: mask = EvenMask(output_size) else: mask = OddMask(output_size) with tf.name_scope(name_scope): m = tf.Variable(mask(), dtype=tf.float32, trainable=False, name='mask') masks.append(m) # create t t_model.append(create_model(MultiLayerPerceptron, input_size=output_size, model_args=translation_networks_args)) # --------- # build the full sample model by stacking coupling layers # --------- # feed-forward mode = 'feed_forward' forward_layers = list() forward_layers.append(LowerTriangularLayer(input_tensor=latent_vector_input, lower_triangular_matrix=triangular_params.lower_triangular_matrix, bias=triangular_params.bias, mode=mode, invertible_facq=InvertibleIdentity())) for d in range(0, depth): forward_layers.append(AdditiveCouplingLayer(mode=mode, input_tensor=forward_layers[d].output, mask=masks[d], translation_model=t_model[d])) self._data_vector = forward_layers[depth].output # feed backward mode = 'feed_backward' backward_layers = list() backward_layers.append(AdditiveCouplingLayer(mode=mode, input_tensor=data_vector_input, mask=masks[depth-1], translation_model=t_model[depth-1])) for d in range(1, depth): backward_layers.append(AdditiveCouplingLayer(mode=mode, input_tensor=backward_layers[d-1].output, mask=masks[depth-1-d], translation_model=t_model[depth-1-d])) backward_layers.append(LowerTriangularLayer(input_tensor=backward_layers[depth-1].output, lower_triangular_matrix=triangular_params.lower_triangular_matrix, bias=triangular_params.bias, mode=mode, invertible_facq=InvertibleIdentity())) self._latent_vector = backward_layers[depth].output # --------- # build the likelihood model # --------- concat_log_det_jac = Concatenate(axis=1)([backward_layers[c].log_det_jacobian for c in range(depth+1)]) self._log_det_jac = Lambda(lambda x: tf.reduce_sum(x, axis=1))(concat_log_det_jac) self._log_det_jac_test = tf.log(tf.matrix_determinant(tf.stack( [tf.gradients(self._latent_vector[:, idx], data_vector_input)[0] for idx in range(output_size)], axis=1))) self._llh = Lambda( lambda x: -0.5 * output_size * np.log(2 * np.pi) - 0.5 * tf.reduce_sum(tf.square(x[0]), axis=1) + \ x[1])([self._latent_vector, self._log_det_jac])
print(tf.sqrt(tf.reduce_sum(tf.square(x))).eval()) print("Frobenius(A) = ") print(tf.sqrt(tf.reduce_sum(tf.square(A))).eval()) print("Numpy l2(x) =") print(np.linalg.norm(x.eval(session=tf.Session()))) print("Numpy Forbenius(A) =") print(np.linalg.norm(A.eval(session=tf.Session()))) # Can you write the L(inf) ? # Orthogonal vectors; How do you make x and y orthonormal? print("x dot y") print(tf.matmul(x, y, transpose_a=True).eval()) # Eigenvalues and eigenvectors print("Numpy Eigenvalues of (A)=") e, v = np.linalg.eig(A.eval()) print(e) print("Numpy Eigenvectors of (A)=") print(v) # Frobenius norm is equal to the trace of A*tran(A) print("Frobenius(A) = Tr(A*tran(A) = ") print(tf.sqrt(tf.trace(tf.matmul(A, tf.transpose(A)))).eval()) # Determinant of A is the product of its eigenvalues print("det(A)=") print(tf.matrix_determinant(A).eval()) # Determinant from eigenvalues print("det(A) as product of eigenvalues") print(tf.reduce_prod(e).eval())
############################################################################### # 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1, # 2, ..., 6 # Hint: Use tf.range() and tf.diag(). ############################################################################### x = tf.diag(tf.range(1,7)) ############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### x = tf.random_uniform([10,10]) y = tf.matrix_determinant(x) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) y, y_ = tf.unique(x) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution. # Use tf.less() and tf.select() to return: # - The mean squared error of (x - y) if the average of all elements in (x - y)