def _overdetermined(op, grad): """Gradients for the overdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the first kind: X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B which solve the least squares problem min ||A * X - B||_F^2 + lambda ||X||_F^2. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) x = op.outputs[0] a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] n = a_shape[-1] identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype) gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity chol = linalg_ops.cholesky(gramian) # Temporary z = (A^T * A + lambda * I)^{-1} * grad. z = linalg_ops.cholesky_solve(chol, grad) xzt = math_ops.matmul(x, z, adjoint_b=True) zx_sym = xzt + array_ops.matrix_transpose(xzt) grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True) grad_b = math_ops.matmul(a, z) return (grad_a, grad_b, None)
def __call__(self, input_tuple, state, scope=None): with vs.variable_scope(scope or type(self).__name__): self._init_parameters() _input, target, dInput_dF = input_tuple u, s, r, inner_spikes, dW, dF, reward, reward_mean = state s = (1.0 - tau_syn) * s + tf.concat_v2([_input, inner_spikes], 1) u = (1.0 - tau_mem) * u + mo.matmul(s, self.W) r = (1.0 - tau_refr) * r act_raw = self._activation(u) act = act_raw * tf.exp(-r) spikes = tf.where( act > tf.random_uniform([batch_size, self._num_units]), tf.ones([batch_size, self._num_units]), tf.zeros([batch_size, self._num_units]) ) hidden_spikes, _ = self.slice(spikes) _, act_visible = self.slice(act) reward_mean = (1.0 - tau_long) * reward_mean + tau_long * reward reward = (1.0 - tau_learn) * reward + tau_learn * tf.reduce_sum( target * safe_log(act_visible*dt) + (1.0 - target) * safe_log(1.0 - act_visible*dt) , 1) r += spikes * amp_refr act_grad = tf.gradients([act], [u])[0] learn_target = tf.concat_v2([hidden_spikes, target], 1) factor = tf.concat_v2([ (reward - reward_mean) * tf.ones((batch_size, hidden_size,)), tf.ones((batch_size, visible_size,)) ], 1) neuron_derivative = tf.reduce_sum( (act_grad/act_raw) * (learn_target - act) * factor, 0) Wsliced = tf.slice(self.W, [0, 0], [self._filters_num, self._num_units]) dF_deriv_part = tf.squeeze(mo.matmul(Wsliced, tf.expand_dims(neuron_derivative, 1))) dW += lrate * outer( tf.reduce_sum(s, 0), neuron_derivative ) dInput_dF = tf.reduce_mean(dInput_dF, 0) dF += lrate * dF_deriv_part * dInput_dF return GLMOutputTuple(spikes, act, factor, reward, reward_mean), GLMStateTuple(u, s, r, spikes, dW, dF, reward, reward_mean)
def power_sums_tensor(array_size, power_matrix, multiplier): r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1). Args: array_size: The number of non-trivial sums to pre-compute. power_matrix: The "A" matrix above. multiplier: The "B" matrix above Returns: A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T S[0] is the zero matrix S[1] is B S[2] is A B A^T + B ...and so on """ array_size = math_ops.cast(array_size, dtypes.int32) power_matrix = ops.convert_to_tensor(power_matrix) identity_like_power_matrix = linalg_ops.eye( array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype) identity_like_power_matrix.set_shape( ops.convert_to_tensor(power_matrix).get_shape()) transition_powers = functional_ops.scan( lambda previous_power, _: math_ops.matmul(previous_power, power_matrix), math_ops.range(array_size - 1), initializer=identity_like_power_matrix) summed = math_ops.cumsum( array_ops.concat([ array_ops.expand_dims(multiplier, 0), math_ops.matmul( batch_times_matrix(transition_powers, multiplier), transition_powers, adjoint_b=True) ], 0)) return array_ops.concat( [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
def make_inverse_update_ops(self): """Create and return update ops corresponding to registered computations.""" ops = [] num_inverses = len(self._inverses_by_damping) matrix_power_registered = bool(self._matpower_by_exp_and_damping) use_eig = ( self._eigendecomp or matrix_power_registered or num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) if use_eig: self.register_eigendecomp() # ensures self._eigendecomp is set eigenvalues, eigenvectors = self._eigendecomp # pylint: disable=unpacking-non-sequence for damping, inv in self._inverses_by_damping.items(): ops.append( inv.assign( math_ops.matmul(eigenvectors / (eigenvalues + damping), array_ops.transpose(eigenvectors)))) for (exp, damping), matpower in self._matpower_by_exp_and_damping.items(): ops.append( matpower.assign( math_ops.matmul(eigenvectors * (eigenvalues + damping)**exp, array_ops.transpose(eigenvectors)))) # These ops share computation and should be run on a single device. ops = [control_flow_ops.group(*ops)] else: for damping, inv in self._inverses_by_damping.items(): ops.append(inv.assign(utils.posdef_inv(self._cov, damping))) return ops
def testPotentialCycle(self): graph1 = ops.Graph() with graph1.as_default(): a = constant_op.constant(1.0, shape=[2, 2]) b = constant_op.constant(2.0, shape=[2, 2]) matmul = math_ops.matmul(a, b) with ops.name_scope("hidden1"): c = nn_ops.relu(matmul) d = constant_op.constant(3.0, shape=[2, 2]) matmul = math_ops.matmul(c, d) orig_meta_graph, _ = meta_graph.export_scoped_meta_graph( export_scope="hidden1", graph=graph1) graph2 = ops.Graph() with graph2.as_default(): with self.assertRaisesRegexp(ValueError, "Graph contains unbound inputs"): meta_graph.import_scoped_meta_graph( orig_meta_graph, import_scope="new_hidden1") meta_graph.import_scoped_meta_graph( orig_meta_graph, import_scope="new_hidden1", input_map={ "$unbound_inputs_MatMul": constant_op.constant( 4.0, shape=[2, 2]) })
def make_inverse_update_ops(self): """Create and return update ops corresponding to registered computations.""" ops = super(InverseProvidingFactor, self).make_inverse_update_ops() num_inverses = len(self._inverses_by_damping) matrix_power_registered = bool(self._matpower_by_exp_and_damping) use_eig = (self._eigendecomp or matrix_power_registered or num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) if use_eig: self.register_eigendecomp() # ensures self._eigendecomp is set eigenvalues, eigenvectors = self._eigendecomp # pylint: disable=unpacking-non-sequence # The matrix self._cov is positive semidefinite by construction, but the # numerical eigenvalues could be negative due to numerical errors, so here # we clip them to be at least EIGENVALUE_CLIPPING_THRESHOLD. clipped_eigenvalues = math_ops.maximum(eigenvalues, EIGENVALUE_CLIPPING_THRESHOLD) for damping, inv in self._inverses_by_damping.items(): ops.append( inv.assign( math_ops.matmul(eigenvectors / (clipped_eigenvalues + damping), array_ops.transpose(eigenvectors)))) for (exp, damping), matpower in self._matpower_by_exp_and_damping.items(): ops.append( matpower.assign( math_ops.matmul(eigenvectors * (clipped_eigenvalues + damping)** exp, array_ops.transpose(eigenvectors)))) else: for damping, inv in self._inverses_by_damping.items(): ops.append(inv.assign(utils.posdef_inv(self._cov, damping))) return ops
def _testDefaultGraphInThread(self, constructed_event, continue_event, i): with session.Session() as s: self.assertEqual(ops.get_default_graph(), s.graph) a = constant_op.constant(1.0, shape=[1, 2]) b = constant_op.constant(2.0, shape=[2, 3]) c = math_ops.matmul(a, b) v = variables.Variable(c, name='var_%d' % i) # Block here until all threads have constructed their graph. constructed_event.set() continue_event.wait() assign_c_to_v = state_ops.assign(v, c) v.initializer.run() assign_c_to_v.eval() v_val = v.eval() self.assertAllEqual([[4.0, 4.0, 4.0]], v_val) d = constant_op.constant(3.0, shape=[2, 3]) e = math_ops.matmul(a, d) assign_e_to_v = state_ops.assign(v, e) e_val = e.eval() self.assertAllEqual([[6.0, 6.0, 6.0]], e_val) v_val = v.eval() self.assertAllEqual([[4.0, 4.0, 4.0]], v_val) s.run(assign_e_to_v) v_val = v.eval() self.assertAllEqual([[6.0, 6.0, 6.0]], v_val) self.assertEqual(ops.get_default_graph(), s.graph)
def Test(self): if not use_static_shape_ or a_np_.dtype in (np.int32, np.int64, np.float16): self.skipTest("Skipping infeasible gradient test.") # Transpose and possibly conjugate a_np_ and b_np_ according to the # attributes such that tf.matmul(effective_a_np, effective_b_np, **kwargs) # results in a valid matrix multiplication and produces the same result as # np.matrix(a_np_) * np.matrix(b_np_) effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_) effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_) epsilon = np.finfo(a_np_.dtype).eps delta = epsilon**(1.0 / 3.0) tol = 20 * delta with self.session(), test_util.use_gpu(): theoretical, numerical = gradient_checker_v2.compute_gradient( lambda x: math_ops.matmul(x, effective_b_np, **kwargs_), [effective_a_np], delta=delta) self.assertAllClose(theoretical, numerical, rtol=tol, atol=tol) theoretical, numerical = gradient_checker_v2.compute_gradient( lambda x: math_ops.matmul(effective_a_np, x, **kwargs_), [effective_b_np], delta=delta) self.assertAllClose(theoretical, numerical, rtol=tol, atol=tol)
def Test(self): np_val = np.matrix(a_np_) * np.matrix(b_np_) use_gpu = True if a_np_.dtype is np.float16 and ( not test_util.CudaSupportsHalfMatMulAndConv()): use_gpu = False print("Built without fp16 matmul support for Cuda, running test on CPU.") # Transpose and possibly conjugate a_np_ and b_np_ according to the # attributes such that tf.matmul(effective_a_np, effective_b_np, **kwargs) # results in a valid matrix multiplication and produces the same result as # np.matrix(a_np_) * np.matrix(b_np_) effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_) effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_) with self.cached_session() as sess, test_util.device(use_gpu): if use_static_shape_: a = constant_op.constant(effective_a_np) b = constant_op.constant(effective_b_np) res = math_ops.matmul(a, b, **kwargs_) tf_val = self.evaluate(res) else: a = array_ops.placeholder(a_np_.dtype) b = array_ops.placeholder(b_np_.dtype) res = math_ops.matmul(a, b, **kwargs_) tf_val = sess.run(res, feed_dict={a: effective_a_np, b: effective_b_np}) self.assertAllCloseAccordingToType( tf_val, np_val, float_rtol=2e-5, float_atol=2e-5, half_rtol=0.2, half_atol=0.2)
def sqrt_solve(self, x): """Computes `solve(self, x)`. Doesn't actually do the sqrt! Named as such to agree with API. To compute (M + V D V.T), we use the the Woodbury matrix identity: inv(M + V D V.T) = inv(M) - inv(M) V inv(C) V.T inv(M) where, C = inv(D) + V.T inv(M) V. See: https://en.wikipedia.org/wiki/Woodbury_matrix_identity Args: x: `Tensor` Returns: inv_of_self_times_x: `Tensor` """ minv_x = linalg_ops.matrix_triangular_solve(self._m, x) vt_minv_x = math_ops.matmul(self._v, minv_x, transpose_a=True) cinv_vt_minv_x = linalg_ops.matrix_solve( self._woodbury_sandwiched_term(), vt_minv_x) v_cinv_vt_minv_x = math_ops.matmul(self._v, cinv_vt_minv_x) minv_v_cinv_vt_minv_x = linalg_ops.matrix_triangular_solve( self._m, v_cinv_vt_minv_x) return minv_x - minv_v_cinv_vt_minv_x
def testServerDefChanged(self): """Update server def, and run ops on new cluster.""" context.set_server_def( server_def=get_server_def( ALT_JOB_NAME, local_server_port=0, remote_server_addresses=[ self._cached_server1_target, self._cached_server2_target ], task_index=0)) with ops.device("job:%s/replica:0/task:1/device:CPU:0" % ALT_JOB_NAME): x1 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x1) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy()) # Set the server def back to JOB_NAME context.set_server_def( server_def=get_server_def( JOB_NAME, local_server_port=0, remote_server_addresses=[ self._cached_server1_target, self._cached_server2_target ], task_index=0)) with ops.device("job:%s/replica:0/task:1/device:CPU:0" % JOB_NAME): x1 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x1) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
def Test(self): np.random.seed(1) n = shape_[-1] batch_shape = shape_[:-2] np_dtype = dtype_.as_numpy_dtype a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: a += 1j * np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) a += np.conj(a.T) a = np.tile(a, batch_shape + (1, 1)) if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64): atol = 1e-4 else: atol = 1e-12 np_e, np_v = np.linalg.eigh(a) with self.test_session(): if compute_v_: tf_e, tf_v = linalg_ops.self_adjoint_eig(constant_op.constant(a)) # Check that V*diag(E)*V^T is close to A. a_ev = math_ops.matmul( math_ops.matmul(tf_v, array_ops.matrix_diag(tf_e)), tf_v, adjoint_b=True) self.assertAllClose(a_ev.eval(), a, atol=atol) # Compare to numpy.linalg.eigh. CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(), atol) else: tf_e = linalg_ops.self_adjoint_eigvals(constant_op.constant(a)) self.assertAllClose( np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
def _symmetric_matrix_square_root(mat, eps=1e-10): """Compute square root of a symmetric matrix. Note that this is different from an elementwise square root. We want to compute M' where M' = sqrt(mat) such that M' * M' = mat. Also note that this method **only** works for symmetric matrices. Args: mat: Matrix to take the square root of. eps: Small epsilon such that any element less than eps will not be square rooted to guard against numerical instability. Returns: Matrix square root of mat. """ # Unlike numpy, tensorflow's return order is (s, u, v) s, u, v = linalg_ops.svd(mat) # sqrt is unstable around 0, just use 0 in such case si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s)) # Note that the v returned by Tensorflow is v = V # (when referencing the equation A = U S V^T) # This is unlike Numpy which returns v = V^T return math_ops.matmul( math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True)
def grad(dmm, dr): return [ math_ops.matmul(dmm, b, transpose_b=True) + math_ops.matmul(array_ops.ones_like(b * dr), b, transpose_b=True), math_ops.matmul(a, dmm, transpose_b=True) + math_ops.matmul(a, array_ops.ones_like(a) * dr, transpose_b=True) ]
def _MatrixInverseGrad(op, grad): """Gradient for MatrixInverse.""" ainv = op.outputs[0] return -math_ops.matmul( ainv, math_ops.matmul(grad, ainv, transpose_b=True), transpose_a=True)
def test_lanczos_bidiag(self): np.random.seed(1) a_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) tol = 1e-12 if dtype_ == np.float64 else 1e-5 with self.cached_session() as sess: if use_static_shape_: a = constant_op.constant(a_np) else: a = array_ops.placeholder(dtype_) operator = util.create_operator(a) lbd = lanczos.lanczos_bidiag( operator, steps_, orthogonalize=orthogonalize_) # The computed factorization should satisfy the equations # A * V = U * B # A' * U[:, :-1] = V * B[:-1, :]' av = math_ops.matmul(a, lbd.v) ub = lanczos.bidiag_matmul(lbd.u, lbd.alpha, lbd.beta, adjoint_b=False) atu = math_ops.matmul(a, lbd.u[:, :-1], adjoint_a=True) vbt = lanczos.bidiag_matmul(lbd.v, lbd.alpha, lbd.beta, adjoint_b=True) if use_static_shape_: av_val, ub_val, atu_val, vbt_val = sess.run([av, ub, atu, vbt]) else: av_val, ub_val, atu_val, vbt_val = sess.run([av, ub, atu, vbt], feed_dict={a: a_np}) self.assertAllClose(av_val, ub_val, atol=tol, rtol=tol) self.assertAllClose(atu_val, vbt_val, atol=tol, rtol=tol)
def benchmarkBatchMatMulBroadcast(self): for (a_shape, b_shape) in self.shape_pairs: with compat.forward_compatibility_horizon(2019, 4, 26): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix_a = variables.Variable( GetRandomNormalInput(a_shape, np.float32)) matrix_b = variables.Variable( GetRandomNormalInput(b_shape, np.float32)) variables.global_variables_initializer().run() # Use batch matmul op's internal broadcasting. self.run_op_benchmark( sess, math_ops.matmul(matrix_a, matrix_b), min_iters=50, name="batch_matmul_cpu_{}_{}".format(a_shape, b_shape)) # Manually broadcast the input matrices using the broadcast_to op. broadcasted_batch_shape = array_ops.broadcast_static_shape( matrix_a.shape[:-2], matrix_b.shape[:-2]) broadcasted_a_shape = broadcasted_batch_shape.concatenate( matrix_a.shape[-2:]) broadcasted_b_shape = broadcasted_batch_shape.concatenate( matrix_b.shape[-2:]) self.run_op_benchmark( sess, math_ops.matmul( array_ops.broadcast_to(matrix_a, broadcasted_a_shape), array_ops.broadcast_to(matrix_b, broadcasted_b_shape)), min_iters=50, name="batch_matmul_manual_broadcast_cpu_{}_{}".format( a_shape, b_shape))
def _sliced_wasserstein(a, b, random_sampling_count, random_projection_dim): """Compute the approximate sliced Wasserstein distance. Args: a: (matrix) Distribution "a" of samples (row, col). b: (matrix) Distribution "b" of samples (row, col). random_sampling_count: (int) Number of random projections to average. random_projection_dim: (int) Dimension of the random projection space. Returns: Float containing the approximate distance between "a" and "b". """ s = array_ops.shape(a) means = [] for _ in range(random_sampling_count): # Random projection matrix. proj = random_ops.random_normal( [array_ops.shape(a)[1], random_projection_dim]) proj *= math_ops.rsqrt( math_ops.reduce_sum(math_ops.square(proj), 0, keepdims=True)) # Project both distributions and sort them. proj_a = math_ops.matmul(a, proj) proj_b = math_ops.matmul(b, proj) proj_a = _sort_rows(proj_a, s[0]) proj_b = _sort_rows(proj_b, s[0]) # Pairwise Wasserstein distance. wdist = math_ops.reduce_mean(math_ops.abs(proj_a - proj_b)) means.append(wdist) return math_ops.reduce_mean(means)
def _sqrt_solve(self, rhs): # Recall the square root of this operator is M + VDV^T. # The Woodbury formula gives: # (M + VDV^T)^{-1} # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1} # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1} # where C is the capacitance matrix. # TODO(jvdillon) Determine if recursively applying rank-1 updates is more # efficient. May not be possible because a general n x n matrix can be # represeneted as n rank-1 updates, and solving with this matrix is always # done in O(n^3) time. m = self._operator v = self._v cchol = self._chol_capacitance(batch_mode=False) # The operators will use batch/singleton mode automatically. We don't # override. # M^{-1} rhs minv_rhs = m.solve(rhs) # V^T M^{-1} rhs vt_minv_rhs = math_ops.matmul(v, minv_rhs, transpose_a=True) # C^{-1} V^T M^{-1} rhs cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs) # V C^{-1} V^T M^{-1} rhs v_cinv_vt_minv_rhs = math_ops.matmul(v, cinv_vt_minv_rhs) # M^{-1} V C^{-1} V^T M^{-1} rhs minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs) # M^{-1} - M^{-1} V C^{-1} V^T M^{-1} return minv_rhs - minv_v_cinv_vt_minv_rhs
def transition_power_noise_accumulator(self, num_steps): """Computes power sums in closed form.""" def _pack_and_reshape(*values): return array_ops.reshape( array_ops.stack(axis=1, values=values), array_ops.concat(values=[array_ops.shape(num_steps), [2, 2]], axis=0)) num_steps = math_ops.cast(num_steps, self.dtype) noise_transitions = num_steps - 1 noise_transform = ops.convert_to_tensor(self.get_noise_transform(), self.dtype) noise_covariance_transformed = math_ops.matmul( math_ops.matmul(noise_transform, self.state_transition_noise_covariance), noise_transform, adjoint_b=True) # Un-packing the transformed noise as: # [[a b] # [c d]] a, b, c, d = array_ops.unstack( array_ops.reshape(noise_covariance_transformed, [-1, 4]), axis=1) sum_of_first_n = noise_transitions * (noise_transitions + 1) / 2 sum_of_first_n_squares = sum_of_first_n * (2 * noise_transitions + 1) / 3 return _pack_and_reshape( num_steps * a + sum_of_first_n * (b + c) + sum_of_first_n_squares * d, num_steps * b + sum_of_first_n * d, num_steps * c + sum_of_first_n * d, num_steps * d)
def _SparseMatMul(t1, t2, transpose_a=False, transpose_b=False): """Helper function to create SparseMatMul op.""" assert t1 in is_sparse and t2 in is_sparse t1_sparse = is_sparse[t1] t2_sparse = is_sparse[t2] if not t1_sparse and not t2_sparse: return math_ops.matmul(t1, t2, transpose_a=transpose_a, transpose_b=transpose_b) transpose_out = False if not t1_sparse: transpose_out = True t1, t2 = t2, t1 t1_sparse, t2_sparse = t2_sparse, t1_sparse assert t1_sparse transpose_a, transpose_b = not transpose_b, not transpose_a if transpose_b: t2 = array_ops.transpose(t2) transpose_b = False m = math_ops.matmul(t1, t2, transpose_a=transpose_a, transpose_b=transpose_b, a_is_sparse=t1_sparse, b_is_sparse=t2_sparse) if transpose_out: m = array_ops.transpose(m) return m
def _QrGrad(op, dq, dr): """Gradient for Qr.""" q, r = op.outputs if q.dtype.is_complex: raise NotImplementedError("QrGrad not implemented for dtype: %s" % q.dtype) if (r.shape.ndims is None or r.shape.as_list()[-2] is None or r.shape.as_list()[-1] is None): raise NotImplementedError("QrGrad not implemented with dynamic shapes.") if r.shape.dims[-2].value != r.shape.dims[-1].value: raise NotImplementedError("QrGrad not implemented when ncols > nrows " "or full_matrices is true and ncols != nrows.") qdq = math_ops.matmul(q, dq, adjoint_a=True) qdq_ = qdq - _linalg.adjoint(qdq) rdr = math_ops.matmul(r, dr, adjoint_b=True) rdr_ = rdr - _linalg.adjoint(rdr) tril = array_ops.matrix_band_part(qdq_ + rdr_, -1, 0) def _TriangularSolve(x, r): """Equiv to matmul(x, adjoint(matrix_inverse(r))) if r is upper-tri.""" return _linalg.adjoint( linalg_ops.matrix_triangular_solve( r, _linalg.adjoint(x), lower=False, adjoint=False)) grad_a = math_ops.matmul(q, dr + _TriangularSolve(tril, r)) grad_b = _TriangularSolve(dq - math_ops.matmul(q, qdq), r) return grad_a + grad_b
def compute_cov(tensor, tensor_right=None, normalizer=None): """Compute the empirical second moment of the rows of a 2D Tensor. This function is meant to be applied to random matrices for which the true row mean is zero, so that the true second moment equals the true covariance. Args: tensor: A 2D Tensor. tensor_right: An optional 2D Tensor. If provided, this function computes the matrix product tensor^T * tensor_right instead of tensor^T * tensor. normalizer: optional scalar for the estimator (by default, the normalizer is the number of rows of tensor). Returns: A square 2D Tensor with as many rows/cols as the number of input columns. """ if normalizer is None: normalizer = array_ops.shape(tensor)[0] if tensor_right is None: cov = ( math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast( normalizer, tensor.dtype)) return (cov + array_ops.transpose(cov)) / math_ops.cast(2.0, cov.dtype) else: return (math_ops.matmul(tensor, tensor_right, transpose_a=True) / math_ops.cast(normalizer, tensor.dtype))
def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) pred = math_ops.matmul(var0, x) + var1 loss = pred * pred sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss) # TODO(apassos) calling initialize_resources on all resources here # doesn't work because the sessions and graph are reused across unit # tests and this would mean trying to reinitialize variables. Figure out # a long-term solution for this. resources.initialize_resources([var0, var1]).run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) self.assertAllCloseAccordingToType([3.0], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 np_grad = 2 * np_pred self.assertAllCloseAccordingToType( [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval()) self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
def test_while_jacobian(self): x = random_ops.random_uniform([1, 3]) y = random_ops.random_uniform([3, 3]) # out = x @ y @ y @ y @ y, where @ is matmul operator. _, out = control_flow_ops.while_loop( lambda i, _: i < 4, lambda i, out: (i + 1, math_ops.matmul(out, y)), [0, x]) def loop_fn(i): out_i = array_ops.gather(out, i, axis=1) return array_ops.reshape(gradient_ops.gradients(out_i, x)[0], [-1]) out = pfor_control_flow_ops.pfor(loop_fn, iters=3) # The above code does not work with tf.while_loop instead of pfor. So we # manually compute the expected output here. # Note that gradient of output w.r.t is (y @ y @ y @ y)^T. expected_output = y for _ in range(3): expected_output = math_ops.matmul(expected_output, y) expected_output = array_ops.transpose(expected_output, [1, 0]) with session.Session() as sess: out, expected = sess.run([out, expected_output]) self.assertAllClose(expected, out)
def runFiniteDifferences(self, shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64), scalarTest=False): with self.test_session(use_gpu=False): for shape in shapes: for batch in False, True: for dtype in dtypes: if not scalarTest: x = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype) tensor = math_ops.matmul(x, array_ops.transpose(x)) / shape[0] else: # This is designed to be a faster test for larger matrices. x = constant_op.constant(np.random.randn(), dtype) R = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype) e = math_ops.mul(R, x) tensor = math_ops.matmul(e, array_ops.transpose(e)) / shape[0] # Inner-most matrices in tensor are positive definite. if batch: tensor = array_ops.tile(array_ops.expand_dims(tensor, 0), [4, 1, 1]) y = linalg_ops.cholesky(tensor) if scalarTest: y = math_ops.reduce_mean(y) error = gradient_checker.compute_gradient_error(x, x._shape_as_list(), y, y._shape_as_list()) tf_logging.info("error = %f", error) if dtype == dtypes_lib.float64: self.assertLess(error, 1e-5) else: self.assertLess(error, 3e-3)
def test_mixing_eager_and_graph_tensors(self): with ops.Graph().as_default(): x1 = array_ops.ones((3, 3)) x2 = array_ops.ones((3, 3)) self.assertIsInstance(x2, ops.EagerTensor) with self.assertRaisesRegexp(TypeError, 'Graph tensors'): math_ops.matmul(x1, x2)
def _project_input(self, inputs, c_prev, m_prev, with_c): """Fills in c_prev and m_prev with projected input, for input dimensions """ conf = self._config if (inputs is not None and inputs.get_shape().with_rank(2)[1].value > 0 and len(conf.inputs) > 0): if isinstance(inputs, tuple): if len(conf.inputs) != len(inputs): raise ValueError("Expect inputs as a tuple of {} " "tensors".format(len(conf.inputs))) input_splits = inputs else: input_splits = array_ops.split( value=inputs, num_or_size_splits=len(conf.inputs), axis=1) input_sz = input_splits[0].get_shape().with_rank(2)[1].value for i, j in enumerate(conf.inputs): input_project_m = vs.get_variable( 'project_m_{}'.format(j), [input_sz, conf.num_units], dtype=inputs.dtype) m_prev[j] = math_ops.matmul(input_splits[i], input_project_m) if with_c: input_project_c = vs.get_variable( 'project_c_{}'.format(j), [input_sz, conf.num_units], dtype=inputs.dtype) c_prev[j] = math_ops.matmul(input_splits[i], input_project_c)
def _batch_sqrt_solve(self, rhs): # Recall the square root of this operator is M + VDV^T. # The Woodbury formula gives: # (M + VDV^T)^{-1} # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1} # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1} # where C is the capacitance matrix. m = self._operator v = self._v cchol = self._chol_capacitance(batch_mode=True) # The operators will use batch/singleton mode automatically. We don't # override. # M^{-1} rhs minv_rhs = m.solve(rhs) # V^T M^{-1} rhs vt_minv_rhs = math_ops.matmul(v, minv_rhs, adjoint_a=True) # C^{-1} V^T M^{-1} rhs cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs) # V C^{-1} V^T M^{-1} rhs v_cinv_vt_minv_rhs = math_ops.matmul(v, cinv_vt_minv_rhs) # M^{-1} V C^{-1} V^T M^{-1} rhs minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs) # M^{-1} - M^{-1} V C^{-1} V^T M^{-1} return minv_rhs - minv_v_cinv_vt_minv_rhs
def _Underdetermined(op, grad): """Gradients for the underdetermined case of MatrixSolveLs. This is the backprop for the solution to the normal equations of the second kind: X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B that (for lambda=0) solve the least squares problem min ||X||_F subject to A*X = B. """ a = op.inputs[0] b = op.inputs[1] l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) # pylint: disable=protected-access chol = linalg_ops._RegularizedGramianCholesky( a, l2_regularizer=l2_regularizer, first_kind=False) # pylint: enable=protected-access grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad)) # Temporary tmp = (A * A^T + lambda * I)^{-1} * B. tmp = linalg_ops.cholesky_solve(chol, b) a1 = math_ops.matmul(tmp, a, adjoint_a=True) a1 = -math_ops.matmul(grad_b, a1) a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True) a2 = math_ops.matmul(tmp, a2, adjoint_b=True) grad_a = a1 + a2 return (grad_a, grad_b, None)
def _matmul_with_sharded_variable(tensor, sharded_tensor): """Multiply tensor with each tensor in sharded_tensor and column-concatenated""" return array_ops.concat( 1, [math_ops.matmul(tensor, shard) for shard in sharded_tensor])
def jac_mul(tangent): flat_tangent = array_ops.reshape(tangent, shape=[-1]) tangent_vector = array_ops.expand_dims(flat_tangent, 1) jvp_vector = math_ops.matmul(jac_fwd, tangent_vector) return array_ops.reshape(jvp_vector, tangent.shape)
def _verifyCholesky(self, x): # Verify that LL^T == x. with self.test_session() as sess: chol = linalg_ops.cholesky(x) verification = math_ops.matmul(chol, chol, adjoint_b=True) self._verifyCholeskyBase(sess, x, chol, verification)
def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A tf.contrib.layers style linear prediction builder based on FeatureColumn. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. Example: ``` # Building model for training feature_columns = ( real_valued_column("my_feature1"), ... ) columns_to_tensor = tf.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) ``` Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple containing: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ columns_to_tensors = columns_to_tensors.copy() check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='weighted_sum_from_feature_columns', values=columns_to_tensors.values()): output_tensors = [] column_to_variable = dict() transformer = _Transformer(columns_to_tensors) # pylint: disable=protected-access for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments = column._wide_embedding_lookup_arguments( transformed_tensor) variable, predictions = _create_embedding_lookup( column, columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) except NotImplementedError: with variable_scope.variable_scope( None, default_name=column.name, values=columns_to_tensors.values()): tensor = column._to_dense_tensor(transformed_tensor) tensor = _maybe_reshape_input_tensor(tensor, column.name, output_rank=2) variable = [ contrib_variables.model_variable( name='weight', shape=[tensor.get_shape()[1], num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=weight_collections) ] predictions = math_ops.matmul(tensor, variable[0], name='matmul') except ValueError as ee: raise ValueError( 'Error creating weighted sum for column: {}.\n' '{}'.format(column.name, ee)) output_tensors.append( array_ops.reshape(predictions, shape=(-1, num_outputs))) column_to_variable[column] = variable _log_variable(variable) _maybe_restore_from_checkpoint(column._checkpoint_path(), variable) # pylint: enable=protected-access predictions_no_bias = math_ops.add_n(output_tensors) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=_add_variable_collection(weight_collections)) _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias
def npairs_loss(labels, embeddings_anchor, embeddings_positive, reg_lambda=3e-3, print_losses=False): """Computes the npairs loss. Npairs loss expects paired data where a pair is composed of samples from the same labels and each pairs in the minibatch have different labels. The loss has two components. The first component is the L2 regularizer on the embedding vectors. The second component is the sum of cross entropy loss which takes each row of the pair-wise similarity matrix as logits and the remapped one-hot labels as labels. See: http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf Args: labels: 1-D tf.int32 `Tensor` of shape [batch_size/2]. embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the embedding vectors for the anchor images. Embeddings should not be l2 normalized. embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the embedding vectors for the positive images. Embeddings should not be l2 normalized. reg_lambda: Float. L2 regularization term on the embedding vectors. print_losses: Boolean. Option to print the xent and l2loss. Returns: npairs_loss: tf.float32 scalar. """ # pylint: enable=line-too-long # Add the regularizer on the embedding. reg_anchor = math_ops.reduce_mean( math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1)) reg_positive = math_ops.reduce_mean( math_ops.reduce_sum(math_ops.square(embeddings_positive), 1)) l2loss = math_ops.multiply(0.25 * reg_lambda, reg_anchor + reg_positive, name='l2loss') # Get per pair similarities. similarity_matrix = math_ops.matmul(embeddings_anchor, embeddings_positive, transpose_a=False, transpose_b=True) # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) labels_remapped = math_ops.to_float( math_ops.equal(labels, array_ops.transpose(labels))) labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keepdims=True) # Add the softmax loss. xent_loss = nn.softmax_cross_entropy_with_logits(logits=similarity_matrix, labels=labels_remapped) xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy') if print_losses: xent_loss = logging_ops.Print( xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss]) return l2loss + xent_loss
def _covariance(self): x = self._variance_scale_term() * self._mean() return array_ops.matrix_set_diag( -math_ops.matmul(x[..., array_ops.newaxis], x[..., array_ops.newaxis, :]), # outer prod self._variance())
def lambda_fn(x): return math_ops.matmul(x[0], x[1])
def random_normal_correlated_columns(shape, mean=0.0, stddev=1.0, dtype=dtypes.float32, eps=1e-4, seed=None): """Batch matrix with (possibly complex) Gaussian entries and correlated cols. Returns random batch matrix `A` with specified element-wise `mean`, `stddev`, living close to an embedded hyperplane. Suppose `shape[-2:] = (M, N)`. If `M < N`, `A` is a random `M x N` [batch] matrix with iid Gaussian entries. If `M >= N`, then the colums of `A` will be made almost dependent as follows: ``` L = random normal N x N-1 matrix, mean = 0, stddev = 1 / sqrt(N - 1) B = random normal M x N-1 matrix, mean = 0, stddev = stddev. G = (L B^H)^H, a random normal M x N matrix, living on N-1 dim hyperplane E = a random normal M x N matrix, mean = 0, stddev = eps mu = a constant M x N matrix, equal to the argument "mean" A = G + E + mu ``` Args: shape: Python list of integers. Shape of the returned tensor. Must be at least length two. mean: `Tensor` giving mean of normal to sample from. stddev: `Tensor` giving stdev of normal to sample from. dtype: `TensorFlow` `dtype` or numpy dtype eps: Distance each column is perturbed from the low-dimensional subspace. seed: Python integer seed for the RNG. Returns: `Tensor` with desired shape and dtype. Raises: ValueError: If `shape` is not at least length 2. """ dtype = dtypes.as_dtype(dtype) if len(shape) < 2: raise ValueError( "Argument shape must be at least length 2. Found: %s" % shape) # Shape is the final shape, e.g. [..., M, N] shape = list(shape) batch_shape = shape[:-2] m, n = shape[-2:] # If there is only one column, "they" are by definition correlated. if n < 2 or n < m: return random_normal(shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed) # Shape of the matrix with only n - 1 columns that we will embed in higher # dimensional space. smaller_shape = batch_shape + [m, n - 1] # Shape of the embedding matrix, mapping batch matrices # from [..., N-1, M] to [..., N, M] embedding_mat_shape = batch_shape + [n, n - 1] # This stddev for the embedding_mat ensures final result has correct stddev. stddev_mat = 1 / np.sqrt(n - 1) with ops.name_scope("random_normal_correlated_columns"): smaller_mat = random_normal(smaller_shape, mean=0.0, stddev=stddev_mat, dtype=dtype, seed=seed) if seed is not None: seed += 1287 embedding_mat = random_normal(embedding_mat_shape, dtype=dtype, seed=seed) embedded_t = math_ops.matmul(embedding_mat, smaller_mat, transpose_b=True) embedded = array_ops.matrix_transpose(embedded_t) mean_mat = array_ops.ones_like(embedded) * mean return embedded + random_normal(shape, stddev=eps, dtype=dtype) + mean_mat
def _matmul(self, x, adjoint=False, adjoint_arg=False): return math_ops.matmul(self._matrix, x, adjoint_a=adjoint, adjoint_b=adjoint_arg)
def call(self, inputs, state): """Perform a step of attention-wrapped RNN. - Step 1: Mix the `inputs` and previous step's `attention` output via `cell_input_fn`. - Step 2: Call the wrapped `cell` with this input and its previous state. - Step 3: Score the cell's output with `attention_mechanism`. - Step 4: Calculate the alignments by passing the score through the `normalizer`. - Step 5: Calculate the context vector as the inner product between the alignments and the attention_mechanism's values (memory). - Step 6: Calculate the attention output by concatenating the cell output and context through the attention layer (a linear layer with `attention_size` outputs). Args: inputs: (Possibly nested tuple of) Tensor, the input at this time step. state: An instance of `AttentionWrapperState` containing tensors from the previous time step. Returns: A tuple `(attention_or_cell_output, next_state)`, where: - `attention_or_cell_output` depending on `output_attention`. - `next_state` is an instance of `DynamicAttentionWrapperState` containing the state calculated at this time step. """ # Step 1: Calculate the true inputs to the cell based on the # previous attention value. output_prev_step = state.cell_state.h # get hr_(i-1) attention_input = self._attention_input_fn( inputs, output_prev_step) # get input to BahdanauAttention to get alpha_i alignments, raw_scores = self._attention_mechanism( attention_input, previous_alignments=state.alignments ) #alignments after softmax raw_scores(batch,sentence) expanded_alignments = array_ops.expand_dims(alignments, 1) attention_mechanism_values = self._attention_mechanism.values #.value is encoder after multply w. means wh context = math_ops.matmul( expanded_alignments, attention_mechanism_values) #means how import per word in senctce context = array_ops.squeeze(context, [1]) cell_inputs = self._cell_input_fn( inputs, context ) #concatenate input with alpha*memory and feed into root LSTM cell_state = state.cell_state cell_output, next_cell_state = self._cell(cell_inputs, cell_state) if self._attention_layer is not None: attention = self._attention_layer( array_ops.concat([cell_output, context], 1)) else: attention = context if self._alignment_history: alignment_history = state.alignment_history.write( state.time, alignments) else: alignment_history = () next_state = AttentionWrapperState(time=state.time + 1, cell_state=next_cell_state, attention=attention, alignments=alignments, alignment_history=alignment_history) if self._output_attention: return raw_scores, next_state else: return cell_output, next_state
def __call__(self, inputs, state, scope=None): """Run one step of GridRNN. Args: inputs: input Tensor, 2D, batch x input_size. Or None state: state Tensor, 2D, batch x state_size. Note that state_size = cell_state_size * recurrent_dims scope: VariableScope for the created subgraph; defaults to "GridRNNCell". Returns: A tuple containing: - A 2D, batch x output_size, Tensor representing the output of the cell after reading "inputs" when previous state was "state". - A 2D, batch x state_size, Tensor representing the new state of the cell after reading "inputs" when previous state was "state". """ state_sz = state.get_shape().as_list()[1] if self.state_size != state_sz: raise ValueError('Actual state size not same as specified: {} vs {}.'.format(state_sz, self.state_size)) conf = self._config dtype = inputs.dtype if inputs is not None else state.dtype # c_prev is `m`, and m_prev is `h` in the paper. Keep c and m here for consistency with the codebase c_prev = [None] * self._config.num_dims m_prev = [None] * self._config.num_dims cell_output_size = self._cell.state_size - conf.num_units # for LSTM : state = memory cell + output, hence cell_output_size > 0 # for GRU/RNN: state = output (whose size is equal to _num_units), hence cell_output_size = 0 for recurrent_dim, start_idx in zip(self._config.recurrents, range(0, self.state_size, self._cell.state_size)): if cell_output_size > 0: c_prev[recurrent_dim] = array_ops.slice(state, [0, start_idx], [-1, conf.num_units]) m_prev[recurrent_dim] = array_ops.slice(state, [0, start_idx + conf.num_units], [-1, cell_output_size]) else: m_prev[recurrent_dim] = array_ops.slice(state, [0, start_idx], [-1, conf.num_units]) new_output = [None] * conf.num_dims new_state = [None] * conf.num_dims with vs.variable_scope(scope or type(self).__name__): # GridRNNCell # project input if inputs is not None and sum(inputs.get_shape().as_list()) > 0 and len(conf.inputs) > 0: input_splits = array_ops.split(1, len(conf.inputs), inputs) input_sz = input_splits[0].get_shape().as_list()[1] for i, j in enumerate(conf.inputs): input_project_m = vs.get_variable('project_m_{}'.format(j), [input_sz, conf.num_units], dtype=dtype) m_prev[j] = math_ops.matmul(input_splits[i], input_project_m) if cell_output_size > 0: input_project_c = vs.get_variable('project_c_{}'.format(j), [input_sz, conf.num_units], dtype=dtype) c_prev[j] = math_ops.matmul(input_splits[i], input_project_c) _propagate(conf.non_priority, conf, self._cell, c_prev, m_prev, new_output, new_state, True) _propagate(conf.priority, conf, self._cell, c_prev, m_prev, new_output, new_state, False) output_tensors = [new_output[i] for i in self._config.outputs] output = array_ops.zeros([0, 0], dtype) if len(output_tensors) == 0 else array_ops.concat(1, output_tensors) state_tensors = [new_state[i] for i in self._config.recurrents] states = array_ops.zeros([0, 0], dtype) if len(state_tensors) == 0 else array_ops.concat(1, state_tensors) return output, states
def test_mixing_numpy_arrays_and_graph_tensors(self): with ops.Graph().as_default(): x1 = array_ops.ones((3, 3)) x2 = np.ones((3, 3), dtype='float32') with self.assertRaisesRegexp(TypeError, 'Graph tensors'): math_ops.matmul(x1, x2)
def linear(args, output_size, bias, handle=None, bias_initializer=None, kernel_initializer=None, kernel_name=_WEIGHTS_VARIABLE_NAME, bias_name=_BIAS_VARIABLE_NAME): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. This function is originally copied from rnn_cell_impl.py and add the capability to deal with 3-D matrix. Args: args: a 2D/3D Tensor or a list of 2D/3D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. handle: A Tensor. If provided, use it as the weight matrix. bias_initializer: starting value to initialize the bias (default is all zeros). kernel_initializer: starting value to initialize the weight. Returns: A 2D/3D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): # added by Chengqi ## capable for 3D tensor shape = args.get_shape() if shape.ndims > 2: scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: if handle is None: weights = vs.get_variable(kernel_name, [shape[-1].value, output_size], dtype=args.dtype, initializer=kernel_initializer) else: assert output_size == handle.get_shape().as_list()[-1], \ "ouput_size should be the same as the last dimension of handle tensor" weights = handle res = math_ops.tensordot(args, weights, [[shape.ndims - 1], [0]]) if not bias: return res with vs.variable_scope(outer_scope) as inner_scope: inner_scope.set_partitioner(None) if bias_initializer is None: bias_initializer = init_ops.constant_initializer( 0.0, dtype=args.dtype) biases = vs.get_variable(bias_name, [output_size], dtype=args.dtype, initializer=bias_initializer) return nn_ops.bias_add(res, biases) else: args = [args] # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape() for a in args] for shape in shapes: if shape.ndims != 2: raise ValueError("linear is expecting 2D arguments: %s" % shapes) if shape[1].value is None: raise ValueError( "linear expects shape[1] to be provided for shape %s, " "but saw %s" % (shape, shape[1])) else: total_arg_size += shape[1].value dtype = [a.dtype for a in args][0] # Now the computation. scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: if handle is None: weights = vs.get_variable(kernel_name, [total_arg_size, output_size], dtype=dtype, initializer=kernel_initializer) else: assert output_size == handle.get_shape().as_list()[-1], \ "ouput_size should be the same as the last dimension of handle tensor" weights = handle if len(args) == 1: res = math_ops.matmul(args[0], weights) else: res = math_ops.matmul(array_ops.concat(args, 1), weights) if not bias: return res with vs.variable_scope(outer_scope) as inner_scope: inner_scope.set_partitioner(None) if bias_initializer is None: bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) biases = vs.get_variable(bias_name, [output_size], dtype=dtype, initializer=bias_initializer) return nn_ops.bias_add(res, biases)
def _benchmark_tf_matmul(self, m, transpose_b, num_iters): func = lambda: math_ops.matmul(m, m, transpose_b=transpose_b) self._run(func, num_iters)
def logsumexp_logit(embeddings): return math_ops.reduce_logsumexp(math_ops.matmul(embeddings, reweighted_inputs, transpose_b=True), axis=1, keep_dims=False)
def __call__(self, inputs, state, scope=None): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. scope: VariableScope for the created subgraph; defaults to "LSTMCell". Returns: A tuple containing: - A `2-D, [batch x output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") with vs.variable_scope(scope or type(self).__name__, initializer=self._initializer): # "LSTMCell" concat_w = _get_concat_variable( "W", [input_size.value + num_proj, 4 * self._num_units], dtype, self._num_unit_shards) b = vs.get_variable("B", shape=[4 * self._num_units], initializer=array_ops.zeros_initializer, dtype=dtype) # i = input_gate, j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat(1, [inputs, m_prev]) lstm_matrix = nn_ops.bias_add( math_ops.matmul(cell_inputs, concat_w), b) i, j, f, o = array_ops.split(1, 4, lstm_matrix) # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable("W_F_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable("W_I_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable("W_O_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: concat_w_proj = _get_concat_variable( "W_P", [self._num_units, self._num_proj], dtype, self._num_proj_shards) m = math_ops.matmul(m, concat_w_proj) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat(1, [c, m])) return m, new_state
def overflow_in_float16(): out = var * 2 ** 10 out = math_ops.matmul(out, out) return array_ops.reshape(out, ())
def linear(args, output_size, bias, bias_start=0.0, name='', scope=None, var_on_cpu=False, wd=0.0, squeeze=False, initializer=None, feat=None, state=None, drop_rate=1.0): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. scope: VariableScope for the created subgraph; defaults to "Linear". Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ if args is None or (isinstance(args, (list, tuple)) and not args): raise ValueError("`args` must be specified") if not isinstance(args, (list, tuple)): args = [args] # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] assert len(set(tuple(shape[:-1]) for shape in shapes)) <= 1 new_shapes = [flatten(shape, 2) for shape in shapes] res_shape = shapes[0][:-1] + [output_size] args = [ tf.reshape(arg, new_shape) for arg, new_shape in zip(args, new_shapes) ] for new_shape in new_shapes: if len(new_shape) != 2: raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes)) if not new_shape[1]: raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes)) else: total_arg_size += new_shape[1] # This is for Dialog Match if feat is not None: total_arg_size -= 2 if state is not None: total_arg_size += state.get_shape().as_list()[-1] # Now the computation. with vs.variable_scope(scope or "Linear"): if var_on_cpu: with tf.device("/cpu:0"): matrix = vs.get_variable("Matrix" + name, [total_arg_size, output_size], initializer=initializer) else: matrix = vs.get_variable("Matrix" + name, [total_arg_size, output_size], initializer=initializer) if wd: weight_decay = tf.mul(tf.nn.l2_loss(matrix), wd, name='weight_loss' + name) tf.add_to_collection('losses', weight_decay) # Modify for Feature Matching if feat is not None: W = vs.get_variable("Embed" + name, [total_arg_size + 2, total_arg_size + 2], initializer=initializer) args_ = tf.transpose(math_ops.matmul(args[0], W)) # [D * N] h1 = tf.transpose(tf.slice(args_, [0, 0], [total_arg_size, -1])) h2 = tf.slice(args_, [total_arg_size, 0], [2, -1]) f = tf.cast(tf.transpose(feat, [1, 0, 2]), 'float32') # [2 * N * A] res = math_ops.matmul(h1, matrix) # [N * A] for i in range(2): h2_ = tf.gather(h2, i) # [1 * N] f_ = tf.gather(f, i) # [1 * N * A] res += tf.transpose(tf.mul(h2_, tf.transpose(f_))) elif state is not None: h = tf.concat(1, [args[0], tf.cast(state, 'float32')]) # [D+A] res = math_ops.matmul(h, matrix) # [N * A] else: if len(args) == 1: res = math_ops.matmul(args[0], matrix) else: res = math_ops.matmul(array_ops.concat(1, args), matrix) if not bias: res = tf.reshape(res, res_shape, name='out' + name) if squeeze: res = tf.squeeze(res, squeeze_dims=[len(res_shape) - 1]) return res bias_term = vs.get_variable( "Bias" + name, [output_size], initializer=init_ops.constant_initializer(bias_start)) res = res + bias_term res = tf.reshape(res, res_shape, name='out' + name) if squeeze: res = tf.squeeze(res, squeeze_dims=[len(res_shape) - 1]) return res
def GraphFn(self, inp, inp1): x1 = math_ops.matmul(inp, inp1, name="matmul") # Relu to reach minimum segment size. x1 = nn.relu(x1, name="relu") return array_ops.identity(x1, name="output_0")
def _testScopedExport(self, test_dir, exported_filenames): graph = ops.Graph() with graph.as_default(): # Creates an inference graph. # Hidden 1 colocate_constraint = constant_op.constant(1.2, name="constraint") images = constant_op.constant(1.2, dtypes.float32, shape=[100, 28], name="images") with ops.name_scope("hidden1"): with graph.colocate_with(colocate_constraint.op): weights1 = variables.Variable(random_ops.truncated_normal( [28, 128], stddev=1.0 / math.sqrt(float(28))), name="weights") # The use of control_flow_ops.cond here is purely for adding test # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. biases1 = variables.Variable(control_flow_ops.cond( math_ops.less(random.random(), 0.5), lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])), name="biases") hidden1 = nn_ops.relu( math_ops.matmul(images, weights1) + biases1) # Hidden 2 with ops.name_scope("hidden2"): weights2 = variables.Variable(random_ops.truncated_normal( [128, 32], stddev=1.0 / math.sqrt(float(128))), name="weights") # The use of control_flow_ops.while_loop here is purely for adding test # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. def loop_cond(it, _): return it < 2 def loop_body(it, biases2): biases2 += constant_op.constant(0.1, shape=[32]) return it + 1, biases2 _, biases2 = control_flow_ops.while_loop( loop_cond, loop_body, [ constant_op.constant(0), variables.Variable(array_ops.zeros([32]), name="biases") ]) hidden2 = nn_ops.relu( math_ops.matmul(hidden1, weights2) + biases2) # Linear with ops.name_scope("softmax_linear"): weights3 = variables.Variable(random_ops.truncated_normal( [32, 10], stddev=1.0 / math.sqrt(float(32))), name="weights") biases3 = variables.Variable(array_ops.zeros([10]), name="biases") logits = math_ops.matmul(hidden2, weights3) + biases3 ops.add_to_collection("logits", logits) # Exports each sub-graph. # Exports the first one with unbound_inputs_col_name set to default. orig_meta_graph1, var_list = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[0]), graph=ops.get_default_graph(), export_scope="hidden1") self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) var_names = [v.name for _, v in var_list.items()] self.assertEqual(["hidden1/biases:0", "hidden1/weights:0"], sorted(var_names)) # Exports the rest with no unbound_inputs_col_name. orig_meta_graph2, _ = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[1]), graph=ops.get_default_graph(), export_scope="hidden2", unbound_inputs_col_name=None) orig_meta_graph3, _ = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[2]), graph=ops.get_default_graph(), export_scope="softmax_linear", unbound_inputs_col_name=None) return [orig_meta_graph1, orig_meta_graph2, orig_meta_graph3]
def linear(args, output_size, bias, bias_initializer=None, kernel_initializer=None, kernel_regularizer=None, bias_regularizer=None, normalize=False): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_initializer: starting value to initialize the bias (default is all zeros). kernel_initializer: starting value to initialize the weight. kernel_regularizer: kernel regularizer bias_regularizer: bias regularizer Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape() for a in args] for shape in shapes: if shape.ndims != 2: raise ValueError("linear is expecting 2D arguments: %s" % shapes) if shape[1].value is None: raise ValueError( "linear expects shape[1] to be provided for shape %s, " "but saw %s" % (shape, shape[1])) else: total_arg_size += shape[1].value dtype = [a.dtype for a in args][0] # Now the computation. scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: weights = vs.get_variable(_WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer) if len(args) == 1: res = math_ops.matmul(args[0], weights) else: res = math_ops.matmul(array_ops.concat(args, 1), weights) res = tf.cond(normalize, lambda: tf.contrib.layers.layer_norm(res), lambda: res) # remove the layer's bias if there is one (because it would be redundant) if not bias: return res with vs.variable_scope(outer_scope) as inner_scope: inner_scope.set_partitioner(None) if bias_initializer is None: bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) biases = vs.get_variable(_BIAS_VARIABLE_NAME, [output_size], dtype=dtype, initializer=bias_initializer, regularizer=bias_regularizer) return nn_ops.bias_add(res, biases)
def call(self, inputs, state): """Run one step of LSTM. Args: inputs: input Tensor, 2D, `[batch, num_units]. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. Returns: A tuple containing: - A `2-D, [batch, output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid if self._state_is_tuple: (c_prev, h_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) h_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") # i = input_gate, j = new_input, f = forget_gate, o = output_gate # calculate softmaxed output y_prev = softmax(math_ops.matmul(self._y_w, h_prev) + self._y_b) lstm_matrix = math_ops.matmul( array_ops.concat([inputs, h_prev, y_prev], 1), self._kernel) lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias) i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1) # Diagonal connections if self._use_peepholes: c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev + sigmoid(i + self._w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: h = sigmoid(o + self._w_o_diag * c) * self._activation(c) else: h = sigmoid(o) * self._activation(c) if self._num_proj is not None: h = math_ops.matmul(h, self._proj_kernel) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type h = clip_ops.clip_by_value(h, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, h) if self._state_is_tuple else array_ops.concat([c, h], 1)) return h, new_state
def __call__(self, input_x, input_t, previous_sate): """ :param input_x: tf.placeholder with shape [batch_size, input_x_depth], dtype=tf.float64 :param input_t: tf.placeholder a matrix with shape [batch_size, input_t_depth], dtype=tf.float64 :param previous_sate: the previous hidden states is a tensor with size [batch_size, hidden_state], dtype=tf.float64. if the previous state is the zero state, the size should be [batch_size,], dtype=tf.float64. :return: new hidden state with size [batch_size, hidden_state], dtype=tf.float64. """ if (previous_sate.shape.dims == 1 and previous_sate.shape[0].value != self.__hidden_state) or \ (previous_sate.shape.dims == 2 and previous_sate.shape[1].value != self.__hidden_state): raise ValueError('previous state/ zero state size incompatible') # get predefined variable td = input_t.shape[1].value hs = self.__hidden_state xd = input_x.shape[1].value gw = self.__initial_strategy_map['gate_weight'] gb = self.__initial_strategy_map['gate_bias'] cw = self.__initial_strategy_map['candidate_weight'] cb = self.__initial_strategy_map['candidate_bias'] # define the parameter a GRU cell with tf.variable_scope("cell_para", reuse=tf.AUTO_REUSE): gate_weight = tf.get_variable(name='gate_weight', shape=[td, hs * 2], initializer=gw, dtype=tf.float64) gate_bias = tf.get_variable(name='gate_bias', shape=[hs * 2], initializer=gb, dtype=tf.float64) candidate_weight = tf.get_variable(name='candidate_weight', shape=[xd + hs, hs], initializer=cw, dtype=tf.float64) candidate_bias = tf.get_variable(name='candidate_bias', shape=[hs], initializer=cb, dtype=tf.float64) with tf.name_scope('gate_calc'): gate_value = math_ops.matmul(input_t, gate_weight) gate_value = nn_ops.bias_add(gate_value, gate_bias) gate_value = math_ops.sigmoid(gate_value) with tf.name_scope('gate_split'): r, u = array_ops.split(value=gate_value, num_or_size_splits=2, axis=1) r_state = r * previous_sate with tf.name_scope('candidate_calc'): concat = array_ops.concat([input_x, r_state], axis=1) candidate = math_ops.matmul(concat, candidate_weight, name='candidate_matmul') candidate = nn_ops.bias_add(candidate, candidate_bias, name='candidate_bias_add') c = self.__activation(candidate) with tf.name_scope('new_state'): new_h = u * previous_sate + (1 - u) * c return new_h
def f(): x = constant_op.constant([[1, 2], [3, 4]]) out = math_ops.matmul(v, x) self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
def _grad(op, grad): """A gradient function for RFFT with the provided `rank` and `irfft_fn`.""" fft_length = op.inputs[1] input_shape = _array_ops.shape(op.inputs[0]) is_even = _math_ops.cast(1 - (fft_length[-1] % 2), _dtypes.complex64) def _tile_for_broadcasting(matrix, t): expanded = _array_ops.reshape( matrix, _array_ops.concat([ _array_ops.ones([_array_ops.rank(t) - 2], _dtypes.int32), _array_ops.shape(matrix) ], 0)) return _array_ops.tile( expanded, _array_ops.concat([_array_ops.shape(t)[:-2], [1, 1]], 0)) def _mask_matrix(length): """Computes t_n = exp(sqrt(-1) * pi * n^2 / line_len).""" # TODO(rjryan): Speed up computation of twiddle factors using the # following recurrence relation and cache them across invocations of RFFT. # # t_n = exp(sqrt(-1) * pi * n^2 / line_len) # for n = 0, 1,..., line_len-1. # For n > 2, use t_n = t_{n-1}^2 / t_{n-2} * t_1^2 a = _array_ops.tile( _array_ops.expand_dims(_math_ops.range(length), 0), (length, 1)) b = _array_ops.transpose(a, [1, 0]) return _math_ops.exp( -2j * np.pi * _math_ops.cast(a * b, _dtypes.complex64) / _math_ops.cast(length, _dtypes.complex64)) def _ymask(length): """A sequence of [1+0j, -1+0j, 1+0j, -1+0j, ...] with length `length`.""" return _math_ops.cast(1 - 2 * (_math_ops.range(length) % 2), _dtypes.complex64) y0 = grad[..., 0:1] if rank == 1: ym = grad[..., -1:] extra_terms = y0 + is_even * ym * _ymask(input_shape[-1]) elif rank == 2: # Create a mask matrix for y0 and ym. base_mask = _mask_matrix(input_shape[-2]) # Tile base_mask to match y0 in shape so that we can batch-matmul the # inner 2 dimensions. tiled_mask = _tile_for_broadcasting(base_mask, y0) y0_term = _math_ops.matmul(tiled_mask, _math_ops.conj(y0)) extra_terms = y0_term ym = grad[..., -1:] ym_term = _math_ops.matmul(tiled_mask, _math_ops.conj(ym)) inner_dim = input_shape[-1] ym_term = _array_ops.tile( ym_term, _array_ops.concat([ _array_ops.ones([_array_ops.rank(grad) - 1], _dtypes.int32), [inner_dim] ], 0)) * _ymask(inner_dim) extra_terms += is_even * ym_term # The gradient of RFFT is the IRFFT of the incoming gradient times a scaling # factor, plus some additional terms to make up for the components dropped # due to Hermitian symmetry. input_size = _math_ops.cast( _fft_size_for_grad(op.inputs[0], rank), _dtypes.float32) the_irfft = irfft_fn(grad, fft_length) return 0.5 * (the_irfft * input_size + _math_ops.real(extra_terms)), None
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape `[num_classes, dim]`. The (possibly-partitioned) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) subtract_log_q: A `bool`. whether to subtract the log expected count of the labels in the sample to get the logits of the true labels. Default is True. Turn off for Negative Sampling. remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is False. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: out_logits, out_labels: `Tensor` objects each with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). """ if not isinstance(weights, list): weights = [weights] with ops.op_scope(weights + [biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice( all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat(0, [ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat(1, [ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ]) return out_logits, out_labels
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') shutil.rmtree(FLAGS.saved_model_path) # Create the graph x1 = array_ops.placeholder(dtypes.int32, shape=(1, 3), name='input1') # 'y' is a read-only Reference Variable in this test case, which will be # converted to Resource Variable in the MLIR lowering pass. y = variable_scope.get_variable(name='y', initializer=[[1], [2], [3]]) r1 = math_ops.matmul(x1, y, name='result1') x2 = array_ops.placeholder(dtypes.int32, shape=(1, 3), name='input2') r21 = math_ops.matmul(x2, y, name='result21') t2 = math_ops.add(x2, 1) r22 = math_ops.matmul(t2, y, name='result22') x3 = array_ops.placeholder(dtypes.int32, shape=(1, 3), name='input3') r31 = math_ops.matmul(x3, y, name='result31') r32 = math_ops.add(x3, r31, name='result32') r33 = math_ops.add(x3, r32, name='result33') sess = session.Session() sess.run(variables.global_variables_initializer()) sm_builder = builder.SavedModelBuilder(FLAGS.saved_model_path) tensor_info_x1 = utils.build_tensor_info(x1) tensor_info_x2 = utils.build_tensor_info(x2) tensor_info_x3 = utils.build_tensor_info(x3) tensor_info_r1 = utils.build_tensor_info(r1) tensor_info_r21 = utils.build_tensor_info(r21) tensor_info_r22 = utils.build_tensor_info(r22) tensor_info_r31 = utils.build_tensor_info(r31) tensor_info_r32 = utils.build_tensor_info(r32) tensor_info_r33 = utils.build_tensor_info(r33) toy_signature = (signature_def_utils.build_signature_def( inputs={'x1': tensor_info_x1}, outputs={'r1': tensor_info_r1}, method_name=signature_constants.PREDICT_METHOD_NAME)) another_toy_signature = (signature_def_utils.build_signature_def( inputs={'x2': tensor_info_x2}, outputs={ 'r21': tensor_info_r21, 'r22': tensor_info_r22, }, method_name=signature_constants.PREDICT_METHOD_NAME)) yet_another_toy_signature = (signature_def_utils.build_signature_def( inputs={'x3': tensor_info_x3}, outputs={ 'r31': tensor_info_r31, 'r32': tensor_info_r32, 'r33': tensor_info_r33, }, method_name=signature_constants.PREDICT_METHOD_NAME)) sm_builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], signature_def_map={ 'toy': toy_signature, 'another_toy': another_toy_signature, 'yet_another_toy': yet_another_toy_signature, signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: toy_signature, }, strip_default_attrs=True) sm_builder.save()
def loss(): pred = math_ops.matmul( embedding_ops.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop return pred * pred
def _expected(mat, tangent): return (math_ops.matmul(tangent, mat, transpose_b=True) + math_ops.matmul(mat, tangent, transpose_b=True))
def _einsum_reduction(t0, t0_axis_labels, t1, t1_axis_labels, axes_to_sum): """Helper for einsum() that computes the result of a two-argument einsum(). Args: t0: a `Tensor` t0_axis_labels: a string of axis labels. This string's length must equal the rank of t0. t1: a `Tensor` t1_axis_labels: a string to axis labels. This string's length must equal the rank of t1. axes_to_sum: set of labels of axes to be summed over Returns: A `Tensor` whose elements are obtained by summing, over all axes in `axes_to_sum`, the corresponding elements of `t0` and `t1`. For example, if t0_axis_labels == 'abijk', t1_axis_labels == 'acjkl', and axes_to_sum == {j,k}, this will return a tensor x where out[a,b,c,i,l] = sum_j sum_k t0[a,b,i,j,k] * t1[a,c,j,k,l] Raises: ValueError: if the rank of `t0` does not match the length of `t0_axis_labels`, or that of `t1` does not match the length of `t1_axis_labels`. """ if len(t0_axis_labels) != len(t0.get_shape()): raise ValueError( 'Tensor t0 of rank %d does not match einsum reduction of length %d' % (len(t0.get_shape()), len(t0_axis_labels))) if len(t1_axis_labels) != len(t1.get_shape()): raise ValueError( 'Tensor t1 of rank %d does not match einsum reduction of length %d' % (len(t1.get_shape()), len(t1_axis_labels))) # This function computes the result of a two-argument einsum() using batch # matrix multiplication. This involves # 1. transposing t0 and t1 so that axes are in the correct order for # batch matrix multiplication, and # 2. reshaping t0 and t1 so that they are both of rank 3. # First, we divide axes into three groups: # * "preserved" axes are present in both inputs and the output # * "summed" axes are present in both inputs but not the output # * "broadcast" axes are present in exactly one input and the output # # As an example, if the einsum is abijk,acjkl->abcil, then "a" is a # preserved axis, "b" and "c" are broadcast axes, and "j" and "k" are # summed axes. assert all(a in t0_axis_labels and a in t1_axis_labels for a in axes_to_sum) preserved_axes = (set(t0_axis_labels) & set(t1_axis_labels)) - axes_to_sum broadcast_axes = {} for i, sym_list in enumerate([t0_axis_labels, t1_axis_labels]): broadcast_axes[i] = set(sym_list) - preserved_axes - axes_to_sum # Reorder the axes so that: # 1. preserved axes come first in both inputs # 2. in input 0, broadcast axes come next, followed by summed axes # 3. in input 1, summed axes come next, followed by broadcast axes def sort_key(input_index, a): if a in preserved_axes: return (-1, a) elif ((input_index == 0 and a in broadcast_axes[0]) or (input_index == 1 and a in axes_to_sum)): return (0, a) else: return (1, a) axis_labels = [t0_axis_labels, t1_axis_labels] sorted_axes = [ sorted(sym_list, key=lambda a: sort_key(i, a)) for i, sym_list in enumerate(axis_labels) ] inputs = [t0, t1] for i, axes_str in enumerate(axis_labels): perm = [axes_str.find(a) for a in sorted_axes[i]] inputs[i] = _transpose_if_necessary(inputs[i], perm) t0, t1 = inputs if not axes_to_sum: # In the special case where there are no axes to sum over, reduce to mul() # rather than to batch matrix multiplication. for _ in broadcast_axes[1]: t0 = array_ops.expand_dims(t0, -1) for _ in broadcast_axes[0]: t1 = array_ops.expand_dims(t1, len(preserved_axes)) product = math_ops.multiply(t0, t1) product_axes = sorted_axes[0] + sorted_axes[1][len(preserved_axes):] return product, ''.join(product_axes) else: # Reduce to matmul(). # Reshape both inputs so as to combine multiple broadcast axes # into a single axis, and combine multiple summed axes into a # single axis. t0_shape = _get_shape(t0) num_broadcast_elements_t0 = _total_size( t0_shape[len(preserved_axes):-len(axes_to_sum)]) num_summed_elements = _total_size(t0_shape[-len(axes_to_sum):]) new_shape = ( t0_shape[:len(preserved_axes)] + [num_broadcast_elements_t0, num_summed_elements]) t0 = _reshape_if_necessary(t0, new_shape) t1_shape = _get_shape(t1) num_broadcast_elements_t1 = _total_size( t1_shape[len(preserved_axes) + len(axes_to_sum):]) new_shape = ( t1_shape[:len(preserved_axes)] + [num_summed_elements, num_broadcast_elements_t1]) t1 = _reshape_if_necessary(t1, new_shape) product = math_ops.matmul(t0, t1) # Undo compaction of broadcast axes uncompacted_shape = ( t0_shape[:len(preserved_axes) + len(broadcast_axes[0])] + t1_shape[len(t1_shape) - len(broadcast_axes[1]):]) product = _reshape_if_necessary(product, uncompacted_shape) product_axes = ( sorted_axes[0][:len(preserved_axes) + len(broadcast_axes[0])] + sorted_axes[1][len(sorted_axes[1]) - len(broadcast_axes[1]):]) return product, ''.join(product_axes)