def benchmark_einsum(self): for equation, dim in self.cases: with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device('/cpu:0'): r = np.random.RandomState(0) input_subscripts = equation.split('->')[0].split(',') input_vars = [] for subscript in input_subscripts: input_shape = (dim, ) * len(subscript) input_vars.append( variables.Variable( np.array(r.randn(*input_shape), np.float32))) variables.global_variables_initializer().run() if len(input_vars) <= 2: self.run_op_benchmark( sess, special_math_ops.einsum(equation, *input_vars), min_iters=50, name='einsum_cpu_({})_{}'.format(equation, dim)) else: for optimize in ['greedy', 'auto']: self.run_op_benchmark( sess, special_math_ops.einsum(equation, *input_vars, optimize=optimize), min_iters=50, name='einsum_cpu_({})_{}_{}'.format( equation, optimize, dim))
def call(self, inputs, states): prev_output = states[0] h = special_math_ops.einsum('bij,ijkl->bkl', inputs, self.kernel) h += array_ops.expand_dims(self.bias, axis=0) output = h + special_math_ops.einsum('bij,ijkl->bkl', prev_output, self.recurring_kernel) return output, [output]
def call(self, inputs, attention_mask=None, return_attention_scores=False, training=None): if not self._built_from_signature: self._build_from_signature(featuremap=inputs) # N = `num_attention_heads` # H = `size_per_head` # `query` = [B, T, N ,H] query = self._query_dense(inputs) # `key` = [B, S, N, H] key = self._key_dense(inputs) # `value` = [B, S, N, H] value = self._value_dense(inputs) query = math_ops.multiply(query, 1.0 / math.sqrt(float(self._key_dim))) attention_scores = special_math_ops.einsum(self._dot_product_equation, key, query) if self.relative: attention_scores += self.relative_logits(query) attention_scores = self._masked_softmax(attention_scores, attention_mask) attention_scores_dropout = self._dropout_layer(attention_scores, training=training) attention_output = special_math_ops.einsum(self._combine_equation, attention_scores_dropout, value) # attention_output = self._output_dense(attention_output) hh, ww = inputs.shape[1], inputs.shape[2] attention_output = tf.reshape(attention_output, [-1, hh, ww, self.num_heads * self.key_dim]) if return_attention_scores: return attention_output, attention_scores return attention_output
def test_input_is_placeholder(self): with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(1, None)) m1 = array_ops.placeholder(dtypes.int32, shape=(None, 1)) out = special_math_ops.einsum('ij,jk->ik', m0, m1) with session.Session() as sess: feed_dict = { m0: [[1, 2, 3]], m1: [[2], [1], [1]], } np.testing.assert_almost_equal([[7]], sess.run(out, feed_dict=feed_dict)) with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(None, 3)) m1 = array_ops.placeholder(dtypes.int32, shape=(3, )) out = special_math_ops.einsum('ij,j->i', m0, m1) with session.Session() as sess: feed_dict = { m0: [[1, 2, 3]], m1: [2, 1, 1], } np.testing.assert_almost_equal([7], sess.run(out, feed_dict=feed_dict))
def test_invalid_equation(self): r = np.random.RandomState(0) cases = [ # invalid equation format. ('a0->a', r.randn(5, 3)), ('a->a,a', r.randn(5)), ('a->a->a', r.randn(5)), ('ijk ijk', r.randn(1, 2, 3), r.randn(1, 2, 3)), ('ij.jk->ik', r.randn(2, 3), r.randn(3, 4)), # output label not present in input. ('a->b', r.randn(5)), ('ij,jk->im', r.randn(2, 3), r.randn(3, 4)), # wrong shape. ('ij,jk->ik', r.randn(1, 2, 3), r.randn(3, 4)), # inconsistent dimensions. ('ij,jk->ik', r.randn(2, 3), r.randn(4, 4)), # output has repeated subscripts. ('ij,jk->iik', r.randn(2, 3), r.randn(3, 4)), # too many ellipses ('...ij...,jk...->ik...', r.randn(2, 3), r.randn(3, 4)), ('...ij,jk...->...ik...', r.randn(2, 3), r.randn(3, 4)), # invalid broadcast dimensions. ('...ij,...jk->...ik', r.randn(5, 2, 3), r.randn(7, 3, 4)), # output should have ellipsis when broadcasting shape is non-empty. ('...ij,...jk->ik', r.randn(2, 2, 3), r.randn(3, 4)), ] for args in cases: with self.assertRaises((ValueError, errors.InvalidArgumentError)): _ = special_math_ops.einsum(*args) placeholders = [ array_ops.placeholder_with_default(x, shape=None) for x in args[1:] ] with self.assertRaises((ValueError, errors.InvalidArgumentError)): _ = self.evaluate(special_math_ops.einsum(args[0], *placeholders))
def test_input_is_placeholder(self): with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(1, None)) m1 = array_ops.placeholder(dtypes.int32, shape=(None, 1)) out = special_math_ops.einsum('ij,jk->ik', m0, m1) with session.Session() as sess: feed_dict = { m0: [[1, 2, 3]], m1: [[2], [1], [1]], } np.testing.assert_almost_equal( [[7]], sess.run(out, feed_dict=feed_dict)) with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(None, 3)) m1 = array_ops.placeholder(dtypes.int32, shape=(3,)) out = special_math_ops.einsum('ij,j->i', m0, m1) with session.Session() as sess: feed_dict = { m0: [[1, 2, 3]], m1: [2, 1, 1], } np.testing.assert_almost_equal([7], sess.run(out, feed_dict=feed_dict)) # Tests for placeholders which have two or more None values with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(None, None, 2)) m1 = array_ops.placeholder(dtypes.int32, shape=(2, 1)) out = special_math_ops.einsum('ijk,kl->ijl', m0, m1) with session.Session() as sess: feed_dict = { m0: [[[1,2]]], m1: [[3], [2]], } np.testing.assert_almost_equal( [[[7]]], sess.run(out, feed_dict=feed_dict)) with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(2, 1)) m1 = array_ops.placeholder(dtypes.int32, shape=(None, None, 2)) out = special_math_ops.einsum('kl,ijk->ijl', m0, m1) with session.Session() as sess: feed_dict = { m0: [[3], [2]], m1: [[[1,2]]], } np.testing.assert_almost_equal( [[[7]]], sess.run(out, feed_dict=feed_dict)) with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(None, None, 2)) m1 = array_ops.placeholder(dtypes.int32, shape=(2,)) out = special_math_ops.einsum('ijk,k->ij', m0, m1) with session.Session() as sess: feed_dict = { m0: [[[1, 2]]], m1: [3, 2], } np.testing.assert_almost_equal( [[7]], sess.run(out, feed_dict=feed_dict))
def eigen_basis_kron_product_3d(left, right, vec, transpose=False): if transpose: left_t = array_ops.transpose(left) right_matmul = special_math_ops.einsum("bij,jk->bik", vec, right) result = special_math_ops.einsum("ik,bkz->biz", left_t, right_matmul) return result else: raise NotImplementedError()
def _compute_attention(self, query, key, value, attention_mask=None): # query = tf.math.multiply(query, self._query_scale) attn_scores = special_math_ops.einsum(self._dot_product_equation, key, query) attn_scores = self._masked_softmax( attn_scores, attention_mask ) # TODO can I replace softmax here with somthing more log likelihood related? (ie continuous attn) attn_output = special_math_ops.einsum(self._combine_equation, attn_scores, value) return attn_output, attn_scores
def loop_fn(i): x = array_ops.gather(x_series, 0) # invariant. y = array_ops.gather(y_series, 0) # invariant. x_i = array_ops.gather(x_series, i) y_i = array_ops.gather(y_series, i) z1 = special_math_ops.einsum("ab,bc->ac", x_i, y) z2 = special_math_ops.einsum("ab,bc->ac", x, y_i) z3 = special_math_ops.einsum("ab,bc->ac", x, y) z4 = special_math_ops.einsum("ab,bc->ac", x_i, y_i) z5 = special_math_ops.einsum("cd,ce->de", y_i, x_i) # Includes transpose. outputs = [z1, z2, z3, z4, z5] return outputs
def testUnary(self): for dtype in self.float_types: self._testUnary( lambda x: special_math_ops.einsum('ijk->kji', x), np.array([[[1, 3], [2, 5], [6, 8]]], dtype=dtype), expected=np.array([[[1], [2], [6]], [[3], [5], [8]]], dtype=dtype)) with compat.forward_compatibility_horizon(2019, 10, 19): self._testUnary( lambda x: special_math_ops.einsum('ijk->kji', x), np.array([[[1, 3], [2, 5], [6, 8]]], dtype=dtype), expected=np.array([[[1], [2], [6]], [[3], [5], [8]]], dtype=dtype))
def testMatMul(self): for dtype in self.float_types: self._testBinary( lambda x, y: special_math_ops.einsum('ij,jk->ik', x, y), np.array([[-0.25]], dtype=dtype), np.array([[8]], dtype=dtype), expected=np.array([[-2]], dtype=dtype)) with compat.forward_compatibility_horizon(2019, 10, 19): self._testBinary( lambda x, y: special_math_ops.einsum('ij,jk->ik', x, y), np.array([[-0.25]], dtype=dtype), np.array([[8]], dtype=dtype), expected=np.array([[-2]], dtype=dtype))
def testReducedIndices(self): for dtype in self.float_types: self._testBinary( lambda x, y: special_math_ops.einsum('ij,j->', x, y), np.array([[1, 3], [2, 5], [6, 8]], dtype=dtype), np.array([3, 2], dtype=dtype), expected=np.array(59, dtype=dtype)) with compat.forward_compatibility_horizon(2019, 10, 19): self._testBinary( lambda x, y: special_math_ops.einsum('ij,j->', x, y), np.array([[1, 3], [2, 5], [6, 8]], dtype=dtype), np.array([3, 2], dtype=dtype), expected=np.array(59, dtype=dtype))
def testImplicitForm(self): for dtype in self.float_types: self._testBinary( lambda x, y: special_math_ops.einsum('ijk,kji', x, y), np.array([[[1, 3], [2, 5], [6, 8]]], dtype=dtype), np.array([[[1], [3], [2]], [[5], [6], [8]]], dtype=dtype), expected=np.array(128, dtype=dtype)) with compat.forward_compatibility_horizon(2019, 10, 19): self._testBinary( lambda x, y: special_math_ops.einsum('ijk,kji', x, y), np.array([[[1, 3], [2, 5], [6, 8]]], dtype=dtype), np.array([[[1], [3], [2]], [[5], [6], [8]]], dtype=dtype), expected=np.array(128, dtype=dtype))
def _compute_attention(self, query, key, value, attention_mask=None): """Applies Dot-product attention with query, key, value tensors. This function defines the computation inside `call` with projected multi-head Q, K, V inputs. Users can override this function for customized attention implementation. Args: query: Projected query `Tensor` of shape `[B, T, N, key_dim]`. key: Projected key `Tensor` of shape `[B, T, N, key_dim]`. value: Projected value `Tensor` of shape `[B, T, N, value_dim]`. attention_mask: a boolean mask of shape `[B, T, S]`, that prevents attention to certain positions. Returns: attention_output: Multi-headed outputs of attention computation. attention_scores: Multi-headed attention weights. """ # Note: Applying scalar multiply at the smaller end of einsum improves # XLA performance, but may introduce slight numeric differences in # the Transformer attention head. query = math_ops.multiply(query, 1.0 / math.sqrt(float(self._key_dim))) # Take the dot product between "query" and "key" to get the raw # attention scores. attention_scores = special_math_ops.einsum(self._dot_product_equation, key, query) # Normalize the attention scores to probabilities. # `attention_scores` = [B, N, T, S] if attention_mask is not None: # The expand dim happens starting from the `num_heads` dimension, # (<batch_dims>, num_heads, <query_attention_dims, key_attention_dims>) mask_expansion_axes = [-len(self._attention_axes) * 2 - 1] for _ in range( len(attention_scores.shape) - len(attention_mask.shape)): attention_mask = array_ops.expand_dims( attention_mask, axis=mask_expansion_axes) attention_scores = self._masked_softmax(attention_scores, attention_mask) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_scores_dropout = self._dropout_layer(attention_scores) # `context_layer` = [B, T, N, H] attention_output = special_math_ops.einsum(self._combine_equation, attention_scores_dropout, value) return attention_output, attention_scores
def _compute_attention(self, query, key, value, attention_mask=None, training=None): """Applies Dot-product attention with query, key, value tensors. This function defines the computation inside `call` with projected multi-head Q, K, V inputs. Users can override this function for customized attention implementation. Args: query: Projected query `Tensor` of shape `[B, T, N, key_dim]`. key: Projected key `Tensor` of shape `[B, T, N, key_dim]`. value: Projected value `Tensor` of shape `[B, T, N, value_dim]`. attention_mask: a boolean mask of shape `[B, T, S]`, that prevents attention to certain positions. training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (doing nothing). Returns: attention_output: Multi-headed outputs of attention computation. attention_scores: Multi-headed attention weights. """ # Note: Applying scalar multiply at the smaller end of einsum improves # XLA performance, but may introduce slight numeric differences in # the Transformer attention head. query = math_ops.multiply(query, 1.0 / math.sqrt(float(self._key_dim))) # Take the dot product between "query" and "key" to get the raw # attention scores. attention_scores = special_math_ops.einsum(self._dot_product_equation, key, query) attention_scores = self._masked_softmax(attention_scores, attention_mask) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_scores_dropout = self._dropout_layer(attention_scores, training=training) # `context_layer` = [B, T, N, H] attention_output = special_math_ops.einsum(self._combine_equation, attention_scores_dropout, value) return attention_output, attention_scores
def testUnary(self): for dtype in self.float_types: self._testUnary(lambda x: special_math_ops.einsum('ijk->kji', x), np.array([[[1, 3], [2, 5], [6, 8]]], dtype=dtype), expected=np.array( [[[1], [2], [6]], [[3], [5], [8]]], dtype=dtype))
def testReducedIndices(self): for dtype in self.float_types: self._testBinary( lambda x, y: special_math_ops.einsum('ij,j->', x, y), np.array([[1, 3], [2, 5], [6, 8]], dtype=dtype), np.array([3, 2], dtype=dtype), expected=np.array(59, dtype=dtype))
def testMatMul(self): for dtype in self.float_types: self._testBinary( lambda x, y: special_math_ops.einsum('ij,jk->ik', x, y), np.array([[-0.25]], dtype=dtype), np.array([[8]], dtype=dtype), expected=np.array([[-2]], dtype=dtype))
def einsum(subscripts, *operands, **kwargs): # pylint: disable=missing-docstring casting = kwargs.get('casting', 'safe') optimize = kwargs.get('optimize', False) if casting == 'safe': operands = np_array_ops._promote_dtype(*operands) # pylint: disable=protected-access elif casting == 'no': operands = [np_array_ops.asarray(x) for x in operands] else: raise ValueError( 'Invalid value for argument `casting`. ' f'Expected casting="safe" or casting="no". Received: casting={casting}') if not optimize: # TF doesn't have a "no optimization" option. # TODO(wangpeng): Print a warning that np and tf use different # optimizations. tf_optimize = 'greedy' elif optimize == True: # pylint: disable=singleton-comparison,g-explicit-bool-comparison tf_optimize = 'greedy' elif optimize == 'greedy': tf_optimize = 'greedy' elif optimize == 'optimal': tf_optimize = 'optimal' else: raise ValueError( 'Invalid value for argument `optimize`. ' 'Expected one of {True, "greedy", "optimal"}. ' f'Received: optimize={optimize}') res = special_math_ops.einsum(subscripts, *operands, optimize=tf_optimize) return res
def test_invalid_keyword_arguments(self): m0 = array_ops.placeholder(dtypes.int32, shape=(1, None)) m1 = array_ops.placeholder(dtypes.int32, shape=(None, 1)) with self.assertRaisesRegexp(TypeError, 'invalid keyword arguments for this function: invalid1, invalid2'): _ = special_math_ops.einsum('ij,jk->ik', m0, m1, name="name", invalid1="value1", invalid2="value2")
def call(self, inputs, state): """c_k_RNN basic operations """ #e.g. scipy.special.binom(3,[0,1,2,3]) = array([1., 3., 3., 1.]) # coeff_mat = math_ops.cast(scipy.special.binom(self._c_n, np.arange(self._c_n)) * # np.power(-1, np.flip(np.arange(self._c_n))), # dtype = dtypes.float32) #np.power(-1, np.arange(c_n) + 1) is the (-1)^n term #state dimension is [batch_size, c_k * num_hidden] #we want [batch_size, c_k, num_hidden] full_state = state[:, :self._num_units * (self._c_n - 1)] #full_state records the entire c_k timestep states, now we discard the earliest state from the previous step state = gen_array_ops.reshape(state, [-1, self._c_n, self._num_units]) # tanh(W[h,x]+b) current_state = math_ops.matmul( array_ops.concat([inputs, state[:, 0, :]], 1), self._kernel) current_state = nn_ops.bias_add(current_state, self._bias) current_state = self._activation(current_state) current_state += special_math_ops.einsum('ijk,jk->ik', state, self._kernel_A) # current_state = special_math_ops.einsum('ijk,jk->ik', state, self._kernel_A) + special_math_ops.einsum('ij,j->ij', current_state, (1-math_ops.reduce_sum(self._kernel_A, 0))) #Einstein summation, state: [batch_size, c_k, num_hidden] #kernel_A: [c_k, num_hidden, num_hidden], result: [batch_size, num_hidden] full_state = array_ops.concat([current_state, full_state], axis=1) output = array_ops.concat([ self._kernel[inputs.get_shape().as_list()[1]:, :], self._kernel_A ], axis=0) return output, full_state
def test_invalid_keyword_arguments(self): r = np.random.RandomState(0) a = array_ops.placeholder_with_default(r.randn(2, 3), shape=(2, 3)) b = array_ops.placeholder_with_default(r.randn(3, 4), shape=(3, 4)) with self.assertRaises(TypeError): _ = special_math_ops.einsum( 'ij,jk->ik', a, b, name='name', invalid1='value1', invalid2='value2')
def run_test(self, axes, expanded_axes=None): expanded_axes = expanded_axes if expanded_axes is not None else axes all_axes = {ax: np.random.randint(4, 12) for ax in expanded_axes if ax.isalpha()} input_vals = [] input_axes, _, _ = axes.partition('->') for idx in input_axes.split(','): shape = [all_axes[ax] for ax in idx if ax.isalpha()] input_vals.append(np.random.random(shape)) input_tensors = [constant_op.constant(val) for val in input_vals] output_tensor = special_math_ops.einsum(axes, *input_tensors) with self.session(use_gpu=True): output_value = self.evaluate(output_tensor) correct_value = 0 if axes == 'ijji': output = math_ops.trace(*input_tensors) correct_value = self.evaluate(output) else: correct_value = np.einsum(axes, *input_vals) err = np.abs(correct_value - output_value).max() self.assertLess(err, 1e-8)
def testImplicitForm(self): for dtype in self.float_types: self._testBinary( lambda x, y: special_math_ops.einsum('ijk,kji', x, y), np.array([[[1, 3], [2, 5], [6, 8]]], dtype=dtype), np.array([[[1], [3], [2]], [[5], [6], [8]]], dtype=dtype), expected=np.array(128, dtype=dtype))
def benchmarkEinsum(self): for equation, dim in self.cases: with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device('/cpu:0'): r = np.random.RandomState(0) input_subscripts = equation.split('->')[0].split(',') input_vars = [] for subscript in input_subscripts: input_shape = (dim, ) * len(subscript) input_vars.append( variables.Variable( np.array(r.randn(*input_shape), np.float32))) self.evaluate(variables.global_variables_initializer()) # Call einsum_v1. self.run_op_benchmark( sess, special_math_ops.einsum(equation, *input_vars), min_iters=50, name='einsum_v1_cpu_({})_{}'.format(equation, dim)) # Call gen_linalg_ops.einsum. self.run_op_benchmark( sess, gen_linalg_ops.einsum(input_vars, equation), min_iters=50, name='einsum_v2_cpu_({})_{}'.format(equation, dim))
def _convdiag_sum_of_squares(self, patches, outputs_grad): # This computes the sum of the squares of the per-training-case "gradients". # It does this simply by computing a giant tensor containing all of these, # doing an entry-wise square, and them summing along the batch dimension. case_wise_gradients = special_math_ops.einsum("bijk,bijl->bkl", patches, outputs_grad) return math_ops.reduce_sum(math_ops.square(case_wise_gradients), axis=0)
def einsum(subscripts, *operands, **kwargs): # pylint: disable=missing-docstring casting = kwargs.get('casting', 'safe') optimize = kwargs.get('optimize', False) if casting == 'safe': operands = np_array_ops._promote_dtype(*operands) # pylint: disable=protected-access elif casting == 'no': operands = [np_array_ops.asarray(x) for x in operands] else: raise ValueError('casting policy not supported: %s' % casting) if not optimize: # TF doesn't have a "no optimization" option. # TODO(wangpeng): Print a warning that np and tf use different # optimizations. tf_optimize = 'greedy' elif optimize == True: # pylint: disable=singleton-comparison,g-explicit-bool-comparison tf_optimize = 'greedy' elif optimize == 'greedy': tf_optimize = 'greedy' elif optimize == 'optimal': tf_optimize = 'optimal' else: raise ValueError('`optimize` method not supported: %s' % optimize) operands = [x.data for x in operands] res = special_math_ops.einsum(subscripts, *operands, optimize=tf_optimize) res = np_utils.tensor_to_ndarray(res) return res
def call(self, inputs): ret = special_math_ops.einsum(self.equation, inputs, self.kernel) if self.bias is not None: ret += self.bias if self.activation is not None: ret = self.activation(ret) return ret
def test_dim_mismatch(self): for axes, input_shapes in self.dim_mismatch_cases: inputs = [ array_ops.placeholder(dtypes.float32, shape=shape) for shape in input_shapes ] with self.assertRaises(ValueError): _ = special_math_ops.einsum(axes, *inputs)
def test_invalid(self): for axes in self.invalid_cases: inputs = [ array_ops.placeholder(dtypes.float32, shape=(3, 4)), array_ops.placeholder(dtypes.float32, shape=(3, 4)), ] with self.assertRaises(ValueError): _ = special_math_ops.einsum(axes, *inputs)
def _compute_new_cov(self, idx=0): with _maybe_colocate_with(self._outputs_grads[idx], self._colocate_cov_ops_with_inputs): batch_size = array_ops.shape(self._patches)[0] batch_size = array_ops.shape(self._patches)[0] transformed_inputs = special_math_ops.einsum( "bijk,kl->bijl", self._patches, self._input_factor_eigen_basis) transformed_outputs_grads = special_math_ops.einsum( "bijk,kl->bijl", self._outputs_grads[idx], self._output_factor_eigen_basis) new_scale = special_math_ops.einsum("bijk,bijl->bkl", transformed_inputs, transformed_outputs_grads) new_cov = math_ops.reduce_sum(math_ops.square(new_scale), axis=0) new_cov /= math_ops.cast(batch_size, new_scale.dtype) return new_cov
def _check_gradient(self, s, *input_shapes): with self.cached_session(): r = np.random.RandomState(0) inputs = [np.array(r.randn(*shape)) for shape in input_shapes] input_tensors = [constant_op.constant(x, shape=x.shape) for x in inputs] analytical, numerical = gradient_checker_v2.compute_gradient( lambda *xs: special_math_ops.einsum(s, *xs), input_tensors) self.assertLess( gradient_checker_v2.max_error(analytical, numerical), 1e-4)
def test_input_is_placeholder(self): with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(1, None)) m1 = array_ops.placeholder(dtypes.int32, shape=(None, 1)) out = special_math_ops.einsum('ij,jk->ik', m0, m1) with session.Session() as sess: feed_dict = { m0: [[1, 2, 3]], m1: [[2], [1], [1]], } np.testing.assert_almost_equal( [[7]], sess.run(out, feed_dict=feed_dict)) with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.int32, shape=(None, 3)) m1 = array_ops.placeholder(dtypes.int32, shape=(3,)) out = special_math_ops.einsum('ij,j->i', m0, m1) with session.Session() as sess: feed_dict = { m0: [[1, 2, 3]], m1: [2, 1, 1], } np.testing.assert_almost_equal([7], sess.run(out, feed_dict=feed_dict))
def call(self, inputs, states): # inputs should be in [(batch, input_1), (batch, input_2, input_3)] # state should be in shape [(batch, unit_1), (batch, unit_2, unit_3)] flatten_inputs = nest.flatten(inputs) s1, s2 = states output_1 = math_ops.matmul(flatten_inputs[0], self.kernel_1) output_2_3 = special_math_ops.einsum('bij,ijkl->bkl', flatten_inputs[1], self.kernel_2_3) state_1 = s1 + output_1 state_2_3 = s2 + output_2_3 output = [output_1, output_2_3] new_states = NestedState(s1=state_1, s2=state_2_3) return output, new_states
def run_test(self, axes): all_axes = {ax: np.random.randint(4, 12) for ax in axes if ax.isalpha()} input_vals = [] input_axes, _, _ = axes.partition('->') for idx in input_axes.split(','): shape = [all_axes[ax] for ax in idx] input_vals.append(np.random.random(shape)) input_tensors = [constant_op.constant(val) for val in input_vals] output_tensor = special_math_ops.einsum(axes, *input_tensors) with self.test_session(use_gpu=True): output_value = output_tensor.eval() correct_value = np.einsum(axes, *input_vals) err = np.abs(correct_value - output_value).max() print(axes, err) assert err < 1e-8
def test_ellipses_with_unknown_input_dim(self): with ops.Graph().as_default(): m0 = array_ops.placeholder(dtypes.float32) m1 = array_ops.placeholder_with_default([[3, 2]], shape=(None, 2)) with self.assertRaises(ValueError): _ = special_math_ops.einsum('...jkl,...j->...kl', m0, m1)
def test_repeated_axis_single_input(self): x = array_ops.placeholder(dtypes.float32, shape=[2, 2]) with self.assertRaises(ValueError): _ = special_math_ops.einsum('ii->', x)
def test_multiple_ellipses(self): m0 = array_ops.placeholder_with_default([[[[1, 2]], [[2, 1]]]], shape=(None, 2, None, 2)) m1 = array_ops.placeholder_with_default([[3, 2]], shape=(None, 2)) out = special_math_ops.einsum('...jkl,...j->...kl', m0, m1) self.assertAllClose([[[7, 8]]], self.evaluate(out))