def test_transpose_0d_1d_operands(): x1 = C.input(()) with pytest.raises(ValueError): transpose_0d = C.transpose(x1) x2 = C.input(2) with pytest.raises(ValueError): transpose_1d = C.transpose(x2)
def __cntk_cov__(m, rowvar: bool = False): if len(m.shape) > 2: raise ValueError('m has more than 2 dimensions') if len(m.shape) < 2: m = C.reshape(m, (1, -1)) if not rowvar and m.shape[0] != 1: m = C.transpose(m, [1, 0]) fact = 1.0 / (m.shape[1] - 1) m -= C.reduce_mean(m, axis=1) mt = C.transpose(m, [1, 0]) return fact * C.squeeze(m @ mt)
def test_Transpose(tmpdir): data = np.arange(24).reshape(2, 3, 4).astype('f') x = C.input_variable(np.shape(data)) model = C.transpose(data, perm=(2, 0, 1)) verify_no_input(model, tmpdir, 'Transpose_0') model = C.transpose(x, perm=(2, 0, 1)) verify_one_input(model, data, tmpdir, 'Transpose_1') model = C.transpose(x, perm=(0, 2, 1)) verify_one_input(model, data, tmpdir, 'Transpose_1_2')
def __cntk_cov2__(m): m = C.reshape(m, -1) m = C.unpack_batch(m) m = C.transpose(m, [1, 0]) count = C.reduce_sum(C.reduce_mean(C.ones_like(m), axis=0)) fact = 1.0 / (count - 1) m -= C.reduce_mean(m, axis=1) mt = C.transpose(m, [1, 0]) return fact * C.squeeze(m @ mt)
def test_Transpose(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.arange(24).reshape(2,3,4).astype(dtype) x = C.input_variable(np.shape(data)) model = C.transpose(data, perm=(2, 0, 1)) verify_no_input(model, tmpdir, 'Transpose_0') model = C.transpose(x, perm=(2, 0, 1)) verify_one_input(model, data, tmpdir, 'Transpose_1') model = C.transpose(x, perm=(0, 2, 1)) verify_one_input(model, data, tmpdir, 'Transpose_1_2')
def test_transpose(): a = np.arange(120, dtype=np.float32).reshape(2, 3, 4, 5) from itertools import permutations for p in permutations(range(4)): assert np.array_equal(C.transpose(a, p).eval(), np.transpose(a, p)) # test permutations over odd number of axes just in case b = a.reshape(6, 4, 5) for p in permutations(range(3)): assert np.array_equal(C.transpose(b, p).eval(), np.transpose(b, p)) # test negative numbers for p in permutations(range(3)): q = [i - 3 for i in p] assert np.array_equal(C.transpose(b, q).eval(), np.transpose(b, q))
def multivariate_kl_divergence(input_layer): _dim = input_layer.shape[0] out_value = C.unpack_batch(input_layer) _mu1 = C.transpose(C.reduce_mean(out_value, axis=0), [1, 0]) _sigma1 = C.cov2(input_layer) _mu2 = C.zeros_like(_mu1) _sigma2 = C.Constant(np.eye(_dim)) _sigma2_inv = _sigma2 # identity matrix return 0.5 * (C.log(C.det(_sigma2) / C.det(_sigma1)) - _dim + C.trace(_sigma2_inv @ _sigma1) + C.transpose( (_mu2 - _mu1), [1, 0]) @ _sigma2_inv @ (_mu2 - _mu1))
def convolution(input, name, **kwargs): dim = __weights_dict[name]['weights'].ndim weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) w = cntk.Parameter(init=weight, name=name + '_weight') input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2))) layer = ops.convolution(w, input, **kwargs) if 'bias' in __weights_dict[name]: bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2)) b = cntk.Parameter(init=bias, name=name + '_bias') layer = layer + b layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0]) return layer
def test_depth_to_space(image_shape, num_channels, block_size, device_id, precision): dev = cntk_device(device_id) from cntk.internal import sanitize_dtype_cntk input_val = np.array(np.reshape(range(num_channels), (num_channels, 1, 1)), dtype=PRECISION_TO_TYPE[precision]) input_val = np.tile(input_val, (1, ) + image_shape) img = C.input_variable( (num_channels, ) + image_shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision])) # Result from depth_to_space node. depth_to_space_op = C.depth_to_space(img, block_size) output_test = depth_to_space_op.eval({img: input_val}) # Reference result from simulating depth_to_space with other CNTK ops. h, w = image_shape reshape_node = C.reshape(img, (block_size, block_size, num_channels // (block_size**2), h, w)) transpose_node = C.transpose(reshape_node, [2, 3, 0, 4, 1]) depth_to_space_sim_op = C.reshape( transpose_node, (num_channels // (block_size**2), h * block_size, w * block_size)) output_ref = depth_to_space_sim_op.eval({img: input_val}) assert np.array_equal(output_test, output_ref)
def test_unpack_axis_times_transpose_unpack_axis(output_rank, x_input_shape, x_data, y_input_shape, y_data): #test free axis times from unpack batch x = C.input_variable(x_input_shape) y = C.input_variable(y_input_shape) xx = C.unpack_batch(x) yy = C.unpack_batch(y) yyy = C.transpose(yy, range(len(yy.shape))[::-1]) t = C.times(xx, yyy, output_rank=output_rank) cntk_result = t.eval({x: x_data, y: y_data}) np_result = np.tensordot(x_data, np.transpose(y_data), axes = len(x_data.shape) - output_rank) np.testing.assert_allclose(np_result, cntk_result)
def test_transpose_backward(): shape = (2, 3, 4) p = (2, 0, 1) x0 = np.arange(np.prod(shape), dtype=np.float32).reshape(*shape) shapet = tuple(shape[i] for i in p) x = C.input_variable(shape, needs_gradient=True) y = C.reduce_sum(C.cos(C.transpose(x, p))) xt = C.input_variable(shapet, needs_gradient=True) yt = C.reduce_sum(C.cos(xt)) g = np.squeeze(y.grad({x: x0})) gt = np.squeeze(yt.grad({xt: np.transpose(x0, p)})) assert np.allclose(np.transpose(g, p), gt)
def test_transpose_backward(): shape = (2, 3, 4) p = (2, 0, 1) x0 = np.arange(np.prod(shape), dtype=np.float32).reshape(*shape) shapet = tuple(shape[i] for i in p) x = C.input_variable(shape, needs_gradient=True) y = C.reduce_sum(C.cos(C.transpose(x, p))) xt = C.input_variable(shapet, needs_gradient=True) yt = C.reduce_sum(C.cos(xt)) g = np.squeeze(y.grad({x:x0})) gt = np.squeeze(yt.grad({xt:np.transpose(x0, p)})) assert np.allclose(np.transpose(g, p), gt)
def cnwindow(mna,window): mnas=mna.shape mnout=(*mnas[:-2],*window,((mnas[-2]-window[-2])+1),((mnas[-1]-window[-1])+1)) mne2=None for R in range(window[0]): j_lim = R + mnout[-2] for H in range(window[1]): tdata=C.slice(mna,[-2,-1], [R,H], [j_lim,(H + mnout[-1])]) if mne2 is None: mne2=tdata else: mne2=C.splice(mne2,tdata,axis=1) return(C.reshape(C.transpose(C.reshape(mne2, shape=mnout),(0,5,4,3,2,1)), (mnout[0],*mnout[5:3:-1],1,*mnout[3:0:-1])))
def build_model_cntk(max_features, max_len): x = cntk.placeholder(shape=(max_len, ), name='x_placeholder') l_0 = cntk.one_hot(x, num_classes=max_features, sparse_output=True) l_1_0 = cntk.layers.Embedding(128)(l_0) l_1_1 = cntk.transpose(l_1_0, (1, 0)) l_2 = cntk.layers.Convolution1D(filter_shape=7, num_filters=32, activation=cntk.relu)(l_1_1) l_3 = cntk.layers.MaxPooling(filter_shape=(5, ), strides=5)(l_2) l_4 = cntk.layers.Convolution1D(filter_shape=7, num_filters=32, activation=cntk.relu)(l_3) l_5 = cntk.layers.GlobalMaxPooling()(l_4) model = cntk.layers.Dense(shape=1, activation=cntk.sigmoid)(l_5) return model
def test_unpack_axis_times_transpose_unpack_axis(output_rank, x_input_shape, x_data, y_input_shape, y_data): #test free axis times from unpack batch x = C.input_variable(x_input_shape) y = C.input_variable(y_input_shape) xx = C.unpack_batch(x) yy = C.unpack_batch(y) yyy = C.transpose(yy, range(len(yy.shape))[::-1]) t = C.times(xx, yyy, output_rank=output_rank) cntk_result = t.eval({x: x_data, y: y_data}) np_result = np.tensordot(x_data, np.transpose(y_data), axes=len(x_data.shape) - output_rank) np.testing.assert_allclose(np_result, cntk_result)
def test_depth_to_space(image_shape, num_channels, block_size, device_id, precision): dev = cntk_device(device_id) from cntk.internal import sanitize_dtype_cntk input_val = np.array(np.reshape(range(num_channels), (num_channels, 1, 1)), dtype=PRECISION_TO_TYPE[precision]) input_val = np.tile(input_val, (1,) + image_shape) img = C.input_variable((num_channels,) + image_shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision])) # Result from depth_to_space node. depth_to_space_op = C.depth_to_space(img, block_size) output_test = depth_to_space_op.eval({ img : input_val }) # Reference result from simulating depth_to_space with other CNTK ops. h, w = image_shape reshape_node = C.reshape(img, (block_size, block_size, num_channels // (block_size**2), h, w)) transpose_node = C.transpose(reshape_node, [2, 3, 0, 4, 1]) depth_to_space_sim_op = C.reshape(transpose_node, (num_channels // (block_size**2), h * block_size, w * block_size)) output_ref = depth_to_space_sim_op.eval({ img : input_val }) assert np.array_equal(output_test, output_ref)
def test_transpose(): """ Test for transpose() :return: Nothing """ repeat_for = 5 for repeat in range(repeat_for): for i in range(1, 5): permutation = np.random.permutation(i + 1) permutation = [int(p) for p in permutation] shape = [np.random.randint(2, 5) for _ in range(i + 1)] entries = np.product(shape) data = np.arange(entries) data.shape = shape np_transposed = np.transpose(np.copy(data), np.copy(permutation)) by_transposeCNTK = transpose(np.ascontiguousarray(data), permutation).eval() assert np.alltrue(np_transposed == by_transposeCNTK)
def test_eye_like(operand, sparse_output, device_id, precision): np_eye_like = lambda matrix: np.eye( matrix.shape[0], matrix.shape[1], dtype=np.float32) operand = AA(operand).astype(np.float32) expected = np_eye_like(operand) expected_grad = np.zeros_like(operand).reshape(expected.shape) my_eval = (lambda f, arg: f.eval(arg).todense()) if sparse_output else ( lambda f, arg: f.eval(arg)) from .. import eye_like import cntk as C #testing with direct numpy input y = C.eye_like(operand, sparse_output=sparse_output) actual = y.eval().todense() if sparse_output else y.eval() np.testing.assert_almost_equal(actual, expected) #testing through input_variable #test load and save: import tempfile import os x = C.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal( my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected) os.remove(tempdir) cntk_eye_like = C.eye_like(C.unpack_batch(x), sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test2') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal( my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected) os.remove(tempdir) cntk_eye_like = C.eye_like(C.transpose(C.unpack_batch(x), (1, 0)), sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected.transpose()) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test3') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal( my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected.transpose()) os.remove(tempdir) #test pass through gradients #test direct input: no gradients pass through to inputs data = operand op = lambda x: eye_like( x, sparse_output=False ) #sparse are not supported for some of the following basic operations w = C.parameter(x.shape, init=np.ones(x.shape).astype(np.float32) * 3.0) expected_x_backward = np.zeros_like(data) expected_w_backward = np.zeros_like(w) op_func = op(x) grad = op_func.grad({x: data}, [x]) np.testing.assert_almost_equal(grad, expected_x_backward) # test inputs through sub-expressions: no gradients pass through to inputs (e.g. x, w) of the subexpressoin (e.g. x * w here) op_func = op(x * w) grad = op_func.grad({x: data}, [w, x]) np.testing.assert_almost_equal(grad[x], expected_x_backward) np.testing.assert_almost_equal(grad[w], expected_w_backward) # testing inputs through shared sub-expressions: no gradients pass through reduce arg ops to inputs (e.g. x, w) of the subexpressoin # (e.g. x * w here), therefore the gradients will depend on how the shared expressions participate in other experssions: shared_exp = x * w op_func = op(shared_exp) + x + w + shared_exp ref_op_func = x + w + shared_exp grad = op_func.grad({x: data}, [w, x]) ref_grad = ref_op_func.grad({x: data}, [w, x]) np.testing.assert_almost_equal(grad[x], ref_grad[x]) np.testing.assert_almost_equal(grad[w], ref_grad[w]) #test expecting exception with sequence axis with pytest.raises(Exception) as info: #no sequence axis is allowed x = C.sequence.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) with pytest.raises(Exception) as info: #no more than 2 axes is allowed (including any dynamic axes) x = C.input_variable((3, 3), dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) with pytest.raises(Exception) as info: #no less than 2 axes is allowed (including any dynamic axes) x = C.input_variable((), dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)
def test_Transpose(tmpdir): a = np.arange(24).reshape(2,3,4).astype('f') model = C.transpose(a, perm=(2, 0, 1)) verify_no_input(model, tmpdir, 'Transpose_0')
def test_eye_like(operand, sparse_output, device_id, precision): np_eye_like = lambda matrix: np.eye(matrix.shape[0], matrix.shape[1], dtype=np.float32) operand = AA(operand).astype(np.float32) expected = np_eye_like(operand) expected_grad = np.zeros_like(operand).reshape(expected.shape) my_eval = (lambda f, arg: f.eval(arg).todense()) if sparse_output else (lambda f, arg: f.eval(arg)) from .. import eye_like import cntk as C #testing with direct numpy input y = C.eye_like(operand, sparse_output=sparse_output) actual = y.eval().todense() if sparse_output else y.eval() np.testing.assert_almost_equal(actual, expected) #testing through input_variable #test load and save: import tempfile import os x = C.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal(my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected) os.remove(tempdir) cntk_eye_like = C.eye_like(C.unpack_batch(x), sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test2') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal(my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected) os.remove(tempdir) cntk_eye_like = C.eye_like(C.transpose(C.unpack_batch(x), (1,0)), sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected.transpose()) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test3') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal(my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected.transpose()) os.remove(tempdir) #test expecting exception with sequence axis with pytest.raises(Exception) as info: #no sequence axis is allowed x = C.sequence.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) with pytest.raises(Exception) as info: #no more than 2 axes is allowed (including any dynamic axes) x = C.input_variable((3, 3), dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) with pytest.raises(Exception) as info: #no less than 2 axes is allowed (including any dynamic axes) x = C.input_variable((), dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)
def pooling(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = ops.pooling(input, **kwargs) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer
def lrn(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = BlockApiSetup.lrn(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer
def test_eye_like(operand, sparse_output, device_id, precision): np_eye_like = lambda matrix: np.eye( matrix.shape[0], matrix.shape[1], dtype=np.float32) operand = AA(operand).astype(np.float32) expected = np_eye_like(operand) expected_grad = np.zeros_like(operand).reshape(expected.shape) my_eval = (lambda f, arg: f.eval(arg).todense()) if sparse_output else ( lambda f, arg: f.eval(arg)) from .. import eye_like import cntk as C #testing with direct numpy input y = C.eye_like(operand, sparse_output=sparse_output) actual = y.eval().todense() if sparse_output else y.eval() np.testing.assert_almost_equal(actual, expected) #testing through input_variable #test load and save: import tempfile import os x = C.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal( my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected) os.remove(tempdir) cntk_eye_like = C.eye_like(C.unpack_batch(x), sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test2') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal( my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected) os.remove(tempdir) cntk_eye_like = C.eye_like(C.transpose(C.unpack_batch(x), (1, 0)), sparse_output=sparse_output) actual = my_eval(cntk_eye_like, {x: operand}) grad = cntk_eye_like.grad({x: operand}) np.testing.assert_almost_equal(actual, expected.transpose()) np.testing.assert_almost_equal(grad, expected_grad) tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test3') cntk_eye_like.save(tempdir) cntk_eye_like2 = C.load_model(tempdir) np.testing.assert_almost_equal( my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected.transpose()) os.remove(tempdir) #test expecting exception with sequence axis with pytest.raises(Exception) as info: #no sequence axis is allowed x = C.sequence.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) with pytest.raises(Exception) as info: #no more than 2 axes is allowed (including any dynamic axes) x = C.input_variable((3, 3), dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output) with pytest.raises(Exception) as info: #no less than 2 axes is allowed (including any dynamic axes) x = C.input_variable((), dtype=np.float32, needs_gradient=True) cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)
def test_Transpose(tmpdir): a = np.arange(24).reshape(2, 3, 4).astype('f') model = C.transpose(a, perm=(2, 0, 1)) verify_no_input(model, tmpdir, 'Transpose_0')
def attention_layer(self, context, query, dim): input_ph = C.placeholder(shape=(dim, )) input_mem = C.placeholder(shape=(dim, )) with C.layers.default_options(bias=False, activation=C.relu): attn_proj_enc = C.layers.Dense(self.hidden_dim, init=glorot_uniform(), input_rank=1, name="Wqu") attn_proj_dec = C.layers.Dense(self.hidden_dim, init=glorot_uniform(), input_rank=1) inputs_ = attn_proj_enc(input_ph) # [#,c][d] memory_ = attn_proj_dec(input_mem) # [#,q][d] cln_mem_ph = C.placeholder() # [#,q][?=d] cln_inp_ph = C.placeholder() # [#,c][?=d] unpack_inputs, inputs_mask = C.sequence.unpack( cln_inp_ph, 0).outputs # [#][*=c,d] [#][*=c] expand_inputs = C.sequence.broadcast_as(unpack_inputs, cln_mem_ph) # [#,q][*=c,d] matrix = C.reshape( C.times_transpose(cln_mem_ph, expand_inputs) / (self.hidden_dim**0.5), (-1, )) # [#,q][*=c] matrix = C.element_select( C.sequence.broadcast_as(inputs_mask, cln_mem_ph), matrix, C.constant(-1e30)) logits = C.softmax(matrix, axis=0, name='level 1 weight') # [#,q][*=c] trans_expand_inputs = C.transpose(expand_inputs, [1, 0]) # [#,q][d,*=c] q_over_c = C.reshape( C.reduce_sum(logits * trans_expand_inputs, axis=1), (-1, )) / (self.hidden_dim**0.5) # [#,q][d] new_q = C.splice(cln_mem_ph, q_over_c) # [#,q][2*d] # over unpack_matrix, matrix_mask = C.sequence.unpack( matrix, 0).outputs # [#][*=q,*=c] [#][*=q] inputs_mask_s = C.to_sequence(C.reshape(inputs_mask, (-1, 1))) # [#,c'][1] trans_matrix = C.to_sequence_like(C.transpose(unpack_matrix, [1, 0]), inputs_mask_s) # [#,c'][*=q] trans_matrix = C.sequence.gather(trans_matrix, inputs_mask_s) # [#,c2][*=q] trans_matrix = C.element_select( C.sequence.broadcast_as(matrix_mask, trans_matrix), trans_matrix, C.constant(-1e30)) logits2 = C.softmax(trans_matrix, axis=0, name='level 2 weight') # [#,c2][*=c] unpack_new_q, new_q_mask = C.sequence.unpack( new_q, 0).outputs # [#][*=q,2*d] [#][*=q] expand_new_q = C.transpose( C.sequence.broadcast_as(unpack_new_q, trans_matrix), [1, 0]) # [#,c2][2d,*=q] c_over_q = C.reshape(C.reduce_sum(logits2 * expand_new_q, axis=1), (-1, )) / (2 * self.hidden_dim)**0.5 # [#,c2][2d] c_over_q = C.reconcile_dynamic_axes(c_over_q, cln_inp_ph) weighted_q = c_over_q.clone(C.CloneMethod.share, { cln_mem_ph: memory_, cln_inp_ph: inputs_ }) # [#,c][2d] c2c = q_over_c.clone(C.CloneMethod.share, { cln_mem_ph: inputs_, cln_inp_ph: inputs_ }) # [#,c][2d] att_context = C.splice(input_ph, weighted_q, c2c) # 2d+2d+2d return C.as_block(att_context, [(input_ph, context), (input_mem, query)], 'attention_layer', 'attention_layer')
def DigitCaps(input, num_capsules, dim_out_vector, routings=3, name='DigitCaps'): ''' Function to create an instance of a digit capsule. Args: input: Input Tensor num_capsules (int): Number of output capsules dim_out_vector (int): Number of dimensions of the capsule output vector routings (int, optional): The number of routing iterations name (str, optional): The name of the Function instance in the network. ''' # Learnable Parameters W = ct.Parameter(shape=(1152, 10, 16, 8), init=ct.normal(0.01), name=name + '_Weights') # reshape input for broadcasting on all output capsules input = ct.reshape(input, (1152, 1, 1, 8), name='reshape_input') # Output shape = [#](1152, 10, 16, 1) u_hat = ct.reduce_sum(W * input, axis=3) # we don't need gradients on routing u_hat_stopped = ct.stop_gradient(u_hat, name='stop_gradient') # all the routing logits (Bij) are initialized to zero for each routing. Bij = ct.Constant(np.zeros((1152, 10, 1, 1), dtype=np.float32)) # line 3, for r iterations do for r_iter in range(routings): # line 4: for all capsule i in layer l: ci ← softmax(bi) => Cij # Output shape = [#][1152, 10, 1, 1] Cij = ct.softmax(Bij, axis=1) # At last iteration, use `u_hat` in order to receive gradients from the following graph if r_iter == routings - 1: # line 5: for all capsule j in layer (l + 1): sj ← sum(cij * u_hat) # Output shape = [#][1152, 10, 16, 1] Sj = ct.reduce_sum(ct.element_times(Cij, u_hat, 'weighted_u_hat'), axis=0) # line 6: for all capsule j in layer (l + 1): vj ← squash(sj) # Output shape = [#][1, 10, 16, 1] Vj = Squash(Sj) elif r_iter < routings - 1: # line 5: for all capsule j in layer (l + 1): sj ← sum(cij * u_hat) # Output shape = [#][1152, 10, 16, 1] Sj = ct.reduce_sum(ct.element_times(Cij, u_hat_stopped), axis=0) # line 6: for all capsule j in layer (l + 1): vj ← squash(sj) # Output shape = [#][1, 10, 16, 1] Vj = Squash(Sj) # line 7: for all capsule i in layer l and capsule j in layer (l + 1): bij ← bij + ^uj|i * vj # Output shape = [#][1, 10, 1, 16] Vj_Transpose = ct.transpose(ct.reshape(Vj, (1, 10, 16, 1)), (0, 1, 3, 2), name='Vj_Transpose') # Output shape = [#][1152, 10, 1, 1] UV = ct.reduce_sum(ct.reshape(u_hat_stopped, (1152, 10, 1, 16)) * Vj_Transpose, axis=3) Bij += UV # Output shape = [#][10, 16, 1] Vj = ct.reshape(Vj, (10, 16, 1), name='digit_caps_output') return Vj