def test_gather_op(device_id, precision): a_data = [ AA([[0], [1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3], [4]], dtype=PRECISION_TO_TYPE[precision]) ] a = C.input_variable((2, 1)) r_data = np.arange(12).reshape(6, 2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a: a_data}) expectd = np.asarray([[[[0., 1.]], [[2., 3.]]], [[[6., 7.]], [[8., 9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a: a_data}, [r]) expectd_grad = np.asarray([[1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [0, 0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) b_data = [ AA([[0, 2], [1, 3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2, 4], [3, 5]], dtype=PRECISION_TO_TYPE[precision]) ] b = C.input_variable((2, 2)) res2 = C.gather(r, b).eval({b: b_data}) expectd2 = np.asarray([[[[0., 1.], [4., 5.]], [[2., 3.], [6., 7.]]], [[[4., 5.], [8., 9.]], [[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2)
def test_gather_op(device_id, precision): a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])] a = C.input_variable((2,1)) r_data = np.arange(12).reshape(6,2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a:a_data}) expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a:a_data}, [r]) expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed) indices_params = C.parameter(shape=(1,), init=1.0) grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params]) assert np.array_equal(grads[r], expectd_grad) assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32)) b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])] b = C.input_variable((2,2)) res2 = C.gather(r, b).eval({b:b_data}) expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2) #the following small model is to test the memory reuse issue of gather node. x = C.input((3, 4)) x1 = C.to_sequence(x) w = C.parameter((5, 6), init=1) z = C.gather(w, x1) assert z.shape == (4, 6) #need the unpack node to trigger memory reuse. f = C.sequence.unpack(z, 0, no_mask_output=True) y = C.input((3, 4, 6)) loss = C.reduce_mean(C.square(f - y), axis=-1) loss = C.reduce_mean(loss, axis=C.Axis.all_axes()) g = C.constant(0, shape=w.shape) u = C.assign(w, g + 1) learner = C.cntk_py.universal_learner([w], [g], u) trainer = C.trainer.Trainer(loss, [loss], [learner]) indices = np.asarray([[[1, 2, 1, 2]]]) input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0) lable = np.full((10, 3, 4, 6), 2) trainer.train_minibatch({x: input, y: lable}) # the 2nd and 3rd rows should be udpated by gradients. assert np.mean(w.value[1, :]) < 1 assert np.mean(w.value[2, :]) < 1 # the other three rows should keep as 1 assert np.isclose(np.mean(w.value[0, :]), 1) assert np.isclose(np.mean(w.value[3, :]), 1) assert np.isclose(np.mean(w.value[4, :]), 1)
def test_Gather(tmpdir): c = np.asarray([[[0], [1]], [[4], [5]]]).astype('f') x = C.input_variable((2, 1)) d = np.arange(12).reshape(6, 2).astype('f') y = C.constant(d) model = C.gather(y, x) verify_one_input(model, c, tmpdir, 'Gather_1')
def test_Gather_With_Axis(tmpdir): data = np.asarray( [[ [111, 112], [121, 122], [131, 132], ],[ [211, 212], [221, 222], [231, 232], ]]).astype('f') indices = np.asarray( [ [0, 1, 1], [1, 1, 1]]) x = C.input_variable(np.shape(data)) y = C.input_variable(np.shape(indices)) axis = 1 model = C.gather(data, y, axis) verify_one_input(model, indices, tmpdir, 'Gather_With_Axis_1')
def test_gather_op_with_axis(device_id, precision): data = np.array([ [1.0, 1.2, 1.9], [2.3, 3.4, 3.9], [4.5, 5.7, 5.9], ]).astype(PRECISION_TO_TYPE[precision]) indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision]) output = np.array([ [1.0, 1.9], [2.3, 3.9], [4.5, 5.9], ]).astype(PRECISION_TO_TYPE[precision]) x = C.constant(data) i = C.constant(indices) y = C.gather(x, i, axis=1) z = y.eval({}, device=cntk_device(device_id)) assert np.allclose(output, z) data = np.array([ [[1.0, 1.2, 1.9]], [[2.3, 3.4, 3.9]], [[4.5, 5.7, 5.9]], ]).astype(PRECISION_TO_TYPE[precision]) indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision]) output = np.array([ [[1.0, 1.9]], [[2.3, 3.9]], [[4.5, 5.9]], ]).astype(PRECISION_TO_TYPE[precision]) x = C.constant(data) i = C.constant(indices) y = C.gather(x, i, axis=2) z = y.eval({}, device=cntk_device(device_id)) assert np.allclose(output, z)
def test_Gather(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): c = np.asarray([[[0],[1]],[[4],[5]]]).astype(dtype) x = C.input_variable((2,1)) d = np.arange(12).reshape(6,2).astype(dtype) y = C.constant(d) model = C.gather(y, x) verify_one_input(model, c, tmpdir, 'Gather_1')
def test_Gather(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype=dtype): c = np.asarray([[[0], [1]], [[4], [5]]]).astype(dtype) x = C.input_variable((2, 1)) d = np.arange(12).reshape(6, 2).astype(dtype) y = C.constant(d) model = C.gather(y, x) verify_one_input(model, c, tmpdir, 'Gather_1')
def test_op_gather_grad(device_id): dim = 10 ii = C.sequence.input_variable(()) param = C.parameter((dim, 1), init=np.reshape(np.arange(dim), (dim,1)).astype(np.float32)) ss = C.gather(param, ii) data = [[0], [0,1,2], [1,2,3,4,5, 6]] grad1 = ss.grad(data, wrt=[param]) ss2 = C.times(C.one_hot(ii, num_classes=dim, sparse_output=False), param) grad2 = ss2.grad(data, wrt=[param]) assert np.array_equal(grad1, grad2)
def test_Gather_With_Axis(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): data = np.asarray( [[ [111, 112], [121, 122], [131, 132], ],[ [211, 212], [221, 222], [231, 232], ]]).astype(dtype) indices = np.asarray([[0, 1, 1], [1, 1, 1]]) x = C.input_variable(np.shape(data)) y = C.input_variable(np.shape(indices)) axis = 1 model = C.gather(data, y, axis) verify_one_input(model, indices, tmpdir, 'Gather_With_Axis_1')
def test_gather_op_backward(device_id, precision): a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])] a = C.input_variable((2,1), dtype=PRECISION_TO_TYPE[precision]) r_data = np.arange(12).reshape(6,2).astype(PRECISION_TO_TYPE[precision]) r = C.parameter(shape=r_data.data, init=r_data) g = C.gather(r, a) grad = g.grad(a_data, wrt=[r]) expectd = np.asarray([[1., 1.], [1., 1.], [0., 0.], [1., 1.], [1., 1.], [0., 0.]]).astype(PRECISION_TO_TYPE[precision]) assert np.array_equal(grad, expectd) # test without dynamic axis data = np.array([ [1.0, 1.2, 1.9], [2.3, 3.4, 3.9], [4.5, 5.7, 5.9], ]).astype(PRECISION_TO_TYPE[precision]) indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision]) expectd = np.array([[1., 1., 1.], [0., 0., 0.], [1., 1., 1.]]).astype(PRECISION_TO_TYPE[precision]) x = C.input_variable(dynamic_axes=[], shape=(3,3), needs_gradient=True, dtype=PRECISION_TO_TYPE[precision]) i = C.constant(indices, dtype=PRECISION_TO_TYPE[precision]) y = C.gather(x, i) grad = y.grad(data, wrt=[x]) assert np.allclose(expectd, grad)
def test_gather_op(device_id, precision): a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])] a = C.input_variable((2,1)) r_data = np.arange(12).reshape(6,2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a:a_data}) expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a:a_data}, [r]) expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])] b = C.input_variable((2,2)) res2 = C.gather(r, b).eval({b:b_data}) expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2)
def bilateral_slice(im, guide, guide_no_grad): # Flatten data for gather op flat_grid = grid_scale * C.reshape( grid, [grid_sz * grid_sz * sigma_r * o_chans * (i_chans + 1)]) # flat_grid_u = C.unpack_batch(flat_grid) # Make sure we do sth that requires the gradient w.r.t guide scaled_guide = guide_scale * guide gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord( scaled_guide, xx, yy, sz, grid_sz, sigma_r) wx = C.abs(gx_d - 0.5 - fx_d) wy = C.abs(gy_d - 0.5 - fy_d) wz = C.abs(gz_d - 0.5 - fz_d) # Enclosing cell gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord( guide_no_grad, xx, yy, sz, grid_sz, sigma_r) out_chans = [] for chan in range(o_chans): output_components = [] for ix, x in enumerate([fx, cx]): wx_ = (1 - wx) if ix == 0 else wx for iy, y in enumerate([fy, cy]): wy_ = (1 - wy) if iy == 0 else wy for iz, z in enumerate([fz, cz]): wz_ = (1 - wz) if iz == 0 else wz linear_idx = x + grid_sz * (y + grid_sz * (z + sigma_r * (cc + chan * (i_chans + 1)))) flat_linear_idx = C.reshape(linear_idx, [(i_chans + 1) * sz * sz]) # Slice interp = C.gather(flat_grid, flat_linear_idx) interp_fsz = C.reshape( interp, [i_chans + 1, sz, sz]) * wx_ * wy_ * wz_ output_components.append(interp_fsz) out_coeffs = sum(output_components) out_chan = C.reduce_sum( out_coeffs[:i_chans] * (im_scale * im) + out_coeffs[-1], 0) out_chans.append(out_chan) out = C.splice(*out_chans, axis=0) return out
def gather(operand, condition, name=''): ''' TBA Example: TBA Args: operand: the symbolic tensor operand denoting a sequence condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import gather operand = sanitize_input(operand, get_data_type(operand)) condition = sanitize_input(condition, get_data_type(condition)) return gather(operand, condition, name).output()
def gather(operand, condition, name = ''): ''' TBA Example: TBA Args: operand: the symbolic tensor operand denoting a sequence condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import gather operand = sanitize_input(operand, get_data_type(operand)) condition = sanitize_input(condition, get_data_type(condition)) return gather(operand, condition, name).output()
def flatten_and_gather(x, y): linear_idx = x + w * y + w * h * chan_idx + w * h * c * batch_idx flat_linear_idx = C.reshape(linear_idx, [-1]) return C.reshape(C.gather(flat_im, flat_linear_idx), linear_idx.shape)
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s): ''' A two layers hierarchical softmax function with sequence axis input: Example: >>> input_dim = 2 >>> num_output_classes = 4 >>> minibatch_size = 3 >>> seq_size = 5 >>> n_classes = int(math.ceil(math.sqrt(num_output_classes))) >>> n_outputs_per_class = n_classes >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1') >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1') >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s') >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s') # neural network structure for hierarchical softmax >>> h_input = C.sequence.input_variable(input_dim) >>> h_target_class = C.sequence.input_variable([1]) >>> h_target_output_in_class = C.sequence.input_variable([1]) >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s) >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim)) >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes >>> target_labels = labels // n_outputs_per_class >>> target_output_in_labels = labels % n_outputs_per_class >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1] array([[ 0.000859], [ 0. ], [ 0. ]], dtype=float32) Args: input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis num_output_classes: int target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis batch_size: int w1: C.parameter b1: C.parameter w2s: C.parameter b2s: C.parameter Returns: output_prob: class:`~cntk.ops.functions.Function` class_probs: class:`~cntk.ops.functions.Function` all_probs: a list of class:`~cntk.ops.functions.Function` ''' input_dim = input_var.shape[0] n_classes = int(math.ceil(math.sqrt(num_output_classes))) n_outputs_per_class = n_classes class_probs = C.softmax(b1 + C.times(input_var, w1)) w2_temp = C.gather(w2s, target_class) w2 = reshape(w2_temp, (input_dim, n_outputs_per_class)) w2 = C.sequence.broadcast_as(w2, input_var) b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class)) b2 = C.sequence.broadcast_as(b2, input_var) times_result = times(input_var, w2) probs_in_class = softmax(b2 + times_result) probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class) target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False) probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class) prob_in_class = C.times_transpose(probs_in_class, target_output_in_class) target_class = C.one_hot(target_class, n_classes, False) class_probs = C.sequence.broadcast_as(class_probs, target_class) class_prob = C.times_transpose(class_probs, target_class) output_prob = C.element_times(class_prob, prob_in_class) # this is for calculating all the outputs' probabilities all_probs = [] for i in range(n_classes): ci = C.constant(i) w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class)) w2a = C.sequence.broadcast_as(w2a, input_var) b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class)) b2a = C.sequence.broadcast_as(b2a, input_var) probs_in_classa = C.softmax(b2a + times(input_var, w2a)) cia = C.constant(i, shape=[1]) cia = C.reconcile_dynamic_axes(cia, class_probs) cia = C.one_hot(cia, n_outputs_per_class, False) class_proba = C.times_transpose(class_probs, cia) class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa) output_proba = C.element_times(class_proba, probs_in_classa) all_probs.append(output_proba) return output_prob, class_probs, all_probs
import cntk as C import numpy as np c = np.asarray([0, 1]).astype('f') x = C.input_variable((2), needs_gradient=True, dynamic_axes=[]) y = C.input_variable((6), needs_gradient=False, dynamic_axes=[]) output = C.gather(x, y) loss = C.reduce_sum(output) print(loss.grad({y: np.arange(6).reshape(6).astype('f'), x: c}))
def main(): print("version", C.__version__) bs = 1 n_chans = 1 sigma_s = 16 sigma_r = 12 # 4x4x1024x1024 # 4x12x64x64 sz = 256 # sz = 1024 small_sz = sz // sigma_s yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz)) cc, bb = np.meshgrid(np.arange(0, n_chans), np.arange(0, bs)) xx = np.expand_dims(xx, 0) xx = np.expand_dims(xx, 0) yy = np.expand_dims(yy, 0) yy = np.expand_dims(yy, 0) bb = np.expand_dims(bb, 2) bb = np.expand_dims(bb, 3) cc = np.expand_dims(cc, 2) cc = np.expand_dims(cc, 3) # Compute graph grid = C.Parameter([bs, n_chans, sigma_r, small_sz, small_sz], ) # grid = C.input_variable( # [bs, n_chans, sigma_r, small_sz, small_sz], # dynamic_axes=[], needs_gradient=True) guide = C.input_variable([bs, sz, sz], dynamic_axes=[], needs_gradient=True) guide_non_diff = C.input_variable([bs, sz, sz], dynamic_axes=[]) # Coordinates xx = C.Constant(xx, xx.shape) yy = C.Constant(yy, yy.shape) cc = C.Constant(cc, cc.shape) bb = C.Constant(bb, bb.shape) gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord( guide, xx, yy, sz, small_sz, sigma_r, bs) # Trilerp weights wx = (gx_d - 0.5 - fx_d) wy = (gy_d - 0.5 - fy_d) wz = C.abs(gz_d - 0.5 - fz_d) # Enclosing cell gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(guide_non_diff, xx, yy, sz, small_sz, sigma_r, bs) output_components = [] for ix, x in enumerate([fx, cx]): wx_ = (1 - wx) if ix == 0 else wx for iy, y in enumerate([fy, cy]): wy_ = (1 - wy) if iy == 0 else wy for iz, z in enumerate([fz, cz]): wz_ = (1 - wz) if iz == 0 else wz linear_idx = x + small_sz * (y + small_sz * (z + sigma_r * (cc + n_chans * bb))) # Flatten data for gather op flat_grid = C.reshape( grid, [bs * small_sz * small_sz * sigma_r * n_chans]) flat_linear_idx = C.reshape(linear_idx, [bs * n_chans * sz * sz]) # Slice interp = C.gather(flat_grid, flat_linear_idx) interp_fsz = C.reshape(interp, [bs, n_chans, sz, sz]) output_components.append(interp_fsz * wz_ * wx_ * wy_) out = sum(output_components) loss = C.squared_error(out, guide) # svg = C.logging.graph.plot(out, "/output/graph.svg") grid_data = np.random.uniform(size=(bs, n_chans, sigma_r, small_sz, small_sz)).astype(np.float32) # guide_data = np.random.uniform( # size=(bs, sz, sz)).astype(np.float32) guide_data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype( np.float32) guide_data = np.expand_dims(guide_data, 0) / 255.0 inputs = {guide: guide_data, guide_non_diff: guide_data}
def flatten_and_gather(x, y, z): linear_idx = x + gw * y + gw * gh * z + c_idx * gw * gh * gd + batch_idx * gw * gh * gd * cg flat_linear_idx = C.reshape(linear_idx, [-1]) return C.reshape(C.gather(flat_grid, flat_linear_idx), linear_idx.shape)