Exemple #1
0
def test_gather_op(device_id, precision):
    a_data = [
        AA([[0], [1]], dtype=PRECISION_TO_TYPE[precision]),
        AA([[3], [4]], dtype=PRECISION_TO_TYPE[precision])
    ]
    a = C.input_variable((2, 1))
    r_data = np.arange(12).reshape(6, 2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a: a_data})
    expectd = np.asarray([[[[0., 1.]], [[2., 3.]]], [[[6., 7.]], [[8., 9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a: a_data}, [r])
    expectd_grad = np.asarray([[1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [0, 0]],
                              dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    b_data = [
        AA([[0, 2], [1, 3]], dtype=PRECISION_TO_TYPE[precision]),
        AA([[2, 4], [3, 5]], dtype=PRECISION_TO_TYPE[precision])
    ]
    b = C.input_variable((2, 2))
    res2 = C.gather(r, b).eval({b: b_data})

    expectd2 = np.asarray([[[[0., 1.], [4., 5.]], [[2., 3.], [6., 7.]]],
                           [[[4., 5.], [8., 9.]], [[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)
Exemple #2
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
Exemple #3
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
Exemple #4
0
def test_Gather(tmpdir):
    c = np.asarray([[[0], [1]], [[4], [5]]]).astype('f')
    x = C.input_variable((2, 1))
    d = np.arange(12).reshape(6, 2).astype('f')
    y = C.constant(d)
    model = C.gather(y, x)
    verify_one_input(model, c, tmpdir, 'Gather_1')
Exemple #5
0
def test_Gather_With_Axis(tmpdir):
    data = np.asarray( [[ [111, 112], [121, 122], [131, 132], ],[ [211, 212], [221, 222], [231, 232], ]]).astype('f')
    indices = np.asarray( [ [0, 1, 1], [1, 1, 1]])
    x = C.input_variable(np.shape(data))
    y = C.input_variable(np.shape(indices))
    axis = 1
    model = C.gather(data, y, axis)
    verify_one_input(model, indices, tmpdir, 'Gather_With_Axis_1')
Exemple #6
0
def test_gather_op_with_axis(device_id, precision):
    data = np.array([ [1.0, 1.2, 1.9], [2.3, 3.4, 3.9], [4.5, 5.7, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision])
    output = np.array([ [1.0, 1.9], [2.3, 3.9], [4.5, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    x = C.constant(data)
    i = C.constant(indices)
    y = C.gather(x, i, axis=1)
    z = y.eval({}, device=cntk_device(device_id))
    assert np.allclose(output, z)

    data = np.array([ [[1.0, 1.2, 1.9]], [[2.3, 3.4, 3.9]], [[4.5, 5.7, 5.9]], ]).astype(PRECISION_TO_TYPE[precision])
    indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision])
    output = np.array([ [[1.0, 1.9]], [[2.3, 3.9]], [[4.5, 5.9]], ]).astype(PRECISION_TO_TYPE[precision])
    x = C.constant(data)
    i = C.constant(indices)
    y = C.gather(x, i, axis=2)
    z = y.eval({}, device=cntk_device(device_id))
    assert np.allclose(output, z)
Exemple #7
0
def test_gather_op_with_axis(device_id, precision):
    data = np.array([ [1.0, 1.2, 1.9], [2.3, 3.4, 3.9], [4.5, 5.7, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision])
    output = np.array([ [1.0, 1.9], [2.3, 3.9], [4.5, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    x = C.constant(data)
    i = C.constant(indices)
    y = C.gather(x, i, axis=1)
    z = y.eval({}, device=cntk_device(device_id))
    assert np.allclose(output, z)

    data = np.array([ [[1.0, 1.2, 1.9]], [[2.3, 3.4, 3.9]], [[4.5, 5.7, 5.9]], ]).astype(PRECISION_TO_TYPE[precision])
    indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision])
    output = np.array([ [[1.0, 1.9]], [[2.3, 3.9]], [[4.5, 5.9]], ]).astype(PRECISION_TO_TYPE[precision])
    x = C.constant(data)
    i = C.constant(indices)
    y = C.gather(x, i, axis=2)
    z = y.eval({}, device=cntk_device(device_id))
    assert np.allclose(output, z)
Exemple #8
0
def test_Gather(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype = dtype):
        c = np.asarray([[[0],[1]],[[4],[5]]]).astype(dtype)
        x = C.input_variable((2,1))
        d = np.arange(12).reshape(6,2).astype(dtype)
        y = C.constant(d)
        model = C.gather(y, x)
        verify_one_input(model, c, tmpdir, 'Gather_1')
Exemple #9
0
def test_Gather(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype=dtype):
        c = np.asarray([[[0], [1]], [[4], [5]]]).astype(dtype)
        x = C.input_variable((2, 1))
        d = np.arange(12).reshape(6, 2).astype(dtype)
        y = C.constant(d)
        model = C.gather(y, x)
        verify_one_input(model, c, tmpdir, 'Gather_1')
Exemple #10
0
def test_op_gather_grad(device_id):
    dim = 10
    ii = C.sequence.input_variable(())
    param = C.parameter((dim, 1), init=np.reshape(np.arange(dim), (dim,1)).astype(np.float32))
    ss = C.gather(param, ii)
    data = [[0], [0,1,2], [1,2,3,4,5, 6]]
    grad1 = ss.grad(data, wrt=[param])
    ss2 = C.times(C.one_hot(ii, num_classes=dim, sparse_output=False), param)
    grad2 = ss2.grad(data, wrt=[param])
    assert np.array_equal(grad1, grad2)
Exemple #11
0
def test_Gather_With_Axis(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype = dtype):
        data = np.asarray( [[ [111, 112], [121, 122], [131, 132], ],[ [211, 212], [221, 222], [231, 232], ]]).astype(dtype)
        indices = np.asarray([[0, 1, 1], [1, 1, 1]])
        x = C.input_variable(np.shape(data))
        y = C.input_variable(np.shape(indices))
        axis = 1
        model = C.gather(data, y, axis)
        verify_one_input(model, indices, tmpdir, 'Gather_With_Axis_1')
Exemple #12
0
def test_Gather_With_Axis(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype = dtype):
        data = np.asarray( [[ [111, 112], [121, 122], [131, 132], ],[ [211, 212], [221, 222], [231, 232], ]]).astype(dtype)
        indices = np.asarray([[0, 1, 1], [1, 1, 1]])
        x = C.input_variable(np.shape(data))
        y = C.input_variable(np.shape(indices))
        axis = 1
        model = C.gather(data, y, axis)
        verify_one_input(model, indices, tmpdir, 'Gather_With_Axis_1')
Exemple #13
0
def test_gather_op_backward(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1), dtype=PRECISION_TO_TYPE[precision])
    r_data = np.arange(12).reshape(6,2).astype(PRECISION_TO_TYPE[precision])
    r = C.parameter(shape=r_data.data, init=r_data)
    g = C.gather(r, a)
    grad = g.grad(a_data, wrt=[r])
    expectd = np.asarray([[1., 1.], [1., 1.], [0., 0.], [1., 1.], [1., 1.], [0., 0.]]).astype(PRECISION_TO_TYPE[precision])
    assert np.array_equal(grad, expectd)

    # test without dynamic axis
    data = np.array([ [1.0, 1.2, 1.9], [2.3, 3.4, 3.9], [4.5, 5.7, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision])
    expectd = np.array([[1., 1., 1.], [0., 0., 0.], [1., 1., 1.]]).astype(PRECISION_TO_TYPE[precision])
    x = C.input_variable(dynamic_axes=[], shape=(3,3), needs_gradient=True, dtype=PRECISION_TO_TYPE[precision])
    i = C.constant(indices, dtype=PRECISION_TO_TYPE[precision])
    y = C.gather(x, i)
    grad = y.grad(data, wrt=[x])
    assert np.allclose(expectd, grad)
Exemple #14
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)
    
    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)
Exemple #15
0
    def bilateral_slice(im, guide, guide_no_grad):
        # Flatten data for gather op
        flat_grid = grid_scale * C.reshape(
            grid, [grid_sz * grid_sz * sigma_r * o_chans * (i_chans + 1)])
        # flat_grid_u = C.unpack_batch(flat_grid)

        # Make sure we do sth that requires the gradient w.r.t guide
        scaled_guide = guide_scale * guide
        gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord(
            scaled_guide, xx, yy, sz, grid_sz, sigma_r)
        wx = C.abs(gx_d - 0.5 - fx_d)
        wy = C.abs(gy_d - 0.5 - fy_d)
        wz = C.abs(gz_d - 0.5 - fz_d)

        # Enclosing cell
        gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(
            guide_no_grad, xx, yy, sz, grid_sz, sigma_r)

        out_chans = []
        for chan in range(o_chans):
            output_components = []
            for ix, x in enumerate([fx, cx]):
                wx_ = (1 - wx) if ix == 0 else wx
                for iy, y in enumerate([fy, cy]):
                    wy_ = (1 - wy) if iy == 0 else wy
                    for iz, z in enumerate([fz, cz]):
                        wz_ = (1 - wz) if iz == 0 else wz

                        linear_idx = x + grid_sz * (y + grid_sz *
                                                    (z + sigma_r *
                                                     (cc + chan *
                                                      (i_chans + 1))))
                        flat_linear_idx = C.reshape(linear_idx,
                                                    [(i_chans + 1) * sz * sz])
                        # Slice
                        interp = C.gather(flat_grid, flat_linear_idx)
                        interp_fsz = C.reshape(
                            interp, [i_chans + 1, sz, sz]) * wx_ * wy_ * wz_
                        output_components.append(interp_fsz)

            out_coeffs = sum(output_components)
            out_chan = C.reduce_sum(
                out_coeffs[:i_chans] * (im_scale * im) + out_coeffs[-1], 0)
            out_chans.append(out_chan)
        out = C.splice(*out_chans, axis=0)

        return out
Exemple #16
0
def gather(operand, condition, name=''):
    '''
    TBA
        
    Example:
        TBA
    Args:        
        operand: the symbolic tensor operand denoting a sequence
        condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import gather
    operand = sanitize_input(operand, get_data_type(operand))
    condition = sanitize_input(condition, get_data_type(condition))
    return gather(operand, condition, name).output()
Exemple #17
0
def gather(operand, condition, name = ''):
    '''
    TBA
        
    Example:
        TBA
    Args:        
        operand: the symbolic tensor operand denoting a sequence
        condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''    
    from cntk import gather
    operand = sanitize_input(operand, get_data_type(operand))
    condition = sanitize_input(condition, get_data_type(condition))
    return gather(operand, condition, name).output()
Exemple #18
0
 def flatten_and_gather(x, y):
     linear_idx = x + w * y + w * h * chan_idx + w * h * c * batch_idx
     flat_linear_idx = C.reshape(linear_idx, [-1])
     return C.reshape(C.gather(flat_im, flat_linear_idx), linear_idx.shape)
Exemple #19
0
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function with sequence axis input:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> seq_size = 5
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.sequence.input_variable(input_dim)
        >>> h_target_class = C.sequence.input_variable([1])
        >>> h_target_output_in_class = C.sequence.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1]
        array([[ 0.000859],
               [ 0.      ],
               [ 0.      ]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[1])
        cia = C.reconcile_dynamic_axes(cia, class_probs)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Exemple #20
0
import cntk as C
import numpy as np

c = np.asarray([0, 1]).astype('f')
x = C.input_variable((2), needs_gradient=True, dynamic_axes=[])
y = C.input_variable((6), needs_gradient=False, dynamic_axes=[])
output = C.gather(x, y)
loss = C.reduce_sum(output)
print(loss.grad({y: np.arange(6).reshape(6).astype('f'), x: c}))
Exemple #21
0
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function with sequence axis input:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> seq_size = 5
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.sequence.input_variable(input_dim)
        >>> h_target_class = C.sequence.input_variable([1])
        >>> h_target_output_in_class = C.sequence.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1]
        array([[ 0.000859],
               [ 0.      ],
               [ 0.      ]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[1])
        cia = C.reconcile_dynamic_axes(cia, class_probs)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Exemple #22
0
def main():
    print("version", C.__version__)
    bs = 1
    n_chans = 1

    sigma_s = 16
    sigma_r = 12

    # 4x4x1024x1024
    # 4x12x64x64

    sz = 256
    # sz = 1024
    small_sz = sz // sigma_s

    yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz))
    cc, bb = np.meshgrid(np.arange(0, n_chans), np.arange(0, bs))

    xx = np.expand_dims(xx, 0)
    xx = np.expand_dims(xx, 0)
    yy = np.expand_dims(yy, 0)
    yy = np.expand_dims(yy, 0)

    bb = np.expand_dims(bb, 2)
    bb = np.expand_dims(bb, 3)
    cc = np.expand_dims(cc, 2)
    cc = np.expand_dims(cc, 3)

    # Compute graph
    grid = C.Parameter([bs, n_chans, sigma_r, small_sz, small_sz], )
    # grid = C.input_variable(
    #     [bs, n_chans, sigma_r, small_sz, small_sz],
    #     dynamic_axes=[], needs_gradient=True)
    guide = C.input_variable([bs, sz, sz],
                             dynamic_axes=[],
                             needs_gradient=True)
    guide_non_diff = C.input_variable([bs, sz, sz], dynamic_axes=[])

    # Coordinates
    xx = C.Constant(xx, xx.shape)
    yy = C.Constant(yy, yy.shape)
    cc = C.Constant(cc, cc.shape)
    bb = C.Constant(bb, bb.shape)

    gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord(
        guide, xx, yy, sz, small_sz, sigma_r, bs)

    # Trilerp weights
    wx = (gx_d - 0.5 - fx_d)
    wy = (gy_d - 0.5 - fy_d)
    wz = C.abs(gz_d - 0.5 - fz_d)

    # Enclosing cell
    gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(guide_non_diff, xx, yy, sz,
                                                    small_sz, sigma_r, bs)

    output_components = []
    for ix, x in enumerate([fx, cx]):
        wx_ = (1 - wx) if ix == 0 else wx
        for iy, y in enumerate([fy, cy]):
            wy_ = (1 - wy) if iy == 0 else wy
            for iz, z in enumerate([fz, cz]):
                wz_ = (1 - wz) if iz == 0 else wz
                linear_idx = x + small_sz * (y + small_sz *
                                             (z + sigma_r *
                                              (cc + n_chans * bb)))

                # Flatten data for gather op
                flat_grid = C.reshape(
                    grid, [bs * small_sz * small_sz * sigma_r * n_chans])
                flat_linear_idx = C.reshape(linear_idx,
                                            [bs * n_chans * sz * sz])

                # Slice
                interp = C.gather(flat_grid, flat_linear_idx)
                interp_fsz = C.reshape(interp, [bs, n_chans, sz, sz])
                output_components.append(interp_fsz * wz_ * wx_ * wy_)

    out = sum(output_components)
    loss = C.squared_error(out, guide)

    # svg = C.logging.graph.plot(out, "/output/graph.svg")

    grid_data = np.random.uniform(size=(bs, n_chans, sigma_r, small_sz,
                                        small_sz)).astype(np.float32)

    # guide_data = np.random.uniform(
    #     size=(bs, sz, sz)).astype(np.float32)
    guide_data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype(
        np.float32)
    guide_data = np.expand_dims(guide_data, 0) / 255.0

    inputs = {guide: guide_data, guide_non_diff: guide_data}
Exemple #23
0
 def flatten_and_gather(x, y, z):
     linear_idx = x + gw * y + gw * gh * z + c_idx * gw * gh * gd + batch_idx * gw * gh * gd * cg
     flat_linear_idx = C.reshape(linear_idx, [-1])
     return C.reshape(C.gather(flat_grid, flat_linear_idx),
                      linear_idx.shape)