Example #1
0
def test_convert_dynamic_axis():
    #test fix batch size
    batch_size = 4
    a = C.parameter(shape=(batch_size, 2, 3), init=1)
    dynamic_a = C.to_batch(a)
    assert len(dynamic_a.dynamic_axes) == 1
    assert dynamic_a.shape == (2, 3)

    x = C.input_variable((2, 3))
    y = x * dynamic_a

    #test grad
    data = np.arange(batch_size * 2 * 3).reshape(batch_size, 2, 3).astype('f')
    assert np.array_equal(y.grad({x: data}, [a]), data)

    const_a = C.unpack_batch(y)
    assert len(const_a.dynamic_axes) == 0
    assert const_a.shape == (C.FreeDimension, 2, 3)

    f = C.assign(a, const_a)
    f.eval({x: data})
    assert np.array_equal(a.value, data)

    #test reshape with batch axis
    x = C.input_variable((2, 3))
    const_x = C.unpack_batch(x)
    assert len(const_x.dynamic_axes) == 0
    assert const_x.shape == (C.FreeDimension, 2, 3)

    const_y = C.reshape(const_x, (-1, 3))
    assert const_y.shape == (C.FreeDimension, 3)
    y = C.to_batch(const_y)
    assert len(y.dynamic_axes) == 1
    assert y.shape == (3, )

    z = y * 2
    expected = data.reshape((8, 3)) * 2
    assert np.array_equal(z.eval({x: data}), expected)

    #test inferred dimension
    x = C.input_variable((C.InferredDimension, 3))
    const_x = C.unpack_batch(x)
    assert len(const_x.dynamic_axes) == 0
    assert const_x.shape == (C.FreeDimension, C.InferredDimension, 3)

    const_y = const_x * 2
    y = C.to_batch(const_y)
    assert len(y.dynamic_axes) == 1
    assert y.shape == (C.InferredDimension, 3)
Example #2
0
def test_convert_dynamic_axis():
    #test fix batch size
    batch_size = 4
    a = C.parameter(shape=(batch_size, 2, 3), init=1)
    dynamic_a = C.to_batch(a)
    assert len(dynamic_a.dynamic_axes) == 1
    assert dynamic_a.shape == (2, 3)

    x = C.input_variable((2, 3))
    y = x * dynamic_a

    #test grad
    data = np.arange(batch_size * 2 * 3).reshape(batch_size, 2, 3).astype('f')
    assert np.array_equal(y.grad({x:data}, [a]), data)

    const_a = C.unpack_batch(y)
    assert len(const_a.dynamic_axes) == 0
    assert const_a.shape == (C.FreeDimension, 2, 3)

    f = C.assign(a, const_a)
    f.eval({x:data})
    assert np.array_equal(a.value, data)

    #test reshape with batch axis
    x = C.input_variable((2,3))
    const_x = C.unpack_batch(x)
    assert len(const_x.dynamic_axes) == 0
    assert const_x.shape == (C.FreeDimension, 2, 3)

    const_y = C.reshape(const_x, (-1, 3))
    assert const_y.shape == (C.FreeDimension, 3)
    y = C.to_batch(const_y)
    assert len(y.dynamic_axes) == 1
    assert y.shape == (3,)

    z = y * 2
    expected = data.reshape((8, 3)) * 2
    assert np.array_equal(z.eval({x:data}), expected)

    #test inferred dimension
    x = C.input_variable((C.InferredDimension, 3))
    const_x = C.unpack_batch(x)
    assert len(const_x.dynamic_axes) == 0
    assert const_x.shape == (C.FreeDimension, C.InferredDimension, 3)

    const_y = const_x * 2
    y = C.to_batch(const_y)
    assert len(y.dynamic_axes) == 1
    assert y.shape == (C.InferredDimension, 3)
Example #3
0
def hierarchical_softmax_layer(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=1), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=2), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=3), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=4), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.input_variable(input_dim)
        >>> h_target_class = C.input_variable([1])
        >>> h_target_output_in_class = C.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(minibatch_size * input_dim, dtype = np.float32), (minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(minibatch_size, dtype = np.float32), (minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})
        array([[ 0.031305],
               [ 0.003239],
               [ 0.990064]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[batch_size, 1])
        cia = C.to_batch(cia)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        cia = C.sequence.broadcast_as(cia, class_probs)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Example #4
0
def hierarchical_softmax_layer(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=1), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=2), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=3), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=4), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.input_variable(input_dim)
        >>> h_target_class = C.input_variable([1])
        >>> h_target_output_in_class = C.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(minibatch_size * input_dim, dtype = np.float32), (minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(minibatch_size, dtype = np.float32), (minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})
        array([[ 0.031305],
               [ 0.003239],
               [ 0.990064]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[batch_size, 1])
        cia = C.to_batch(cia)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        cia = C.sequence.broadcast_as(cia, class_probs)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs