Exemple #1
0
def Linear(shape, _inf, bias=True, init=_default_initializer, init_bias=0, input_rank=None, map_rank=None):
    out_shape = _as_tuple(shape)

    # TODO: implement the full semantics of the BrainScript code
    #inputShape =
    #    if       BS.Constants.IsNone (inputRank) then Inferred  # not given: one Inferred, which will get expanded
    #    else if !BS.Constants.IsNone (mapRank)   then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
    #    else Repeat (inputRank, Inferred)
    #W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
    #b = ParameterTensor {outDim, initValue=0}
    #outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
    #inferInputRankToMap =
    #    if      !BS.Constants.IsNone (inputRank) then -1  # means not specified
    #    else if  BS.Constants.IsNone (mapRank)   then 0   # default to 'use all input dims'
    #    else mapRank
    #apply (x) =
    #    if bias
    #    then Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap) + b
    #    else Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap)

    W = Parameter(_inf.shape + out_shape, init=init     , name='W')
    b = Parameter(             out_shape, init=init_bias, name='b') if bias else None
    x = Placeholder(_inf=_inf, name='linear_arg')
    apply_x = Function.__matmul__(x, W) + b if bias else \
              Function.__matmul__(x, W)
    _name_and_extend_Function(apply_x, 'Linear')
    return apply_x
Exemple #2
0
def Embedding(shape, _inf, weights=None, init=_default_initializer, transpose=False):
    shape = _as_tuple(shape)
    full_shape = (shape + _inf.shape) if transpose else (_inf.shape + shape)
    if weights is None:  # no weights given: learn the embedding
        E = Parameter(full_shape, init=init, name='E')
    else:                # weights given: use them as constant
        UntestedBranchError("Embedding, from constant")
        E = Constant(full_shape, init=weights, name='E')  # TODO: can 'weights' be a CNTK object already? Then how to do this?
    x = Placeholder(_inf=_inf, name='embedding_arg')
    apply_x = Function.__matmul__(E, x) if transpose else \
              Function.__matmul__(x, E)     # x is expected to be sparse one-hot
    _name_and_extend_Function(apply_x, 'Embedding')
    return apply_x
Exemple #3
0
    def gru(dh, x):

        dhs = Sdh(dh)  # previous value, stabilized
        # note: input does not get a stabilizer here, user is meant to do that outside

        # projected contribution from input(s), hidden, and bias
        projx3 = b + times(x, W)
        projh2  = times(dhs, H)

        zt_proj = slice (projx3, stack_axis, 0*stacked_dim, 1*stacked_dim) + slice (projh2, stack_axis, 0*stacked_dim, 1*stacked_dim)
        rt_proj = slice (projx3, stack_axis, 1*stacked_dim, 2*stacked_dim) + slice (projh2, stack_axis, 1*stacked_dim, 2*stacked_dim)
        ct_proj = slice (projx3, stack_axis, 2*stacked_dim, 3*stacked_dim)

        zt = sigmoid (zt_proj)        # update gate z(t)

        rt = sigmoid (rt_proj)        # reset gate r(t)

        rs = dhs * rt        # "cell" c
        ct = activation (ct_proj + times(rs, H1))

        ht = (1 - zt) * ct + zt * dhs # hidden state ht / output

        # for comparison: CUDNN_GRU
        # i(t) = sigmoid(W_i x(t) +          R_i h(t-1)  + b_Wi + b_Ru)
        # r(t) = sigmoid(W_r x(t) +          R_r h(t-1)  + b_Wr + b_Rr)   --same up to here
        # h'(t) =   tanh(W_h x(t) + r(t) .* (R_h h(t-1)) + b_Wh + b_Rh)   --r applied after projection? Would make life easier!
        # h(t) = (1 - i(t) .* h'(t)) + i(t) .* h(t-1)                     --TODO: need to confirm bracketing with NVIDIA

        h = times(Sht(ht), Wmr) if has_projection else \
            ht

        # returns the new state as a tuple with names but order matters
        return Function.NamedOutput(h=h)
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Exemple #5
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
def test_override_deserialize(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_override_deserialize.dat')
    op.save(filepath)

    Function.register_udf_deserialize_callback(MyPlus._op_name(),
                                               lambda *x: MyPlusPlus(*x))

    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = 2 * (np.matmul(2 * (x_value + c1_value), w_value) + c2_value)
        assert np.allclose(result, expected)
Exemple #7
0
def test_override_deserialize(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_override_deserialize.dat')
    op.save(filepath)

    Function.register_udf_deserialize_callback(MyPlus._op_name(),
                                               lambda *x: MyPlusPlus(*x))

    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = 2 * (np.matmul(2 * (x_value + c1_value), w_value) + c2_value)
        assert np.allclose(result, expected)
Exemple #8
0
def _inject_name(f, name):
    '''
    Call this at the end of any layer or block that takes an optional name argument.
    '''
    if name:
        if not isinstance(f, Function):
            f = Function(f)
        if len(f.outputs) == 1:
            f = alias(f, name=name)
        else:
            f = combine(list(f.outputs),
                        name=name)  # BUGBUG: Does this actually name things?
    return f
Exemple #9
0
    def lstm(dh, dc, x):

        dhs = Sdh(dh)  # previous values, stabilized
        dcs = Sdc(dc)
        # note: input does not get a stabilizer here, user is meant to do that outside

        # projected contribution from input(s), hidden, and bias
        proj4 = b + times(x, W) + times(dhs, H)

        it_proj = slice(proj4, stack_axis, 0 * stacked_dim,
                        1 * stacked_dim)  # split along stack_axis
        bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim)
        ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim)
        ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim)

        # helper to inject peephole connection if requested
        def peep(x, c, C):
            return x + C * c if use_peepholes else x

        it = sigmoid(peep(it_proj, dcs, Ci))  # input gate(t)
        # TODO: should both activations be replaced?
        bit = it * activation(bit_proj)  # applied to tanh of input network

        ft = sigmoid(peep(ft_proj, dcs, Cf))  # forget-me-not gate(t)
        bft = ft * dc  # applied to cell(t-1)

        ct = bft + bit  # c(t) is sum of both

        ot = sigmoid(peep(ot_proj, Sct(ct), Co))  # output gate(t)
        ht = ot * activation(ct)  # applied to tanh(cell(t))

        c = ct  # cell value
        h = times(Sht(ht), Wmr) if has_projection else \
            ht

        # returns the new state as a tuple with names but order matters
        return (Function.NamedOutput(h=h), Function.NamedOutput(c=c))
Exemple #10
0
def test_native_user_function(tmpdir):

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function(
            'NativeUserTimesOp',
            'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'),
            'CreateUserTimesFunction')

    dev = C.cpu()
    x = C.input_variable((2))
    w = C.parameter((2, 2),
                    init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32),
                    device=dev)
    attributes = {
        'param_rank': 2,
        'padding': True,
        'none': None,
        'nested lists': [[1, 2, 3], [4, 5, 6]],
        'string': 'string',
        'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3))
    }

    def verify_attributes(udf):
        for k, v in attributes.items():
            if not isinstance(v, np.ndarray):
                assert udf.attributes[k] == v
            else:
                assert (udf.attributes[k] == v).all()

    op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes,
                                    'native_user_times_function')

    verify_attributes(op.owner)

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)
    x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]],
                                                 dtype=np.float32),
                                      device=dev)
    result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)

    assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]])

    native_times_primitive = op_reloaded.find_by_name(
        'native_user_times_function')

    verify_attributes(native_times_primitive)
def test_both_flavors_of_user_functions(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)
    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = np.matmul((x_value + c1_value), w_value) + c2_value
        assert np.allclose(result, expected)
Exemple #12
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
Exemple #13
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
Exemple #14
0
def test_both_flavors_of_user_functions(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)
    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = np.matmul((x_value + c1_value), w_value) + c2_value
        assert np.allclose(result, expected)
Exemple #15
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(z, (z + 0, z + 0), [
        C.momentum_sgd(z.parameters,
                       lr_per_sample,
                       momentum_time_constant,
                       True,
                       minibatch_size=0)
    ])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Exemple #16
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Exemple #17
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = parameter(shape=(dim, ), init=1)
    i = input(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)
    m = user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)
    z = Function.load(
        filepath,
        udf_factory_callback_map={
            'conditional_exec_lambda':
            lambda x, *unused: LambdaFunc(
                x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)
        })
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (z+0, z+0), \
                      [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                      True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Exemple #18
0
def test_native_user_function(tmpdir):

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction')

    dev = C.cpu()
    x = C.input_variable((2))
    w = C.parameter((2, 2), init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32), device=dev)
    attributes = {'param_rank': 2,
                  'padding': True,
                  'none': None,
                  'nested lists': [[1, 2, 3], [4, 5, 6]],
                  'string': 'string',
                  'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3))
                  }

    def verify_attributes(udf):
        for k, v in attributes.items():
            if not isinstance(v, np.ndarray):
                assert udf.attributes[k] == v
            else:
                assert (udf.attributes[k] == v).all()

    op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes, 'native_user_times_function')

    verify_attributes(op.owner)

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)
    x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]], dtype=np.float32), device=dev)
    result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)

    assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]])

    native_times_primitive = op_reloaded.find_by_name('native_user_times_function')

    verify_attributes(native_times_primitive)
if __name__ == '__main__':
    #try_set_default_device(cpu())

    from _cntk_py import set_fixed_random_seed
    set_fixed_random_seed(1)

    # hook up data
    vocab, i2w, w2i = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE))

    # create inputs and create model
    model = create_model()

    # train
    train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True)
    valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True)
    train(train_reader, valid_reader, vocab, i2w, model, max_epochs=30, epoch_size=908241)

    test_epoch = 10
    model = Function.load(model_path(test_epoch))

    # test string error rate on decoded output
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_decoding(test_reader, model, i2w)

    # test same metric same as in training on test set
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_metric(test_reader, model)

    # try the model out in an interactive session
    interactive_session(model, vocab, i2w, show_attention=True)
Exemple #20
0
def BlockFunction(op_name, name):
    '''
    Decorator for defining a @Function as a BlockFunction. Same as @Function, but wrap the content into an as_block().
    '''
    return lambda f: Function(f, make_block=True, op_name=op_name, name=name)
Exemple #21
0
 def rnn(dh, x):
     dhs = Sdh(dh)  # previous value, stabilized
     ht = activation (times(x, W) + times(dhs, H) + b)
     h = times(Sht(ht), Wmr) if has_projection else \
         ht
     return Function.NamedOutput(h=h)
Exemple #22
0
    # hook up data
    vocab, i2w, w2i = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE))

    # create inputs and create model
    model = create_model()

    # train
    train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True)
    valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True)
    train(train_reader,
          valid_reader,
          vocab,
          i2w,
          model,
          max_epochs=30,
          epoch_size=908241)

    test_epoch = 10
    model = Function.load(model_path(test_epoch))

    # test string error rate on decoded output
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_decoding(test_reader, model, i2w)

    # test same metric same as in training on test set
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_metric(test_reader, model)

    # try the model out in an interactive session
    interactive_session(model, vocab, i2w, show_attention=True)