def Linear(shape, _inf, bias=True, init=_default_initializer, init_bias=0, input_rank=None, map_rank=None): out_shape = _as_tuple(shape) # TODO: implement the full semantics of the BrainScript code #inputShape = # if BS.Constants.IsNone (inputRank) then Inferred # not given: one Inferred, which will get expanded # else if !BS.Constants.IsNone (mapRank) then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.") # else Repeat (inputRank, Inferred) #W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale} #b = ParameterTensor {outDim, initValue=0} #outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts #inferInputRankToMap = # if !BS.Constants.IsNone (inputRank) then -1 # means not specified # else if BS.Constants.IsNone (mapRank) then 0 # default to 'use all input dims' # else mapRank #apply (x) = # if bias # then Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap) + b # else Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap) W = Parameter(_inf.shape + out_shape, init=init , name='W') b = Parameter( out_shape, init=init_bias, name='b') if bias else None x = Placeholder(_inf=_inf, name='linear_arg') apply_x = Function.__matmul__(x, W) + b if bias else \ Function.__matmul__(x, W) _name_and_extend_Function(apply_x, 'Linear') return apply_x
def Embedding(shape, _inf, weights=None, init=_default_initializer, transpose=False): shape = _as_tuple(shape) full_shape = (shape + _inf.shape) if transpose else (_inf.shape + shape) if weights is None: # no weights given: learn the embedding E = Parameter(full_shape, init=init, name='E') else: # weights given: use them as constant UntestedBranchError("Embedding, from constant") E = Constant(full_shape, init=weights, name='E') # TODO: can 'weights' be a CNTK object already? Then how to do this? x = Placeholder(_inf=_inf, name='embedding_arg') apply_x = Function.__matmul__(E, x) if transpose else \ Function.__matmul__(x, E) # x is expected to be sparse one-hot _name_and_extend_Function(apply_x, 'Embedding') return apply_x
def gru(dh, x): dhs = Sdh(dh) # previous value, stabilized # note: input does not get a stabilizer here, user is meant to do that outside # projected contribution from input(s), hidden, and bias projx3 = b + times(x, W) projh2 = times(dhs, H) zt_proj = slice (projx3, stack_axis, 0*stacked_dim, 1*stacked_dim) + slice (projh2, stack_axis, 0*stacked_dim, 1*stacked_dim) rt_proj = slice (projx3, stack_axis, 1*stacked_dim, 2*stacked_dim) + slice (projh2, stack_axis, 1*stacked_dim, 2*stacked_dim) ct_proj = slice (projx3, stack_axis, 2*stacked_dim, 3*stacked_dim) zt = sigmoid (zt_proj) # update gate z(t) rt = sigmoid (rt_proj) # reset gate r(t) rs = dhs * rt # "cell" c ct = activation (ct_proj + times(rs, H1)) ht = (1 - zt) * ct + zt * dhs # hidden state ht / output # for comparison: CUDNN_GRU # i(t) = sigmoid(W_i x(t) + R_i h(t-1) + b_Wi + b_Ru) # r(t) = sigmoid(W_r x(t) + R_r h(t-1) + b_Wr + b_Rr) --same up to here # h'(t) = tanh(W_h x(t) + r(t) .* (R_h h(t-1)) + b_Wh + b_Rh) --r applied after projection? Would make life easier! # h(t) = (1 - i(t) .* h'(t)) + i(t) .* h(t-1) --TODO: need to confirm bracketing with NVIDIA h = times(Sht(ht), Wmr) if has_projection else \ ht # returns the new state as a tuple with names but order matters return Function.NamedOutput(h=h)
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = C.parameter(shape=(dim,), init=1) i = C.input_variable(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = C.user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) Function.register_udf_deserialize_callback('conditional_exec_lambda', lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)) z = Function.load(filepath) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = C.parameter(shape=(dim,), init=1) i = C.input_variable(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = C.user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) Function.register_udf_deserialize_callback('conditional_exec_lambda', lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)) z = Function.load(filepath) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def test_override_deserialize(tmpdir): dev, w_value, c1_value, c2_value, op = build_test_function() filepath = str(tmpdir / 'test_override_deserialize.dat') op.save(filepath) Function.register_udf_deserialize_callback(MyPlus._op_name(), lambda *x: MyPlusPlus(*x)) op_reloaded = Function.load(filepath, device=dev) np.random.seed(1) for i in range(5): x_value = np.random.random((2, 2)).astype(np.float32) x_data = C.NDArrayView.from_dense(x_value, device=dev) result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev) expected = 2 * (np.matmul(2 * (x_value + c1_value), w_value) + c2_value) assert np.allclose(result, expected)
def _inject_name(f, name): ''' Call this at the end of any layer or block that takes an optional name argument. ''' if name: if not isinstance(f, Function): f = Function(f) if len(f.outputs) == 1: f = alias(f, name=name) else: f = combine(list(f.outputs), name=name) # BUGBUG: Does this actually name things? return f
def lstm(dh, dc, x): dhs = Sdh(dh) # previous values, stabilized dcs = Sdc(dc) # note: input does not get a stabilizer here, user is meant to do that outside # projected contribution from input(s), hidden, and bias proj4 = b + times(x, W) + times(dhs, H) it_proj = slice(proj4, stack_axis, 0 * stacked_dim, 1 * stacked_dim) # split along stack_axis bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim) ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim) ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim) # helper to inject peephole connection if requested def peep(x, c, C): return x + C * c if use_peepholes else x it = sigmoid(peep(it_proj, dcs, Ci)) # input gate(t) # TODO: should both activations be replaced? bit = it * activation(bit_proj) # applied to tanh of input network ft = sigmoid(peep(ft_proj, dcs, Cf)) # forget-me-not gate(t) bft = ft * dc # applied to cell(t-1) ct = bft + bit # c(t) is sum of both ot = sigmoid(peep(ot_proj, Sct(ct), Co)) # output gate(t) ht = ot * activation(ct) # applied to tanh(cell(t)) c = ct # cell value h = times(Sht(ht), Wmr) if has_projection else \ ht # returns the new state as a tuple with names but order matters return (Function.NamedOutput(h=h), Function.NamedOutput(c=c))
def test_native_user_function(tmpdir): if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'): C.ops.register_native_user_function( 'NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction') dev = C.cpu() x = C.input_variable((2)) w = C.parameter((2, 2), init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32), device=dev) attributes = { 'param_rank': 2, 'padding': True, 'none': None, 'nested lists': [[1, 2, 3], [4, 5, 6]], 'string': 'string', 'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3)) } def verify_attributes(udf): for k, v in attributes.items(): if not isinstance(v, np.ndarray): assert udf.attributes[k] == v else: assert (udf.attributes[k] == v).all() op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes, 'native_user_times_function') verify_attributes(op.owner) filepath = str(tmpdir / 'test_native_user_function.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]], dtype=np.float32), device=dev) result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev) assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]]) native_times_primitive = op_reloaded.find_by_name( 'native_user_times_function') verify_attributes(native_times_primitive)
def test_both_flavors_of_user_functions(tmpdir): dev, w_value, c1_value, c2_value, op = build_test_function() filepath = str(tmpdir / 'test_native_user_function.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) np.random.seed(1) for i in range(5): x_value = np.random.random((2, 2)).astype(np.float32) x_data = C.NDArrayView.from_dense(x_value, device=dev) result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev) expected = np.matmul((x_value + c1_value), w_value) + c2_value assert np.allclose(result, expected)
def test_override_serialize(tmpdir): dev = C.cpu() a, b = 1.2322341, -0.29084 op = MyPlusPlus([C.constant(a), C.constant(b)], '++') op = MyPlusPlus([op, op], '+++') op = MyPlusPlus([op, op], '++++') op = C.user_function(op) result1 = op.eval({}, device=dev) filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) assert result1 == op_reloaded.eval({}, device=dev)
def test_ext_train(tmpdir): dim = 4 p = C.parameter(shape=(dim, ), init=10) i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = MyPlus(i, C.constant(3), 'my_plus') # keeping m unwrapped since we need to access its member variables z = C.user_function(m) + p momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1) trainer = C.Trainer(z, (z + 0, z + 0), [ C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True, minibatch_size=0) ]) i = 0 while i < 100: i += 1 input_data = np.random.rand(dim) trainer.train_minibatch([input_data]) assert m.forward_calls == m.backward_calls == 100 filepath = str(tmpdir / 'test_ext_train.dat') z.save(filepath) buf = open(filepath, 'rb').read() # this is only need for Python 2.7 # (which does not distinguish between bytes and strings) if isinstance(buf, str): buf = bytearray(buf) z1 = Function.load(buf) m1 = z1.find_by_name('my_plus') # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus, # using serialize as workaround: state = m1.serialize()['state'] assert state['forward_calls'] == state['backward_calls'] == 100
def test_ext_train(tmpdir): dim = 4 p = C.parameter(shape=(dim,), init=10) i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = MyPlus(i, C.constant(3), 'my_plus') # keeping m unwrapped since we need to access its member variables z = C.user_function(m) + p momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 while i < 100: i += 1 input_data = np.random.rand(dim) trainer.train_minibatch([input_data]) assert m.forward_calls == m.backward_calls == 100 filepath = str(tmpdir / 'test_ext_train.dat') z.save(filepath) buf = open(filepath, 'rb').read() # this is only need for Python 2.7 # (which does not distinguish between bytes and strings) if isinstance(buf, str): buf = bytearray(buf) z1 = Function.load(buf) m1 = z1.find_by_name('my_plus') # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus, # using serialize as workaround: state = m1.serialize()['state'] assert state['forward_calls'] == state['backward_calls'] == 100
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = parameter(shape=(dim, ), init=1) i = input(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) z = Function.load( filepath, udf_factory_callback_map={ 'conditional_exec_lambda': lambda x, *unused: LambdaFunc( x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) }) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (z+0, z+0), \ [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def test_native_user_function(tmpdir): if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'): C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction') dev = C.cpu() x = C.input_variable((2)) w = C.parameter((2, 2), init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32), device=dev) attributes = {'param_rank': 2, 'padding': True, 'none': None, 'nested lists': [[1, 2, 3], [4, 5, 6]], 'string': 'string', 'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3)) } def verify_attributes(udf): for k, v in attributes.items(): if not isinstance(v, np.ndarray): assert udf.attributes[k] == v else: assert (udf.attributes[k] == v).all() op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes, 'native_user_times_function') verify_attributes(op.owner) filepath = str(tmpdir / 'test_native_user_function.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]], dtype=np.float32), device=dev) result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev) assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]]) native_times_primitive = op_reloaded.find_by_name('native_user_times_function') verify_attributes(native_times_primitive)
if __name__ == '__main__': #try_set_default_device(cpu()) from _cntk_py import set_fixed_random_seed set_fixed_random_seed(1) # hook up data vocab, i2w, w2i = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE)) # create inputs and create model model = create_model() # train train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True) valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True) train(train_reader, valid_reader, vocab, i2w, model, max_epochs=30, epoch_size=908241) test_epoch = 10 model = Function.load(model_path(test_epoch)) # test string error rate on decoded output test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False) evaluate_decoding(test_reader, model, i2w) # test same metric same as in training on test set test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False) evaluate_metric(test_reader, model) # try the model out in an interactive session interactive_session(model, vocab, i2w, show_attention=True)
def BlockFunction(op_name, name): ''' Decorator for defining a @Function as a BlockFunction. Same as @Function, but wrap the content into an as_block(). ''' return lambda f: Function(f, make_block=True, op_name=op_name, name=name)
def rnn(dh, x): dhs = Sdh(dh) # previous value, stabilized ht = activation (times(x, W) + times(dhs, H) + b) h = times(Sht(ht), Wmr) if has_projection else \ ht return Function.NamedOutput(h=h)
# hook up data vocab, i2w, w2i = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE)) # create inputs and create model model = create_model() # train train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True) valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True) train(train_reader, valid_reader, vocab, i2w, model, max_epochs=30, epoch_size=908241) test_epoch = 10 model = Function.load(model_path(test_epoch)) # test string error rate on decoded output test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False) evaluate_decoding(test_reader, model, i2w) # test same metric same as in training on test set test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False) evaluate_metric(test_reader, model) # try the model out in an interactive session interactive_session(model, vocab, i2w, show_attention=True)