def test_convert_optimized_rnnstack(num_layers, bidirectional, recurrent_op, device_id): if device_id == -1: pytest.skip('only runs on GPU') input_dim = 5 hidden_dim = 3 data = [np.random.random((20,input_dim)).astype(np.float32), np.random.random((10,input_dim)).astype(np.float32), np.random.random((40,input_dim)).astype(np.float32)] input_var = C.sequence.input_variable(shape=(input_dim,)) W1 = C.parameter((-1,1), init = C.glorot_uniform()) W2 = C.parameter((-1,1), init = C.glorot_uniform()) cudnn_rnn1 = C.optimized_rnnstack(input_var, W1, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) dense1 = C.layers.Dense(hidden_dim)(cudnn_rnn1) cudnn_rnn2 = C.optimized_rnnstack(dense1, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) dense2 = C.layers.Dense(hidden_dim)(cudnn_rnn2) cudnn_rnn3 = C.optimized_rnnstack(dense2, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) # test shared parameter W2 def blocked(d): blocked_W = C.parameter((-1,d), init = C.glorot_uniform()) @C.layers.BlockFunction('', '') def func(x): return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm') return func cudnn_model = C.layers.Sequential([blocked(hidden_dim), blocked(2*hidden_dim), blocked(3*hidden_dim)])(cudnn_rnn3) cudnn_out = cudnn_model.eval({input_var:data}) model = C.misc.convert_optimized_rnnstack(cudnn_model) # make sure original cudnn model is intact cudnn_out2 = cudnn_model.eval({input_var:data}) assert all(np.allclose(cudnn_out[i], cudnn_out2[i]) for i in range(len(cudnn_out))) model_out = model.eval({model.arguments[0]:data}) assert all(np.allclose(cudnn_out[i], model_out[i]) for i in range(len(cudnn_out)))
def test_convert_optimized_rnnstack(num_layers, bidirectional, recurrent_op, device_id): if device_id == -1: pytest.skip('only runs on GPU') input_dim = 5 hidden_dim = 3 data = [ np.random.random((20, input_dim)).astype(np.float32), np.random.random((10, input_dim)).astype(np.float32), np.random.random((40, input_dim)).astype(np.float32) ] input_var = C.sequence.input_variable(shape=(input_dim, )) W1 = C.parameter((-1, 1), init=C.glorot_uniform()) W2 = C.parameter((-1, 1), init=C.glorot_uniform()) cudnn_rnn1 = C.optimized_rnnstack(input_var, W1, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) dense1 = C.layers.Dense(hidden_dim)(cudnn_rnn1) cudnn_rnn2 = C.optimized_rnnstack(dense1, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) dense2 = C.layers.Dense(hidden_dim)(cudnn_rnn2) cudnn_model = C.optimized_rnnstack( dense2, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) # test shared parameter W2 cudnn_out = cudnn_model.eval({input_var: data}) model = C.utils.convert_optimized_rnnstack(cudnn_model) # make sure original cudnn model is intact cudnn_out2 = cudnn_model.eval({input_var: data}) assert all( np.allclose(cudnn_out[i], cudnn_out2[i]) for i in range(len(cudnn_out))) model_out = model.eval({model.arguments[0]: data}) assert all( np.allclose(cudnn_out[i], model_out[i]) for i in range(len(cudnn_out)))
def test_OptimizedRNNStack(bidirectional, num_layers, input_size, hidden_size, recurrent_op, tmpdir, device_id): pytest.skip('Need to support new ONNX spec.') if device_id == -1: pytest.skip('Test only runs on GPU') dev = cntk_device(device_id) from _cntk_py import constant_initializer model_filename = 'optimized_rnn_stack_' + ( 'bi' if bidirectional else 'uni') + '_layers' + str( num_layers) + '_inp' + str(input_size) + '_hid' + str(hidden_size) W = C.parameter((C.InferredDimension, input_size), constant_initializer(0.1), device=dev) x = C.sequence.input_variable(shape=(input_size, )) s = np.asarray(np.random.uniform(-1, 1, (5, input_size)), dtype=np.float32) f = C.optimized_rnnstack(x, W, hidden_size, num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op, name='MyRnnStack') f.parameters[0].value = np.reshape( np.arange(np.prod(f.parameters[0].value.shape), dtype=np.float32), f.parameters[0].value.shape) verify_one_input(f, s, tmpdir, model_filename)
def _func(operand): return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm')
def test_rnn(device_id): if device_id == -1: pytest.skip('Test only runs on GPU') batch_size = 8 sequence_len = 100 vocab_dim = 20 embed_dim = 10 hidden_dim = 7 input = C.cast(C.sequence.input_variable(()), np.float16) with C.default_options(dtype=np.float16): embed = C.layers.Embedding(embed_dim)(C.one_hot(input, num_classes=vocab_dim, sparse_output=False)) z = C.layers.Recurrence(C.layers.LSTM(hidden_dim))(embed) feed = np.floor( np.random.rand(batch_size, sequence_len).astype(np.float32) * (vocab_dim - 1)) z.grad(feed, wrt=z.parameters) num_layers = 2 W = C.parameter((C.InferredDimension, embed_dim), init=C.glorot_uniform(), dtype=np.float16) with C.default_options(dtype=np.float16): z = C.optimized_rnnstack(embed, W, hidden_dim, num_layers) feed = np.floor( np.random.rand(batch_size, sequence_len).astype(np.float32) * (vocab_dim - 1)) z.grad(feed, wrt=z.parameters)
def test_cntk_cudnn(): try: import tensorflow has_tensorflow = True except: has_tensorflow = False if has_tensorflow: tf_baseline_lstm() else: cntk_baseline_lstm() import cntk as C import cntk.contrib.crosstalk.crosstalk_cntk as crct ci = crct.instance input_var = C.sequence.input(shape=(in_dim)) data = {input_var:data_cntk} ci.set_data(data) ci.set_workdir(workdir) W = C.parameter((-1,dim,), init=C.glorot_uniform()) cudnn_fwbw = C.optimized_rnnstack(input_var, W, dim, 1, bidirectional=True, recurrent_op='lstm') ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn', var_type=cstk.RnnAttr, attr=cstk.RnnAttr(bidirectional=True, op_type='lstm', input_dim=in_dim, hidden_dim=dim, forget_bias=0)) ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn_out') ci.assign('cntk_birnn_cudnn', load=True, load_name='cntk_birnn') assert ci.compare('cntk_birnn_cudnn_out', compare_name='cntk_birnn_out') ci.fetch('cntk_birnn_cudnn', save=True) ci.assign('cntk_birnn_cudnn', load=True) assert ci.compare('cntk_birnn_cudnn_out', compare_name='cntk_birnn_out') ci.reset()
def func(x): return C.optimized_rnnstack(x, W, hidden_dim, num_layers, bidirectional, recurrent_op=recurrent_op, name=name)
def GenRNN(): feature = C.sequence.input_variable((64, ), np.float32) model = C.optimized_rnnstack( feature, C.parameter(( C.InferredDimension, 64, ), init=C.glorot_uniform()), 128, 2, True, 'rnnReLU') data_feature = np.random.rand(1, 16, 64).astype(np.float32) data_output = np.asarray(model.eval(data_feature)) Save('test_RNN', model, data_feature, data_output)
def test_OptimizedRNNStack(bidirectional, num_layers, input_size, hidden_size, recurrent_op, tmpdir, device_id): pytest.skip('Need to support new ONNX spec.') if device_id == -1: pytest.skip('Test only runs on GPU') dev = cntk_device(device_id) from _cntk_py import constant_initializer model_filename = 'optimized_rnn_stack_' + ('bi' if bidirectional else 'uni') + '_layers' + str(num_layers) + '_inp' + str(input_size) + '_hid' + str(hidden_size) W = C.parameter((C.InferredDimension, input_size), constant_initializer(0.1), device=dev) x = C.sequence.input_variable(shape=(input_size,)) s = np.asarray(np.random.uniform(-1, 1, (5,input_size)), dtype=np.float32) f = C.optimized_rnnstack(x, W, hidden_size, num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op, name='MyRnnStack') f.parameters[0].value = np.reshape(np.arange(np.prod(f.parameters[0].value.shape), dtype=np.float32), f.parameters[0].value.shape) verify_one_input(f, s, tmpdir, model_filename)
def test_cntk_cudnn(): try: import tensorflow has_tensorflow = True except: has_tensorflow = False if has_tensorflow: tf_baseline_lstm() else: cntk_baseline_lstm() import cntk as C import cntk.contrib.crosstalk.crosstalk_cntk as crct ci = crct.instance input_var = C.sequence.input_variable(shape=(in_dim)) data = {input_var:data_cntk} ci.set_data(data) ci.set_workdir(workdir) W = C.parameter((-1,dim,), init=C.glorot_uniform()) cudnn_fwbw = C.optimized_rnnstack(input_var, W, dim, 1, bidirectional=True, recurrent_op='lstm') ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn', var_type=cstk.RnnAttr, attr=cstk.RnnAttr(bidirectional=True, op_type='lstm', input_dim=in_dim, hidden_dim=dim, forget_bias=0)) ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn_out') ci.assign('cntk_birnn_cudnn', load=True, load_name='birnn') assert ci.compare('cntk_birnn_cudnn_out', compare_name='birnn_out', rtol=1e-4, atol=1e-6) ci.fetch('cntk_birnn_cudnn', save=True) ci.assign('cntk_birnn_cudnn', load=True) assert ci.compare('cntk_birnn_cudnn_out', compare_name='birnn_out', rtol=1e-4, atol=1e-6) # test assign with value num_gates=4 ci.assign('cntk_birnn_cudnn', value=cstk.RnnArgs(fw_W=np.random.random((in_dim,num_gates*dim)).astype(np.float32), fw_H=np.random.random((dim,num_gates*dim)).astype(np.float32), fw_b=np.random.random((num_gates*dim,)).astype(np.float32), bw_W=np.random.random((in_dim,num_gates*dim)).astype(np.float32), bw_H=np.random.random((dim,num_gates*dim)).astype(np.float32), bw_b=np.random.random((num_gates*dim,)).astype(np.float32))) ci.reset()
def func(x): return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm')
def func(x): return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm')
def test_convert_optimized_rnnstack(num_layers, bidirectional, recurrent_op, device_id): if device_id == -1: pytest.skip('only runs on GPU') input_dim = 5 hidden_dim = 3 batches = [[ np.random.random((20, input_dim)).astype(np.float32), np.random.random((10, input_dim)).astype(np.float32), np.random.random((40, input_dim)).astype(np.float32) ], [np.random.random((1, input_dim)).astype(np.float32)]] for data in batches: input_var = C.sequence.input_variable(shape=(input_dim, )) W1 = C.parameter((-1, 1), init=C.glorot_uniform()) W2 = C.parameter((-1, 1), init=C.glorot_uniform()) cudnn_rnn1 = C.optimized_rnnstack(input_var, W1, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) dense1 = C.layers.Dense(hidden_dim)(cudnn_rnn1) cudnn_rnn2 = C.optimized_rnnstack(dense1, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) dense2 = C.layers.Dense(hidden_dim)(cudnn_rnn2) cudnn_rnn3 = C.optimized_rnnstack( dense2, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) # test shared parameter W2 def blocked(d): blocked_W = C.parameter((-1, d), init=C.glorot_uniform()) @C.layers.BlockFunction('', '') def func(x): return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm') return func cudnn_model = C.layers.Sequential([ blocked(hidden_dim), blocked(2 * hidden_dim), blocked(3 * hidden_dim) ])(cudnn_rnn3) cudnn_out = cudnn_model.eval({input_var: data}) model = C.misc.convert_optimized_rnnstack(cudnn_model) # make sure original cudnn model is intact cudnn_out2 = cudnn_model.eval({input_var: data}) assert all( np.allclose(cudnn_out[i], cudnn_out2[i]) for i in range(len(cudnn_out))) model_out = model.eval({model.arguments[0]: data}) assert all( np.allclose(cudnn_out[i], model_out[i]) for i in range(len(cudnn_out)))
def func(x): return C.optimized_rnnstack(x, W, hidden_dim, num_layers, bidirectional, recurrent_op=recurrent_op, name=name)