def test_save_cudnn_rnn(self): np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) num_layers = 2 num_gates = 'lstm' skip_input = False is_bidirectional = False path = '/tmp/rnn' weights, biases = K.init_rnn(input_dim=8, hidden_dim=18, b_init=init_ops.random_normal_initializer(), num_layers=num_layers, num_gates=num_gates, skip_input=skip_input, is_bidirectional=is_bidirectional) rnn = N.CudnnRNN(num_units=18, W_init=weights, b_init=biases, rnn_mode=num_gates, num_layers=num_layers, skip_input=skip_input, is_bidirectional=is_bidirectional, return_states=False, dropout=0., name="CudnnRNNTest") y = rnn(X) K.initialize_all_variables() y = K.eval(y) N.serialize(nnops=rnn, path=path, binary_output=True, override=True) test_script = r""" from __future__ import print_function, division, absolute_import import os os.environ['ODIN'] = 'gpu,float32,seed=5218' import pickle import numpy as np import tensorflow as tf from tensorflow.python.ops import init_ops from odin.config import randint from odin import backend as K, nnet as N np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) rnn = N.deserialize("%s", force_restore_vars=True) y = rnn(X) K.initialize_all_variables() y = K.eval(y) print(len(rnn.variables), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight)), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias)), y.sum(), (y**2).sum()) """ % path outputs = run_script(test_script)[1] num_variables, w, b, s1, s2 = outputs.split(' ') assert int(num_variables) == len(rnn.variables) assert np.allclose(float(w), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight))) assert np.allclose(float(b), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias))) assert np.allclose(float(s1), y.sum()) assert np.allclose(float(s2), (y**2).sum())
def test_cudnn_rnn_nnet(self): if get_device() == 'cpu': return print() np.random.seed(1208) batch_size = 6 hidden_size = 4 X_linear = K.placeholder(shape=(None, 3, 8), name='X_linear') X_skip = K.placeholder(shape=(None, 3, hidden_size), name='X_skip') for direction_mode in ['bidirectional', 'unidirectional']: is_bidirectional = direction_mode == 'bidirectional' for nb_layers in [2]: real_layers = nb_layers * 2 if is_bidirectional else nb_layers for rnn_mode in ['gru', 'lstm', 'rnn_relu', 'rnn_tanh']: for init_state, init_state_name in zip( [ None, # None init K.init.uniform, # function init K.variable( np.random.rand(real_layers, 1, hidden_size)), # variable K.variable( np.random.rand(real_layers, batch_size, hidden_size)), # variable K.zeros(shape=(real_layers, 1, hidden_size)), K.ones(shape=(real_layers, batch_size, hidden_size)) ], [ 'None', 'Function', 'Var1', 'VarB', 'Tensor1', 'TensorB' ]): for input_mode in ['linear', 'skip']: if input_mode == 'linear': X = X_linear x = np.random.rand(batch_size, 3, 8) else: X = X_skip x = np.random.rand(batch_size, 3, hidden_size) start = timeit.default_timer() f = N.CudnnRNN(num_units=hidden_size, rnn_mode=rnn_mode, input_mode=input_mode, num_layers=nb_layers, direction_mode=direction_mode, params_split=False, return_states=True) # perform function y = f(X, h0=init_state, c0=init_state) f = K.function(X, y) output = f(x) benchmark = timeit.default_timer() - start self.assertTrue([list(i.shape) for i in output] == [[ batch_size if j is None else j for j in K.get_shape(i) ] for i in y]) print( "*PASSED* [Layers]%s [Mode]%-8s [Input]%-6s [Direction]%-12s [State]%s [Benchmark]%.4f" % (nb_layers, rnn_mode, input_mode, direction_mode, init_state_name, benchmark))
name='input%d' % i) for i, shape in enumerate(train.shape) ] print("Inputs:", ctext(inputs, 'cyan')) # ====== create the network ====== # f_encoder = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv( num_filters=32, filter_size=(7, 7), b_init=None, activation=K.linear), N.BatchNorm(), N.Pool(pool_size=(3, 2), strides=2), ], debug=True, name='Encoder') f_latent = N.Sequence([ N.Flatten(outdim=3), N.CudnnRNN( num_units=128, num_layers=1, is_bidirectional=False, rnn_mode='lstm'), ], debug=True, name='Latent') f_decoder = N.Sequence([ N.Flatten(outdim=2), N.Dense(num_units=1024, b_init=None, activation=K.linear), N.BatchNorm(axes=0, activation=K.relu) ], debug=True, name='Decoder') f_output = N.Sequence([N.Dense(len(digits), activation=K.linear)], debug=True, name='Output') # ====== applying ====== # E = f_encoder(inputs[0])
# =========================================================================== # Build network # =========================================================================== ops = N.Sequence([ N.Dimshuffle((0, 1, 2, 'x')) if USE_MNIST_DATA else N.Dimshuffle( (0, 2, 3, 1)), N.Conv(32, filter_size=3, strides=1, pad='same', activation=K.linear), N.BatchNorm(axes='auto', activation=K.relu), N.Pool(pool_size=2, strides=None), N.Dimshuffle(pattern=(0, 3, 1, 2)), N.Flatten(outdim=3), N.CudnnRNN(18, initial_states=None, rnn_mode='lstm', num_layers=2, input_mode='linear', direction_mode='unidirectional', params_split=False), N.Flatten(outdim=2), N.Dense(128, activation=K.relu), N.Dense(10, activation=K.softmax) ], debug=True) ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able K.set_training(True) y_pred_train = ops(X) K.set_training(False) y_pred_score = ops(X)
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1