def test_save_cudnn_rnn(self): np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) num_layers = 2 num_gates = 'lstm' skip_input = False is_bidirectional = False path = '/tmp/rnn' weights, biases = K.init_rnn(input_dim=8, hidden_dim=18, b_init=init_ops.random_normal_initializer(), num_layers=num_layers, num_gates=num_gates, skip_input=skip_input, is_bidirectional=is_bidirectional) rnn = N.CudnnRNN(num_units=18, W_init=weights, b_init=biases, rnn_mode=num_gates, num_layers=num_layers, skip_input=skip_input, is_bidirectional=is_bidirectional, return_states=False, dropout=0., name="CudnnRNNTest") y = rnn(X) K.initialize_all_variables() y = K.eval(y) N.serialize(nnops=rnn, path=path, binary_output=True, override=True) test_script = r""" from __future__ import print_function, division, absolute_import import os os.environ['ODIN'] = 'gpu,float32,seed=5218' import pickle import numpy as np import tensorflow as tf from tensorflow.python.ops import init_ops from odin.config import randint from odin import backend as K, nnet as N np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) rnn = N.deserialize("%s", force_restore_vars=True) y = rnn(X) K.initialize_all_variables() y = K.eval(y) print(len(rnn.variables), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight)), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias)), y.sum(), (y**2).sum()) """ % path outputs = run_script(test_script)[1] num_variables, w, b, s1, s2 = outputs.split(' ') assert int(num_variables) == len(rnn.variables) assert np.allclose(float(w), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight))) assert np.allclose(float(b), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias))) assert np.allclose(float(s1), y.sum()) assert np.allclose(float(s2), (y**2).sum())
def _initialize(self): input_shape = self.input_shape_map['X'] weights, biases = K.init_rnn( input_dim=int(input_shape[-1]), hidden_dim=int(self.num_units), W_init=self.W_init, b_init=self.b_init, num_layers=self.num_layers, num_gates=self.rnn_mode, skip_input=self.skip_input, is_bidirectional=self.is_bidirectional, cudnn_vector=False) self._weights_name = [w.name for w in weights] self._biases_name = [b.name for b in biases] for i in weights + biases: self.get_variable_nnop(name=i.name.split('/')[-1].split(':')[0], shape=i.shape.as_list(), initializer=i)
def _initialize(self): input_shape = self.input_shape_map['X'] weights, biases = K.init_rnn(input_dim=int(input_shape[-1]), hidden_dim=int(self.num_units), W_init=self.W_init, b_init=self.b_init, num_layers=self.num_layers, num_gates=self.rnn_mode, skip_input=self.skip_input, is_bidirectional=self.is_bidirectional, cudnn_vector=False) self._weights_name = [w.name for w in weights] self._biases_name = [b.name for b in biases] for i in weights + biases: self.get_variable_nnop(name=i.name.split('/')[-1].split(':')[0], shape=i.shape.as_list(), initializer=i)
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1