def make_node(self, Z, c, y0, i, *args): """ :param Z: {input,output,forget} gate + cell state. 3d (time,batch,dim*4) :param c: initial cell state. 2d (batch,dim) :param y0: output of t = -1 (for recursion at t = 0). 2d (batch,dim) :param i: index. 2d (time,batch) -> 0 or 1 :param args: custom_inputs + initial_state_vars: other inputs for the custom function """ from Device import have_gpu assert have_gpu() assert len(args) == self._get_num_custom_vars( ) + self._get_num_state_vars(), self.recurrent_transform custom_inputs = args[:self._get_num_custom_vars()] initial_state_vars = args[self._get_num_custom_vars():] custom_inputs = [ gpu_contiguous(as_cuda_ndarray_variable(x)) for x in custom_inputs ] initial_state_vars = [ gpu_contiguous(as_cuda_ndarray_variable(x)) for x in initial_state_vars ] Z = gpu_contiguous(as_cuda_ndarray_variable(Z)) c = gpu_contiguous(as_cuda_ndarray_variable(c)) y0 = gpu_contiguous(as_cuda_ndarray_variable(y0)) i = gpu_contiguous(as_cuda_ndarray_variable(T.cast(i, 'float32'))) assert Z.dtype == "float32" assert c.dtype == "float32" assert y0.dtype == "float32" for x in custom_inputs: assert x.dtype == "float32" for x in initial_state_vars: assert x.dtype == "float32" assert Z.ndim == 3 assert c.ndim == 2 assert y0.ndim == 2 assert i.ndim == 2 seq_state_vars = [ self._seq_var_for_initial_state_var(x) for x in initial_state_vars ] return theano.Apply( self, [Z, c, y0, i] + custom_inputs + initial_state_vars, # results: (output) Y, (gates and cell state) H, (final cell state) d, state vars sequences [Z.type(), Z.type(), c.type()] + seq_state_vars)
def make_node(self, Z, c, y0, i, freq, W_re, *args): """ :param Z: {input,output,forget} gate + cell state. 3d (time,batch,dim*4) :param c: initial cell state. 2d (batch,dim) :param y0: output of t = -1 (for recursion at t = 0). 2d (batch,dim) :param i: index. 2d (time,batch) -> 0 or 1 :param W_re: recurrent matrix. 2d (dim,dim*4) :param freq: call frequency to custom function. int :param args: custom_inputs + initial_state_vars: other inputs for the custom function """ from Device import have_gpu assert have_gpu() assert len(args) == self._get_num_custom_vars() + self._get_num_state_vars(), self.recurrent_transform custom_inputs = args[:self._get_num_custom_vars()] initial_state_vars = args[self._get_num_custom_vars():] custom_inputs = [gpu_contiguous(as_cuda_ndarray_variable(x)) for x in custom_inputs] initial_state_vars = [gpu_contiguous(as_cuda_ndarray_variable(x)) for x in initial_state_vars] Z = gpu_contiguous(as_cuda_ndarray_variable(Z)) c = gpu_contiguous(as_cuda_ndarray_variable(c)) y0 = gpu_contiguous(as_cuda_ndarray_variable(y0)) i = gpu_contiguous(as_cuda_ndarray_variable(T.cast(i,'float32'))) W_re = gpu_contiguous(as_cuda_ndarray_variable(W_re)) self.freq = gpu_contiguous(as_cuda_ndarray_variable(freq)) assert Z.dtype == "float32" assert c.dtype == "float32" assert y0.dtype == "float32" assert W_re.dtype == "float32" for x in custom_inputs: assert x.dtype == "float32" for x in initial_state_vars: assert x.dtype == "float32" assert Z.ndim == 3 assert c.ndim == 2 assert y0.ndim == 2 assert i.ndim == 2 assert W_re.ndim == 2 seq_state_vars = [self._seq_var_for_initial_state_var(x) for x in initial_state_vars] return theano.Apply(self, [Z, c, y0, i, freq, W_re] + custom_inputs + initial_state_vars, # results: (output) Y, (gates and cell state) H, (final cell state) d, state vars sequences [Z.type(), Z.type(), c.type()] + seq_state_vars)
CustomLSTMFunctions.debug_function_hook = True def get_attention(att_class, **kwargs): import OpLSTMCustom recurrent_transform = RecurrentTransform.get_dummy_recurrent_transform(att_class.name, **kwargs) assert isinstance(recurrent_transform, att_class) f = OpLSTMCustom.register_func(recurrent_transform) return f LSTMCustomTestOpNoInplaceInstance = get_attention(RecurrentTransform.AttentionTest) LSTMCustomDotAttentionOpNoInplaceInstance = get_attention(RecurrentTransform.AttentionDot) from OpLSTM import LSTMOpInstance @unittest.skipIf(not have_gpu(), "no gpu on this system") def test_does_not_crash(): Z = T.ftensor3('Z') W_re = T.fmatrix('W_re') W_att_in = T.fmatrix('W_att_in') c = T.fmatrix('c') #initial state y0 = T.fmatrix('y0') #initial activation i = T.matrix('i',dtype='int8') Y, H, d = LSTMCustomTestOpNoInplaceInstance(Z, c, y0, i, W_re, W_att_in) f = theano.function(inputs=[Z, c, y0, i, W_re, W_att_in], outputs=Y) n_T = 5 n_batch = 4 n_inp_dim = 3 n_cells = 8
def test_have_gpu(): have_gpu()
from nose.tools import assert_equal, assert_is_instance, assert_in, assert_not_in, assert_true, assert_false import unittest from Device import have_gpu def test_have_gpu(): have_gpu() @unittest.skipIf(not have_gpu(), "no gpu on this system") def test_cuda(): import theano.sandbox.cuda as theano_cuda assert_true(theano_cuda.cuda_available, "Theano CUDA support not available. Check that nvcc is in $PATH.") if theano_cuda.cuda_enabled: # already enabled when $THEANO_FLAGS=device=gpu print("CUDA already enabled") else: print("Call theano_cuda.use") theano_cuda.use(device="gpu", force=True) try: import cuda_ndarray.cuda_ndarray as cuda except ImportError as exc: raise Exception("Theano CUDA support seems broken: %s" % exc) id = cuda.active_device_number(); """ :type: int """ device_name = cuda.active_device_name(); """ :type: str """ print("id: %i", id) print("dev name: %s" % device_name)