def test_gru_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = GRULayer(l_inp, n_units, hid_init=l_inp_h) # network with `np.array` initializer for hid_init l_rec_nparray = GRULayer(l_inp, n_units, hid_init=Xh_test) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, { l_inp: X, l_inp_h: Xh }) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({ X: X_test, Xh: Xh_test_batch }) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
def test_gru_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = GRULayer(l_inp, n_units, hid_init=l_inp_h) # network with `np.array` initializer for hid_init l_rec_nparray = GRULayer(l_inp, n_units, hid_init=Xh_test) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
class InputModule(MergeLayer): # Input Module, which uses SemMemModule and GRULayer(lasgne) def __init__(self, incomings, voc_size, hid_state_size, SemMem=None, GRU=None, **kwargs): super(InputModule, self).__init__(incomings, **kwargs) if SemMem is not None: self.SemMem = SemMem else: self.SemMem = SemMemModule(incomings[0], voc_size, hid_state_size, **kwargs) if GRU is not None: self.GRU = GRU else: self.GRU = GRULayer(SemMem, hid_state_size) self.voc_size = voc_size self.hid_state_size = hid_state_size def get_params(self, **tags): # Because InputModules uses external GRULayer's parameters, # We have to notify this information to train the GRU's parameters. return self.GRU.get_params(**tags) def get_output_shape_for(self, input_shape): return (None, None, self.hid_state_size) def get_output_for(self, inputs, **kwargs): # input with size (batch, sentences, words) input = inputs[0] # original size of input_word is (batch, sentences) # input_word with size (batch x sentences, ) after flatten input_word = T.flatten(inputs[1]) word_dropout = inputs[2] # Apply word embedding # With size (batch x sentence, word, emb_dim) sentence_rep = self.SemMem.get_output_for([input, word_dropout]) # Apply GRU Layer # 'gru_outs' with size (batch x sentence, word, hid_state_size) gru_outs = self.GRU.get_output_for([sentence_rep]) # Extract candidate fact from GRU's output by input_word variable # resolving input with additional word # e.g. John went to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5 # # hid_extract with size (batch x sentence, hid_state_size) hid_extract = gru_outs[T.arange(gru_outs.shape[0], dtype='int16'), input_word - 1] # candidate_facts with size (batch, sentences, hid_state_size) candidate_facts = T.reshape(x=hid_extract, newshape=(-1, input.shape[1], self.hid_state_size)) return candidate_facts
class InputModule(MergeLayer): # Input Module, which uses SemMemModule and GRULayer(lasgne) def __init__(self, incomings, voc_size, hid_state_size, SemMem=None, GRU=None, **kwargs): super(InputModule, self).__init__(incomings, **kwargs) if SemMem is not None: self.SemMem = SemMem else: self.SemMem = SemMemModule(incomings[0], voc_size, hid_state_size, **kwargs) if GRU is not None: self.GRU = GRU else: self.GRU = GRULayer(SemMem, hid_state_size) self.voc_size = voc_size self.hid_state_size = hid_state_size def get_params(self, **tags): # Because InputModules uses external GRULayer's parameters, # We have to inform this information to train them. return self.GRU.get_params(**tags) def get_output_shape_for(self, input_shape): return (None, None, self.hid_state_size) def get_output_for(self, inputs, **kwargs): input = inputs[0] input_word = T.flatten(inputs[1]) word_dropout = inputs[2] # Apply word embedding sentence_rep = self.SemMem.get_output_for([input, word_dropout]) # Apply GRU Layer gru_outs = self.GRU.get_output_for([sentence_rep]) # Extract candidate fact from GRU's output by input_word variable # resolving input with adtional word # e.g. John when to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5 candidate_facts = T.reshape( gru_outs[T.arange(gru_outs.shape[0], dtype='int32'), input_word - 1], (-1, input.shape[1], self.hid_state_size)) return candidate_facts
class InputModule(MergeLayer): # Input Module, which uses SemMemModule and GRULayer(lasgne) def __init__(self, incomings, voc_size, hid_state_size, SemMem=None, GRU=None, **kwargs): super(InputModule, self).__init__(incomings, **kwargs) if SemMem is not None: self.SemMem = SemMem else: self.SemMem = SemMemModule(incomings[0],voc_size,hid_state_size,**kwargs) if GRU is not None: self.GRU = GRU else: self.GRU = GRULayer(SemMem, hid_state_size) self.voc_size = voc_size self.hid_state_size = hid_state_size def get_params(self, **tags): # Because InputModules uses external GRULayer's parameters, # We have to inform this information to train them. return self.GRU.get_params(**tags) def get_output_shape_for(self, input_shape): return (None, None, self.hid_state_size) def get_output_for(self, inputs, **kwargs): input = inputs[0] input_word = T.flatten(inputs[1]) word_dropout = inputs[2] # Apply word embedding sentence_rep = self.SemMem.get_output_for([input, word_dropout]) # Apply GRU Layer gru_outs = self.GRU.get_output_for([sentence_rep]) # Extract candidate fact from GRU's output by input_word variable # resolving input with adtional word # e.g. John when to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5 candidate_facts = T.reshape( gru_outs[T.arange(gru_outs.shape[0],dtype='int32'), input_word-1], (-1, input.shape[1], self.hid_state_size)) return candidate_facts