def test_model(): x = tensor.matrix('x') mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1") mlp2 = MLP([Tanh()], [30, 40], name="mlp2") h1 = mlp1.apply(x) h2 = mlp2.apply(h1) model = Model(h2) assert model.get_top_bricks() == [mlp1, mlp2] # The order of parameters returned is deterministic but # not sensible. assert list(model.get_parameter_dict().items()) == [ ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b), ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b), ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b), ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W), ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W), ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W) ] # Test getting and setting parameter values mlp3 = MLP([Tanh()], [10, 10]) mlp3.allocate() model3 = Model(mlp3.apply(x)) parameter_values = { '/mlp/linear_0.W': 2 * numpy.ones( (10, 10), dtype=theano.config.floatX), '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX) } model3.set_parameter_values(parameter_values) assert numpy.all( mlp3.linear_transformations[0].parameters[0].get_value() == 2) assert numpy.all( mlp3.linear_transformations[0].parameters[1].get_value() == 3) got_parameter_values = model3.get_parameter_values() assert len(got_parameter_values) == len(parameter_values) for name, value in parameter_values.items(): assert_allclose(value, got_parameter_values[name]) # Test exception is raised if parameter shapes don't match def helper(): parameter_values = { '/mlp/linear_0.W': 2 * numpy.ones( (11, 11), dtype=theano.config.floatX), '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX) } model3.set_parameter_values(parameter_values) assert_raises(ValueError, helper) # Test name conflict handling mlp4 = MLP([Tanh()], [10, 10]) def helper(): Model(mlp4.apply(mlp3.apply(x))) assert_raises(ValueError, helper)
def test_extract_parameter_values(): mlp = MLP([Identity(), Identity()], [10, 20, 10]) mlp.allocate() param_values = extract_parameter_values(mlp) assert len(param_values) == 4 assert isinstance(param_values['/mlp/linear_0.W'], numpy.ndarray) assert isinstance(param_values['/mlp/linear_0.b'], numpy.ndarray) assert isinstance(param_values['/mlp/linear_1.W'], numpy.ndarray) assert isinstance(param_values['/mlp/linear_1.b'], numpy.ndarray)
def test_inject_parameter_values(): mlp = MLP([Identity()], [10, 10]) mlp.allocate() param_values = { '/mlp/linear_0.W': 2 * numpy.ones((10, 10), dtype=floatX), '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=floatX) } inject_parameter_values(mlp, param_values) assert numpy.all(mlp.linear_transformations[0].params[0].get_value() == 2) assert numpy.all(mlp.linear_transformations[0].params[1].get_value() == 3)
def test_model(): x = tensor.matrix('x') mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1") mlp2 = MLP([Tanh()], [30, 40], name="mlp2") h1 = mlp1.apply(x) h2 = mlp2.apply(h1) model = Model(h2) assert model.get_top_bricks() == [mlp1, mlp2] # The order of parameters returned is deterministic but # not sensible. assert list(model.get_parameter_dict().items()) == [ ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b), ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b), ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b), ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W), ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W), ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)] # Test getting and setting parameter values mlp3 = MLP([Tanh()], [10, 10]) mlp3.allocate() model3 = Model(mlp3.apply(x)) parameter_values = { '/mlp/linear_0.W': 2 * numpy.ones((10, 10), dtype=theano.config.floatX), '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)} model3.set_parameter_values(parameter_values) assert numpy.all( mlp3.linear_transformations[0].parameters[0].get_value() == 2) assert numpy.all( mlp3.linear_transformations[0].parameters[1].get_value() == 3) got_parameter_values = model3.get_parameter_values() assert len(got_parameter_values) == len(parameter_values) for name, value in parameter_values.items(): assert_allclose(value, got_parameter_values[name]) # Test exception is raised if parameter shapes don't match def helper(): parameter_values = { '/mlp/linear_0.W': 2 * numpy.ones((11, 11), dtype=theano.config.floatX), '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)} model3.set_parameter_values(parameter_values) assert_raises(ValueError, helper) # Test name conflict handling mlp4 = MLP([Tanh()], [10, 10]) def helper(): Model(mlp4.apply(mlp3.apply(x))) assert_raises(ValueError, helper)
class Window(Initializable, Feedforward): @lazy(allocation=['dwin', 'n_mot', 'vect_size', 'n_hidden']) def __init__(self, dwin, n_mot, vect_size, n_hidden, n_out=2, **kwargs): super(Window, self).__init__(**kwargs) self.dwin = dwin self.n_mot = n_mot self.vect_size = vect_size self.n_hidden = n_hidden self.n_out = n_out self.n_tables = len(self.vect_size) self.tables = [ LookUpTable(self.vect_size[i], self.n_mot[i], weights_init=IsotropicGaussian(0.001), use_bias=False) for i in range(self.n_tables) ] self.mlp = MLP(activations=[Tanh()] * len(self.n_hidden) + [Identity()], dims=[self.dwin * sum(self.vect_size)] + self.n_hidden + [self.n_out], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.)) self.parameters = [] self.children = self.tables + [self.mlp] def _initialize(self): for i in range(self.n_tables): self.tables[i].initialize() self.mlp.initialize() W = self.parameters[0] self.weights_init.initialize(W, self.rng) def _allocate(self): for i in range(self.n_tables): self.tables[i].allocate() self.mlp.allocate() W = shared_floatx_nans((sum(self.n_mot), sum(self.vect_size)), name='W') add_role(W, WEIGHT) self.parameters.append(W) def update_transition_matrix(self): W_tmp = self.parameters[0] params_lookup = [ getParams(table, T.itensor3()) for table in self.tables ] index_row = 0 index_col = 0 for i in range(len(self.tables)): W_tmp_value = W_tmp.get_value() p_value = params_lookup[i][0].get_value() W_tmp_value[index_row: index_row+ p_value.shape[1], index_col: index_col+p_value.shape[0]] =\ p_value.transpose() index_row += p_value.shape[1] index_col += p_value.shape[0] W_tmp.set_value(W_tmp_value) def update_lookup_weights(self): W_tmp = self.parameters[0] params_lookup = [ getParams(table, T.itensor3()) for table in self.tables ] index_row = 0 index_col = 0 for i in range(len(self.tables)): W_tmp_value = W_tmp.get_value().transpose() p_value = params_lookup[i][0].get_value() params_lookup[i][0].set_value( W_tmp_value[index_col:index_col + p_value.shape[0], index_row:index_row + p_value.shape[1]]) index_row += p_value.shape[1] index_col += p_value.shape[0] def get_Params(self): params = getParams(self.mlp, T.matrix()) self.update_transition_matrix() weights = [] biases = [] for p in params: if p.ndim == 1: biases.append(p) else: weights.append(p) if len(params[0].name) == 1: # words has not been renamed yet if weights[0].shape[-1].eval() == self.n_out: weights.reverse() biases.reverse() # add the lookuptables weights weights = [self.parameters[0]] + weights assert len(weights) == len(biases) + 1 for w, index in zip(weights, range(len(weights))): w.name = "layer_" + str(index) + "_" + w.name for b, index in zip(biases, range(len(biases))): b.name = "layer_" + str(index + len(weights) - len(biases)) + "_" + b.name else: weights = [self.parameters[0]] + weights return weights, biases @application(inputs=['input_'], outputs=['output']) def apply(self, input_): outputs = [ self.tables[i].apply(input_[:, i]) for i in xrange(self.n_tables) ] # (batch_size, vector_size[i], dwin) outputs = [output.dimshuffle((1, 0, 2)) for output in outputs] output = T.concatenate(outputs, axis=0) # (sum vector_size, batch_size, dwin) output = output.dimshuffle((1, 0, 2)) shape = output.shape output = output.reshape((shape[0], shape[1] * shape[2])) return self.mlp.apply(output) @application(inputs=['input_'], outputs=['output']) def embedding(self, input_): input_ = input_.dimshuffle(('x', 0, 1)) outputs = [ self.tables[i].apply(input_[:, i]) for i in xrange(self.n_tables) ] # (batch_size, vector_size[i], nb_words) outputs = [output.dimshuffle((1, 0, 2)) for output in outputs] output = T.concatenate( outputs, axis=0) # (sum vector_size, batch_size, nb_words) return output.dimshuffle((1, 2, 0)) def _push_allocation_config(self): for i in range(self.n_tables): self.tables[i]._push_allocation_config() self.mlp._push_allocation_config()
class ConvPoolNlp(Initializable, Feedforward): """ This is layer make a convolution and a subsampling on an input sentence """ @lazy(allocation=['n_out', 'dwin', 'vector_size', 'n_hidden_layer']) def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs): super(ConvPoolNlp, self).__init__(**kwargs) self.vector_size = vector_size self.n_hidden_layer = n_hidden_layer self.dwin = dwin self.n_out = n_out self.rectifier = Rectifier() """ self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1, weights_init=IsotropicGaussian(0.01), use_bias=False) """ # second dimension is of fixed size sum(vect_size) less the fiter_size borders self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) + [Identity()], dims=[self.n_out] + self.n_hidden_layer + [2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.)) self.parameters = [] self.children = [] #self.children.append(self.lookup) #self.children.append(self.convolution) self.children.append(self.mlp) self.children.append(self.rectifier) def _allocate(self): W = shared_floatx_nans((self.n_out, self.dwin * self.vector_size), name='W') b = shared_floatx_nans((self.n_out, ), name='b') add_role(b, BIAS) add_role(W, WEIGHT) self.parameters.append(W) self.parameters.append(b) self.mlp.allocate() @property def b(self): return self.parameters[0] @property def b(self): return self.parameters[0] def _initialize(self): #self.allocate() #import pdb #pdb.set_trace() W, b = self.parameters self.weights_init.initialize(W, self.rng) self.biases_init.initialize(b, self.rng) #self.convolution.initialize() self.mlp.initialize() #self.lookup.initialize() @application(inputs=['input_'], outputs=['output']) def apply(self, input_): W, b = self.parameters #input_ = self.lookup.embedding(input_) #input_ = input_.dimshuffle(('x', 0, 1, 2)) convolved_inputs, _ = theano.scan(fn=lambda i, A, W, b: T.dot( W, flat_submatrix(input_, i, self.dwin)) + b, sequences=T.arange(input_.shape[0] - self.dwin), non_sequences=[input_, W, b]) output = T.concatenate([convolved_inputs]) #output = self.rectifier.apply(output) output = T.max(output, axis=0) output = output.dimshuffle(('x', 0)) return self.mlp.apply(output) def _push_allocation_config(self): #self.convolution._push_allocation_config() self.mlp._push_allocation_config()