def build_squarederror_regression(self): self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype) self.Y_sym = T.matrix(name='y_true', dtype=self.dtype) self.X_autoencoder_sym = T.matrix(name='x_autoencoder', dtype=self.dtype) self.Y_autoencoder_sym = T.matrix(name='y_autoencoder', dtype=self.dtype) l_in_text = lasagne.layers.InputLayer(shape=(None, self.input_size), input_var=self.X_sym) if self.drop_out and self.dropout_coef > 0: l_in_text = lasagne_layers.SparseInputDropoutLayer( l_in_text, p=self.dropout_coef) l_hid_text = SparseInputDenseLayer( l_in_text, num_units=self.hid_size, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform()) if self.drop_out and self.dropout_coef > 0: l_hid_text = lasagne.layers.dropout(l_hid_text, p=self.dropout_coef) self.l_out = lasagne.layers.DenseLayer( l_hid_text, num_units=2, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.GlorotUniform()) output = lasagne.layers.get_output(self.l_out, self.X_sym) loss = lasagne.objectives.squared_error(output, self.Y_sym).mean() output_eval = lasagne.layers.get_output(self.l_out, self.X_sym, deterministic=True) if self.regul_coef: l1_share_out = 0.5 l1_share_hid = 0.5 regul_coef_out, regul_coef_hid = self.regul_coef, self.regul_coef logging.info( 'regul coefficient for output and hidden lasagne_layers is ' + str(self.regul_coef)) l1_penalty = lasagne.regularization.regularize_layer_params( self.l_out, l1) * regul_coef_out * l1_share_out l2_penalty = lasagne.regularization.regularize_layer_params( self.l_out, l2) * regul_coef_out * (1 - l1_share_out) l1_penalty = lasagne.regularization.regularize_layer_params( l_hid_text, l1) * regul_coef_hid * l1_share_hid l2_penalty = lasagne.regularization.regularize_layer_params( l_hid_text, l2) * regul_coef_hid * (1 - l1_share_hid) loss = loss + l1_penalty + l2_penalty parameters = lasagne.layers.get_all_params(self.l_out, trainable=True) updates = lasagne.updates.adam(loss, parameters, learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8) self.f_train = theano.function([self.X_sym, self.Y_sym], loss, updates=updates, on_unused_input='warn') self.f_val = theano.function([self.X_sym, self.Y_sym], loss, on_unused_input='warn') self.f_predict = theano.function([self.X_sym], output_eval, on_unused_input='warn')
def build(self): self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype) self.Y_sym = T.matrix(name='y_true', dtype=self.dtype) l_in_text = lasagne.layers.InputLayer(shape=(None, self.input_size), input_var=self.X_sym) if self.drop_out and self.dropout_coef > 0: l_in_text = lasagne_layers.SparseInputDropoutLayer( l_in_text, p=self.dropout_coef) l_hid_text = SparseInputDenseLayer( l_in_text, num_units=self.hid_size, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform()) #if self.drop_out and self.dropout_coef > 0: # l_hid_text = lasagne.layers.dropout(l_hid_text, p=self.dropout_coef) self.l_out_gaus = lasagne.layers.DenseLayer( l_hid_text, num_units=self.n_bigaus_comp * 6, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.GlorotUniform()) #sq_error_coef = 0.01 output = lasagne.layers.get_output(self.l_out_gaus, self.X_sym) mus, sigmas, corxy, pis = self.unpack_params(output, n_comp=self.n_bigaus_comp) loss = self.nll_loss(mus, sigmas, corxy, pis, self.Y_sym) #predicted_mu = self.get_symb_mus(mus, sigmas, corxy, pis) #loss += lasagne.objectives.squared_error(predicted_mu, self.Y_sym).mean() * sq_error_coef if self.regul_coef: l1_share_out = 0.5 l1_share_hid = 0.5 regul_coef_out, regul_coef_hid = self.regul_coef, self.regul_coef logging.info( 'regul coefficient for output and hidden lasagne_layers is ' + str(self.regul_coef)) l1_penalty = lasagne.regularization.regularize_layer_params( self.l_out_gaus, l1) * regul_coef_out * l1_share_out l2_penalty = lasagne.regularization.regularize_layer_params( self.l_out_gaus, l2) * regul_coef_out * (1 - l1_share_out) l1_penalty = lasagne.regularization.regularize_layer_params( l_hid_text, l1) * regul_coef_hid * l1_share_hid l2_penalty = lasagne.regularization.regularize_layer_params( l_hid_text, l2) * regul_coef_hid * (1 - l1_share_hid) loss = loss + l1_penalty + l2_penalty parameters = lasagne.layers.get_all_params(self.l_out_gaus, trainable=True) updates = lasagne.updates.adam(loss, parameters, learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8) self.f_train = theano.function([self.X_sym, self.Y_sym], loss, updates=updates, on_unused_input='warn') self.f_val = theano.function([self.X_sym, self.Y_sym], loss, on_unused_input='warn') self.f_predict = theano.function([self.X_sym], [mus, sigmas, corxy, pis], on_unused_input='warn')