def forward_scan(self, x): h0 = shared_zero_matrix((self.hidden_dim, ), 'h0_forward') c0 = shared_zero_matrix((self.hidden_dim, ), 'c0_forward') hs, _ = theano.scan( fn=self._step, sequences=x, outputs_info=[h0, c0], non_sequences=[self.W, self.U, self.b], ) return hs[0]
def backward_scan(self, x): h0_backward = shared_zero_matrix(self.hidden_dim, 'h0_backward') c0_backward = shared_zero_matrix(self.hidden_dim, 'c0_backward') h_backwards, _ = theano.scan( fn=self._step, sequences=x, outputs_info=[h0_backward, c0_backward], non_sequences=[self.W_backward, self.U_backward, self.b_backward], go_backwards=True, ) return h_backwards[::-1]
def backward_scan_batch(self, x, mask, batch_size): h0_backward = shared_zero_matrix((batch_size, self.hidden_dim), 'h0_backward') c0_backward = shared_zero_matrix((batch_size, self.hidden_dim), 'c0_backward') h_backwards, _ = theano.scan( fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), T.transpose(mask, (1, 0))], outputs_info=[h0_backward, c0_backward], non_sequences=[self.W_backward, self.U_backward, self.b_backward], go_backwards=True, ) return T.transpose(h_backwards[0], (1, 0, 2))[:, ::-1]
def get_update(self, loss, params, norm_exc_params=[]): logger.info("Update Parameters: %s" % params) updates = OrderedDict({}) accumulators = OrderedDict({}) grad_params = [] for param in params: accumulators[param] = shared_zero_matrix(param.get_value().shape, name="acc_%s" % param.name) gp = T.grad(loss, param) grad_params.append(gp) for param, gp in zip(params, grad_params): exp_sr = accumulators[param] up_exp_sr = exp_sr + T.sqr(gp).sum() updates[exp_sr] = up_exp_sr step = (self.lr / (T.sqrt(up_exp_sr) + self.epsilon)) * gp stepped_param = param - step param_name = param.name if self.norm_lim > 0 and (param.get_value(borrow=True).ndim == 2) and (param_name not in norm_exc_params): col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(self.norm_lim)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param return updates
def forward_scan_batch(self, x, mask, batch_size): h0 = shared_zero_matrix(( batch_size, self.hidden_dim, ), 'h0_forward') c0 = shared_zero_matrix(( batch_size, self.hidden_dim, ), 'c0_forward') hs, _ = theano.scan( fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), T.transpose(mask, (1, 0))], outputs_info=[h0, c0], non_sequences=[self.W, self.U, self.b], ) return T.transpose(hs[0], (1, 0, 2))
def __init__(self, num_in, initializer=default_initializer): self.W = shared_rand_matrix(shape=(num_in, 1), name="logistic_W", initializer=initializer) self.b = shared_zero_matrix(np.asarray([0]), name='logistic_b') self.params = [self.W, self.b] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2)
def __init__(self, in_dim, hidden_dim, pooling, activation, gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(BiLSTMEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, gates, prefix, initializer, dropout, verbose) self.out_dim = hidden_dim * 2 # Composition Function Weight -- Gates # W [in, forget, output, recurrent] self.W_forward, self.W_forward.name = self.W, prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim * 4, self.in_dim), prefix + 'W_backward', initializer) # U [in, forget, output, recurrent] self.U_forward, self.U_forward.name = self.U, prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim * 4, self.hidden_dim), prefix + 'U_backward', initializer) # b [in, forget, output, recurrent] self.b_forward, self.b_forward.name = self.b, prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b_backward') self.params = [ self.W_forward, self.U_forward, self.b_forward, self.W_backward, self.U_backward, self.b_backward ] self.norm_params = [ self.W_forward, self.U_forward, self.W_backward, self.U_backward ] self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, num_in, num_out, initializer=default_initializer): self.num_in = num_in self.num_out = num_out self.W = shared_rand_matrix(shape=(num_in, num_out), name="softmax_W", initializer=initializer) self.b = shared_zero_matrix((num_out, ), 'softmax_b') self.params = [self.W, self.b] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2)
def __init__(self, in_dim, hidden_dim, pooling, activation, prefix="", initializer=default_initializer, dropout=0, verbose=True): super(BiRecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, prefix, initializer, dropout, verbose) if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.out_dim = hidden_dim * 2 # Forward Direction - Backward Direction # Feed-Forward Matrix (hidden, in) self.W_forward = self.W self.W_forward.name = prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim, self.in_dim), prefix + 'W_backward', initializer) # Bias Term (hidden,) self.b_forward = self.b self.b_forward.name = prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_backward') # Recurrent Matrix (hidden, hidden) self.U_forward = self.U self.U_forward.name = prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim, self.hidden_dim), prefix + 'U_backward', initializer) self.params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward, self.b_forward, self.b_backward ] self.norm_params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward ] # L1, L2 Norm self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def get_update(self, loss, params, norm_exc_params=[]): logger.info("Update Parameters: %s" % params) rho = self.lr epsilon = self.epsilon norm_lim = self.norm_lim updates = OrderedDict({}) exp_sqr_grads = OrderedDict({}) exp_sqr_ups = OrderedDict({}) gparams = [] for param in params: exp_sqr_grads[param] = shared_zero_matrix(param.get_value().shape, name="exp_grad_%s" % param.name) gp = T.grad(loss, param) exp_sqr_ups[param] = shared_zero_matrix(param.get_value().shape, name="exp_ups_%s" % param.name) gparams.append(gp) for param, gp in zip(params, gparams): exp_sg = exp_sqr_grads[param] exp_su = exp_sqr_ups[param] up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gp) updates[exp_sg] = up_exp_sg step = -(T.sqrt(exp_su + epsilon) / T.sqrt(up_exp_sg + epsilon)) * gp updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step) stepped_param = param + step param_name = param.name if self.norm_lim > 0 and (param.get_value(borrow=True).ndim == 2) and (param_name not in norm_exc_params): col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(norm_lim)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param return updates
def __init__(self, in_dim, hidden_dim, pooling, activation, gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.hidden_dim = hidden_dim self.out_dim = hidden_dim self.pooling = pooling self.act = Activation(activation) self.in_gate, self.forget_gate, self.out_gate = Activation( gates[0]), Activation(gates[1]), Activation(gates[2]) self.dropout = dropout # W [in, forget, output, recurrent] (4 * hidden, in) self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim), prefix + 'W', initializer) # U [in, forget, output, recurrent] (4 * hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim), prefix + 'U', initializer) # b [in, forget, output, recurrent] (4 * hidden,) self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b') self.params = [self.W, self.U, self.b] self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation, prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim self.pooling = pooling self.dropout = dropout self.act = Activation(activation) # Composition Function Weight # Feed-Forward Matrix (hidden, in) self.W = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_forward', initializer) # Bias Term (hidden) self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward') # Recurrent Matrix (hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim), prefix + 'U_forward', initializer) self.params = [self.W, self.U, self.b] self.norm_params = [self.W, self.U] # L1, L2 Norm self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def forward_sequence_batch(self, x, mask, batch_size): """ :param x: (batch, max_len, dim) :param mask: (batch, max_len) :param batch_size: """ h0 = shared_zero_matrix((batch_size, self.hidden_dim), 'h0') hs, _ = theano.scan( fn=self._step_batch, sequences=[ T.transpose( x, (1, 0, 2)), # (batch, max_len, dim) -> (max_len, batch, dim) T.transpose(mask, (1, 0)) ], # (batch, max_len) -> (max_len, batch) outputs_info=[h0], non_sequences=[self.W, self.U, self.b], ) # (max_len, batch, dim) -> (batch, max_len, dim) return T.transpose(hs, (1, 0, 2))
def __init__(self, lookup_table, recurrent_encoder, in_dim, hidden_dim, num_label, pooling, activation, batch_size=64, initializer=default_initializer, dropout=0, verbose=True): self.batch_size = batch_size word_index = T.imatrix() # (batch, max_len) gold_truth = T.ivector() # (batch, 1) rnn_encoder = recurrent_encoder(in_dim=in_dim, hidden_dim=hidden_dim, pooling=pooling, activation=activation, initializer=initializer, dropout=dropout, verbose=verbose) mask = (word_index > 0) * one_float32 word_embedding = lookup_table.W[word_index] rnn_output = rnn_encoder.forward_batch(word_embedding, mask, batch_size) classifier = SoftmaxClassifier(num_in=rnn_encoder.out_dim, num_out=num_label, initializer=initializer) classifier_output = classifier.forward(rnn_output) loss = classifier.loss(rnn_output, gold_truth) params = lookup_table.params + classifier.params + rnn_encoder.params sgd_optimizer = AdaGradOptimizer(lr=0.95, norm_lim=16) except_norm_list = [param.name for param in lookup_table.params] updates = sgd_optimizer.get_update(loss, params, except_norm_list) self.train_x = shared_zero_matrix((batch_size, 1), dtype=np.int32) self.train_y = shared_zero_matrix(1, dtype=np.int32) self.dev_x = shared_zero_matrix((batch_size, 1), dtype=np.int32) self.test_x = shared_zero_matrix((batch_size, 1), dtype=np.int32) index = T.ivector() self.train_batch = theano.function(inputs=[index], outputs=[classifier_output, loss], updates=updates, givens={ word_index: self.train_x[index], gold_truth: self.train_y[index] }) self.get_norm = theano.function( inputs=[], outputs=[lookup_table.l2_norm, classifier.l2_norm]) self.pred_train_batch = theano.function( inputs=[index], outputs=classifier_output, givens={word_index: self.train_x[index]}) self.pred_dev_batch = theano.function( inputs=[index], outputs=classifier_output, givens={word_index: self.dev_x[index]}) self.pred_test_batch = theano.function( inputs=[index], outputs=classifier_output, givens={word_index: self.test_x[index]})
def __init__(self, lookup_table, in_dim, hidden_dims, labels_nums, activation, highway=False, batch_size=64, initializer=default_initializer, optimizer=None, dropout=0, verbose=True): self.batch_size = batch_size self.num_task = len(labels_nums) word_index = T.itensor3() # (batch, max_len) gold_truth = T.ivector() # (batch, 1) mask_query = (word_index > 0) * T.constant(1, dtype=theano.config.floatX) mask_user = (T.sum(word_index, axis=2) > 0) * T.constant( 1, dtype=theano.config.floatX) word_embedding = lookup_table.W[word_index] # max sum averaging hidden = get_pooling_batch_word(word_embedding, mask_query, "averaging") hidden = get_pooling_batch(hidden, mask_user, "averaging") # hidden = T.mean(hidden, axis=1) if len(hidden_dims) == 0 or hidden_dims[0] == 0: nn_output = hidden nn_output_dim = in_dim elif highway: encoder = HighwayLayer(in_dim=in_dim, activation=activation, initializer=initializer, dropout=dropout, verbose=verbose) nn_output = encoder.forward_batch(hidden) nn_output_dim = encoder.out_dim else: encoder = MultiHiddenLayer(in_dim=in_dim, hidden_dims=hidden_dims, activation=activation, initializer=initializer, dropout=dropout, verbose=verbose) nn_output = encoder.forward_batch(hidden) nn_output_dim = encoder.out_dim if optimizer is None: sgd_optimizer = AdaGradOptimizer(lr=0.95, norm_lim=16) else: sgd_optimizer = optimizer self.train_x = shared_zero_matrix((batch_size, 1, 1), dtype=np.int32) self.train_y = shared_zero_matrix((1, 1), dtype=np.int32) self.dev_x = shared_zero_matrix((batch_size, 1, 1), dtype=np.int32) self.test_x = shared_zero_matrix((batch_size, 1, 1), dtype=np.int32) self.train_batch_list = list() self.pred_train_batch_list = list() self.pred_dev_batch_list = list() self.pred_test_batch_list = list() self.get_y_list = list() index = T.ivector() classifier_list = list() classifier_output_list = list() classifier_loss_list = list() classifier_param_list = list() classifier_updates_list = list() for i in xrange(len(labels_nums)): classifier = SoftmaxClassifier(num_in=nn_output_dim, num_out=labels_nums[i], initializer=initializer) classifier_list.append(classifier) classifier_output_list.append( classifier_list[i].forward(nn_output)) classifier_loss_list.append(classifier_list[i].loss( nn_output, gold_truth)) if len(hidden_dims) == 0 or hidden_dims[0] == 0: classifier_param_list.append(lookup_table.params + classifier.params) else: classifier_param_list.append(lookup_table.params + classifier.params + encoder.params) except_norm_list = [param.name for param in lookup_table.params] classifier_updates_list.append( sgd_optimizer.get_update(classifier_loss_list[i], classifier_param_list[i], except_norm_list)) train_batch = theano.function( inputs=[index], outputs=[classifier_output_list[i], classifier_loss_list[i]], updates=classifier_updates_list[i], givens={ word_index: self.train_x[index], gold_truth: self.train_y[index, i] }) self.train_batch_list.append(train_batch) pred_train_batch = theano.function( inputs=[index], outputs=classifier_output_list[i], givens={word_index: self.train_x[index]}) self.pred_train_batch_list.append(pred_train_batch) pred_dev_batch = theano.function( inputs=[index], outputs=classifier_output_list[i], givens={word_index: self.dev_x[index]}) self.pred_dev_batch_list.append(pred_dev_batch) pred_test_batch = theano.function( inputs=[index], outputs=classifier_output_list[i], givens={word_index: self.test_x[index]}) self.pred_test_batch_list.append(pred_test_batch) self.get_y_list.append( theano.function(inputs=[index], outputs=self.train_y[index, i]))
def __init__(self, key_index, label_num, pretrain_name=None, encoder='lstm', word_dim=300, hidden='100_100', dropout=0.5, regularization_weight=0.0001, optimizer_name='adagrad', lr=0.1, norm_lim=-1, label2index_filename=None): self.label2index, self.index2label = self.load_label_index( label2index_filename, label_num) self.indexs = T.imatrix() # (batch, max_len) self.golden = T.ivector() # (batch, ) self.max_len = T.iscalar() # max length self.s1_mask = self.indexs[:, :self.max_len] > 0 self.s1_mask = self.s1_mask * T.constant(1.0, dtype=theano.config.floatX) if pretrain_name is None: self.embedding = WordEmbedding( key_index, dim=word_dim, initializer=UniformInitializer(scale=0.01)) else: self.embedding = WordEmbedding(key_index, filename=pretrain_name, normalize=False, binary=True) assert self.embedding.dim == word_dim self.word_embeddings = self.embedding[self.indexs[:, :self.max_len]] if type(hidden) is str: hidden_dims = [int(hid) for hid in hidden.split('_')] else: hidden_dims = [hidden] if encoder == 'lstm': encoder_layer = LSTMEncoder(in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='final', prefix="LSTM_", dropout=dropout) elif encoder == 'bilstm': encoder_layer = BiLSTMEncoder(in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='final', prefix="BiLSTM_", bidirection_shared=True, dropout=dropout) elif encoder == 'recurrent': encoder_layer = RecurrentEncoder(in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='final', prefix="Recurrent_", dropout=dropout) elif encoder == 'birecurrent': encoder_layer = BiRecurrentEncoder(in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='final', prefix="BiRecurrent_", bidirection_shared=True, dropout=dropout) elif encoder == 'gru': encoder_layer = GRUEncoder(in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='final', prefix="GRU_", dropout=dropout) elif encoder == 'bigru': encoder_layer = BiGRUEncoder(in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='final', prefix="BiGRU_", bidirection_shared=True, dropout=dropout) elif encoder == 'cbow': encoder_layer = CBOWLayer(in_dim=word_dim, ) elif encoder == 'cnn': encoder_layer = MultiFilterConvolutionLayer( in_dim=word_dim, hidden_dim=hidden_dims[0], pooling='max', prefix="ConvLayer_", kernel_sizes=CONV_FILTER_SIZES) else: raise NotImplementedError self.text_embedding = encoder_layer.forward_batch( self.word_embeddings, self.s1_mask) if len(hidden_dims) > 1: hidden_layer = MultiHiddenLayer(in_dim=encoder_layer.out_dim, hidden_dims=hidden_dims[1:], dropout=dropout, prefix='Full_Connected_Layer_') classifier_input = hidden_layer.forward_batch(self.text_embedding) classifier_input_dim = hidden_layer.out_dim else: classifier_input = self.text_embedding classifier_input_dim = encoder_layer.out_dim self.classifier = SoftmaxClassifier(classifier_input_dim, label_num, dropout=dropout) self.predict_loss = self.classifier.loss(classifier_input, self.golden) self.predict_prob = self.classifier.forward_batch(classifier_input) self.predict_label = T.argmax(self.predict_prob, axis=1) """Params in TextClassifier""" self.params = self.classifier.params + encoder_layer.params self.l2_norm = self.classifier.l2_norm + encoder_layer.l2_norm if len(hidden_dims) > 1: self.params += hidden_layer.params self.l2_norm += hidden_layer.l2_norm self.l2_loss = regularization_weight * self.l2_norm / 2 self.loss = self.predict_loss + self.l2_loss """Opimizer and Loss""" if optimizer_name == 'adagrad': sgd_optimizer = AdaGradOptimizer(lr=lr, norm_lim=norm_lim) elif optimizer_name == 'adadelta': sgd_optimizer = AdaDeltaOptimizer(lr=lr, norm_lim=norm_lim) elif optimizer_name == 'sgd': sgd_optimizer = SGDOptimizer(lr=lr, norm_lim=norm_lim) elif optimizer_name == 'momentum': sgd_optimizer = SGDMomentumOptimizer(lr=lr, norm_lim=norm_lim) elif optimizer_name == 'adam': sgd_optimizer = AdamOptimizer(lr=lr, norm_lim=norm_lim) else: raise NotImplementedError self.train_indexs = T.ivector() self.train_data_x = shared_zero_matrix(shape=(5, 5), name="train_data_x", dtype=np.int32) self.train_data_y = shared_zero_matrix(shape=(5, ), name="train_data_y", dtype=np.int32) self.model_params = self.params + self.embedding.params """Theano Function""" if EMBEDDING_LR > 0: embedding_updates = SGDOptimizer(lr=EMBEDDING_LR, norm_lim=-1).get_update( self.loss, self.embedding.params) updates = sgd_optimizer.get_update( self.loss, self.params, norm_exc_params=self.embedding.params) updates.update(embedding_updates) elif EMBEDDING_LR < 0: # Optimize Embedding using Global Optimizer self.params += self.embedding.params updates = sgd_optimizer.get_update( self.loss, self.params, norm_exc_params=self.embedding.params) else: # Fix Embedding updates = sgd_optimizer.get_update( self.loss, self.params, norm_exc_params=self.embedding.params) self.train_batch = theano.function( inputs=[self.train_indexs, self.max_len], outputs=[self.loss, self.predict_loss, self.l2_loss], updates=updates, givens=[(self.indexs, self.train_data_x[self.train_indexs]), (self.golden, self.train_data_y[self.train_indexs])]) self.loss_batch = theano.function( inputs=[self.indexs, self.golden, self.max_len], outputs=[self.loss, self.predict_loss, self.l2_loss], ) self.pred_prob_batch = theano.function( inputs=[self.indexs, self.max_len], outputs=[self.predict_prob], ) self.pred_label_batch = theano.function( inputs=[self.indexs, self.max_len], outputs=[self.predict_label], ) self.get_l2_loss = theano.function( inputs=[], outputs=[self.l2_loss, self.l2_norm], )