def get_iter(self, params, grad_params): """ get params' update values :param params: list/ tuple :param grad_params: list/ tuple :return: param_updates OrderedDict({}) key: param, value: update value optimizer_updates OrderedDict({}) key: acc in optmizer, value: update value """ params_updates = OrderedDict({}) optimizer_updates = OrderedDict({}) rho = self.decay_rate epsilon = self.epsilon exp_sqr_grads = OrderedDict({}) exp_sqr_ups = OrderedDict({}) for param in params: exp_sqr_grads[param] = shared_zero_matrix(param.get_value().shape, name="exp_grad_%s" % param.name) exp_sqr_ups[param] = shared_zero_matrix(param.get_value().shape, name="exp_ups_%s" % param.name) for param, gp in zip(params, grad_params): exp_sg = exp_sqr_grads[param] exp_su = exp_sqr_ups[param] up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gp) step = -(T.sqrt(exp_su + epsilon) / T.sqrt(up_exp_sg + epsilon)) * gp * self.lr stepped_param = param + step optimizer_updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step) optimizer_updates[exp_sg] = up_exp_sg params_updates[param] = stepped_param return params_updates, optimizer_updates
def __init__(self, in_dim, hidden_dim, initializer=default_initializer, normalize=True, dropout=0, reconstructe=True, activation="tanh", verbose=True): """ :param in_dim: 输入维度 :param hidden_dim: 隐层维度 :param initializer: 随机初始化器 :param normalize: 是否归一化 :param dropout: dropout率 :param activation: 激活函数 :param verbose: 是否输出Debug日志内容 :return: """ self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim assert self.in_dim == self.hidden_dim self.initializer = initializer self.normalize = normalize self.dropout = dropout self.verbose = verbose self.act = Activation(activation) # Composition Function Weight # (dim, 2 * dim) self.W = shared_rand_matrix((self.hidden_dim, 2 * self.in_dim), 'W', initializer=initializer) # (dim, ) self.b = shared_zero_matrix((self.hidden_dim, ), 'b') # Reconstruction Function Weight # (2 * dim, dim) self.Wr = shared_rand_matrix((2 * self.in_dim, self.hidden_dim), 'Wr', initializer=initializer) # (2 * dim, ) self.br = shared_zero_matrix((self.in_dim * 2, ), 'br') self.params = [self.W, self.b, self.Wr, self.br] self.norm_params = [self.W, self.Wr] self.l1_norm = sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug( 'Architecture of RAE built finished, summarized as below: ') logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Normalize: %s' % self.normalize) logger.debug('Activation: %s' % self.act) logger.debug('Dropout Rate: %s' % self.dropout)
def forward_scan(self, x): h0 = shared_zero_matrix((self.hidden_dim, ), 'h0_forward') c0 = shared_zero_matrix((self.hidden_dim, ), 'c0_forward') hs, _ = theano.scan( fn=self._step, sequences=x, outputs_info=[h0, c0], non_sequences=[self.W, self.U, self.b], ) return hs[0]
def backward_scan(self, x): h0_backward = shared_zero_matrix(self.hidden_dim, 'h0_backward') c0_backward = shared_zero_matrix(self.hidden_dim, 'c0_backward') h_backwards, _ = theano.scan( fn=self._step, sequences=x, outputs_info=[h0_backward, c0_backward], non_sequences=[self.W_backward, self.U_backward, self.b_backward], go_backwards=True, ) return h_backwards[0][::-1]
def __init__(self, in_dim, hidden_dim, activation, prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.hidden_dim = hidden_dim self.out_dim = hidden_dim self.act = Activation(activation) self.dropout = dropout self.W = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W', initializer) self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b') self.params = [self.W, self.b] self.norm_params = [self.W] self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, activation, hidden_dim=None, transform_gate="sigmoid", prefix="", initializer=default_initializer, dropout=0, verbose=True): # By construction the dimensions of in_dim and out_dim have to match, and hence W_T and W_H are square matrices. if hidden_dim is not None: assert in_dim == hidden_dim if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(HighwayLayer, self).__init__(in_dim, in_dim, activation, prefix, initializer, dropout, verbose) self.transform_gate = Activation(transform_gate) self.W_H, self.W_H.name = self.W, prefix + "W_H" self.b_H, self.b_H.name = self.b, prefix + "b_H" self.W_T = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_T', initializer) self.b_T = shared_zero_matrix((self.hidden_dim,), prefix + 'b_T') self.params = [self.W_H, self.W_T, self.b_H, self.b_T] self.norm_params = [self.W_H, self.W_T] self.l1_norm = T.sum([T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param ** 2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format(self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Transform Gate: %s' % self.transform_gate.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, lookup_table, in_dim, hidden_dims, num_label, activation, batch_size=64, initializer=default_initializer, dropout=0, verbose=True): self.batch_size = batch_size word_index = T.imatrix() # (batch, max_len) gold_truth = T.ivector() # (batch, 1) encoder = MultiHiddenLayer(in_dim=in_dim, hidden_dims=hidden_dims, activation=activation, initializer=initializer, dropout=dropout, verbose=verbose) mask = (word_index > 0) * T.constant(1, dtype=theano.config.floatX) word_embedding = lookup_table.W[word_index] hidden = T.sum(word_embedding * mask[:, :, None], axis=1) / T.sum(mask, axis=1)[:, None] rnn_output = encoder.forward_batch(hidden) classifier = SoftmaxClassifier(num_in=encoder.out_dim, num_out=num_label, initializer=initializer) classifier_output = classifier.forward(rnn_output) loss = classifier.loss(rnn_output, gold_truth) params = lookup_table.params + classifier.params + encoder.params sgd_optimizer = AdaGradOptimizer(lr=0.95, norm_lim=16) except_norm_list = [param.name for param in lookup_table.params] updates = sgd_optimizer.get_update(loss, params, except_norm_list) self.train_x = shared_zero_matrix((batch_size, 1), dtype=np.int32) self.train_y = shared_zero_matrix(1, dtype=np.int32) self.dev_x = shared_zero_matrix((batch_size, 1), dtype=np.int32) self.test_x = shared_zero_matrix((batch_size, 1), dtype=np.int32) index = T.ivector() self.train_batch = theano.function(inputs=[index], outputs=[classifier_output, loss], updates=updates, givens={word_index: self.train_x[index], gold_truth: self.train_y[index]} ) self.get_norm = theano.function(inputs=[], outputs=[lookup_table.l2_norm, classifier.l2_norm]) self.pred_train_batch = theano.function(inputs=[index], outputs=classifier_output, givens={word_index: self.train_x[index]} ) self.pred_dev_batch = theano.function(inputs=[index], outputs=classifier_output, givens={word_index: self.dev_x[index]} ) self.pred_test_batch = theano.function(inputs=[index], outputs=classifier_output, givens={word_index: self.test_x[index]} )
def get_iter(self, params, grad_params): """ get params' update values :param params: list/ tuple :param grad_params: list/ tuple :return: param_updates OrderedDict({}) key: param, value: update value optimizer_updates OrderedDict({}) key: acc in optmizer, value: update value """ params_updates = OrderedDict({}) optimizer_updates = OrderedDict({}) epsilon = self.epsilon first_moment_bias = OrderedDict({}) second_moment_bias = OrderedDict({}) rho1 = self.first_decay_rate rho2 = self.second_decay_rate acc_rho1 = shared_scalar(value=rho1, name="adam_acc_rho1") acc_rho2 = shared_scalar(value=rho2, name="adam_acc_rho2") for param in params: first_moment_bias[param] = shared_zero_matrix( param.get_value().shape, name="fir_mom_bias%s" % param.name) second_moment_bias[param] = shared_zero_matrix( param.get_value().shape, name="sec_mom_bias%s" % param.name) for param, gp in zip(params, grad_params): first_mb = first_moment_bias[param] second_mb = second_moment_bias[param] # Update biased first moment estimate up_first_mb = rho1 * first_mb + (1 - rho1) * gp # Update biased second moment estimate up_second_mb = rho2 * second_mb + (1 - rho2) * T.sqr(gp) # Correct bias in first moment correct_first_mb = up_first_mb / (1 - acc_rho1) # Correct bias in second moment correct_second_mb = up_second_mb / (1 - acc_rho2) # Compute step step = correct_first_mb / (T.sqrt(correct_second_mb) + epsilon) # Apply Update stepped_param = param - step * self.lr optimizer_updates[first_mb] = up_first_mb optimizer_updates[second_mb] = up_second_mb optimizer_updates[acc_rho1] = acc_rho1 * rho1 optimizer_updates[acc_rho2] = acc_rho2 * rho2 params_updates[param] = stepped_param return params_updates, optimizer_updates
def __init__(self, in_dim, hidden_dim, kernel_size=3, padding='same', pooling='max', dilation_rate=1.0, activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True): """ Init Function for ConvolutionLayer :param in_dim: :param hidden_dim: :param kernel_size: :param padding: 'same', 'valid' :param pooling: 'max', 'mean', 'min' :param dilation_rate: :param activation: :param prefix: :param initializer: :param dropout: :param verbose: """ if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim self.kernel_size = kernel_size self.padding = padding self.dilation_rate = dilation_rate self.pooling = pooling self.dropout = dropout self.act = Activation(activation) self.padding_size = int(self.dilation_rate * (self.kernel_size - 1)) # Composition Function Weight # Kernel Matrix (kernel_size, hidden, in) self.W = shared_rand_matrix((self.kernel_size, self.hidden_dim, self.in_dim), prefix + 'W', initializer) # Bias Term (hidden) self.b = shared_zero_matrix((self.hidden_dim,), prefix + 'b') self.params = [self.W, self.b] self.norm_params = [self.W] # L1, L2 Norm self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W ** 2) if verbose: logger.debug('Architecture of {} built finished'.format(self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Filter Num (Hidden): %d' % self.hidden_dim) logger.debug('Kernel Size (Windows): %d' % self.kernel_size) logger.debug('Padding method : %s' % self.padding) logger.debug('Dilation Rate : %s' % self.dilation_rate) logger.debug('Padding Size : %s' % self.padding_size) logger.debug('Pooling method : %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, entity_dim, relation_num, activation='tanh', hidden=5, keep_normal=False, initializer=default_initializer, prefix='', verbose=True): super(NeuralTensorModel, self).__init__() self.entity_dim = entity_dim self.relation_num = relation_num self.hidden = hidden self.slice_seq = T.arange(hidden) self.keep_normal = keep_normal # (relation_num, entity_dim, entity_dim, hidden) self.W = shared_rand_matrix( (relation_num, self.entity_dim, self.entity_dim, self.hidden), prefix + 'NTN_W', initializer) # (relation_num, hidden) self.U = shared_ones_matrix((relation_num, self.hidden), name=prefix + 'NTN_U') if keep_normal: # (relation_num, entity_dim, hidden) self.V = shared_rand_matrix( (relation_num, self.entity_dim * 2, self.hidden), prefix + 'NTN_V', initializer) # (relation_num, hidden) self.b = shared_zero_matrix((relation_num, self.hidden), name=prefix + 'NTN_B') self.params = [self.W, self.V, self.U, self.b] self.norm_params = [self.W, self.V, self.U, self.b] else: self.params = [self.W] self.norm_params = [self.W] self.act = Activation(activation) self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug( 'Architecture of Tensor Model built finished, summarized as below:' ) logger.debug('Entity Dimension: %d' % self.entity_dim) logger.debug('Hidden Dimension: %d' % self.hidden) logger.debug('Relation Number: %d' % self.relation_num) logger.debug('Initializer: %s' % initializer) logger.debug('Activation: %s' % activation)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(RecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, dropout) self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim self.pooling = pooling self.dropout = dropout self.act = Activation(activation) # Composition Function Weight # Feed-Forward Matrix (hidden, in) self.W = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_forward', initializer) # Bias Term (hidden) self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward') # Recurrent Matrix (hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim), prefix + 'U_forward', initializer) self.params = [self.W, self.U, self.b] self.norm_params = [self.W, self.U] # L1, L2 Norm self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=OrthogonalInitializer(), dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(LSTMEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, dropout) self.in_gate, self.forget_gate, self.out_gate = Activation( gates[0]), Activation(gates[1]), Activation(gates[2]) # W [in, forget, output, recurrent] (4 * hidden, in) self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim), prefix + 'W', initializer) # U [in, forget, output, recurrent] (4 * hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim), prefix + 'U', initializer) # b [in, forget, output, recurrent] (4 * hidden,) self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b') self.params = [self.W, self.U, self.b] self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def forward_sequence_batch(self, x, mask, batch_size): """ :param x: (batch, max_len, dim) :param mask: (batch, max_len) :param batch_size: """ h0 = shared_zero_matrix((batch_size, self.hidden_dim), 'h0') hs, _ = theano.scan( fn=self._step_batch, sequences=[ T.transpose( x, (1, 0, 2)), # (batch, max_len, dim) -> (max_len, batch, dim) T.transpose(mask, (1, 0)) ], # (batch, max_len) -> (max_len, batch) outputs_info=[h0], non_sequences=[self.W, self.U, self.b], ) # (max_len, batch, dim) -> (batch, max_len, dim) return T.transpose(hs, (1, 0, 2))
def get_iter(self, params, grad_params): """ get params' update values Optmization Section in DEEP LEARNING book :param params: list/ tuple :param grad_params: list/ tuple :return: param_updates OrderedDict({}) key: param, value: update value optimizer_updates OrderedDict({}) key: acc in optmizer, value: update value """ params_updates = OrderedDict({}) optimizer_updates = OrderedDict({}) velocity = OrderedDict({}) for param in params: velocity[param] = shared_zero_matrix(param.get_value().shape, name="vel_%s" % param.name) for param, gp in zip(params, grad_params): vel_para = velocity[param] up_vel_para = self.momentum * vel_para - self.lr * gp step = up_vel_para stepped_param = param + step optimizer_updates[vel_para] = up_vel_para params_updates[param] = stepped_param return params_updates, optimizer_updates
def get_iter(self, params, grad_params): """ get params' update values Optmization Section in DEEP LEARNING book :param params: list/ tuple :param grad_params: list/ tuple :return: param_updates OrderedDict({}) key: param, value: update value optimizer_updates OrderedDict({}) key: acc in optmizer, value: update value """ params_updates = OrderedDict({}) optimizer_updates = OrderedDict({}) accumulators = OrderedDict({}) for param in params: accumulators[param] = shared_zero_matrix(param.get_value().shape, name="acc_%s" % param.name) for param, gp in zip(params, grad_params): exp_sr = accumulators[param] up_exp_sr = exp_sr + T.sqr(gp) step = (self.lr / (T.sqrt(up_exp_sr) + self.epsilon)) * gp stepped_param = param - step optimizer_updates[exp_sr] = up_exp_sr params_updates[param] = stepped_param return params_updates, optimizer_updates
def __init__(self, num_in, num_out, initializer=default_initializer, dropout=0, verbose=True): self.num_in = num_in self.num_out = num_out self.dropout = dropout self.W = shared_rand_matrix(shape=(num_in, num_out), name="softmax_W", initializer=initializer) self.b = shared_zero_matrix((num_out, ), 'softmax_b') self.params = [self.W, self.b] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension : %d' % self.num_in) logger.debug('Output Label Num: %d' % self.num_out) logger.debug('Dropout Rate : %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', prefix="", initializer=default_initializer, dropout=0, bidirection_shared=False, verbose=True): super(BiRecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, prefix, initializer, dropout, verbose) if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.out_dim = hidden_dim * 2 # Forward Direction - Backward Direction if bidirection_shared: # Feed-Forward Matrix (hidden, in) self.W_forward = self.W self.W_forward.name = prefix + "W_shared" self.W_backward = self.W_forward # Bias Term (hidden,) self.b_forward = self.b self.b_forward.name = prefix + "b_shared" self.b_backward = self.b_forward # Recurrent Matrix (hidden, hidden) self.U_forward = self.U self.U_forward.name = prefix + "U_shared" self.U_backward = self.U_forward self.params = [self.W_forward, self.U_forward, self.b_forward] self.norm_params = [self.W_forward, self.U_forward] else: # Feed-Forward Matrix (hidden, in) self.W_forward = self.W self.W_forward.name = prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim, self.in_dim), prefix + 'W_backward', initializer) # Bias Term (hidden,) self.b_forward = self.b self.b_forward.name = prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_backward') # Recurrent Matrix (hidden, hidden) self.U_forward = self.U self.U_forward.name = prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim, self.hidden_dim), prefix + 'U_backward', initializer) self.params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward, self.b_forward, self.b_backward ] self.norm_params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward ] # L1, L2 Norm self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation='tanh', gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=default_initializer, bidirection_shared=False, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(BiLSTMEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, gates, prefix, initializer, dropout, verbose) self.out_dim = hidden_dim * 2 # Composition Function Weight -- Gates if bidirection_shared: # W [in, forget, output, recurrent] self.W_forward, self.W_forward.name = self.W, prefix + "W_shared" self.W_backward = self.W_forward # U [in, forget, output, recurrent] self.U_forward, self.U_forward.name = self.U, prefix + "U_shared" self.U_backward = self.U_forward # b [in, forget, output, recurrent] self.b_forward, self.b_forward.name = self.b, prefix + "b_shared" self.b_backward = self.b_forward self.params = [self.W_forward, self.U_forward, self.b_forward] self.norm_params = [self.W_forward, self.U_forward] else: # W [in, forget, output, recurrent] self.W_forward, self.W_forward.name = self.W, prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim * 4, self.in_dim), prefix + 'W_backward', initializer) # U [in, forget, output, recurrent] self.U_forward, self.U_forward.name = self.U, prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim * 4, self.hidden_dim), prefix + 'U_backward', initializer) # b [in, forget, output, recurrent] self.b_forward, self.b_forward.name = self.b, prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b_backward') self.params = [ self.W_forward, self.U_forward, self.b_forward, self.W_backward, self.U_backward, self.b_backward ] self.norm_params = [ self.W_forward, self.U_forward, self.W_backward, self.U_backward ] self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) if bidirection_shared: logger.debug('%s' % "Forward/Backward Shared Parameter") logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)