Example #1
0
 def get_iter(self, params, grad_params):
     """
     get params' update values
     :param params: list/ tuple
     :param grad_params: list/ tuple
     :return: param_updates      OrderedDict({}) key: param, value: update value
              optimizer_updates  OrderedDict({}) key: acc in optmizer, value: update value
     """
     params_updates = OrderedDict({})
     optimizer_updates = OrderedDict({})
     rho = self.decay_rate
     epsilon = self.epsilon
     exp_sqr_grads = OrderedDict({})
     exp_sqr_ups = OrderedDict({})
     for param in params:
         exp_sqr_grads[param] = shared_zero_matrix(param.get_value().shape,
                                                   name="exp_grad_%s" %
                                                   param.name)
         exp_sqr_ups[param] = shared_zero_matrix(param.get_value().shape,
                                                 name="exp_ups_%s" %
                                                 param.name)
     for param, gp in zip(params, grad_params):
         exp_sg = exp_sqr_grads[param]
         exp_su = exp_sqr_ups[param]
         up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gp)
         step = -(T.sqrt(exp_su + epsilon) /
                  T.sqrt(up_exp_sg + epsilon)) * gp * self.lr
         stepped_param = param + step
         optimizer_updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step)
         optimizer_updates[exp_sg] = up_exp_sg
         params_updates[param] = stepped_param
     return params_updates, optimizer_updates
Example #2
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 initializer=default_initializer,
                 normalize=True,
                 dropout=0,
                 reconstructe=True,
                 activation="tanh",
                 verbose=True):
        """
        :param in_dim:          输入维度
        :param hidden_dim:      隐层维度
        :param initializer:     随机初始化器
        :param normalize:       是否归一化
        :param dropout:         dropout率
        :param activation:      激活函数
        :param verbose:         是否输出Debug日志内容
        :return:
        """
        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        assert self.in_dim == self.hidden_dim

        self.initializer = initializer
        self.normalize = normalize
        self.dropout = dropout
        self.verbose = verbose
        self.act = Activation(activation)
        # Composition Function Weight
        # (dim, 2 * dim)
        self.W = shared_rand_matrix((self.hidden_dim, 2 * self.in_dim),
                                    'W',
                                    initializer=initializer)
        # (dim, )
        self.b = shared_zero_matrix((self.hidden_dim, ), 'b')
        # Reconstruction Function Weight
        # (2 * dim, dim)
        self.Wr = shared_rand_matrix((2 * self.in_dim, self.hidden_dim),
                                     'Wr',
                                     initializer=initializer)
        # (2 * dim, )
        self.br = shared_zero_matrix((self.in_dim * 2, ), 'br')
        self.params = [self.W, self.b, self.Wr, self.br]
        self.norm_params = [self.W, self.Wr]

        self.l1_norm = sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of RAE built finished, summarized as below: ')
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Normalize:        %s' % self.normalize)
            logger.debug('Activation:       %s' % self.act)
            logger.debug('Dropout Rate:     %s' % self.dropout)
Example #3
0
 def forward_scan(self, x):
     h0 = shared_zero_matrix((self.hidden_dim, ), 'h0_forward')
     c0 = shared_zero_matrix((self.hidden_dim, ), 'c0_forward')
     hs, _ = theano.scan(
         fn=self._step,
         sequences=x,
         outputs_info=[h0, c0],
         non_sequences=[self.W, self.U, self.b],
     )
     return hs[0]
Example #4
0
 def backward_scan(self, x):
     h0_backward = shared_zero_matrix(self.hidden_dim, 'h0_backward')
     c0_backward = shared_zero_matrix(self.hidden_dim, 'c0_backward')
     h_backwards, _ = theano.scan(
         fn=self._step,
         sequences=x,
         outputs_info=[h0_backward, c0_backward],
         non_sequences=[self.W_backward, self.U_backward, self.b_backward],
         go_backwards=True,
     )
     return h_backwards[0][::-1]
Example #5
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 activation,
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = hidden_dim
        self.act = Activation(activation)
        self.dropout = dropout
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W', initializer)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b')
        self.params = [self.W, self.b]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Example #6
0
    def __init__(self, in_dim, activation, hidden_dim=None, transform_gate="sigmoid", prefix="",
                 initializer=default_initializer, dropout=0, verbose=True):
        # By construction the dimensions of in_dim and out_dim have to match, and hence W_T and W_H are square matrices.
        if hidden_dim is not None:
            assert in_dim == hidden_dim
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(HighwayLayer, self).__init__(in_dim, in_dim, activation, prefix, initializer, dropout, verbose)
        self.transform_gate = Activation(transform_gate)
        self.W_H, self.W_H.name = self.W, prefix + "W_H"
        self.b_H, self.b_H.name = self.b, prefix + "b_H"
        self.W_T = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_T', initializer)
        self.b_T = shared_zero_matrix((self.hidden_dim,), prefix + 'b_T')
        self.params = [self.W_H, self.W_T, self.b_H, self.b_T]
        self.norm_params = [self.W_H, self.W_T]
        self.l1_norm = T.sum([T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param ** 2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Transform Gate:   %s' % self.transform_gate.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Example #7
0
    def __init__(self, lookup_table, in_dim, hidden_dims, num_label, activation,
                 batch_size=64, initializer=default_initializer, dropout=0, verbose=True):
        self.batch_size = batch_size
        word_index = T.imatrix()  # (batch, max_len)
        gold_truth = T.ivector()  # (batch, 1)
        encoder = MultiHiddenLayer(in_dim=in_dim, hidden_dims=hidden_dims, activation=activation,
                                   initializer=initializer, dropout=dropout, verbose=verbose)
        mask = (word_index > 0) * T.constant(1, dtype=theano.config.floatX)
        word_embedding = lookup_table.W[word_index]
        hidden = T.sum(word_embedding * mask[:, :, None], axis=1) / T.sum(mask, axis=1)[:, None]
        rnn_output = encoder.forward_batch(hidden)
        classifier = SoftmaxClassifier(num_in=encoder.out_dim, num_out=num_label, initializer=initializer)
        classifier_output = classifier.forward(rnn_output)
        loss = classifier.loss(rnn_output, gold_truth)
        params = lookup_table.params + classifier.params + encoder.params
        sgd_optimizer = AdaGradOptimizer(lr=0.95, norm_lim=16)
        except_norm_list = [param.name for param in lookup_table.params]
        updates = sgd_optimizer.get_update(loss, params, except_norm_list)

        self.train_x = shared_zero_matrix((batch_size, 1), dtype=np.int32)
        self.train_y = shared_zero_matrix(1, dtype=np.int32)
        self.dev_x = shared_zero_matrix((batch_size, 1), dtype=np.int32)
        self.test_x = shared_zero_matrix((batch_size, 1), dtype=np.int32)

        index = T.ivector()
        self.train_batch = theano.function(inputs=[index],
                                           outputs=[classifier_output, loss],
                                           updates=updates,
                                           givens={word_index: self.train_x[index],
                                                   gold_truth: self.train_y[index]}
                                           )
        self.get_norm = theano.function(inputs=[],
                                        outputs=[lookup_table.l2_norm, classifier.l2_norm])
        self.pred_train_batch = theano.function(inputs=[index],
                                                outputs=classifier_output,
                                                givens={word_index: self.train_x[index]}
                                                )
        self.pred_dev_batch = theano.function(inputs=[index],
                                              outputs=classifier_output,
                                              givens={word_index: self.dev_x[index]}
                                              )
        self.pred_test_batch = theano.function(inputs=[index],
                                               outputs=classifier_output,
                                               givens={word_index: self.test_x[index]}
                                               )
Example #8
0
 def get_iter(self, params, grad_params):
     """
     get params' update values
     :param params: list/ tuple
     :param grad_params: list/ tuple
     :return: param_updates      OrderedDict({}) key: param, value: update value
              optimizer_updates  OrderedDict({}) key: acc in optmizer, value: update value
     """
     params_updates = OrderedDict({})
     optimizer_updates = OrderedDict({})
     epsilon = self.epsilon
     first_moment_bias = OrderedDict({})
     second_moment_bias = OrderedDict({})
     rho1 = self.first_decay_rate
     rho2 = self.second_decay_rate
     acc_rho1 = shared_scalar(value=rho1, name="adam_acc_rho1")
     acc_rho2 = shared_scalar(value=rho2, name="adam_acc_rho2")
     for param in params:
         first_moment_bias[param] = shared_zero_matrix(
             param.get_value().shape, name="fir_mom_bias%s" % param.name)
         second_moment_bias[param] = shared_zero_matrix(
             param.get_value().shape, name="sec_mom_bias%s" % param.name)
     for param, gp in zip(params, grad_params):
         first_mb = first_moment_bias[param]
         second_mb = second_moment_bias[param]
         # Update biased first moment estimate
         up_first_mb = rho1 * first_mb + (1 - rho1) * gp
         # Update biased second moment estimate
         up_second_mb = rho2 * second_mb + (1 - rho2) * T.sqr(gp)
         # Correct bias in first moment
         correct_first_mb = up_first_mb / (1 - acc_rho1)
         # Correct bias in second moment
         correct_second_mb = up_second_mb / (1 - acc_rho2)
         # Compute step
         step = correct_first_mb / (T.sqrt(correct_second_mb) + epsilon)
         # Apply Update
         stepped_param = param - step * self.lr
         optimizer_updates[first_mb] = up_first_mb
         optimizer_updates[second_mb] = up_second_mb
         optimizer_updates[acc_rho1] = acc_rho1 * rho1
         optimizer_updates[acc_rho2] = acc_rho2 * rho2
         params_updates[param] = stepped_param
     return params_updates, optimizer_updates
Example #9
0
    def __init__(self, in_dim, hidden_dim, kernel_size=3, padding='same', pooling='max', dilation_rate=1.0,
                 activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True):
        """
        Init Function for ConvolutionLayer
        :param in_dim:
        :param hidden_dim:
        :param kernel_size:
        :param padding: 'same', 'valid'
        :param pooling: 'max', 'mean', 'min'
        :param dilation_rate:
        :param activation:
        :param prefix:
        :param initializer:
        :param dropout:
        :param verbose:
        """
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))

        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.padding = padding
        self.dilation_rate = dilation_rate
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        self.padding_size = int(self.dilation_rate * (self.kernel_size - 1))
        # Composition Function Weight
        # Kernel Matrix (kernel_size, hidden, in)
        self.W = shared_rand_matrix((self.kernel_size, self.hidden_dim, self.in_dim), prefix + 'W', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim,), prefix + 'b')

        self.params = [self.W, self.b]
        self.norm_params = [self.W]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W ** 2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Filter Num  (Hidden): %d' % self.hidden_dim)
            logger.debug('Kernel Size (Windows): %d' % self.kernel_size)
            logger.debug('Padding method :  %s' % self.padding)
            logger.debug('Dilation Rate  :  %s' % self.dilation_rate)
            logger.debug('Padding Size   :  %s' % self.padding_size)
            logger.debug('Pooling method :  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Example #10
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='tanh',
                 hidden=5,
                 keep_normal=False,
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(NeuralTensorModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        self.hidden = hidden
        self.slice_seq = T.arange(hidden)
        self.keep_normal = keep_normal
        # (relation_num, entity_dim, entity_dim, hidden)
        self.W = shared_rand_matrix(
            (relation_num, self.entity_dim, self.entity_dim, self.hidden),
            prefix + 'NTN_W', initializer)
        # (relation_num, hidden)
        self.U = shared_ones_matrix((relation_num, self.hidden),
                                    name=prefix + 'NTN_U')
        if keep_normal:
            # (relation_num, entity_dim, hidden)
            self.V = shared_rand_matrix(
                (relation_num, self.entity_dim * 2, self.hidden),
                prefix + 'NTN_V', initializer)
            # (relation_num, hidden)
            self.b = shared_zero_matrix((relation_num, self.hidden),
                                        name=prefix + 'NTN_B')
            self.params = [self.W, self.V, self.U, self.b]
            self.norm_params = [self.W, self.V, self.U, self.b]
        else:
            self.params = [self.W]
            self.norm_params = [self.W]
        self.act = Activation(activation)
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of Tensor Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
Example #11
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(RecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling,
                                               activation, dropout)

        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        # Composition Function Weight
        # Feed-Forward Matrix (hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W_forward', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward')
        # Recurrent Matrix (hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim),
                                    prefix + 'U_forward', initializer)

        self.params = [self.W, self.U, self.b]
        self.norm_params = [self.W, self.U]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Example #12
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 gates=("sigmoid", "sigmoid", "sigmoid"),
                 prefix="",
                 initializer=OrthogonalInitializer(),
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(LSTMEncoder, self).__init__(in_dim, hidden_dim, pooling,
                                          activation, dropout)
        self.in_gate, self.forget_gate, self.out_gate = Activation(
            gates[0]), Activation(gates[1]), Activation(gates[2])

        # W [in, forget, output, recurrent] (4 * hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim),
                                    prefix + 'W', initializer)
        # U [in, forget, output, recurrent] (4 * hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim),
                                    prefix + 'U', initializer)
        # b [in, forget, output, recurrent] (4 * hidden,)
        self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b')

        self.params = [self.W, self.U, self.b]
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Input Gate:       %s' % self.in_gate.method)
            logger.debug('Forget Gate:      %s' % self.forget_gate.method)
            logger.debug('Output Gate:      %s' % self.out_gate.method)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Example #13
0
 def forward_sequence_batch(self, x, mask, batch_size):
     """
     :param x: (batch, max_len, dim)
     :param mask:  (batch, max_len)
     :param batch_size:
     """
     h0 = shared_zero_matrix((batch_size, self.hidden_dim), 'h0')
     hs, _ = theano.scan(
         fn=self._step_batch,
         sequences=[
             T.transpose(
                 x, (1, 0,
                     2)),  # (batch, max_len, dim) -> (max_len, batch, dim)
             T.transpose(mask, (1, 0))
         ],  # (batch, max_len) -> (max_len, batch)
         outputs_info=[h0],
         non_sequences=[self.W, self.U, self.b],
     )
     # (max_len, batch, dim) -> (batch, max_len, dim)
     return T.transpose(hs, (1, 0, 2))
Example #14
0
 def get_iter(self, params, grad_params):
     """
     get params' update values
     Optmization Section in DEEP LEARNING book
     :param params: list/ tuple
     :param grad_params: list/ tuple
     :return: param_updates      OrderedDict({}) key: param, value: update value
              optimizer_updates  OrderedDict({}) key: acc in optmizer, value: update value
     """
     params_updates = OrderedDict({})
     optimizer_updates = OrderedDict({})
     velocity = OrderedDict({})
     for param in params:
         velocity[param] = shared_zero_matrix(param.get_value().shape,
                                              name="vel_%s" % param.name)
     for param, gp in zip(params, grad_params):
         vel_para = velocity[param]
         up_vel_para = self.momentum * vel_para - self.lr * gp
         step = up_vel_para
         stepped_param = param + step
         optimizer_updates[vel_para] = up_vel_para
         params_updates[param] = stepped_param
     return params_updates, optimizer_updates
Example #15
0
 def get_iter(self, params, grad_params):
     """
     get params' update values
     Optmization Section in DEEP LEARNING book
     :param params: list/ tuple
     :param grad_params: list/ tuple
     :return: param_updates      OrderedDict({}) key: param, value: update value
              optimizer_updates  OrderedDict({}) key: acc in optmizer, value: update value
     """
     params_updates = OrderedDict({})
     optimizer_updates = OrderedDict({})
     accumulators = OrderedDict({})
     for param in params:
         accumulators[param] = shared_zero_matrix(param.get_value().shape,
                                                  name="acc_%s" %
                                                  param.name)
     for param, gp in zip(params, grad_params):
         exp_sr = accumulators[param]
         up_exp_sr = exp_sr + T.sqr(gp)
         step = (self.lr / (T.sqrt(up_exp_sr) + self.epsilon)) * gp
         stepped_param = param - step
         optimizer_updates[exp_sr] = up_exp_sr
         params_updates[param] = stepped_param
     return params_updates, optimizer_updates
Example #16
0
    def __init__(self,
                 num_in,
                 num_out,
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        self.num_in = num_in
        self.num_out = num_out
        self.dropout = dropout

        self.W = shared_rand_matrix(shape=(num_in, num_out),
                                    name="softmax_W",
                                    initializer=initializer)
        self.b = shared_zero_matrix((num_out, ), 'softmax_b')
        self.params = [self.W, self.b]
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension : %d' % self.num_in)
            logger.debug('Output Label Num: %d' % self.num_out)
            logger.debug('Dropout Rate    : %f' % self.dropout)
Example #17
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 bidirection_shared=False,
                 verbose=True):
        super(BiRecurrentEncoder,
              self).__init__(in_dim, hidden_dim, pooling, activation, prefix,
                             initializer, dropout, verbose)
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.out_dim = hidden_dim * 2
        # Forward Direction - Backward Direction
        if bidirection_shared:
            # Feed-Forward Matrix (hidden, in)
            self.W_forward = self.W
            self.W_forward.name = prefix + "W_shared"
            self.W_backward = self.W_forward
            # Bias Term (hidden,)
            self.b_forward = self.b
            self.b_forward.name = prefix + "b_shared"
            self.b_backward = self.b_forward
            # Recurrent Matrix (hidden, hidden)
            self.U_forward = self.U
            self.U_forward.name = prefix + "U_shared"
            self.U_backward = self.U_forward

            self.params = [self.W_forward, self.U_forward, self.b_forward]
            self.norm_params = [self.W_forward, self.U_forward]
        else:
            # Feed-Forward Matrix (hidden, in)
            self.W_forward = self.W
            self.W_forward.name = prefix + "W_forward"
            self.W_backward = shared_rand_matrix(
                (self.hidden_dim, self.in_dim), prefix + 'W_backward',
                initializer)
            # Bias Term (hidden,)
            self.b_forward = self.b
            self.b_forward.name = prefix + "b_forward"
            self.b_backward = shared_zero_matrix((self.hidden_dim, ),
                                                 prefix + 'b_backward')
            # Recurrent Matrix (hidden, hidden)
            self.U_forward = self.U
            self.U_forward.name = prefix + "U_forward"
            self.U_backward = shared_rand_matrix(
                (self.hidden_dim, self.hidden_dim), prefix + 'U_backward',
                initializer)

            self.params = [
                self.W_forward, self.W_backward, self.U_forward,
                self.U_backward, self.b_forward, self.b_backward
            ]
            self.norm_params = [
                self.W_forward, self.W_backward, self.U_forward,
                self.U_backward
            ]
        # L1, L2 Norm
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Example #18
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 gates=("sigmoid", "sigmoid", "sigmoid"),
                 prefix="",
                 initializer=default_initializer,
                 bidirection_shared=False,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(BiLSTMEncoder,
              self).__init__(in_dim, hidden_dim, pooling, activation, gates,
                             prefix, initializer, dropout, verbose)
        self.out_dim = hidden_dim * 2
        # Composition Function Weight -- Gates
        if bidirection_shared:
            # W [in, forget, output, recurrent]
            self.W_forward, self.W_forward.name = self.W, prefix + "W_shared"
            self.W_backward = self.W_forward
            # U [in, forget, output, recurrent]
            self.U_forward, self.U_forward.name = self.U, prefix + "U_shared"
            self.U_backward = self.U_forward
            # b [in, forget, output, recurrent]
            self.b_forward, self.b_forward.name = self.b, prefix + "b_shared"
            self.b_backward = self.b_forward

            self.params = [self.W_forward, self.U_forward, self.b_forward]
            self.norm_params = [self.W_forward, self.U_forward]
        else:
            # W [in, forget, output, recurrent]
            self.W_forward, self.W_forward.name = self.W, prefix + "W_forward"
            self.W_backward = shared_rand_matrix(
                (self.hidden_dim * 4, self.in_dim), prefix + 'W_backward',
                initializer)
            # U [in, forget, output, recurrent]

            self.U_forward, self.U_forward.name = self.U, prefix + "U_forward"
            self.U_backward = shared_rand_matrix(
                (self.hidden_dim * 4, self.hidden_dim), prefix + 'U_backward',
                initializer)
            # b [in, forget, output, recurrent]
            self.b_forward, self.b_forward.name = self.b, prefix + "b_forward"
            self.b_backward = shared_zero_matrix((self.hidden_dim * 4, ),
                                                 prefix + 'b_backward')
            self.params = [
                self.W_forward, self.U_forward, self.b_forward,
                self.W_backward, self.U_backward, self.b_backward
            ]
            self.norm_params = [
                self.W_forward, self.U_forward, self.W_backward,
                self.U_backward
            ]

        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            if bidirection_shared:
                logger.debug('%s' % "Forward/Backward Shared Parameter")
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Input Gate:       %s' % self.in_gate.method)
            logger.debug('Forget Gate:      %s' % self.forget_gate.method)
            logger.debug('Output Gate:      %s' % self.out_gate.method)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)