Esempio n. 1
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation,
                 gates=("sigmoid", "sigmoid", "sigmoid"),
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(BiLSTMEncoder,
              self).__init__(in_dim, hidden_dim, pooling, activation, gates,
                             prefix, initializer, dropout, verbose)
        self.out_dim = hidden_dim * 2
        # Composition Function Weight -- Gates
        # W [in, forget, output, recurrent]
        self.W_forward, self.W_forward.name = self.W, prefix + "W_forward"
        self.W_backward = shared_rand_matrix(
            (self.hidden_dim * 4, self.in_dim), prefix + 'W_backward',
            initializer)
        # U [in, forget, output, recurrent]

        self.U_forward, self.U_forward.name = self.U, prefix + "U_forward"
        self.U_backward = shared_rand_matrix(
            (self.hidden_dim * 4, self.hidden_dim), prefix + 'U_backward',
            initializer)
        # b [in, forget, output, recurrent]
        self.b_forward, self.b_forward.name = self.b, prefix + "b_forward"
        self.b_backward = shared_zero_matrix((self.hidden_dim * 4, ),
                                             prefix + 'b_backward')

        self.params = [
            self.W_forward, self.U_forward, self.b_forward, self.W_backward,
            self.U_backward, self.b_backward
        ]
        self.norm_params = [
            self.W_forward, self.U_forward, self.W_backward, self.U_backward
        ]
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Input Gate:       %s' % self.in_gate.method)
            logger.debug('Forget Gate:      %s' % self.forget_gate.method)
            logger.debug('Output Gate:      %s' % self.out_gate.method)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Esempio n. 2
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation,
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        super(BiRecurrentEncoder,
              self).__init__(in_dim, hidden_dim, pooling, activation, prefix,
                             initializer, dropout, verbose)
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.out_dim = hidden_dim * 2
        # Forward Direction - Backward Direction
        # Feed-Forward Matrix (hidden, in)
        self.W_forward = self.W
        self.W_forward.name = prefix + "W_forward"
        self.W_backward = shared_rand_matrix(
            (self.hidden_dim, self.in_dim), prefix + 'W_backward', initializer)
        # Bias Term (hidden,)
        self.b_forward = self.b
        self.b_forward.name = prefix + "b_forward"
        self.b_backward = shared_zero_matrix((self.hidden_dim, ),
                                             prefix + 'b_backward')
        # Recurrent Matrix (hidden, hidden)
        self.U_forward = self.U
        self.U_forward.name = prefix + "U_forward"
        self.U_backward = shared_rand_matrix(
            (self.hidden_dim, self.hidden_dim), prefix + 'U_backward',
            initializer)

        self.params = [
            self.W_forward, self.W_backward, self.U_forward, self.U_backward,
            self.b_forward, self.b_backward
        ]
        self.norm_params = [
            self.W_forward, self.W_backward, self.U_forward, self.U_backward
        ]
        # L1, L2 Norm
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Esempio n. 3
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation,
                 gates=("sigmoid", "sigmoid", "sigmoid"),
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = hidden_dim
        self.pooling = pooling
        self.act = Activation(activation)
        self.in_gate, self.forget_gate, self.out_gate = Activation(
            gates[0]), Activation(gates[1]), Activation(gates[2])
        self.dropout = dropout

        # W [in, forget, output, recurrent] (4 * hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim),
                                    prefix + 'W', initializer)
        # U [in, forget, output, recurrent] (4 * hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim),
                                    prefix + 'U', initializer)
        # b [in, forget, output, recurrent] (4 * hidden,)
        self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b')

        self.params = [self.W, self.U, self.b]
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Input Gate:       %s' % self.in_gate.method)
            logger.debug('Forget Gate:      %s' % self.forget_gate.method)
            logger.debug('Output Gate:      %s' % self.out_gate.method)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Esempio n. 4
0
    def __init__(self, num_in, initializer=default_initializer):
        self.W = shared_rand_matrix(shape=(num_in, 1),
                                    name="logistic_W",
                                    initializer=initializer)
        self.b = shared_zero_matrix(np.asarray([0]), name='logistic_b')
        self.params = [self.W, self.b]

        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W**2)
Esempio n. 5
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation,
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        # Composition Function Weight
        # Feed-Forward Matrix (hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W_forward', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward')
        # Recurrent Matrix (hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim),
                                    prefix + 'U_forward', initializer)

        self.params = [self.W, self.U, self.b]
        self.norm_params = [self.W, self.U]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
Esempio n. 6
0
    def __init__(self, num_in, num_out, initializer=default_initializer):
        self.num_in = num_in
        self.num_out = num_out

        self.W = shared_rand_matrix(shape=(num_in, num_out),
                                    name="softmax_W",
                                    initializer=initializer)
        self.b = shared_zero_matrix((num_out, ), 'softmax_b')
        self.params = [self.W, self.b]
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W**2)
Esempio n. 7
0
    def __init__(self, entity_dim, relation_num, k=50, activation='tanh',
                 initializer=default_initializer, prefix='', verbose=True):
        super(SingleLayerModel, self).__init__()
        self.k = k
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, k, entity_dim)
        self.W_1 = shared_rand_matrix((relation_num, self.k, self.entity_dim), prefix + 'W_1', initializer)
        # (relation_num, k, entity_dim)
        self.W_2 = shared_rand_matrix((relation_num, self.k, self.entity_dim), prefix + 'W_2', initializer)
        # (relation_num, k, )
        self.u = shared_rand_matrix((relation_num, self.k, ), prefix + 'u', initializer)
        self.act = Activation(activation)
        self.params = [self.W_1, self.W_2, self.u]

        self.l1_norm = T.sum(T.abs_(self.W_1)) + T.sum(T.abs_(self.W_2)) + T.sum(T.abs_(self.u))
        self.l2_norm = T.sum(self.W_1 ** 2) + T.sum(self.W_2 ** 2) + T.sum(self.u ** 2)

        if verbose:
            logger.debug('Architecture of Single Layer Model built finished, summarized as below:')
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('K Dimension:      %d' % self.k)
            logger.debug('Relation Number:  %d' % self.relation_num)
Esempio n. 8
0
 def __init__(self,
              w=None,
              size=10000,
              dim=50,
              initializer=default_initializer,
              prefix=""):
     if w is None:
         # Random generate Matrix
         self.size = size
         self.dim = dim
         self.W = shared_rand_matrix(shape=(self.size, self.dim),
                                     initializer=initializer,
                                     name=prefix + 'Embedding')
     else:
         self.size = w.shape[0]
         self.dim = w.shape[1]
         self.W = shared_matrix(np.array(w, dtype=theano.config.floatX),
                                name=prefix + 'Embedding')
     self.params = [self.W]
     self.l1_norm = T.sum(
         T.abs_(self.W[1:]
                ))  # sum([T.sum(T.abs_(param)) for param in self.params])
     self.l2_norm = T.sum(
         self.W[1:]**2)  # sum([T.sum(param ** 2) for param in self.params])
Esempio n. 9
0
    def __init__(self,
                 source_dim,
                 target_dim,
                 initializer=default_initializer,
                 config=None,
                 verbose=True):
        self.source_dim = source_dim
        self.target_dim = target_dim
        self.alpha = config.alpha
        self.uniform_range = config.uniform_range
        self.normalize = config.normalize
        self.weight_rec = config.weight_rec
        self.weight_sem = config.weight_sem
        self.weight_l2 = config.weight_l2
        self.dropout = config.dropout
        self.verbose = verbose
        self.learning_rate = config.optimizer.param["lr"]
        self.source_encoder = NegativePhraseRAE(self.source_dim,
                                                initializer=initializer,
                                                normalize=self.normalize,
                                                dropout=self.dropout,
                                                verbose=self.verbose)
        self.target_encoder = NegativePhraseRAE(self.target_dim,
                                                initializer=initializer,
                                                normalize=self.normalize,
                                                dropout=self.dropout,
                                                verbose=self.verbose)
        self.source_pos = self.source_encoder.output
        self.source_neg = self.source_encoder.neg_output
        self.target_pos = self.target_encoder.output
        self.target_neg = self.target_encoder.neg_output
        # Define Bilingual Parameters
        self.Wsl = shared_rand_matrix(size=(self.target_dim, self.source_dim),
                                      name="Wsl",
                                      initializer=initializer)
        self.Wtl = shared_rand_matrix(size=(self.source_dim, self.target_dim),
                                      name="Wtl",
                                      initializer=initializer)
        self.bsl = shared_rand_matrix(size=(self.target_dim, ),
                                      name="bsl",
                                      initializer=initializer)
        self.btl = shared_rand_matrix(size=(self.source_dim, ),
                                      name="btl",
                                      initializer=initializer)
        self.param = [self.Wsl, self.Wtl, self.bsl, self.btl]
        self.loss_l2 = sum(T.sum(param**2)
                           for param in [self.Wsl, self.Wtl]) * self.weight_sem

        def sem_distance(p1, w1, b1, p2):
            transform_p1 = T.tanh(T.dot(w1, p1) + b1)
            return T.sum((p2 - transform_p1)**2) / 2

        def sem_sim_distance(p1, w1, b1, p2):
            transform_p1 = T.tanh(T.dot(w1, p1) + b1)
            return cosine_similarity(transform_p1, p2)

        self.source_pos_sem = sem_distance(self.source_pos, self.Wsl, self.bsl,
                                           self.target_pos)
        self.target_pos_sem = sem_distance(self.target_pos, self.Wtl, self.btl,
                                           self.source_pos)
        self.source_neg_sem = sem_distance(self.source_pos, self.Wsl, self.bsl,
                                           self.target_neg)
        self.target_neg_sem = sem_distance(self.target_pos, self.Wtl, self.btl,
                                           self.source_neg)
        self.source_tar_sim = sem_sim_distance(self.source_pos, self.Wsl,
                                               self.bsl, self.target_pos)
        self.target_src_sim = sem_sim_distance(self.target_pos, self.Wtl,
                                               self.btl, self.source_pos)
        self.max_margin_source = T.maximum(
            0.0, self.source_pos_sem - self.source_neg_sem + 1.0)
        self.max_margin_target = T.maximum(
            0.0, self.target_pos_sem - self.target_neg_sem + 1.0)

        self.loss_sem = self.max_margin_source + self.max_margin_target
        self.loss_rec = self.source_encoder.loss_rec + self.target_encoder.loss_rec
        self.loss_l2 = self.loss_l2 + (
            self.source_encoder.loss_l2 +
            self.target_encoder.loss_l2) * self.weight_rec
        self.loss = self.alpha * self.loss_rec + \
                    (1 - self.alpha) * self.loss_sem + \
                    self.loss_l2
        self.params = self.source_encoder.params + self.target_encoder.params + self.param
        self.inputs = [
            self.source_encoder.pos_vectors, self.source_encoder.neg_vectors,
            self.target_encoder.pos_vectors, self.target_encoder.neg_vectors
        ]
        self.input_grad = T.grad(self.loss, self.inputs)
        grads = T.grad(self.loss, self.params)
        self.updates = OrderedDict()
        self.single_updates = OrderedDict()
        self.grad = {}
        for param, grad in zip(self.params, grads):
            g = theano.shared(
                np.asarray(np.zeros_like(param.get_value()),
                           dtype=theano.config.floatX))
            self.grad[param] = g
            self.updates[g] = g + grad
            self.single_updates[param] = param - grad * self.learning_rate
        self._compute_result_grad = theano.function(
            inputs=[
                self.source_encoder.pos_vectors, self.source_encoder.seq,
                self.source_encoder.neg_vectors, self.source_encoder.neg_seq,
                self.target_encoder.pos_vectors, self.target_encoder.seq,
                self.target_encoder.neg_vectors, self.target_encoder.neg_seq
            ],
            outputs=[
                self.alpha * self.loss_rec,
                (1 - self.alpha) * self.loss_sem, self.loss_l2
            ] + self.input_grad,
            updates=self.updates,
            allow_input_downcast=True)
        self._compute_result_grad_single = theano.function(
            inputs=[
                self.source_encoder.pos_vectors, self.source_encoder.seq,
                self.source_encoder.neg_vectors, self.source_encoder.neg_seq,
                self.target_encoder.pos_vectors, self.target_encoder.seq,
                self.target_encoder.neg_vectors, self.target_encoder.neg_seq
            ],
            outputs=[
                self.alpha * self.loss_rec,
                (1 - self.alpha) * self.loss_sem, self.loss_l2
            ] + self.input_grad,
            updates=self.single_updates,
            allow_input_downcast=True)
        self.get_source_output = theano.function(
            inputs=[self.source_encoder.pos_vectors, self.source_encoder.seq],
            outputs=self.source_encoder.output,
            allow_input_downcast=True)
        self.get_target_output = theano.function(
            inputs=[self.target_encoder.pos_vectors, self.target_encoder.seq],
            outputs=self.target_encoder.output,
            allow_input_downcast=True)
        self.get_sem_distance = theano.function(
            inputs=[
                self.source_encoder.pos_vectors, self.source_encoder.seq,
                self.target_encoder.pos_vectors, self.target_encoder.seq
            ],
            outputs=[self.source_tar_sim, self.target_src_sim],
            allow_input_downcast=True)
Esempio n. 10
0
    def __init__(self,
                 vectors,
                 dim,
                 normalize,
                 dropout,
                 activation,
                 initializer=default_initializer,
                 verbose=True):
        """
        :param vectors: a theano tensor variable
        :param dim:
        :param uniform_range:
        :param normalize:
        :param dropout:
        :param verbose:
        :return:
        """
        self.vectors = vectors
        self.dim = dim
        self.normalize = normalize
        self.dropout = dropout
        self.verbose = verbose
        self.act = Activation(activation)
        # Composition Function Weight
        self.W = shared_rand_matrix((self.dim, 2 * self.dim),
                                    'W',
                                    initializer=initializer)
        self.b = shared_rand_matrix((self.dim, ), 'b', 0)
        # Reconstruction Function Weight
        self.Wr = shared_rand_matrix((2 * self.dim, self.dim),
                                     'Wr',
                                     initializer=initializer)
        self.br = shared_rand_matrix((self.dim * 2, ), 'br', 0)
        self.params = [self.W, self.b, self.Wr, self.br]

        self.seq = T.lmatrix()
        self.left_vector = T.vector()
        self.right_vector = T.vector()

        self.zero = theano.shared(np.array([0.0], dtype=theano.config.floatX))
        self.scan_result, _ = theano.scan(self.encode,
                                          sequences=[self.seq],
                                          outputs_info=[self.vectors, None],
                                          name="pos_rae_build")
        self.loss_rec = T.sum(self.scan_result[1])
        self.loss_l1 = sum([T.sum(T.abs_(param)) for param in self.params])
        self.loss_l2 = sum([T.sum(param**2) for param in self.params])
        # all history vector in scan
        self.history_output = self.scan_result[0]
        self.all_output = self.history_output[-1]
        # final output
        # self.output = self.all_output[-1]
        # Just for two compose
        self.two_compose_result, self.two_compose_rec = self.compose(
            self.left_vector, self.right_vector)
        self.compose_two = theano.function(
            inputs=[self.left_vector, self.right_vector],
            outputs=[self.two_compose_result, self.two_compose_rec])
        # Compose Vectors: N vectors -> N-1 Vectors
        compose_vector = T.fmatrix()
        compose_len = T.iscalar()
        path = T.imatrix()
        hs, _ = theano.scan(fn=self.compose_step,
                            sequences=T.arange(compose_len - 1),
                            non_sequences=compose_vector,
                            name="compose_phrase")
        comp_vec = hs[0]
        comp_rec = hs[1]
        min_index = T.argmin(comp_rec)
        """compose_result, _ = theano.scan(fn=self.greed_step,
                                        sequences=[T.arange(compose_len - 1), T.arange(compose_len - 1)],
                                        non_sequences=[compose_vector, path])"""
        self._compose_vectors = theano.function(
            [compose_vector, compose_len], [comp_vec[min_index], min_index])
        """self._compose_result = theano.function([compose_vector, compose_len, path], [path])"""

        if verbose:
            logger.debug(
                'Architecture of RAE built finished, summarized as below: ')
            logger.debug('Hidden dimension: %d' % self.dim)
            logger.debug('Normalize:        %s' % self.normalize)
            logger.debug('Dropout Rate:     %s' % self.dropout)