Beispiel #1
0
    def __init__(self, x, c, y, n_words, batch_size, lr, init_emb,
                 vocab_w_size, w_emb_dim, w_hidden_dim, c_emb_dim,
                 c_hidden_dim, output_dim, vocab_c_size, window, opt):
        assert window % 2 == 1, 'Window size must be odd'
        """ input """
        self.x = x  # 1D: n_words * batch_size, 2D: window; elem=word id
        self.x_v = x.flatten(
        )  # 1D: n_words * batch_size * window; elem=word id
        self.c = c  # 1D: n_words * batch_size, 2D: window, 3D: max_len_char, 4D: window; elem=char id
        self.y = y
        self.batch_size = batch_size
        self.n_words = n_words
        self.lr = lr

        n_phi = (w_emb_dim + c_hidden_dim) * window
        max_len_char = T.cast(self.c.shape[2], 'int32')
        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_w_size, w_emb_dim))

        self.pad = build_shared_zeros((1, c_emb_dim))
        self.e_c = theano.shared(sample_norm_dist(vocab_c_size - 1, c_emb_dim))
        self.emb_c = T.concatenate([self.pad, self.e_c], 0)

        self.W_in = theano.shared(sample_weights(n_phi, w_hidden_dim))
        self.W_c = theano.shared(
            sample_weights(c_emb_dim * window, c_hidden_dim))
        self.W_out = theano.shared(sample_weights(w_hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(w_hidden_dim))
        self.b_c = theano.shared(sample_weights(c_hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [
            self.e_c, self.W_in, self.W_c, self.W_out, self.b_in, self.b_c,
            self.b_y
        ]
        """ look up embedding """
        self.x_emb = self.emb[
            self.x_v]  # 1D: batch_size*n_words * window, 2D: emb_dim
        self.c_emb = self.emb_c[
            self.
            c]  # 1D: batch_size*n_words, 2D: window, 3D: max_len_char, 4D: window, 5D: n_c_emb
        self.x_emb_r = self.x_emb.reshape((x.shape[0], x.shape[1], -1))
        """ convolution """
        self.c_phi = T.max(
            T.dot(
                self.c_emb.reshape(
                    (batch_size * n_words, window, max_len_char, -1)),
                self.W_c) + self.b_c, 2)  # 1D: n_words, 2D: window, 3D: n_h_c
        self.x_phi = T.concatenate([self.x_emb_r, self.c_phi], axis=2)
        """ forward """
        self.h = relu(
            T.dot(self.x_phi.reshape((batch_size * n_words,
                                      n_phi)), self.W_in) + self.b_in)
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)
        """ predict """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)
        """ loss """
        self.log_p = T.log(self.p_y_given_x)[T.arange(batch_size * n_words),
                                             self.y]
        self.nll = -T.sum(self.log_p)
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb,
                               self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb,
                                    self.x_emb, self.x, self.lr)
Beispiel #2
0
    def __init__(self, name, w, c, b, y, lr,
                 init_w_emb, vocab_w_size, vocab_c_size,
                 w_emb_dim, c_emb_dim, w_hidden_dim, c_hidden_dim, output_dim,
                 window, opt):

        assert window % 2 == 1, 'Window size must be odd'

        """ input """
        self.name = name
        self.w = w
        self.c = c
        self.b = b
        self.y = y
        self.lr = lr
        self.input = [self.w, self.c, self.b, self.y, self.lr]

        n_phi = w_emb_dim + c_emb_dim * window
        n_words = w.shape[0]

        """ params """
        if init_w_emb is not None:
            self.emb = theano.shared(init_w_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_w_size, w_emb_dim))

        self.emb_c = theano.shared(sample_norm_dist(vocab_c_size, c_emb_dim))
        self.W_in = theano.shared(sample_weights(w_hidden_dim, 1, window, n_phi))
        self.W_c = theano.shared(sample_weights(c_hidden_dim, 1, window, c_emb_dim))
        self.W_out = theano.shared(sample_weights(w_hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(w_hidden_dim, 1))
        self.b_c = theano.shared(sample_weights(c_hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        """ pad """
        self.zero = theano.shared(np.zeros(shape=(1, 1, window / 2, n_phi), dtype=theano.config.floatX))
        self.zero_c = theano.shared(np.zeros(shape=(1, 1, window / 2, c_emb_dim), dtype=theano.config.floatX))

        self.params = [self.emb_c, self.W_in, self.W_c, self.W_out, self.b_in, self.b_c, self.b_y]

        """ look up embedding """
        x_emb = self.emb[self.w]  # x_emb: 1D: n_words, 2D: w_emb_dim
        c_emb = self.emb_c[self.c]  # c_emb: 1D: n_chars, 2D: c_emb_dim

        """ create feature """
        c_phi = self.create_char_feature(self.b, c_emb, self.zero_c) + self.b_c  # 1D: n_words, 2D: c_hidden_dim(50)
        x_phi = T.concatenate([x_emb, c_phi], axis=1)  # 1D: n_words, 2D: w_emb_dim(100) + c_hidden_dim(50)

        """ convolution """
        x_padded = T.concatenate([self.zero, x_phi.reshape((1, 1, x_phi.shape[0], x_phi.shape[1])), self.zero], axis=2)  # x_padded: 1D: n_words + n_pad, 2D: n_phi
        x_in = conv2d(input=x_padded, filters=self.W_in)  # 1D: 1, 2D: w_hidden_dim(300), 3D: n_words, 4D: 1

        """ feed-forward computation """
        h = relu(x_in.reshape((x_in.shape[1], x_in.shape[2])) + T.repeat(self.b_in, T.cast(x_in.shape[2], 'int32'), 1)).T
        self.o = T.dot(h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)

        """ prediction """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)

        """ cost function """
        self.nll = -T.sum(T.log(self.p_y_given_x)[T.arange(n_words), self.y])
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, x_emb, self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb, x_emb, self.w, self.lr)
    def __init__(self, x, c, y, n_words, batch_size, lr, init_emb, vocab_w_size, w_emb_dim, w_hidden_dim,
                 c_emb_dim, c_hidden_dim, output_dim, vocab_c_size, window, opt):
        assert window % 2 == 1, 'Window size must be odd'

        """ input """
        self.x = x  # 1D: n_words * batch_size, 2D: window; elem=word id
        self.x_v = x.flatten()  # 1D: n_words * batch_size * window; elem=word id
        self.c = c  # 1D: n_words * batch_size, 2D: window, 3D: max_len_char, 4D: window; elem=char id
        self.y = y
        self.batch_size = batch_size
        self.n_words = n_words
        self.lr = lr

        n_phi = (w_emb_dim + c_hidden_dim) * window
        max_len_char = T.cast(self.c.shape[2], 'int32')

        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_w_size, w_emb_dim))

        self.pad = build_shared_zeros((1, c_emb_dim))
        self.e_c = theano.shared(sample_norm_dist(vocab_c_size - 1, c_emb_dim))
        self.emb_c = T.concatenate([self.pad, self.e_c], 0)

        self.W_in = theano.shared(sample_weights(n_phi, w_hidden_dim))
        self.W_c = theano.shared(sample_weights(c_emb_dim * window, c_hidden_dim))
        self.W_out = theano.shared(sample_weights(w_hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(w_hidden_dim))
        self.b_c = theano.shared(sample_weights(c_hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [self.e_c, self.W_in, self.W_c, self.W_out, self.b_in, self.b_c, self.b_y]

        """ look up embedding """
        self.x_emb = self.emb[self.x_v]  # 1D: batch_size*n_words * window, 2D: emb_dim
        self.c_emb = self.emb_c[self.c]  # 1D: batch_size*n_words, 2D: window, 3D: max_len_char, 4D: window, 5D: n_c_emb
        self.x_emb_r = self.x_emb.reshape((x.shape[0], x.shape[1], -1))

        """ convolution """
        self.c_phi = T.max(T.dot(self.c_emb.reshape((batch_size * n_words, window, max_len_char, -1)), self.W_c) + self.b_c, 2)  # 1D: n_words, 2D: window, 3D: n_h_c
        self.x_phi = T.concatenate([self.x_emb_r, self.c_phi], axis=2)

        """ forward """
        self.h = relu(T.dot(self.x_phi.reshape((batch_size * n_words, n_phi)), self.W_in) + self.b_in)
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)

        """ predict """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)

        """ loss """
        self.log_p = T.log(self.p_y_given_x)[T.arange(batch_size * n_words), self.y]
        self.nll = -T.sum(self.log_p)
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb, self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb, self.x_emb, self.x, self.lr)