Ejemplo n.º 1
0
    def _init_nn(self):
        """Initialize neural network.

        """
        self.intm_dim = max(MIN_DIM, self.ndim - (self.ndim - self.n_y) / 2)
        # indices of word embeddings
        self.W_INDICES_ARG1 = TT.ivector(name="W_INDICES_ARG1")
        self.W_INDICES_ARG2 = TT.ivector(name="W_INDICES_ARG2")
        # connective's index
        self.CONN_INDEX = TT.iscalar(name="CONN_INDEX")
        # initialize the matrix of word embeddings
        self.init_w_emb()
        # word embeddings of the arguments
        self.EMB_ARG1 = self.W_EMB[self.W_INDICES_ARG1]
        self.EMB_ARG2 = self.W_EMB[self.W_INDICES_ARG2]
        # connective's embedding
        self._init_conn_emb()
        self.EMB_CONN = self.CONN_EMB[self.CONN_INDEX]
        # perform matrix decomposition
        _, _, self.ARG1 = TT.nlinalg.svd(self.EMB_ARG1,
                                         full_matrices=True)
        _, _, self.ARG2 = TT.nlinalg.svd(self.EMB_ARG2,
                                         full_matrices=True)
        self.ARG_DIFF = self.ARG1 - self.ARG2
        # map decomposed matrices to the intermediate level
        self.ARG_DIFF2I = theano.shared(value=HE_UNIFORM((self.ndim, 1)),
                                        name="ARG_DIFF2I")
        self.arg_diff_bias = theano.shared(value=HE_UNIFORM((1, self.ndim)),
                                           name="arg_diff_bias")
        self._params.extend([self.ARG_DIFF2I, self.arg_diff_bias])
        self.ARGS = (TT.dot(self.ARG_DIFF, self.ARG_DIFF2I).T +
                     self.arg_diff_bias).flatten()
        # define final units
        self.I = TT.concatenate((self.ARGS, self.EMB_CONN))
        self.I2Y = theano.shared(value=HE_UNIFORM((self.n_y,
                                                   self.ndim + self.intm_dim)),
                                 name="I2Y")
        self.y_bias = theano.shared(value=HE_UNIFORM((1, self.n_y)),
                                    name="y_bias")
        self._params.extend([self.I2Y, self.y_bias])
        self.Y_pred = TT.nnet.softmax(TT.dot(self.I2Y, self.I).T + self.y_bias)
        # initialize cost and optimization functions
        self.Y_gold = TT.vector(name="Y_gold")
        self._cost = TT.sum((self.Y_pred - self.Y_gold) ** 2)
        self._dev_cost = TT.sum((self.Y_pred - self.Y_gold) ** 2)
        self._pred_class = TT.argmax(self.Y_pred)
        grads = TT.grad(self._cost, wrt=self._params)
        self._init_funcs(grads)
Ejemplo n.º 2
0
    def _init_w_emb(self):
        """Initialize task-specific word embeddings.

        """
        self.W_EMB = theano.shared(
            value=HE_UNIFORM((self.w_i, self.ndim)), name="W_EMB")
        self._params.append(self.W_EMB)
Ejemplo n.º 3
0
    def _init_w_emb(self):
        """Initialize task-specific word embeddings.

        """
        self.W_EMB = theano.shared(value=HE_UNIFORM((self.w_i, self.ndim)),
                                   name="W_EMB")
        self._params.append(self.W_EMB)
Ejemplo n.º 4
0
    def _init_nn(self):
        """Initialize neural network.

        """
        self.intm_dim = max(MIN_DIM, self.ndim - (self.ndim - self.n_y) / 2)
        # indices of word embeddings
        self.W_INDICES_ARG1 = TT.ivector(name="W_INDICES_ARG1")
        self.W_INDICES_ARG2 = TT.ivector(name="W_INDICES_ARG2")
        # connective's index
        self.CONN_INDEX = TT.iscalar(name="CONN_INDEX")
        # initialize the matrix of word embeddings
        self.init_w_emb()
        # word embeddings of the arguments
        self.EMB_ARG1 = self.W_EMB[self.W_INDICES_ARG1]
        self.EMB_ARG2 = self.W_EMB[self.W_INDICES_ARG2]
        # connective's embedding
        self._init_conn_emb()
        self.EMB_CONN = self.CONN_EMB[self.CONN_INDEX]
        # perform matrix decomposition
        _, _, self.ARG1 = TT.nlinalg.svd(self.EMB_ARG1, full_matrices=True)
        _, _, self.ARG2 = TT.nlinalg.svd(self.EMB_ARG2, full_matrices=True)
        self.ARG_DIFF = self.ARG1 - self.ARG2
        # map decomposed matrices to the intermediate level
        self.ARG_DIFF2I = theano.shared(value=HE_UNIFORM((self.ndim, 1)),
                                        name="ARG_DIFF2I")
        self.arg_diff_bias = theano.shared(value=HE_UNIFORM((1, self.ndim)),
                                           name="arg_diff_bias")
        self._params.extend([self.ARG_DIFF2I, self.arg_diff_bias])
        self.ARGS = (TT.dot(self.ARG_DIFF, self.ARG_DIFF2I).T +
                     self.arg_diff_bias).flatten()
        # define final units
        self.I = TT.concatenate((self.ARGS, self.EMB_CONN))
        self.I2Y = theano.shared(value=HE_UNIFORM(
            (self.n_y, self.ndim + self.intm_dim)),
                                 name="I2Y")
        self.y_bias = theano.shared(value=HE_UNIFORM((1, self.n_y)),
                                    name="y_bias")
        self._params.extend([self.I2Y, self.y_bias])
        self.Y_pred = TT.nnet.softmax(TT.dot(self.I2Y, self.I).T + self.y_bias)
        # initialize cost and optimization functions
        self.Y_gold = TT.vector(name="Y_gold")
        self._cost = TT.sum((self.Y_pred - self.Y_gold)**2)
        self._dev_cost = TT.sum((self.Y_pred - self.Y_gold)**2)
        self._pred_class = TT.argmax(self.Y_pred)
        grads = TT.grad(self._cost, wrt=self._params)
        self._init_funcs(grads)
Ejemplo n.º 5
0
    def _init_conn_emb(self):
        """Initialize task-specific connective embeddings.

        """
        self.CONN_EMB = theano.shared(value=HE_UNIFORM(
            (self.c_i, self.intm_dim)),
                                      name="CONN_EMB")
        self._params.append(self.CONN_EMB)
Ejemplo n.º 6
0
    def _init_conn_emb(self):
        """Initialize task-specific connective embeddings.

        """
        self.CONN_EMB = theano.shared(
            value=HE_UNIFORM((self.c_i, self.intm_dim)),
            name="CONN_EMB")
        self._params.append(self.CONN_EMB)
Ejemplo n.º 7
0
    def _init_nn(self):
        """Initialize neural network.

        """
        self.intm_dim = max(100, self.ndim - (self.ndim - self.n_y) / 2)
        # indices of word embeddings
        self.W_INDICES_ARG1 = TT.ivector(name="W_INDICES_ARG1")
        self.W_INDICES_ARG2 = TT.ivector(name="W_INDICES_ARG2")
        # connective's index
        self.CONN_INDEX = TT.iscalar(name="CONN_INDEX")
        # initialize the matrix of word embeddings
        self.init_w_emb()
        # word embeddings of the arguments
        self.EMB_ARG1 = self.W_EMB[self.W_INDICES_ARG1]
        self.EMB_ARG2 = self.W_EMB[self.W_INDICES_ARG2]
        # connective's embedding
        self._init_conn_emb()
        self.EMB_CONN = self.CONN_EMB[self.CONN_INDEX]
        # initialize forward LSTM unit
        invars = ((self.EMB_ARG1, False), (self.EMB_ARG2, False))
        params, outvars = self._init_lstm(invars)
        self._params.extend(params)
        self.F_OUT_ARG1, self.F_OUT_ARG2 = outvars
        self.F_ARG1 = TT.mean(self.F_OUT_ARG1, axis=0)
        self.F_ARG2 = TT.mean(self.F_OUT_ARG2, axis=0)
        # define final units
        self.I = TT.concatenate((self.F_ARG1, self.F_ARG2, self.EMB_CONN))
        self.I2Y = theano.shared(value=HE_UNIFORM(
            (self.n_y, self.intm_dim * 3)),
                                 name="I2Y")
        self.y_bias = theano.shared(value=HE_UNIFORM((1, self.n_y)),
                                    name="y_bias")
        self._params.extend([self.I2Y, self.y_bias])
        self.Y_pred = TT.nnet.softmax(TT.dot(self.I2Y, self.I).T + self.y_bias)
        # initialize cost and optimization functions
        self.Y_gold = TT.vector(name="Y_gold")
        self._cost = TT.sum((self.Y_pred - self.Y_gold)**2)
        self._dev_cost = TT.sum((self.Y_pred - self.Y_gold)**2)
        self._pred_class = TT.argmax(self.Y_pred)
        grads = TT.grad(self._cost, wrt=self._params)
        self._init_funcs(grads)
Ejemplo n.º 8
0
    def _init_w2v_emb(self):
        """Initialize word2vec embedding matrix.

        """
        w_emb = np.empty((self.w_i, self.ndim))
        w_emb[self.unk_w_i, :] = 1e-2  # prevent zeros in this row
        for w, i in self.w2emb_i.iteritems():
            if i == self.unk_w_i:
                continue
            w_emb[i] = self.w2v[w]
        self.W_EMB = theano.shared(value=floatX(w_emb), name="W_EMB")
        # We unload embeddings every time before the training to free more
        # memory.  Feel free to comment the line below, if you have plenty of
        # RAM.
        self.w2v.unload()
Ejemplo n.º 9
0
    def __init__(self, a_w2v=False, a_lstsq=False, a_max_iters=MAX_ITERS):
        """Class constructor.

        Args:
          a_w2v (bool):
            use pre-trained word2vec instance
          a_lstsq (bool):
            pre-train task-specific word embeddings, but use least-square
            method to generate embeddings for unknown words from generic
            word2vec vectors
          a_max_iters (int):
            maximum number of iterations

        """
        # access to the original word2vec resource
        if a_lstsq:
            a_w2v = True
        if a_w2v:
            self.w2v = Word2Vec  # singleton object
        else:
            self.w2v = None
        self.lstsq = a_lstsq
        self._plain_w2v = self.w2v and not self.lstsq
        # matrix mapping word2vec to task-specific embeddings
        self.max_iters = a_max_iters
        self.w2emb = None
        self.ndim = -1  # vector dimensionality will be initialized later
        self.intm_dim = -1
        # mapping from word to its embedding index
        self.unk_w_i = 0
        self._aux_keys = set((0, ))
        self.w_i = 1
        self.w2emb_i = dict()
        # mapping from connective to its embedding index
        self.unk_c_i = 0
        self.c_i = 1
        self.c2emb_i = dict()
        # variables needed for training
        self._trained = False
        self._params = []
        self._w_stat = self._pred_class = None
        self.use_dropout = theano.shared(floatX(0.))
        self.W_EMB = self.CONN_EMB = self._cost = self._dev_cost = None
        # initialize theano functions to None
        self._reset_funcs()
        # set up functions for obtaining word embeddings at train and test
        # times
        self._init_wemb_funcs()
Ejemplo n.º 10
0
    def __init__(self, a_w2v=False, a_lstsq=False, a_max_iters=MAX_ITERS):
        """Class constructor.

        Args:
          a_w2v (bool):
            use pre-trained word2vec instance
          a_lstsq (bool):
            pre-train task-specific word embeddings, but use least-square
            method to generate embeddings for unknown words from generic
            word2vec vectors
          a_max_iters (int):
            maximum number of iterations

        """
        # access to the original word2vec resource
        if a_lstsq:
            a_w2v = True
        if a_w2v:
            self.w2v = Word2Vec  # singleton object
        else:
            self.w2v = None
        self.lstsq = a_lstsq
        self._plain_w2v = self.w2v and not self.lstsq
        # matrix mapping word2vec to task-specific embeddings
        self.max_iters = a_max_iters
        self.w2emb = None
        self.ndim = -1    # vector dimensionality will be initialized later
        self.intm_dim = -1
        # mapping from word to its embedding index
        self.unk_w_i = 0
        self._aux_keys = set((0, ))
        self.w_i = 1
        self.w2emb_i = dict()
        # mapping from connective to its embedding index
        self.unk_c_i = 0
        self.c_i = 1
        self.c2emb_i = dict()
        # variables needed for training
        self._trained = False
        self._params = []
        self._w_stat = self._pred_class = None
        self.use_dropout = theano.shared(floatX(0.))
        self.W_EMB = self.CONN_EMB = self._cost = self._dev_cost = None
        # initialize theano functions to None
        self._reset_funcs()
        # set up functions for obtaining word embeddings at train and test
        # times
        self._init_wemb_funcs()
Ejemplo n.º 11
0
    def _init_w2v_emb(self):
        """Initialize word2vec embedding matrix.

        """
        w_emb = np.empty((self.w_i, self.ndim))
        w_emb[self.unk_w_i, :] = 1e-2  # prevent zeros in this row
        for w, i in self.w2emb_i.iteritems():
            if i == self.unk_w_i:
                continue
            w_emb[i] = self.w2v[w]
        self.W_EMB = theano.shared(value=floatX(w_emb),
                                   name="W_EMB")
        # We unload embeddings every time before the training to free more
        # memory.  Feel free to comment the line below, if you have plenty of
        # RAM.
        self.w2v.unload()
Ejemplo n.º 12
0
    def _init_lstm(self, a_invars, a_sfx="-forward"):
        """Initialize LSTM layer.

        Args:
          a_invars (list(theano.shared)):
              list of input parameters as symbolic theano variable
          a_sfx (str):
            suffix to use for function and parameter names

        Returns:
          (2-tuple):
            parameters to be optimized and list of symbolic outputs from the
            function

        """
        intm_dim = self.intm_dim
        # initialize transformation matrices and bias term
        W_dim = (intm_dim, self.ndim)
        W = np.concatenate([
            ORTHOGONAL(W_dim),
            ORTHOGONAL(W_dim),
            ORTHOGONAL(W_dim),
            ORTHOGONAL(W_dim)
        ],
                           axis=0)
        W = theano.shared(value=W, name="W" + a_sfx)

        U_dim = (intm_dim, intm_dim)
        U = np.concatenate([
            ORTHOGONAL(U_dim),
            ORTHOGONAL(U_dim),
            ORTHOGONAL(U_dim),
            ORTHOGONAL(U_dim)
        ],
                           axis=0)
        U = theano.shared(value=U, name="U" + a_sfx)

        V = ORTHOGONAL(U_dim)  # V for vendetta
        V = theano.shared(value=V, name="V" + a_sfx)

        b_dim = (1, intm_dim * 4)
        b = theano.shared(value=HE_UNIFORM(b_dim), name="b" + a_sfx)

        params = [W, U, V, b]

        # initialize dropout units
        w_do = theano.shared(value=floatX(np.ones((4 * intm_dim, ))),
                             name="w_do")
        w_do = self._init_dropout(w_do)
        u_do = theano.shared(value=floatX(np.ones((4 * intm_dim, ))),
                             name="u_do")
        u_do = self._init_dropout(u_do)

        # custom function for splitting up matrix parts
        def _slice(_x, n, dim):
            if _x.ndim == 3:
                return _x[:, :, n * dim:(n + 1) * dim]
            return _x[:, n * dim:(n + 1) * dim]

        # define recurrent LSTM unit
        def _step(x_, h_, c_, W, U, V, b, w_do, u_do):
            """Recurrent LSTM unit.

            Note:
            The general order of function parameters to fn is:
            sequences (if any), prior result(s) (if needed),
            non-sequences (if any)

            Args:
            x_ (theano.shared): input vector
            h_ (theano.shared): output vector
            c_ (theano.shared): memory state
            W (theano.shared): input transform matrix
            U (theano.shared): inner-state transform matrix
            V (theano.shared): output transform matrix
            b (theano.shared): bias vector
            w_do (TT.col): dropout unit for the W matrix
            u_do (TT.col): dropout unit for the U matrix

            Returns:
            (2-tuple(h, c))
            new hidden and memory states

            """
            # pre-compute common terms:
            # W \in R^{236 x 100}
            # x \in R^{1 x 100}
            # U \in R^{236 x 59}
            # h \in R^{1 x 59}
            # b \in R^{1 x 236}
            # w_do \in R^{236 x 1}
            # u_do \in R^{236 x 1}

            # xhb \in R^{1 x 236}
            xhb = (TT.dot(W * w_do.dimshuffle(
                (0, 'x')), x_.T) + TT.dot(U * u_do.dimshuffle(
                    (0, 'x')), h_.T)).T + b
            # i \in R^{1 x 59}
            i = TT.nnet.sigmoid(_slice(xhb, 0, intm_dim))
            # f \in R^{1 x 59}
            f = TT.nnet.sigmoid(_slice(xhb, 1, intm_dim))
            # c \in R^{1 x 59}
            c = TT.tanh(_slice(xhb, 2, intm_dim))
            c = i * c + f * c_
            # V \in R^{59 x 59}
            # o \in R^{1 x 59}
            o = TT.nnet.sigmoid(_slice(xhb, 3, intm_dim) + TT.dot(V, c.T).T)
            # h \in R^{1 x 59}
            h = o * TT.tanh(c)
            # return current output and memory state
            return h.flatten(), c.flatten()

        m = 0
        n = intm_dim
        ov = None
        outvars = []
        for iv, igbw in a_invars:
            m = iv.shape[0]
            ret, _ = theano.scan(_step,
                                 sequences=[iv],
                                 outputs_info=[
                                     floatX(np.zeros((n, ))),
                                     floatX(np.zeros((n, )))
                                 ],
                                 non_sequences=[W, U, V, b, w_do, u_do],
                                 name="LSTM" + str(iv) + a_sfx,
                                 n_steps=m,
                                 truncate_gradient=TRUNCATE_GRADIENT,
                                 go_backwards=igbw)
            ov = ret[0]
            outvars.append(ov)
        return params, outvars