Exemple #1
0
    def encode(self, x=None):
        if x is None:
            x = CharLSTMEmbeddings.create_placeholder(self.name)
        self.x = x
        with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
            Wch = tf.get_variable(
                "Wch",
                initializer=tf.constant_initializer(self.weights, dtype=tf.float32, verify_shape=True),
                shape=[self.vsz, self.dsz],
                trainable=True
            )
            ech0 = tf.scatter_update(Wch, tf.constant(Offsets.PAD, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, self.dsz]))

            shape = tf.shape(x)
            B = shape[0]
            T = shape[1]
            W = shape[2]
            flat_chars = tf.reshape(x, [-1, W])
            word_lengths = tf.reduce_sum(tf.cast(tf.equal(flat_chars, Offsets.PAD), tf.int32), axis=1)
            with tf.control_dependencies([ech0]):
                embed_chars =  tf.nn.embedding_lookup(Wch, flat_chars)

            fwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers)
            bwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers)
            _, rnn_state = tf.nn.bidirectional_dynamic_rnn(fwd_lstm, bwd_lstm, embed_chars, sequence_length=word_lengths, dtype=tf.float32)

            result = tf.concat([rnn_state[0][-1].h, rnn_state[1][-1].h], axis=1)
            return tf.reshape(result, [B, T, self.lstmsz])
Exemple #2
0
    def pool(self, word_embeddings, dsz, init, **kwargs):
        """LSTM with dropout yielding a final-state as output
        
        :param word_embeddings: The input word embeddings
        :param dsz: The input word embedding depth
        :param init: The tensorflow initializer to use (currently ignored)
        :param kwargs: See below
        
        :Keyword Arguments:
        * *hsz* -- (``int``) The number of hidden units (defaults to `100`)
        * *cmotsz* -- (``int``) An alias for `hsz`
        
        :return: 
        """
        hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100))
        if type(hsz) is list:
            hsz = hsz[0]

        rnntype = kwargs.get('rnntype', 'lstm')
        nlayers = int(kwargs.get('layers', 1))

        if rnntype == 'blstm':
            rnnfwd = stacked_lstm(hsz, self.pkeep, nlayers)
            rnnbwd = stacked_lstm(hsz, self.pkeep, nlayers)
            ((_, _), (fw_final_state,
                      bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(
                          rnnfwd,
                          rnnbwd,
                          word_embeddings,
                          sequence_length=self.lengths,
                          dtype=tf.float32)
            # The output of the BRNN function needs to be joined on the H axis
            output_state = fw_final_state[-1].h + bw_final_state[-1].h
            out_hsz = hsz

        else:
            rnnfwd = stacked_lstm(hsz, self.pkeep, nlayers)
            (_,
             (output_state)) = tf.nn.dynamic_rnn(rnnfwd,
                                                 word_embeddings,
                                                 sequence_length=self.lengths,
                                                 dtype=tf.float32)
            output_state = output_state[-1].h
            out_hsz = hsz

        combine = tf.reshape(output_state, [-1, out_hsz])
        return combine
Exemple #3
0
    def pool(self, word_embeddings, dsz, init, **kwargs):
        """LSTM with dropout yielding a final-state as output

        :param word_embeddings: The input word embeddings
        :param dsz: The input word embedding depth
        :param init: The tensorflow initializer to use (currently ignored)
        :param kwargs: See below

        :Keyword Arguments:
        * *rnnsz* -- (``int``) The number of hidden units (defaults to `hsz`)
        * *hsz* -- (``int``) backoff for `rnnsz`, typically a result of stacking params.  This keeps things simple so
          its easy to do things like residual connections between LSTM and post-LSTM stacking layers

        :return:
        """
        hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100))
        vdrop = bool(kwargs.get('variational_dropout', False))
        if type(hsz) is list:
            hsz = hsz[0]

        rnntype = kwargs.get('rnn_type', kwargs.get('rnntype', 'lstm'))
        nlayers = int(kwargs.get('layers', 1))

        if rnntype == 'blstm':
            rnnfwd = stacked_lstm(hsz//2, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG())
            rnnbwd = stacked_lstm(hsz//2, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG())
            ((_, _), (fw_final_state, bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(rnnfwd,
                                                                                         rnnbwd,
                                                                                         word_embeddings,
                                                                                         sequence_length=self.lengths,
                                                                                         dtype=tf.float32)
            # The output of the BRNN function needs to be joined on the H axis
            output_state = tf.concat([fw_final_state[-1].h, bw_final_state[-1].h], -1)
            out_hsz = hsz

        else:
            rnnfwd = stacked_lstm(hsz, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG())
            (_, (output_state)) = tf.nn.dynamic_rnn(rnnfwd, word_embeddings, sequence_length=self.lengths, dtype=tf.float32)
            output_state = output_state[-1].h
            out_hsz = hsz

        combine = tf.reshape(output_state, [-1, out_hsz])
        return combine
Exemple #4
0
    def pool(self, word_embeddings, dsz, init, **kwargs):
        """LSTM with dropout yielding a final-state as output

        :param word_embeddings: The input word embeddings
        :param dsz: The input word embedding depth
        :param init: The tensorflow initializer to use (currently ignored)
        :param kwargs: See below

        :Keyword Arguments:
        * *rnnsz* -- (``int``) The number of hidden units (defaults to `hsz`)
        * *hsz* -- (``int``) backoff for `rnnsz`, typically a result of stacking params.  This keeps things simple so
          its easy to do things like residual connections between LSTM and post-LSTM stacking layers

        :return:
        """
        hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100))
        vdrop = bool(kwargs.get('variational_dropout', False))
        if type(hsz) is list:
            hsz = hsz[0]

        rnntype = kwargs.get('rnn_type', kwargs.get('rnntype', 'lstm'))
        nlayers = int(kwargs.get('layers', 1))

        if rnntype == 'blstm':
            rnnfwd = stacked_lstm(hsz//2, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG())
            rnnbwd = stacked_lstm(hsz//2, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG())
            ((_, _), (fw_final_state, bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(rnnfwd,
                                                                                         rnnbwd,
                                                                                         word_embeddings,
                                                                                         sequence_length=self.lengths,
                                                                                         dtype=tf.float32)
            # The output of the BRNN function needs to be joined on the H axis
            output_state = tf.concat([fw_final_state[-1].h, bw_final_state[-1].h], -1)
            out_hsz = hsz

        else:
            rnnfwd = stacked_lstm(hsz, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG())
            (_, (output_state)) = tf.nn.dynamic_rnn(rnnfwd, word_embeddings, sequence_length=self.lengths, dtype=tf.float32)
            output_state = output_state[-1].h
            out_hsz = hsz

        combine = tf.reshape(output_state, [-1, out_hsz])
        return combine
    def encode(self, x=None):
        if x is None:
            x = CharLSTMEmbeddings.create_placeholder(self.name)
        self.x = x
        with tf.variable_scope(self.scope):
            Wch = tf.get_variable("Wch",
                                  initializer=tf.constant_initializer(
                                      self.weights,
                                      dtype=tf.float32,
                                      verify_shape=True),
                                  shape=[self.vsz, self.dsz],
                                  trainable=True)
            ech0 = tf.scatter_update(
                Wch, tf.constant(Offsets.PAD, dtype=tf.int32, shape=[1]),
                tf.zeros(shape=[1, self.dsz]))

            shape = tf.shape(x)
            B = shape[0]
            T = shape[1]
            W = shape[2]
            flat_chars = tf.reshape(x, [-1, W])
            word_lengths = tf.reduce_sum(tf.cast(
                tf.equal(flat_chars, Offsets.PAD), tf.int32),
                                         axis=1)
            with tf.control_dependencies([ech0]):
                embed_chars = tf.nn.embedding_lookup(Wch, flat_chars)

            fwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers)
            bwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers)
            _, rnn_state = tf.nn.bidirectional_dynamic_rnn(
                fwd_lstm,
                bwd_lstm,
                embed_chars,
                sequence_length=word_lengths,
                dtype=tf.float32)

            result = tf.concat([rnn_state[0][-1].h, rnn_state[1][-1].h],
                               axis=1)
            return tf.reshape(result, [B, T, self.lstmsz])