Beispiel #1
0
    def __init__(self,
                 word_emb: np.ndarray,
                 char_emb: np.ndarray,
                 context_limit: int = 450,
                 question_limit: int = 150,
                 char_limit: int = 16,
                 train_char_emb: bool = True,
                 char_hidden_size: int = 100,
                 encoder_hidden_size: int = 75,
                 attention_hidden_size: int = 75,
                 keep_prob: float = 0.7,
                 min_learning_rate: float = 0.001,
                 noans_token: bool = False,
                 **kwargs) -> None:
        super().__init__(**kwargs)

        self.init_word_emb = word_emb
        self.init_char_emb = char_emb
        self.context_limit = context_limit
        self.question_limit = question_limit
        self.char_limit = char_limit
        self.train_char_emb = train_char_emb
        self.char_hidden_size = char_hidden_size
        self.hidden_size = encoder_hidden_size
        self.attention_hidden_size = attention_hidden_size
        self.keep_prob = keep_prob
        self.min_learning_rate = min_learning_rate
        self.noans_token = noans_token

        self.word_emb_dim = self.init_word_emb.shape[1]
        self.char_emb_dim = self.init_char_emb.shape[1]

        self.last_impatience = 0
        self.lr_impatience = 0

        if check_gpu_existence():
            self.GRU = CudnnGRU
        else:
            self.GRU = CudnnCompatibleGRU

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self._init_optimizer()

        self.sess.run(tf.global_variables_initializer())

        # Try to load the model (if there are some model files the model will be loaded from them)
        if self.load_path is not None:
            self.load()
Beispiel #2
0
def cudnn_lstm_wrapper(units, n_hidden, n_layers=1, trainable_initial_states=None, seq_lengths=None, initial_h=None,
                       initial_c=None, name='cudnn_lstm', reuse=False):
    if check_gpu_existence():
        return cudnn_lstm(units, n_hidden, n_layers, trainable_initial_states,
                          seq_lengths, initial_h, initial_c, name, reuse)

    log.info('\nWarning! tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell is used. '
             'It is okay for inference mode, but '
             'if you train your model with this cell it could NOT be used with '
             'tf.contrib.cudnn_rnn.CudnnLSTMCell later. '
             )

    return cudnn_compatible_lstm(units, n_hidden, n_layers, trainable_initial_states,
                                 seq_lengths, initial_h, initial_c, name, reuse)
Beispiel #3
0
def cudnn_lstm_wrapper(units, n_hidden, n_layers=1, trainable_initial_states=None, seq_lengths=None, initial_h=None,
                       initial_c=None, name='cudnn_lstm', reuse=False):

    if check_gpu_existence():
        return cudnn_lstm(units, n_hidden, n_layers, trainable_initial_states,
                          seq_lengths, initial_h, initial_c, name, reuse)

    log.info('\nWarning! tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell is used. '
             'It is okay for inference mode, but '
             'if you train your model with this cell it could NOT be used with '
             'tf.contrib.cudnn_rnn.CudnnLSTMCell later. '
             )

    return cudnn_compatible_lstm(units, n_hidden, n_layers, trainable_initial_states,
                                 seq_lengths, initial_h, initial_c, name, reuse)
Beispiel #4
0
 def _build_cudnn_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
     if not check_gpu_existence():
         raise RuntimeError('Usage of cuDNN RNN layers require GPU along with cuDNN library')
     sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
     for n, n_hidden in enumerate(n_hidden_list):
         with tf.variable_scope(cell_type.upper() + '_' + str(n)):
             if cell_type.lower() == 'lstm':
                 units, _ = cudnn_bi_lstm(units, n_hidden, sequence_lengths)
             elif cell_type.lower() == 'gru':
                 units, _ = cudnn_bi_gru(units, n_hidden, sequence_lengths)
             else:
                 raise RuntimeError('Wrong cell type "{}"! Only "gru" and "lstm"!'.format(cell_type))
             units = tf.concat(units, -1)
             if intra_layer_dropout and n != len(n_hidden_list) - 1:
                 units = variational_dropout(units, self._dropout_ph)
         return units
Beispiel #5
0
    def __init__(self, word_emb: np.ndarray, char_emb: np.ndarray, context_limit: int = 450, question_limit: int = 150,
                 char_limit: int = 16, train_char_emb: bool = True, char_hidden_size: int = 100,
                 encoder_hidden_size: int = 75, attention_hidden_size: int = 75, keep_prob: float = 0.7,
                 min_learning_rate: float = 0.001, noans_token: bool = False, **kwargs) -> None:
        super().__init__(**kwargs)

        self.init_word_emb = word_emb
        self.init_char_emb = char_emb
        self.context_limit = context_limit
        self.question_limit = question_limit
        self.char_limit = char_limit
        self.train_char_emb = train_char_emb
        self.char_hidden_size = char_hidden_size
        self.hidden_size = encoder_hidden_size
        self.attention_hidden_size = attention_hidden_size
        self.keep_prob = keep_prob
        self.min_learning_rate = min_learning_rate
        self.noans_token = noans_token

        self.word_emb_dim = self.init_word_emb.shape[1]
        self.char_emb_dim = self.init_char_emb.shape[1]

        self.last_impatience = 0
        self.lr_impatience = 0

        if check_gpu_existence():
            self.GRU = CudnnGRU
        else:
            self.GRU = CudnnCompatibleGRU

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self._init_optimizer()

        self.sess.run(tf.global_variables_initializer())

        # Try to load the model (if there are some model files the model will be loaded from them)
        if self.load_path is not None:
            self.load()
Beispiel #6
0
    def __init__(self, **kwargs):

        if not check_gpu_existence():
            raise RuntimeError('SquadModel requires GPU')

        self.opt = deepcopy(kwargs)
        self.init_word_emb = self.opt['word_emb']
        self.init_char_emb = self.opt['char_emb']
        self.context_limit = self.opt['context_limit']
        self.question_limit = self.opt['question_limit']
        self.char_limit = self.opt['char_limit']
        self.char_hidden_size = self.opt['char_hidden_size']
        self.hidden_size = self.opt['encoder_hidden_size']
        self.attention_hidden_size = self.opt['attention_hidden_size']
        self.keep_prob = self.opt['keep_prob']
        self.learning_rate = self.opt['learning_rate']
        self.min_learning_rate = self.opt['min_learning_rate']
        self.learning_rate_patience = self.opt['learning_rate_patience']
        self.grad_clip = self.opt['grad_clip']
        self.weight_decay = self.opt['weight_decay']
        self.word_emb_dim = self.init_word_emb.shape[1]
        self.char_emb_dim = self.init_char_emb.shape[1]

        self.last_impatience = 0
        self.lr_impatience = 0

        self.sess_config = tf.ConfigProto(allow_soft_placement=True)
        self.sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=self.sess_config)

        self._init_graph()

        self._init_optimizer()

        self.sess.run(tf.global_variables_initializer())

        super().__init__(**kwargs)
        # Try to load the model (if there are some model files the model will be loaded from them)
        if self.load_path is not None:
            self.load()
            if self.weight_decay < 1.0:
                self.sess.run(self.assign_vars)
Beispiel #7
0
    def __init__(self, n_classes: int = 2,
                 dropout_keep_prob: float = 0.5,
                 return_probas: bool = False, **kwargs):
        """

        Args:
            n_classes: number of classes for classification
            dropout_keep_prob: Probability of keeping the hidden state, values from 0 to 1. 0.5 works well
                in most cases.
            return_probas: whether to return confidences of the relation to be appropriate or not
            **kwargs:
        """
        kwargs.setdefault('learning_rate_drop_div', 10.0)
        kwargs.setdefault('learning_rate_drop_patience', 5.0)
        kwargs.setdefault('clip_norm', 5.0)

        super().__init__(**kwargs)

        self.n_classes = n_classes
        self.dropout_keep_prob = dropout_keep_prob
        self.return_probas = return_probas
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        
        if check_gpu_existence():
            self.GRU = CudnnGRU
        else:
            self.GRU = CudnnCompatibleGRU

        self.question_ph = tf.placeholder(tf.float32, [None, None, 300])
        self.rel_emb_ph = tf.placeholder(tf.float32, [None, None, 300])

        r_mask_2 = tf.cast(self.rel_emb_ph, tf.bool)
        r_len_2 = tf.reduce_sum(tf.cast(r_mask_2, tf.int32), axis=2)
        r_mask = tf.cast(r_len_2, tf.bool)
        r_len = tf.reduce_sum(tf.cast(r_mask, tf.int32), axis=1)
        rel_emb = tf.math.divide_no_nan(tf.reduce_sum(self.rel_emb_ph, axis=1),
                                        tf.cast(tf.expand_dims(r_len, axis=1), tf.float32))

        self.y_ph = tf.placeholder(tf.int32, shape=(None,))
        self.one_hot_labels = tf.one_hot(self.y_ph, depth=self.n_classes, dtype=tf.float32)
        self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=[], name='keep_prob_ph')

        q_mask_2 = tf.cast(self.question_ph, tf.bool)
        q_len_2 = tf.reduce_sum(tf.cast(q_mask_2, tf.int32), axis=2)
        q_mask = tf.cast(q_len_2, tf.bool)
        q_len = tf.reduce_sum(tf.cast(q_mask, tf.int32), axis=1)

        question_dr = variational_dropout(self.question_ph, keep_prob=self.keep_prob_ph)
        b_size = tf.shape(self.question_ph)[0]

        with tf.variable_scope("question_encode"):
            rnn = self.GRU(num_layers=2, num_units=75, batch_size=b_size, input_size=300, keep_prob=self.keep_prob_ph)
            q = rnn(question_dr, seq_len=q_len)

        with tf.variable_scope("attention"):
            rel_emb_exp = tf.expand_dims(rel_emb, axis=1)
            dot_products = tf.reduce_sum(tf.multiply(q, rel_emb_exp), axis=2, keep_dims=False)
            s_mask = softmax_mask(dot_products, q_mask)
            att_weights = tf.expand_dims(tf.nn.softmax(s_mask), axis=2)
            self.s_r = tf.reduce_sum(tf.multiply(att_weights, q), axis=1)

            self.logits = tf.layers.dense(tf.multiply(self.s_r, rel_emb), 2, activation=None, use_bias=False)
            self.y_pred = tf.argmax(self.logits, axis=-1)

            loss_tensor = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.one_hot_labels, logits=self.logits)

            self.loss = tf.reduce_mean(loss_tensor)
            self.train_op = self.get_train_op(self.loss)

        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
        self.load()
Beispiel #8
0
    def __init__(self, embedder, tag_vocab, ner_vocab, pos_vocab, sess=None):

        # check gpu
        if not check_gpu_existence():
            raise RuntimeError('Ontonotes NER model requires GPU with cuDNN!')

        n_hidden = (256, 256, 256)
        token_embeddings_dim = 100
        n_tags = len(tag_vocab)

        # Create placeholders
        x_word = tf.placeholder(dtype=tf.float32,
                                shape=[None, None, token_embeddings_dim],
                                name='x_word')
        x_char = tf.placeholder(dtype=tf.int32,
                                shape=[None, None, None],
                                name='x_char')

        # Features
        x_pos = tf.placeholder(dtype=tf.float32,
                               shape=[None, None, len(pos_vocab)],
                               name='x_pos')  # Senna
        x_ner = tf.placeholder(dtype=tf.float32,
                               shape=[None, None, len(ner_vocab)],
                               name='x_ner')  # Senna
        x_capi = tf.placeholder(dtype=tf.float32,
                                shape=[None, None],
                                name='x_capi')

        y_true = tf.placeholder(dtype=tf.int32,
                                shape=[None, None],
                                name='y_tag')
        mask = tf.placeholder(dtype=tf.float32,
                              shape=[None, None],
                              name='mask')
        sequence_lengths = tf.reduce_sum(mask, axis=1)

        # Concat features to embeddings
        emb = tf.concat(
            [x_word, tf.expand_dims(x_capi, 2), x_pos, x_ner], axis=2)

        # The network
        units = emb
        for n, n_h in enumerate(n_hidden):
            with tf.variable_scope('RNN_' + str(n)):
                units, _ = cudnn_bi_lstm(units, n_h,
                                         tf.to_int32(sequence_lengths))

        # Classifier
        with tf.variable_scope('Classifier'):
            units = tf.layers.dense(units,
                                    n_hidden[-1],
                                    kernel_initializer=xavier_initializer())
            logits = tf.layers.dense(units,
                                     n_tags,
                                     kernel_initializer=xavier_initializer())

        # CRF
        _, trainsition_params = tf.contrib.crf.crf_log_likelihood(
            logits, y_true, sequence_lengths)

        # Initialize session
        if sess is None:
            sess = tf.Session()

        self._ner_tagger = SennaNERTagger('download/senna/')
        self._pos_tagger = SennaChunkTagger('download/senna/')

        self._x_w = x_word
        self._x_c = x_char
        self._x_capi = x_capi
        self.x_pos = x_pos
        self.x_ner = x_ner
        self._y_true = y_true
        self._mask = mask
        self._sequence_lengths = sequence_lengths
        self._token_embeddings_dim = token_embeddings_dim

        self._pos_dict = pos_vocab
        self._ner_dict = ner_vocab
        self._tag_dict = tag_vocab

        self._logits = logits
        self._trainsition_params = trainsition_params

        self._sess = sess
        sess.run(tf.global_variables_initializer())
        self._embedder = embedder