Example #1
0
 def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout):
     for n, n_hidden in enumerate(n_hidden_list):
         units, _ = bi_rnn(units, n_hidden, cell_type=cell_type, name='Layer_' + str(n))
         units = tf.concat(units, -1)
         if intra_layer_dropout and n != len(n_hidden_list) - 1:
             units = variational_dropout(units, self._dropout_ph)
     return units
Example #2
0
 def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout):
     for n, n_hidden in enumerate(n_hidden_list):
         units, _ = bi_rnn(units, n_hidden, cell_type=cell_type, name='Layer_' + str(n))
         units = tf.concat(units, -1)
         if intra_layer_dropout and n != len(n_hidden_list) - 1:
             units = variational_dropout(units, self._dropout_ph)
     return units
Example #3
0
 def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
     sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
     for n, n_hidden in enumerate(n_hidden_list):
         units, _ = bi_rnn(units, n_hidden, cell_type=cell_type,
                           seq_lengths=sequence_lengths, name='Layer_' + str(n))
         units = tf.concat(units, -1)
         if intra_layer_dropout and n != len(n_hidden_list) - 1:
             units = variational_dropout(units, self._dropout_ph)
     return units
Example #4
0
 def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
     sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
     for n, n_hidden in enumerate(n_hidden_list):
         units, _ = bi_rnn(units, n_hidden, cell_type=cell_type,
                           seq_lengths=sequence_lengths, name='Layer_' + str(n))
         units = tf.concat(units, -1)
         if intra_layer_dropout and n != len(n_hidden_list) - 1:
             units = variational_dropout(units, self._dropout_ph)
     return units
Example #5
0
    def _init_graph(self) -> None:
        self._init_placeholders()

        units = super()._init_graph()

        with tf.variable_scope('ner'):
            units = token_from_subtoken(units, self.y_masks_ph)
            if self.use_birnn:
                units, _ = bi_rnn(units,
                                  self.birnn_hidden_size,
                                  cell_type=self.birnn_cell_type,
                                  seq_lengths=self.seq_lengths,
                                  name='birnn')
                units = tf.concat(units, -1)
            # for heads
            head_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu")
            head_embeddings = tf.nn.dropout(head_embeddings, self.embeddings_keep_prob_ph)
            dep_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu")
            dep_embeddings = tf.nn.dropout(dep_embeddings, self.embeddings_keep_prob_ph)
            self.dep_head_similarities = biaffine_attention(dep_embeddings, head_embeddings)
            self.dep_heads = tf.argmax(self.dep_head_similarities, -1)
            self.dep_head_probs = tf.nn.softmax(self.dep_head_similarities)
            # for dependency types
            head_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu")
            head_embeddings = tf.nn.dropout(head_embeddings, self.embeddings_keep_prob_ph)
            dep_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu")
            dep_embeddings = tf.nn.dropout(dep_embeddings, self.embeddings_keep_prob_ph)
            # matching each word with its head
            head_embeddings = gather_indexes(head_embeddings, self.y_head_ph)
            self.dep_logits = biaffine_layer(dep_embeddings, head_embeddings, 
                                             deps_dim=self.state_size, heads_dim=self.state_size, 
                                             output_dim=self.n_deps)
            self.deps = tf.argmax(self.dep_logits, -1)
            self.dep_probs = tf.nn.softmax(self.dep_logits)
            if self.predict_tags:
                tag_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu")
                tag_embeddings = tf.nn.dropout(tag_embeddings, self.embeddings_keep_prob_ph)
                self.tag_logits = tf.layers.dense(tag_embeddings, units=self.n_tags)
                self.tags = tf.argmax(self.tag_logits, -1)
                self.tag_probs = tf.nn.softmax(self.tag_logits)
        with tf.variable_scope("loss"):
            tag_mask = self._get_tag_mask()
            y_mask = tf.cast(tag_mask, tf.float32)
            self.loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y_head_ph,
                                                               logits=self.dep_head_similarities,
                                                               weights=y_mask)
            self.loss += tf.losses.sparse_softmax_cross_entropy(labels=self.y_dep_ph,
                                                                logits=self.dep_logits,
                                                                weights=y_mask)
            if self.predict_tags:
                tag_loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y_tag_ph,
                                                                  logits=self.tag_logits,
                                                                  weights=y_mask)
                self.loss += self.tag_weight_ph * tag_loss
Example #6
0
    def _init_graph(self) -> None:
        self._init_placeholders()

        units = super()._init_graph()

        with tf.variable_scope('ner'):
            if self.use_birnn:
                units, _ = bi_rnn(units,
                                  self.birnn_hidden_size,
                                  cell_type=self.birnn_cell_type,
                                  seq_lengths=self.seq_lengths,
                                  name='birnn')
                units = tf.concat(units, -1)
            # TODO: maybe add one more layer?
            logits = tf.layers.dense(units,
                                     units=self.n_tags,
                                     name="output_dense")

            self.logits = token_from_subtoken(logits, self.y_masks_ph)

            # CRF
            if self.use_crf:
                transition_params = tf.get_variable(
                    'Transition_Params',
                    shape=[self.n_tags, self.n_tags],
                    initializer=tf.zeros_initializer())
                log_likelihood, transition_params = \
                    tf.contrib.crf.crf_log_likelihood(self.logits,
                                                      self.y_ph,
                                                      self.seq_lengths,
                                                      transition_params)
                loss_tensor = -log_likelihood
                self._transition_params = transition_params

            self.y_predictions = tf.argmax(self.logits, -1)
            self.y_probas = tf.nn.softmax(self.logits, axis=2)

        with tf.variable_scope("loss"):
            tag_mask = self._get_tag_mask()
            y_mask = tf.cast(tag_mask, tf.float32)
            if self.use_crf:
                self.loss = tf.reduce_mean(loss_tensor)
            else:
                self.loss = tf.losses.sparse_softmax_cross_entropy(
                    labels=self.y_ph, logits=self.logits, weights=y_mask)
Example #7
0
    def _init_graph(self) -> None:
        self._init_placeholders()

        self.seq_lengths = tf.reduce_sum(self.y_masks_ph, axis=1)

        self.bert = BertModel(config=self.bert_config,
                              is_training=self.is_train_ph,
                              input_ids=self.input_ids_ph,
                              input_mask=self.input_masks_ph,
                              token_type_ids=self.token_types_ph,
                              use_one_hot_embeddings=False)

        encoder_layers = [
            self.bert.all_encoder_layers[i] for i in self.encoder_layer_ids
        ]

        with tf.variable_scope('ner'):
            layer_weights = tf.get_variable('layer_weights_',
                                            shape=len(encoder_layers),
                                            initializer=tf.ones_initializer(),
                                            trainable=True)
            layer_weights = tf.unstack(layer_weights / len(encoder_layers))
            # TODO: may be stack and reduce_sum is faster
            units = sum(w * l for w, l in zip(layer_weights, encoder_layers))
            units = tf.nn.dropout(units, keep_prob=self.keep_prob_ph)
            if self.use_birnn:
                units, _ = bi_rnn(units,
                                  self.birnn_hidden_size,
                                  cell_type=self.birnn_cell_type,
                                  seq_lengths=self.seq_lengths,
                                  name='birnn')
                units = tf.concat(units, -1)
            # TODO: maybe add one more layer?
            logits = tf.layers.dense(units,
                                     units=self.n_tags,
                                     name="output_dense")

            self.logits = self.token_from_subtoken(logits, self.y_masks_ph)

            max_length = tf.reduce_max(self.seq_lengths)
            one_hot_max_len = tf.one_hot(self.seq_lengths - 1, max_length)
            tag_mask = tf.cumsum(one_hot_max_len[:, ::-1], axis=1)[:, ::-1]

            # CRF
            if self.use_crf:
                transition_params = tf.get_variable(
                    'Transition_Params',
                    shape=[self.n_tags, self.n_tags],
                    initializer=tf.zeros_initializer())
                log_likelihood, transition_params = \
                    tf.contrib.crf.crf_log_likelihood(self.logits,
                                                      self.y_ph,
                                                      self.seq_lengths,
                                                      transition_params)
                loss_tensor = -log_likelihood
                self._transition_params = transition_params

            self.y_predictions = tf.argmax(self.logits, -1)
            self.y_probas = tf.nn.softmax(self.logits, axis=2)

        with tf.variable_scope("loss"):
            y_mask = tf.cast(tag_mask, tf.float32)
            if self.use_crf:
                self.loss = tf.reduce_mean(loss_tensor)
            else:
                self.loss = tf.losses.sparse_softmax_cross_entropy(
                    labels=self.y_ph, logits=self.logits, weights=y_mask)