def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout): for n, n_hidden in enumerate(n_hidden_list): units, _ = bi_rnn(units, n_hidden, cell_type=cell_type, name='Layer_' + str(n)) units = tf.concat(units, -1) if intra_layer_dropout and n != len(n_hidden_list) - 1: units = variational_dropout(units, self._dropout_ph) return units
def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask): sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1)) for n, n_hidden in enumerate(n_hidden_list): units, _ = bi_rnn(units, n_hidden, cell_type=cell_type, seq_lengths=sequence_lengths, name='Layer_' + str(n)) units = tf.concat(units, -1) if intra_layer_dropout and n != len(n_hidden_list) - 1: units = variational_dropout(units, self._dropout_ph) return units
def _init_graph(self) -> None: self._init_placeholders() units = super()._init_graph() with tf.variable_scope('ner'): units = token_from_subtoken(units, self.y_masks_ph) if self.use_birnn: units, _ = bi_rnn(units, self.birnn_hidden_size, cell_type=self.birnn_cell_type, seq_lengths=self.seq_lengths, name='birnn') units = tf.concat(units, -1) # for heads head_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu") head_embeddings = tf.nn.dropout(head_embeddings, self.embeddings_keep_prob_ph) dep_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu") dep_embeddings = tf.nn.dropout(dep_embeddings, self.embeddings_keep_prob_ph) self.dep_head_similarities = biaffine_attention(dep_embeddings, head_embeddings) self.dep_heads = tf.argmax(self.dep_head_similarities, -1) self.dep_head_probs = tf.nn.softmax(self.dep_head_similarities) # for dependency types head_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu") head_embeddings = tf.nn.dropout(head_embeddings, self.embeddings_keep_prob_ph) dep_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu") dep_embeddings = tf.nn.dropout(dep_embeddings, self.embeddings_keep_prob_ph) # matching each word with its head head_embeddings = gather_indexes(head_embeddings, self.y_head_ph) self.dep_logits = biaffine_layer(dep_embeddings, head_embeddings, deps_dim=self.state_size, heads_dim=self.state_size, output_dim=self.n_deps) self.deps = tf.argmax(self.dep_logits, -1) self.dep_probs = tf.nn.softmax(self.dep_logits) if self.predict_tags: tag_embeddings = tf.layers.dense(units, units=self.state_size, activation="relu") tag_embeddings = tf.nn.dropout(tag_embeddings, self.embeddings_keep_prob_ph) self.tag_logits = tf.layers.dense(tag_embeddings, units=self.n_tags) self.tags = tf.argmax(self.tag_logits, -1) self.tag_probs = tf.nn.softmax(self.tag_logits) with tf.variable_scope("loss"): tag_mask = self._get_tag_mask() y_mask = tf.cast(tag_mask, tf.float32) self.loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y_head_ph, logits=self.dep_head_similarities, weights=y_mask) self.loss += tf.losses.sparse_softmax_cross_entropy(labels=self.y_dep_ph, logits=self.dep_logits, weights=y_mask) if self.predict_tags: tag_loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y_tag_ph, logits=self.tag_logits, weights=y_mask) self.loss += self.tag_weight_ph * tag_loss
def _init_graph(self) -> None: self._init_placeholders() units = super()._init_graph() with tf.variable_scope('ner'): if self.use_birnn: units, _ = bi_rnn(units, self.birnn_hidden_size, cell_type=self.birnn_cell_type, seq_lengths=self.seq_lengths, name='birnn') units = tf.concat(units, -1) # TODO: maybe add one more layer? logits = tf.layers.dense(units, units=self.n_tags, name="output_dense") self.logits = token_from_subtoken(logits, self.y_masks_ph) # CRF if self.use_crf: transition_params = tf.get_variable( 'Transition_Params', shape=[self.n_tags, self.n_tags], initializer=tf.zeros_initializer()) log_likelihood, transition_params = \ tf.contrib.crf.crf_log_likelihood(self.logits, self.y_ph, self.seq_lengths, transition_params) loss_tensor = -log_likelihood self._transition_params = transition_params self.y_predictions = tf.argmax(self.logits, -1) self.y_probas = tf.nn.softmax(self.logits, axis=2) with tf.variable_scope("loss"): tag_mask = self._get_tag_mask() y_mask = tf.cast(tag_mask, tf.float32) if self.use_crf: self.loss = tf.reduce_mean(loss_tensor) else: self.loss = tf.losses.sparse_softmax_cross_entropy( labels=self.y_ph, logits=self.logits, weights=y_mask)
def _init_graph(self) -> None: self._init_placeholders() self.seq_lengths = tf.reduce_sum(self.y_masks_ph, axis=1) self.bert = BertModel(config=self.bert_config, is_training=self.is_train_ph, input_ids=self.input_ids_ph, input_mask=self.input_masks_ph, token_type_ids=self.token_types_ph, use_one_hot_embeddings=False) encoder_layers = [ self.bert.all_encoder_layers[i] for i in self.encoder_layer_ids ] with tf.variable_scope('ner'): layer_weights = tf.get_variable('layer_weights_', shape=len(encoder_layers), initializer=tf.ones_initializer(), trainable=True) layer_weights = tf.unstack(layer_weights / len(encoder_layers)) # TODO: may be stack and reduce_sum is faster units = sum(w * l for w, l in zip(layer_weights, encoder_layers)) units = tf.nn.dropout(units, keep_prob=self.keep_prob_ph) if self.use_birnn: units, _ = bi_rnn(units, self.birnn_hidden_size, cell_type=self.birnn_cell_type, seq_lengths=self.seq_lengths, name='birnn') units = tf.concat(units, -1) # TODO: maybe add one more layer? logits = tf.layers.dense(units, units=self.n_tags, name="output_dense") self.logits = self.token_from_subtoken(logits, self.y_masks_ph) max_length = tf.reduce_max(self.seq_lengths) one_hot_max_len = tf.one_hot(self.seq_lengths - 1, max_length) tag_mask = tf.cumsum(one_hot_max_len[:, ::-1], axis=1)[:, ::-1] # CRF if self.use_crf: transition_params = tf.get_variable( 'Transition_Params', shape=[self.n_tags, self.n_tags], initializer=tf.zeros_initializer()) log_likelihood, transition_params = \ tf.contrib.crf.crf_log_likelihood(self.logits, self.y_ph, self.seq_lengths, transition_params) loss_tensor = -log_likelihood self._transition_params = transition_params self.y_predictions = tf.argmax(self.logits, -1) self.y_probas = tf.nn.softmax(self.logits, axis=2) with tf.variable_scope("loss"): y_mask = tf.cast(tag_mask, tf.float32) if self.use_crf: self.loss = tf.reduce_mean(loss_tensor) else: self.loss = tf.losses.sparse_softmax_cross_entropy( labels=self.y_ph, logits=self.logits, weights=y_mask)