Esempio n. 1
0
    def forward(self, features, labels, mode, params):
        outputs = dict()
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        for (feature_key, feature) in features.items():
            if '/' not in feature_key:
                continue
            feature_key_fields = feature_key.split("/")
            feature_namespace = feature_key_fields[1].strip()
            field_name = feature_key_fields[0].strip()
            if feature_namespace == self._vocab_namespace:
                with tf.variable_scope("embedding/"+self._vocab_namespace, reuse=tf.AUTO_REUSE):
                    input_ids = feature
                    input_mask = None
                    if self._mask_namespace:
                        mask_feature_key = field_name+"/"+self._mask_namespace
                        if mask_feature_key in features:
                            input_mask = features[field_name+"/"+self._mask_namespace]
                        else:
                            logger.warning("The mask namespace %s with field name %s is not in features (%s)"
                                           % (self._mask_namespace, field_name, mask_feature_key))
                    if input_mask is None:
                        input_length, input_mask = nn.length(input_ids)
                    else:
                        input_length, _ = nn.length(input_ids)
                    model = BertModel(
                        config=self._bert_config,
                        is_training=is_training,
                        input_ids=input_ids,
                        input_mask=input_mask,
                        use_one_hot_embeddings=self._use_one_hot_embeddings)

                    embedding_output = model.get_sequence_output()

                    if self._remove_bos_eos:
                        embedding_output = nn.remove_bos_eos(embedding_output, input_length)

                    dropout_rate = params.get('dropout_rate')
                    if dropout_rate is None:
                        dropout_rate = self._dropout_rate
                    emb_drop = tf.layers.dropout(embedding_output, dropout_rate, training=is_training)
                    if self._projection_dim:
                        emb_drop = tf.layers.dense(emb_drop, self._projection_dim, use_bias=False,
                                                   kernel_initializer=initializers.xavier_initializer())
                    outputs[feature_key] = emb_drop
        return outputs
Esempio n. 2
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            h_s, c1 = nn.lstm(premise_tokens,
                              self._hidden_dim,
                              seq_len=prem_seq_lengths,
                              name='premise')
            h_t, c2 = nn.lstm(hypothesis_tokens,
                              self._hidden_dim,
                              seq_len=hyp_seq_lengths,
                              name='hypothesis')

        lstm_m = MatchLSTMCell(self._hidden_dim, h_s, prem_mask)

        k_m, _ = tf.nn.dynamic_rnn(lstm_m,
                                   h_t,
                                   hyp_seq_lengths,
                                   dtype=tf.float32)

        k_valid = select(k_m, hyp_seq_lengths)
        output_dict = self._make_output(k_valid, params)

        if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
            if 'label/labels' not in features:
                raise ConfigureError(
                    "The input features should contain label with vocabulary namespace "
                    "labels int %s dataset." % mode)
            labels_embedding = features_embedding['label/labels']
            labels = features['label/labels']

            loss = self._make_loss(labels=labels_embedding,
                                   logits=output_dict['logits'],
                                   params=params)
            output_dict['loss'] = loss
            metrics = dict()
            metrics['accuracy'] = tf.metrics.accuracy(
                labels=labels, predictions=output_dict['predictions'])
            metrics['precision'] = tf.metrics.precision(
                labels=labels, predictions=output_dict['predictions'])
            metrics['recall'] = tf.metrics.recall(
                labels=labels, predictions=output_dict['predictions'])
            # metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)
            output_dict['metrics'] = metrics
            # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
            #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
        return output_dict
Esempio n. 3
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2

            #prem_mask = tf.expand_dims(prem_mask, -1)
            prem_mask = tf.cast(prem_mask, tf.bool)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)

            with tf.variable_scope('san_fb1'):
                x_fw1 = query_encode_san(premise_tokens, prem_mask,
                                         'forward')  # bs, ql, vec
                x_bw1 = query_encode_san(premise_tokens, prem_mask,
                                         'backward')  # bs, ql, vec
                x_fusion = fusion_gate(premise_tokens, prem_mask, x_fw1,
                                       x_bw1)  # bs, ql, vec
            with tf.variable_scope('san_md'):
                x_code = query_encode_md(x_fusion, prem_mask)  # bs, vec

                pre_logits = tf.nn.relu(
                    linear(x_code,
                           self._hidden_dim,
                           True,
                           scope='pre_logits_linear',
                           is_train=True))  # bs, vec
                logits = linear(pre_logits,
                                self._num_classes,
                                False,
                                scope='get_output',
                                is_train=True)  # bs, cn

            output_dict = self._make_output(logits, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 4
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            # 2.Input Encoder
            # 2.1 Highway Encoder
            query_emb = premise_tokens
            doc_emb = hypothesis_tokens
            query_len = prem_seq_lengths
            doc_len = hyp_seq_lengths
            query_mask = prem_mask
            doc_mask = hyp_mask
            project_dim = premise_tokens.shape[-1].value
            query_length = tf.shape(premise_tokens)[1]
            doc_length = tf.shape(hypothesis_tokens)[1]

            query_output = nn.highway_network(query_emb, 1, dropout_rate=self._dropout_rate, is_trainging=is_training,
                                              scope="query_highway")
            doc_output = nn.highway_network(doc_emb, 1, dropout_rate=self._dropout_rate, is_trainging=is_training,
                                            scope="doc_highway")

            # # 2.2 Co-Attention
            M = tf.Variable(tf.random_normal([project_dim, project_dim], stddev=0.1))
            tmp = tf.einsum("ijk,kl->ijl", query_output, M)
            S = tf.matmul(tmp, doc_output, transpose_b=True)  # [batch, q, d]
            S_mask = tf.matmul(query_mask, doc_mask, transpose_b=True)
            S_mean = S * S_mask #
            S_align_max = S + (1. - S_mask) * tf.float32.min

            # 2.2.1 Extractive Pooling
            # Max Pooling
            query_score = tf.nn.softmax(tf.reduce_max(S_align_max, axis=2, keepdims=True), axis=1)
            query_maxpooling = tf.reduce_sum(query_score * query_output, axis=1) # [batch, r]

            doc_score = tf.nn.softmax(tf.reduce_max(S_align_max, axis=1, keepdims=True), axis=2)
            doc_maxpooling = tf.reduce_sum(tf.transpose(doc_score, [0, 2, 1]) * doc_output, axis=1) # [batch, r]

            # Mean Pooling
            query_score = tf.nn.softmax(tf.reduce_sum(S_mean, axis=2, keepdims=True)/(tf.expand_dims(tf.expand_dims(tf.cast(doc_len, tf.float32)+self._eps, -1), -1)), axis=1)
            query_meanpooling = tf.reduce_sum(query_score * query_output, axis=1)  # [batch, r]
            doc_score = tf.nn.softmax(tf.reduce_sum(S_mean, axis=1, keepdims=True)/(tf.expand_dims(tf.expand_dims(tf.cast(query_len, tf.float32)+self._eps, -1), -1)), axis=2)
            doc_meanpooling = tf.reduce_sum(tf.transpose(doc_score, [0, 2, 1]) * doc_output, axis=1)  # [batch, r]

            # 2.2.2 Alignment Pooling
            query_alignment = tf.matmul(tf.nn.softmax(S_align_max, axis=2), doc_output)  # [batch, q, r]
            doc_alignment = tf.matmul(tf.nn.softmax(S_align_max, axis=1), query_output, transpose_a=True)  # [batch, d, r]

            # 2.2.3 Intra Attention
            query_selfattn = nn.self_attention(query_output, query_len)
            doc_selfattn = nn.self_attention(doc_output, doc_len)

            # 2.3 Multi-Cast Attention
            query_maxpooling = tf.tile(tf.expand_dims(query_maxpooling, axis=1), [1, query_length, 1])
            query_meanpooling = tf.tile(tf.expand_dims(query_meanpooling, axis=1), [1, query_length, 1])
            doc_maxpooling = tf.tile(tf.expand_dims(doc_maxpooling, axis=1), [1, doc_length, 1])
            doc_meanpooling = tf.tile(tf.expand_dims(doc_meanpooling, axis=1), [1, doc_length, 1])

            query_max_fc, query_max_fm, query_max_fs = self.cast_attention(query_maxpooling, query_emb, self.nn_fc, name="query_max_pooling")
            query_mean_fc, query_mean_fm, query_mean_fs = self.cast_attention(query_meanpooling, query_emb, self.nn_fc, name="query_mean_pooling")
            query_align_fcm, query_align_fm, query_align_fs = self.cast_attention(query_alignment, query_emb, self.nn_fc, name="query_align_pooling")
            query_selfattn_fc, query_selfattn_fm, query_selfattn_fs = self.cast_attention(query_selfattn, query_emb, self.nn_fc, name="query_self_pooling")

            doc_max_fc, doc_max_fm, doc_max_fs = self.cast_attention(doc_maxpooling, doc_emb, self.nn_fc, name="doc_max_pooling")
            doc_mean_fc, doc_mean_fm, doc_mean_fs = self.cast_attention(doc_meanpooling, doc_emb, self.nn_fc, name="doc_mean_pooling")
            doc_align_fcm, doc_align_fm, doc_align_fs = self.cast_attention(doc_alignment, doc_emb, self.nn_fc, name="doc_align_pooling")
            doc_selfattn_fc, doc_selfattn_fm, doc_selfattn_fs = self.cast_attention(doc_selfattn, doc_emb, self.nn_fc, name="doc_self_pooling")

            query_cast = tf.concat(
                [query_max_fc, query_max_fm, query_max_fs, query_mean_fc, query_mean_fm, query_mean_fs, query_align_fcm,
                 query_align_fm, query_align_fs, query_selfattn_fc, query_selfattn_fm, query_selfattn_fs, query_output],
                axis=2)
            doc_cast = tf.concat(
                [doc_max_fc, doc_max_fm, doc_max_fs, doc_mean_fc, doc_mean_fm, doc_mean_fs, doc_align_fcm,
                 doc_align_fm, doc_align_fs, doc_selfattn_fc, doc_selfattn_fm, doc_selfattn_fs, doc_output], axis=2)

            # query_cast = tf.concat(
            #     [
            #      query_output],
            #     axis=2)
            # doc_cast = tf.concat(
            #     [doc_output], axis=2)

            query_cast = tf.layers.dropout(query_cast, self._dropout_rate, training=is_training)
            doc_cast = tf.layers.dropout(doc_cast, self._dropout_rate, training=is_training)

            query_hidden, _ = nn.bi_lstm(query_cast, self._hidden_dim, name="query_lstm")
            doc_hidden, _ = nn.bi_lstm(doc_cast, self._hidden_dim, name="doc_lstm")

            query_hidden = tf.concat(query_hidden, axis=2)
            doc_hidden = tf.concat(doc_hidden, axis=2)
            query_hidden = tf.layers.dropout(query_hidden, self._dropout_rate, training=is_training)
            doc_hidden = tf.layers.dropout(doc_hidden, self._dropout_rate, training=is_training)

            #query_hidden_max = query_hidden + (1. - query_mask) * tf.float32.min
            #doc_hidden_max = doc_hidden + (1. - doc_mask) * tf.float32.min
            query_hidden_mean = query_hidden * query_mask
            doc_hidden_mean = doc_hidden * doc_mask

            query_sum = tf.reduce_sum(query_hidden_mean, axis=1)
            query_mean = tf.div(query_sum, tf.expand_dims(tf.cast(query_len, tf.float32), -1) + self._eps)

            query_max = tf.reduce_max(query_hidden_mean, axis=1)
            query_final = tf.concat([query_mean, query_max], axis=1)

            doc_sum = tf.reduce_sum(doc_hidden_mean, axis=1)
            doc_mean = tf.div(doc_sum, tf.expand_dims(tf.cast(doc_len, tf.float32), -1) + self._eps)

            doc_max = tf.reduce_max(doc_hidden_mean, axis=1)
            doc_final = tf.concat([doc_mean, doc_max], axis=1)

            final = tf.concat([query_final, doc_final, query_final * doc_final, query_final - doc_final], axis=1)
            #yout = nn.highway_network(final, 2, dropout_rate=self._drop_rate, is_trainging=is_training)
            # MLP layer
            yout = tf.contrib.layers.fully_connected(final, self._hidden_dim, scope='fc1')
            # Dropout applied to classifier

            output_dict = self._make_output(yout, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = []
                # debug_ops = [query_mean_fs]#[query_maxpooling, query_max_fc] [query_max_fm, query_max_fs],[query_mean_fc, query_mean_fm] , ,
                # for op in debug_ops:
                #     output_dict['debugs'].append(tf.shape(op))
                # output_dict['debugs'].append(query_length)
            return output_dict
Esempio n. 5
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)

            premise_outs, c1 = nn.bi_lstm(premise_tokens,
                                          self._hidden_dim,
                                          seq_len=prem_seq_lengths,
                                          name='premise')

            premise_bi = tf.concat(premise_outs, axis=2)

            premise_bi = premise_bi * prem_mask

            eps = 1e-11
            ### Mean pooling
            premise_sum = tf.reduce_sum(premise_bi, 1)
            premise_ave = tf.div(
                premise_sum,
                tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1) +
                eps)

            # MLP layer
            h_mlp = tf.contrib.layers.fully_connected(premise_ave,
                                                      self._hidden_dim,
                                                      scope='fc1')
            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            output_dict = self._make_output(h_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['map'] = tf.metrics.average_precision_at_k(
                    labels=tf.cast(labels, tf.int64),
                    predictions=output_dict['logits'],
                    k=2)
                metrics['precision_1'] = tf.metrics.precision_at_k(
                    labels=tf.cast(labels, tf.int64),
                    predictions=output_dict['logits'],
                    k=1,
                    class_id=1)

                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 6
0
    def forward(self, features, labels, mode, params):
        global_step = tf.train.get_or_create_global_step()
        dropout_keep_rate = tf.train.exponential_decay(self._keep_prob, global_step,
                                                       self._dropout_decay_step, self._dropout_decay_rate,
                                                       staircase=False, name='dropout_keep_rate')
        tf.summary.scalar('dropout_keep_rate', dropout_keep_rate)

        params.add_hparam('dropout_rate', 1 - dropout_keep_rate)
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_ins = []
            hypothesis_ins = []

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            premise_ins.append(premise_tokens)
            hypothesis_ins.append(hypothesis_tokens)

            premise_chars = features_embedding.get('premise/chars', None)
            hypothesis_chars = features_embedding.get('hypothesis/chars', None)

            if premise_chars is not None and hypothesis_chars is not None:

                with tf.variable_scope("conv") as scope:
                    conv_pre = nn.multi_conv1d_max(premise_chars, self._char_filter_size, self._char_filter_channel_dims,
                                                   "VALID", is_training, dropout_keep_rate, scope='conv')
                    scope.reuse_variables()
                    conv_hyp = nn.multi_conv1d_max(hypothesis_chars, self._char_filter_size, self._char_filter_channel_dims,
                                                   "VALID", is_training, dropout_keep_rate, scope='conv')
                    #conv_pre = tf.reshape(conv_pre, [-1, self.sequence_length, config.char_out_size])
                    #conv_hyp = tf.reshape(conv_hyp, [-1, self.sequence_length, config.char_out_size])

                    premise_ins.append(conv_pre)
                    hypothesis_ins.append(conv_hyp)

            premise_pos = features_embedding.get('premise/pos_tags', None)
            hypothesis_pos = features_embedding.get('hypothesis/pos_tags', None)

            if premise_pos is not None and hypothesis_pos is not None:
                premise_ins.append(premise_pos)
                hypothesis_ins.append(hypothesis_pos)

            premise_exact_match = features.get('premise/exact_match_labels', None)
            hypothesis_exact_match = features.get('hypothesis/exact_match_labels', None)

            if premise_exact_match is not None and hypothesis_exact_match is not None:
                premise_ins.append(tf.expand_dims(tf.cast(premise_exact_match, tf.float32), -1))
                hypothesis_ins.append(tf.expand_dims(tf.cast(hypothesis_exact_match, tf.float32), -1))

            premise_in = tf.concat(premise_ins, axis=2)
            hypothesis_in = tf.concat(hypothesis_ins, axis=2)

            with tf.variable_scope("highway") as scope:
                premise_in = nn.highway_network(premise_in, self._highway_num_layers)
                scope.reuse_variables()
                hypothesis_in = nn.highway_network(hypothesis_in, self._highway_num_layers)

            with tf.variable_scope("prepro") as scope:
                pre = premise_in
                hyp = hypothesis_in
                for i in range(self._num_self_att_enc_layers):
                    with tf.variable_scope("attention_encoder_%s" % i, reuse=False):
                        pre_att = nn.self_attention(pre, prem_seq_lengths, func='tri_linear',
                                                    scope="premise_self_attention")
                        p = nn.fuse_gate(pre, pre_att, scope="premise_fuse_gate")
                        hyp_att = nn.self_attention(hyp, hyp_seq_lengths, func='tri_linear',
                                                    scope="hypothesis_self_attention")
                        h = nn.fuse_gate(hyp, hyp_att, scope="hypothesis_fuse_gate")

                        pre = p
                        hyp = h
                        nn.variable_summaries(p, "p_self_enc_summary_layer_{}".format(i))
                        nn.variable_summaries(h, "h_self_enc_summary_layer_{}".format(i))

            with tf.variable_scope("main") as scope:
                pre = p
                hyp = h

                with tf.variable_scope("interaction"):
                    pre_length = tf.shape(pre)[1]
                    hyp_length = tf.shape(hyp)[1]
                    pre_new = tf.tile(tf.expand_dims(pre, 2), [1, 1, hyp_length, 1])
                    hyp_new = tf.tile(tf.expand_dims(hyp, 1), [1, pre_length, 1, 1])
                    bi_att_mx = pre_new * hyp_new

                    # mask = tf.expand_dims(tf.sequence_mask(query_len, tf.shape(query)[1], dtype=tf.float32),
                    #                       axis=2) * \
                    #        tf.expand_dims(tf.sequence_mask(key_len, tf.shape(key)[1], dtype=tf.float32), axis=1)
                    bi_att_mx = tf.layers.dropout(bi_att_mx, 1-dropout_keep_rate, training=is_training)

                with tf.variable_scope("dense_net"):
                    dim = bi_att_mx.get_shape().as_list()[-1]
                    act = tf.nn.relu if self._first_scale_down_layer_relu else None
                    fm = tf.contrib.layers.convolution2d(bi_att_mx,
                                                         int(dim * self._dense_net_first_scale_down_ratio),
                                                         self._first_scale_down_kernel, padding="SAME",
                                                         activation_fn=act)

                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="first_dense_net_block")
                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='second_transition_layer')
                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="second_dense_net_block")
                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='third_transition_layer')
                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="third_dense_net_block")

                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='fourth_transition_layer')

                    shape_list = list(fm.get_shape())
                    #print(shape_list)
                    premise_final = tf.reshape(fm, [-1, shape_list[1] * shape_list[2] * shape_list[3]])

            output_dict = self._make_output(premise_final, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                #######l2 loss#################
                if self._l2_loss:
                    if self._sigmoid_growing_l2loss:
                        weights_added = tf.add_n([tf.nn.l2_loss(tensor) for tensor in tf.trainable_variables() if
                                                  tensor.name.endswith("weights:0") or tensor.name.endswith('kernel:0') or tensor.name.endswith('filter:0')])
                        full_l2_step = tf.constant(self._weight_l2loss_step_full_reg, dtype=tf.int32, shape=[],
                                                   name='full_l2reg_step')
                        full_l2_ratio = tf.constant(self._l2_regularization_ratio, dtype=tf.float32, shape=[],
                                                    name='l2_regularization_ratio')
                        gs_flt = tf.cast(global_step, tf.float32)
                        half_l2_step_flt = tf.cast(full_l2_step / 2, tf.float32)

                        # (self.global_step - full_l2_step / 2)
                        # tf.cast((self.global_step - full_l2_step / 2) * 8, tf.float32) / tf.cast(full_l2_step / 2 ,tf.float32)
                        # l2loss_ratio = tf.sigmoid( tf.cast((self.global_step - full_l2_step / 2) * 8, tf.float32) / tf.cast(full_l2_step / 2 ,tf.float32)) * full_l2_ratio
                        l2loss_ratio = tf.sigmoid(((gs_flt - half_l2_step_flt) * 8) / half_l2_step_flt) * full_l2_ratio
                        tf.summary.scalar('l2loss_ratio', l2loss_ratio)
                        l2loss = weights_added * l2loss_ratio
                    else:
                        l2loss = tf.add_n([tf.nn.l2_loss(tensor) for tensor in tf.trainable_variables() if
                                           tensor.name.endswith("weights:0") or tensor.name.endswith(
                                               'kernel:0')]) * tf.constant(self._l2_regularization_ratio,
                                                                           dtype='float', shape=[],
                                                                           name='l2_regularization_ratio')
                    tf.summary.scalar('l2loss', l2loss)
                ######diff loss###############################
                diffs = []
                for i in range(self._num_self_att_enc_layers):
                    for tensor in tf.trainable_variables():
                        #print(tensor.name)
                        if tensor.name == "diin/prepro/attention_encoder_{}/premise_self_attention/similar_mat/similar_func/arg/kernel:0".format(
                                i):
                            l_lg = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_self_attention/similar_mat/similar_func/arg/kernel:0".format(
                                i):
                            r_lg = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_1/kernel:0".format(i):
                            l_fg_lhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_1/kernel:0".format(
                                i):
                            r_fg_lhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_1/kernel:0".format(i):
                            l_fg_rhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_1/kernel:0".format(
                                i):
                            r_fg_rhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_2/kernel:0".format(i):
                            l_fg_lhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_2/kernel:0".format(
                                i):
                            r_fg_lhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_2/kernel:0".format(i):
                            l_fg_rhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_2/kernel:0".format(
                                i):
                            r_fg_rhs_2 = tensor

                        if tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_3/kernel:0".format(
                                i):
                            l_fg_lhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_3/kernel:0".format(
                                i):
                            r_fg_lhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_3/kernel:0".format(
                                i):
                            l_fg_rhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_3/kernel:0".format(
                                i):
                            r_fg_rhs_3 = tensor

                    diffs += [l_lg - r_lg, l_fg_lhs_1 - r_fg_lhs_1, l_fg_rhs_1 - r_fg_rhs_1, l_fg_lhs_2 - r_fg_lhs_2,
                              l_fg_rhs_2 - r_fg_rhs_2]
                    diffs += [l_fg_lhs_3 - r_fg_lhs_3, l_fg_rhs_3 - r_fg_rhs_3]
                diff_loss = tf.add_n([tf.nn.l2_loss(tensor) for tensor in diffs]) * tf.constant(
                    self._diff_penalty_loss_ratio, dtype='float', shape=[], name='diff_penalty_loss_ratio')
                tf.summary.scalar('diff_loss', diff_loss)
                ###############################
                output_dict['loss'] = loss + l2loss + diff_loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])

                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 7
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            # prem_mask = tf.expand_dims(prem_mask, -1)
            # hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            with tf.variable_scope("Attend"):
                F_a_bar = self._feedForwardBlock(premise_tokens,
                                                 self._hidden_dim,
                                                 'F',
                                                 is_training=is_training)
                F_b_bar = self._feedForwardBlock(hypothesis_tokens,
                                                 self._hidden_dim,
                                                 'F',
                                                 isReuse=True,
                                                 is_training=is_training)

                # e_i,j = F'(a_hat, b_hat) = F(a_hat).T * F(b_hat) (1)
                #alignment_attention = Attention(self.hidden_size, self.hidden_size)
                #alpha = alignment_attention(F_b_bar, F_a_bar, keys_mask=self.query_mask)
                #beta = alignment_attention(F_a_bar, F_b_bar, keys_mask=self.doc_mask)
                alpha, beta = nn.bi_uni_attention(F_a_bar,
                                                  F_b_bar,
                                                  query_len=prem_seq_lengths,
                                                  key_len=hyp_seq_lengths)

            with tf.variable_scope("Compare"):
                a_beta = tf.concat([premise_tokens, alpha], axis=2)
                b_alpha = tf.concat([hypothesis_tokens, beta], axis=2)

                # v_1,i = G([a_bar_i, beta_i])
                # v_2,j = G([b_bar_j, alpha_j]) (3)
                v_1 = self._feedForwardBlock(a_beta,
                                             self._hidden_dim,
                                             'G',
                                             is_training=is_training)
                v_2 = self._feedForwardBlock(b_alpha,
                                             self._hidden_dim,
                                             'G',
                                             isReuse=True,
                                             is_training=is_training)

            with tf.variable_scope("Aggregate"):
                # v1 = \sum_{i=1}^l_a v_{1,i}
                # v2 = \sum_{j=1}^l_b v_{2,j} (4)
                v1_sum = tf.reduce_sum(v_1, axis=1)
                v2_sum = tf.reduce_sum(v_2, axis=1)

                # y_hat = H([v1, v2]) (5)
                v = tf.concat([v1_sum, v2_sum], axis=1)

                ff_outputs = self._feedForwardBlock(v,
                                                    self._hidden_dim,
                                                    'H',
                                                    is_training=is_training)

                output_dict = self._make_output(ff_outputs, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                #metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [tf.shape(hypothesis_tokens), tf.shape(premise_tokens),
                #                          tf.shape(alpha), tf.shape(beta)]
            return output_dict
Esempio n. 8
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            lm_xor = keras.layers.Lambda(self._xor_match)([premise_tokens_ids, hypothesis_tokens_ids])
            lm_conv = keras.layers.Conv1D(
                self._lm_filters,
                premise_tokens_ids.shape[1].value,
                padding='valid',
                activation=self._activation_func
            )(lm_xor)

            lm_conv = keras.layers.Dropout(self._dropout_rate)(
                lm_conv, training=is_training)
            lm_feat = keras.layers.Reshape((lm_conv.shape[2].value, ))(lm_conv)
            for hidden_size in self._lm_hidden_sizes:
                lm_feat = keras.layers.Dense(
                    hidden_size,
                    activation=self._activation_func
                )(lm_feat)
            lm_drop = keras.layers.Dropout(self._dropout_rate)(
                lm_feat, training=is_training)
            lm_score = keras.layers.Dense(1)(lm_drop)

            dm_q_conv = keras.layers.Conv1D(
                self._dm_filters,
                self._dm_kernel_size,
                padding='same',
                activation=self._activation_func
            )(premise_tokens)
            dm_q_conv = keras.layers.Dropout(self._dropout_rate)(
                dm_q_conv, training=is_training)
            dm_q_mp = keras.layers.MaxPooling1D(
                pool_size=premise_tokens_ids.shape[1].value)(dm_q_conv)
            dm_q_rep = keras.layers.Reshape((dm_q_mp.shape[2].value, ))(dm_q_mp)
            dm_q_rep = keras.layers.Dense(self._dm_q_hidden_size)(
                dm_q_rep)
            dm_q_rep = keras.layers.Lambda(lambda x: tf.expand_dims(x, 1))(
                dm_q_rep)

            dm_d_conv1 = keras.layers.Conv1D(
                self._dm_filters,
                self._dm_kernel_size,
                padding='same',
                activation=self._activation_func
            )(hypothesis_tokens)
            dm_d_conv1 = keras.layers.Dropout(self._dropout_rate)(
                dm_d_conv1, training=is_training)
            dm_d_mp = keras.layers.MaxPooling1D(
                pool_size=self._dm_d_mpool)(dm_d_conv1)
            dm_d_conv2 = keras.layers.Conv1D(
                self._dm_filters, 1,
                padding='same',
                activation=self._activation_func
            )(dm_d_mp)
            dm_d_conv2 = keras.layers.Dropout(self._dropout_rate)(
                dm_d_conv2, training=is_training)

            h_dot = dm_q_rep * dm_d_conv2 #keras.layers.Lambda(self._hadamard_dot)([dm_q_rep, dm_d_conv2])
            dm_feat = keras.layers.Reshape((h_dot.shape[1].value*h_dot.shape[2].value, ))(h_dot)
            for hidden_size in self._dm_hidden_sizes:
                dm_feat = keras.layers.Dense(hidden_size)(dm_feat)
            dm_feat_drop = keras.layers.Dropout(self._dropout_rate)(
                dm_feat, training=is_training)
            dm_score = keras.layers.Dense(1)(dm_feat_drop)

            add = keras.layers.Add()([lm_score, dm_score])

            # Get prediction
            output_dict = self._make_output(add, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])
                # metrics['map'] = tf.metrics.average_precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=2)
                # metrics['precision_1'] = tf.metrics.precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=1, class_id=1)

                    #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 9
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            #########Word Embedding####################
            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_ins = []
            hypothesis_ins = []

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            premise_ins.append(premise_tokens)
            hypothesis_ins.append(hypothesis_tokens)

            premise_chars = features_embedding.get('premise/chars', None)
            hypothesis_chars = features_embedding.get('hypothesis/chars', None)

            if premise_chars is not None and hypothesis_chars is not None:
                with tf.variable_scope("conv") as scope:
                    conv_pre = nn.multi_conv1d_max(
                        premise_chars,
                        self._char_filter_size,
                        self._char_filter_channel_dims,
                        "VALID",
                        is_training,
                        self._dropout_rate,
                        scope='conv')
                    scope.reuse_variables()
                    conv_hyp = nn.multi_conv1d_max(
                        hypothesis_chars,
                        self._char_filter_size,
                        self._char_filter_channel_dims,
                        "VALID",
                        is_training,
                        self._dropout_rate,
                        scope='conv')
                    # conv_pre = tf.reshape(conv_pre, [-1, self.sequence_length, config.char_out_size])
                    # conv_hyp = tf.reshape(conv_hyp, [-1, self.sequence_length, config.char_out_size])

                    premise_ins.append(conv_pre)
                    hypothesis_ins.append(conv_hyp)

            premise_pos = features_embedding.get('premise/pos_tags', None)
            hypothesis_pos = features_embedding.get('hypothesis/pos_tags',
                                                    None)

            if premise_pos is not None and hypothesis_pos is not None:
                premise_ins.append(premise_pos)
                hypothesis_ins.append(hypothesis_pos)

            premise_exact_match = features.get('premise/exact_match_labels',
                                               None)
            hypothesis_exact_match = features.get(
                'hypothesis/exact_match_labels', None)

            if premise_exact_match is not None and hypothesis_exact_match is not None:
                premise_ins.append(
                    tf.expand_dims(tf.cast(premise_exact_match, tf.float32),
                                   -1))
                hypothesis_ins.append(
                    tf.expand_dims(tf.cast(hypothesis_exact_match, tf.float32),
                                   -1))

            premise_in = tf.concat(premise_ins, axis=2)
            hypothesis_in = tf.concat(hypothesis_ins, axis=2)

            premise_in = nn.highway_network(premise_in,
                                            2,
                                            output_size=self._hidden_dim,
                                            dropout_rate=self._dropout_rate,
                                            is_trainging=is_training,
                                            scope="premise_highway")
            hypothesis_in = nn.highway_network(hypothesis_in,
                                               2,
                                               output_size=self._hidden_dim,
                                               dropout_rate=self._dropout_rate,
                                               is_trainging=is_training,
                                               scope="hypothesis_highway")

            ########Attention Stack-GRU################
            def gru_network(input, input_len, name="gru_network"):
                with tf.variable_scope(name):
                    gru_input = input
                    for i in range(self._num_rnn_layer):
                        with tf.variable_scope("layer_%s" % i):
                            seq, c1 = nn.gru(gru_input,
                                             self._hidden_dim,
                                             seq_len=input_len,
                                             initializer=self._initializer)
                            gru_input = tf.concat([gru_input, seq], axis=2)
                return gru_input

            premise_gru = gru_network(premise_in,
                                      prem_seq_lengths,
                                      name='premise_gru_network')
            hypothesis_gru = gru_network(hypothesis_in,
                                         hyp_seq_lengths,
                                         name='hypothesis_gru_network')

            premise_gru = premise_gru * prem_mask
            hypothesis_gru = hypothesis_gru * hyp_mask
            #########
            premise_att = nn.attention_pool(premise_gru,
                                            self._hidden_dim,
                                            seq_len=prem_seq_lengths,
                                            initializer=self._initializer,
                                            name='premise_attention_pool')
            hypothesis_att = nn.attention_pool(
                hypothesis_gru,
                self._hidden_dim,
                seq_len=hyp_seq_lengths,
                initializer=self._initializer,
                name='hypothesis_attention_pool')

            ############Dynamic Re-read Mechanism################

            def dynamic_reread(h_seq_a,
                               h_a,
                               h_b,
                               h_a_len,
                               name="dymanic_reread"):
                with tf.variable_scope(name):
                    h_a_pre = h_a
                    # h_a_pre = nn.highway_layer(h_a, self._hidden_dim, initializer=self._initializer,
                    #                            scope="h_a_pre_highway")
                    # h_seq_a = nn.highway_layer(h_seq_a, self._hidden_dim, initializer=self._initializer,
                    #                            scope="h_seq_a_highway")
                    # h_b = nn.highway_layer(h_b, self._hidden_dim, initializer=self._initializer,
                    #                        scope="h_b_highway")
                    #####
                    w_d = tf.get_variable(
                        "w_d_weights",
                        (h_seq_a.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    u_d = tf.get_variable(
                        "u_d_weights",
                        (h_a_pre.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    m_d = tf.get_variable(
                        "m_d_weights",
                        (h_b.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    omega_d = tf.get_variable("omega_d_weights",
                                              (h_a_pre.shape[-1].value, 1),
                                              initializer=self._initializer)
                    ##########
                    m_d_h_b = tf.tensordot(h_b, m_d, axes=[-1, 0])
                    h_seq_a_w_d = tf.tensordot(h_seq_a, w_d, axes=[-1, 0])

                    if h_a_len is not None:
                        mask = tf.expand_dims(tf.sequence_mask(
                            h_a_len, tf.shape(h_seq_a)[1], dtype=tf.float32),
                                              axis=2)
                    else:
                        mask = None
                    gru_cell = tf.nn.rnn_cell.GRUCell(
                        h_a_pre.shape[-1].value,
                        kernel_initializer=self._initializer)

                    for i in range(self._reread_length):
                        u_d_h_a_pre = tf.tensordot(h_a_pre, u_d, axes=[-1, 0])
                        m_a = tf.nn.tanh(
                            h_seq_a_w_d +
                            tf.expand_dims(m_d_h_b + u_d_h_a_pre, 1))
                        m_a = tf.tensordot(m_a, omega_d, axes=[-1, 0])
                        if mask is not None:
                            m_a = m_a + (1. - mask) * tf.float32.min
                        alpha = tf.nn.softmax(self._beta * m_a, axis=1)
                        alpha = tf.reduce_sum(alpha * h_seq_a, axis=1)
                        gru_output, gru_state = gru_cell(alpha, h_a_pre)
                        h_a_pre = gru_state
                    return gru_output

            premise_v = dynamic_reread(premise_gru,
                                       premise_att,
                                       hypothesis_att,
                                       prem_seq_lengths,
                                       name='premise_dynamic_reread')
            hypothesis_v = dynamic_reread(hypothesis_gru,
                                          hypothesis_att,
                                          premise_att,
                                          hyp_seq_lengths,
                                          name='hypothesis_dynamic_reread')

            ########label prediction##############

            h = tf.concat([
                premise_att, hypothesis_att, hypothesis_att * premise_att,
                hypothesis_att - premise_att
            ],
                          axis=-1)
            v = tf.concat([
                premise_v, hypothesis_v, hypothesis_v * premise_v,
                hypothesis_v - premise_v
            ],
                          axis=-1)

            # h MLP layer
            h_mlp = tf.layers.dense(h,
                                    self._hidden_dim,
                                    activation=tf.nn.relu,
                                    kernel_initializer=self._initializer,
                                    name='h_fc1')
            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            h_logits = tf.layers.dense(h_drop,
                                       self._num_classes,
                                       activation=None,
                                       kernel_initializer=self._initializer,
                                       name='h_logits')

            p_h = tf.nn.softmax(h_logits)

            # # MLP layer
            v_mlp = tf.layers.dense(v,
                                    self._hidden_dim,
                                    activation=tf.nn.relu,
                                    kernel_initializer=self._initializer,
                                    name='v_fc1')
            # Dropout applied to classifier
            v_drop = tf.layers.dropout(v_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            v_logits = tf.layers.dense(v_drop,
                                       self._num_classes,
                                       activation=None,
                                       kernel_initializer=self._initializer,
                                       name='v_logits')

            p_v = tf.nn.softmax(v_logits)
            ####
            alpha_h = tf.layers.dense(h,
                                      1,
                                      activation=tf.nn.sigmoid,
                                      kernel_initializer=self._initializer,
                                      bias_initializer=tf.zeros_initializer())
            alpha_v = tf.layers.dense(v,
                                      1,
                                      activation=tf.nn.sigmoid,
                                      kernel_initializer=self._initializer,
                                      bias_initializer=tf.zeros_initializer())
            # # h MLP layer
            fuse_mlp = tf.layers.dense(alpha_h * h + alpha_v * v,
                                       self._hidden_dim,
                                       activation=tf.nn.relu,
                                       kernel_initializer=self._initializer,
                                       name='fuse_fc1')
            # Dropout applied to classifier
            fuse_drop = tf.layers.dropout(fuse_mlp,
                                          self._dropout_rate,
                                          training=is_training)
            #Get prediction
            output_dict = self._make_output(fuse_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                h_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels_embedding, logits=h_logits))
                v_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels_embedding, logits=v_logits))
                fuse_loss = self._make_loss(labels=labels_embedding,
                                            logits=output_dict['logits'],
                                            params=params)

                output_dict['loss'] = v_loss + h_loss + fuse_loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 10
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            dense_output = tf.layers.dense(premise_tokens, 1, use_bias=False)
            dense_output += (1 - prem_mask) * tf.float32.min
            attention_probs = tf.nn.softmax(dense_output, axis=1)

            # Matching histogram of top-k
            # shape = [B, M, N]
            matching_matrix = tf.matmul(tf.nn.l2_normalize(premise_tokens,
                                                           axis=2),
                                        tf.nn.l2_normalize(hypothesis_tokens,
                                                           axis=2),
                                        transpose_b=True)
            # shape = [B, M, K]
            matching_topk = tf.nn.top_k(matching_matrix,
                                        k=self._top_k,
                                        sorted=True)[0]

            # Feedforward matching topk
            # shape = [B, M, 1]
            dense_output = matching_topk
            for i in range(self._mlp_num_layers):
                dense_output = tf.layers.Dense(
                    self._mlp_num_units,
                    activation=self._mlp_activation_func,
                    use_bias=True)(dense_output)
            dense_output = tf.layers.Dense(
                self._mlp_num_fan_out,
                activation=self._mlp_activation_func,
                use_bias=True)(dense_output)

            # shape = [B, 1, 1]
            dot_score = tf.matmul(attention_probs,
                                  dense_output,
                                  transpose_a=True)
            flatten_score = tf.reshape(dot_score, [-1, 1])
            # Get prediction
            output_dict = self._make_output(flatten_score, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                # metrics['map'] = tf.metrics.average_precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=2)
                # metrics['precision_1'] = tf.metrics.precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=1, class_id=1)

                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 11
0
    def forward(self, features, labels, mode, params):
        if self._sim_func != 'tensor' and self._num_tensor_dim != 1:
            self._num_tensor_dim = 1
            logger.warning(
                "The similarity function is tensor layer. The number of tensor dim is not effective."
            )
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)
            prem_hyp_mask = tf.matmul(prem_mask, hyp_mask, transpose_b=True)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            premise_outs, c1 = nn.bi_lstm(premise_tokens,
                                          self._hidden_dim,
                                          seq_len=prem_seq_lengths,
                                          name='premise')
            hypothesis_outs, c2 = nn.bi_lstm(hypothesis_tokens,
                                             self._hidden_dim,
                                             seq_len=hyp_seq_lengths,
                                             name='hypothesis')
            premise_bi = tf.concat(premise_outs, axis=2)
            hypothesis_bi = tf.concat(hypothesis_outs, axis=2)

            max_premise_length = premise_tokens.shape[1].value
            max_hypothesis_length = hypothesis_tokens.shape[1].value

            if self._sim_func == 'tensor':
                M = tf.Variable(
                    tf.random_normal([
                        self._num_tensor_dim, 2 * self._hidden_dim,
                        2 * self._hidden_dim
                    ],
                                     stddev=0.1))
                W = tf.Variable(
                    tf.random_normal([4 * self._hidden_dim, 1], stddev=0.1))
                bias = tf.Variable(tf.zeros([1]), name="tensor_bias")
                premise_ex = tf.tile(tf.expand_dims(premise_bi, axis=2),
                                     [1, 1, max_hypothesis_length, 1])
                hypothesis_ex = tf.tile(tf.expand_dims(hypothesis_bi, axis=1),
                                        [1, max_premise_length, 1, 1])
                tensor = []
                tmp2 = tf.einsum("abcd,df->abcf",
                                 tf.concat([premise_ex, hypothesis_ex],
                                           axis=3), W)  # [N, L1, L2, 1]
                tmp2 = tf.squeeze(tmp2, axis=3)
                for i in range(self._num_tensor_dim):
                    tmp1 = tf.einsum("abc,cd->abd", premise_bi,
                                     M[i])  # [N, L1, 2d]
                    tmp1 = tf.matmul(tmp1, hypothesis_bi,
                                     transpose_b=True)  # [N, L1, L2]
                    tensor.append(tf.nn.relu(tmp1 + tmp2 + bias))
                tensor = tf.concat([tensor], axis=0)
            elif self._sim_func == 'cosine':
                tensor = tf.matmul(tf.nn.l2_normalize(premise_bi, axis=-1),
                                   tf.nn.l2_normalize(hypothesis_bi, axis=-1),
                                   transpose_b=True)  # [N, L1, L2]
            elif self._sim_func == 'bilinear':
                M = tf.Variable(
                    tf.random_normal(
                        [2 * self._hidden_dim, 2 * self._hidden_dim],
                        stddev=0.1))
                b = tf.Variable(
                    tf.random_normal(
                        [max_premise_length, max_hypothesis_length],
                        stddev=0.1))
                bilinear = tf.einsum("abc,cd->abd", premise_bi,
                                     M)  # [N, L1, 2d]
                tensor = tf.matmul(bilinear, hypothesis_bi,
                                   transpose_b=True) + b  # [N, L1, L2]
            else:
                raise ConfigureError(
                    "The simility function %s is not supported. "
                    "The mvlstm only support simility function for [cosine, bilinear, tensor]."
                    % self._sim_func)

            tensor *= prem_hyp_mask
            # 3.1 k-Max Pooling
            matrix_in = tf.reshape(
                tensor, [-1, max_premise_length * max_hypothesis_length])
            values, indices = tf.nn.top_k(matrix_in,
                                          k=self._num_k,
                                          sorted=False)
            kmax = tf.reshape(values, [-1, self._num_tensor_dim * self._num_k])

            # MLP layer
            h_mlp_1 = tf.contrib.layers.fully_connected(kmax,
                                                        self._num_tensor_dim *
                                                        self._num_k,
                                                        scope='fc1')
            h_mlp_1_drop = tf.layers.dropout(h_mlp_1,
                                             self._dropout_rate,
                                             training=is_training)
            h_mlp_2 = tf.contrib.layers.fully_connected(h_mlp_1_drop,
                                                        self._num_tensor_dim *
                                                        self._num_k // 2,
                                                        scope='fc2')

            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp_2,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            output_dict = self._make_output(h_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])

                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Esempio n. 12
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)

            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            premise_outs, c1 = nn.bi_lstm(premise_tokens,
                                          self._hidden_dim,
                                          seq_len=prem_seq_lengths,
                                          name='premise')
            hypothesis_outs, c2 = nn.bi_lstm(hypothesis_tokens,
                                             self._hidden_dim,
                                             seq_len=hyp_seq_lengths,
                                             name='hypothesis')

            premise_bi = tf.concat(premise_outs, axis=2)
            hypothesis_bi = tf.concat(hypothesis_outs, axis=2)

            premise_bi *= prem_mask
            hypothesis_bi *= hyp_mask

            ### Attention ###
            premise_attns, hypothesis_attns = nn.bi_uni_attention(
                premise_bi,
                hypothesis_bi,
                prem_seq_lengths,
                hyp_seq_lengths,
                func="dot")

            # For making attention plots,
            prem_diff = tf.subtract(premise_bi, premise_attns)
            prem_mul = tf.multiply(premise_bi, premise_attns)
            hyp_diff = tf.subtract(hypothesis_bi, hypothesis_attns)
            hyp_mul = tf.multiply(hypothesis_bi, hypothesis_attns)

            m_a = tf.concat([premise_bi, premise_attns, prem_diff, prem_mul],
                            2)
            m_b = tf.concat(
                [hypothesis_bi, hypothesis_attns, hyp_diff, hyp_mul], 2)

            ### Inference Composition ###

            v1_outs, c3 = nn.bi_lstm(m_a,
                                     self._hidden_dim,
                                     seq_len=prem_seq_lengths,
                                     name='v1')
            v2_outs, c4 = nn.bi_lstm(m_b,
                                     self._hidden_dim,
                                     seq_len=hyp_seq_lengths,
                                     name='v2')

            v1_bi = tf.concat(v1_outs, axis=2)
            v2_bi = tf.concat(v2_outs, axis=2)

            v1_bi = v1_bi * prem_mask
            v2_bi = v2_bi * hyp_mask

            ### Pooling Layer ###
            eps = 1e-11
            v_1_sum = tf.reduce_sum(v1_bi, 1)
            v_1_ave = tf.div(
                v_1_sum,
                tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1) +
                eps)

            v_2_sum = tf.reduce_sum(v2_bi, 1)
            v_2_ave = tf.div(
                v_2_sum,
                tf.expand_dims(tf.cast(hyp_seq_lengths, tf.float32), -1) + eps)

            v_1_max = tf.reduce_max(v1_bi, 1)
            v_2_max = tf.reduce_max(v2_bi, 1)

            v = tf.concat([v_1_ave, v_2_ave, v_1_max, v_2_max], 1)

            # MLP layer
            h_mlp = tf.contrib.layers.fully_connected(v,
                                                      self._hidden_dim,
                                                      activation_fn=tf.nn.tanh,
                                                      scope='fc1')

            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)

            # Get prediction
            output_dict = self._make_output(h_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                # metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)

                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          v_1_ave, v_2_ave, h_mlp, logits]
            return output_dict