예제 #1
0
    def _init_graph(self):
        self._init_placeholders()

        seq_len = tf.shape(self.input_ids_ph)[-1]
        self.y_st = tf.one_hot(self.y_st_ph, depth=seq_len)
        self.y_end = tf.one_hot(self.y_end_ph, depth=seq_len)

        self.bert = BertModel(config=self.bert_config,
                              is_training=self.is_train_ph,
                              input_ids=self.input_ids_ph,
                              input_mask=self.input_masks_ph,
                              token_type_ids=self.token_types_ph,
                              use_one_hot_embeddings=False,
                              )

        last_layer = self.bert.get_sequence_output()
        hidden_size = last_layer.get_shape().as_list()[-1]
        bs = tf.shape(last_layer)[0]

        with tf.variable_scope('squad'):
            output_weights = tf.get_variable('output_weights', [2, hidden_size],
                                             initializer=tf.truncated_normal_initializer(stddev=0.02))
            output_bias = tf.get_variable('output_bias', [2], initializer=tf.zeros_initializer())

            last_layer_rs = tf.reshape(last_layer, [-1, hidden_size])

            logits = tf.matmul(last_layer_rs, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            logits = tf.reshape(logits, [bs, -1, 2])
            logits = tf.transpose(logits, [2, 0, 1])

            logits_st, logits_end = tf.unstack(logits, axis=0)

            logit_mask = self.token_types_ph
            # [CLS] token is used as no answer
            mask = tf.concat([tf.ones((bs, 1), dtype=tf.int32), tf.zeros((bs, seq_len-1), dtype=tf.int32)], axis=-1)
            logit_mask = logit_mask + mask

            logits_st = softmax_mask(logits_st, logit_mask)
            logits_end = softmax_mask(logits_end, logit_mask)
            start_probs = tf.nn.softmax(logits_st)
            end_probs = tf.nn.softmax(logits_end)

            outer = tf.matmul(tf.expand_dims(start_probs, axis=2), tf.expand_dims(end_probs, axis=1))
            outer_logits = tf.exp(tf.expand_dims(logits_st, axis=2) + tf.expand_dims(logits_end, axis=1))

            context_max_len = tf.reduce_max(tf.reduce_sum(self.token_types_ph, axis=1))

            max_ans_length = tf.cast(tf.minimum(20, context_max_len), tf.int64)
            outer = tf.matrix_band_part(outer, 0, max_ans_length)
            outer_logits = tf.matrix_band_part(outer_logits, 0, max_ans_length)

            self.yp_score = 1 - tf.nn.softmax(logits_st)[:, 0] * tf.nn.softmax(logits_end)[:, 0]

            self.start_probs = start_probs
            self.end_probs = end_probs
            self.start_pred = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.end_pred = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            self.yp_logits = tf.reduce_max(tf.reduce_max(outer_logits, axis=2), axis=1)

        with tf.variable_scope("loss"):
            loss_st = tf.nn.softmax_cross_entropy_with_logits(logits=logits_st, labels=self.y_st)
            loss_end = tf.nn.softmax_cross_entropy_with_logits(logits=logits_end, labels=self.y_end)
            self.loss = tf.reduce_mean(loss_st + loss_end)
예제 #2
0
    def __init__(self, n_classes: int = 2,
                 dropout_keep_prob: float = 0.5,
                 return_probas: bool = False, **kwargs):
        """

        Args:
            n_classes: number of classes for classification
            dropout_keep_prob: Probability of keeping the hidden state, values from 0 to 1. 0.5 works well
                in most cases.
            return_probas: whether to return confidences of the relation to be appropriate or not
            **kwargs:
        """
        kwargs.setdefault('learning_rate_drop_div', 10.0)
        kwargs.setdefault('learning_rate_drop_patience', 5.0)
        kwargs.setdefault('clip_norm', 5.0)

        super().__init__(**kwargs)

        self.n_classes = n_classes
        self.dropout_keep_prob = dropout_keep_prob
        self.return_probas = return_probas
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        
        if check_gpu_existence():
            self.GRU = CudnnGRU
        else:
            self.GRU = CudnnCompatibleGRU

        self.question_ph = tf.placeholder(tf.float32, [None, None, 300])
        self.rel_emb_ph = tf.placeholder(tf.float32, [None, None, 300])

        r_mask_2 = tf.cast(self.rel_emb_ph, tf.bool)
        r_len_2 = tf.reduce_sum(tf.cast(r_mask_2, tf.int32), axis=2)
        r_mask = tf.cast(r_len_2, tf.bool)
        r_len = tf.reduce_sum(tf.cast(r_mask, tf.int32), axis=1)
        rel_emb = tf.math.divide_no_nan(tf.reduce_sum(self.rel_emb_ph, axis=1),
                                        tf.cast(tf.expand_dims(r_len, axis=1), tf.float32))

        self.y_ph = tf.placeholder(tf.int32, shape=(None,))
        self.one_hot_labels = tf.one_hot(self.y_ph, depth=self.n_classes, dtype=tf.float32)
        self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=[], name='keep_prob_ph')

        q_mask_2 = tf.cast(self.question_ph, tf.bool)
        q_len_2 = tf.reduce_sum(tf.cast(q_mask_2, tf.int32), axis=2)
        q_mask = tf.cast(q_len_2, tf.bool)
        q_len = tf.reduce_sum(tf.cast(q_mask, tf.int32), axis=1)

        question_dr = variational_dropout(self.question_ph, keep_prob=self.keep_prob_ph)
        b_size = tf.shape(self.question_ph)[0]

        with tf.variable_scope("question_encode"):
            rnn = self.GRU(num_layers=2, num_units=75, batch_size=b_size, input_size=300, keep_prob=self.keep_prob_ph)
            q = rnn(question_dr, seq_len=q_len)

        with tf.variable_scope("attention"):
            rel_emb_exp = tf.expand_dims(rel_emb, axis=1)
            dot_products = tf.reduce_sum(tf.multiply(q, rel_emb_exp), axis=2, keep_dims=False)
            s_mask = softmax_mask(dot_products, q_mask)
            att_weights = tf.expand_dims(tf.nn.softmax(s_mask), axis=2)
            self.s_r = tf.reduce_sum(tf.multiply(att_weights, q), axis=1)

            self.logits = tf.layers.dense(tf.multiply(self.s_r, rel_emb), 2, activation=None, use_bias=False)
            self.y_pred = tf.argmax(self.logits, axis=-1)

            loss_tensor = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.one_hot_labels, logits=self.logits)

            self.loss = tf.reduce_mean(loss_tensor)
            self.train_op = self.get_train_op(self.loss)

        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
        self.load()