Beispiel #1
0
 def _preprocess(self,
                 Xw,
                 Xw_len,
                 Xc,
                 Xc_len,
                 scope="preprocess_layers",
                 reuse=False):
     with tf.variable_scope(scope, reuse=reuse):
         Xw_embedded, size_w = embedded(Xw,
                                        self.embeddings_w[0],
                                        self.embeddings_w[1],
                                        self.config.wv_config["train_w"],
                                        scope="embedded_w")
         Xc_embedded, size_c = embedded(Xc,
                                        self.embeddings_c[0],
                                        self.embeddings_c[1],
                                        self.config.wv_config["train_c"],
                                        scope="embedded_c")
         batch_size, seq_len = tf.shape(Xw)[0], tf.shape(Xw)[1]
         Xc_embedded = tf.reshape(Xc_embedded,
                                  shape=[batch_size * seq_len, -1, size_c])
         Xc_len = tf.reshape(Xc_len, shape=[
             batch_size * seq_len,
         ])
         Xc_embedded, size_c = bi_gru(Xc_embedded, Xc_len,
                                      (self.config.char_dim, ), 2,
                                      self.initializer, 1.0, "bi_gru_c2w")
         Xc_embedded = tf.reshape(Xc_embedded,
                                  shape=[batch_size, seq_len, size_c])
         X_embedded = tf.concat([Xw_embedded, Xc_embedded], axis=-1)
         out_w, out_w_size = bi_gru(X_embedded, Xw_len,
                                    (self.config.bi_dim, ), 1,
                                    self.initializer, 1.0, "bi_gru__wc")
         return out_w, out_w_size
Beispiel #2
0
 def _encode(self, Xw, Xw_l, Xc, Xc_l, scope="encode_layers", reuse=False):
     with tf.variable_scope(scope, reuse=reuse):
         Xw_embedded, size_w = embedded(Xw,
                                        self.embeddings_w[0],
                                        self.embeddings_w[1],
                                        self.config.wv_config["train_w"],
                                        scope="embedded_w")
         Xc_embedded, size_c = embedded(Xc,
                                        self.embeddings_c[0],
                                        self.embeddings_c[1],
                                        self.config.wv_config["train_c"],
                                        scope="embedded_c")
         batch_size = tf.shape(Xw)[0]
         # char
         v0, v0_size = attention_han(Xc_embedded, self.config.un_dim,
                                     self.initializer, "attention_han_c")
         v1, v1_size = bi_gru(Xc_embedded, Xc_l, (self.config.bi_dim, ), 2,
                              self.initializer, 1.0, "bi_gru_c")
         char_v = tf.reshape(tf.concat([v0, v1], axis=-1),
                             [batch_size, v0_size + v1_size])
         # word
         v0, v0_size = attention_han(Xw_embedded, self.config.un_dim,
                                     self.initializer, "attention_han_w")
         v1, v1_size = bi_gru(Xw_embedded, Xw_l, (self.config.bi_dim, ), 2,
                              self.initializer, 1.0, "bi_gru_w")
         word_v = tf.reshape(tf.concat([v0, v1], axis=-1),
                             [batch_size, v0_size + v1_size])
         # phrase
         Xp_embedded, size_p = conv_with_max_pool(Xw_embedded, (2, 3, 4, 5),
                                                  size_w // 4, False,
                                                  tf.nn.selu,
                                                  self.initializer,
                                                  "conv_w2p")
         v0, v0_size = attention_han(Xp_embedded, self.config.un_dim,
                                     self.initializer, "attention_han_p")
         v1, v1_size = bi_gru(Xp_embedded, Xw_l, (self.config.bi_dim, ), 2,
                              self.initializer, 1.0, "bi_gru_p")
         phrase_v = tf.reshape(tf.concat([v0, v1], axis=-1),
                               [batch_size, v0_size + v1_size])
         return char_v, word_v, phrase_v
    def build_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            with tf.variable_scope("placeholders"):
                self.X1w = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None],
                                          name="sent1w_ph")
                self.X2w = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None],
                                          name="sent2w_ph")
                self.X1c = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None, None],
                                          name="sent1c_ph")
                self.X2c = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None, None],
                                          name="sent2c_ph")
                self.y = tf.placeholder(dtype=tf.int32,
                                        shape=[
                                            None,
                                        ],
                                        name="label_ph")
                self.keep_prob = tf.placeholder_with_default(
                    1.0, shape=[], name="keep_prob_ph")
                self.X1w_mask = tf.sign(self.X1w, name="sent1w_mask")
                self.X2w_mask = tf.sign(self.X2w, name="sent2w_mask")
                self.X1c_mask = tf.sign(self.X1c, name="sent1c_mask")
                self.X2c_mask = tf.sign(self.X2c, name="sent2c_mask")
                self.X1w_l = tf.reduce_sum(self.X1w_mask,
                                           axis=-1,
                                           name="sent1w_len")
                self.X2w_l = tf.reduce_sum(self.X2w_mask,
                                           axis=-1,
                                           name="sent2w_len")
                self.X1c_l = tf.reduce_sum(self.X1c_mask,
                                           axis=-1,
                                           name="sent1c_len")
                self.X2c_l = tf.reduce_sum(self.X2c_mask,
                                           axis=-1,
                                           name="sent2c_len")

            X1_f, X1_b = self._preprocess(self.X1w,
                                          self.X1w_l,
                                          self.X1c,
                                          self.X1c_l,
                                          scope="preprocess_layers")
            X2_f, X2_b = self._preprocess(self.X2w,
                                          self.X2w_l,
                                          self.X2c,
                                          self.X2c_l,
                                          scope="preprocess_layers",
                                          reuse=True)

            with tf.variable_scope("match_layers"):
                # Shapes: (batch_size, num_sentence_words, 8*multi-perspective_dims)
                match_1_to_2_out, match_2_to_1_out = bilateral_matching(
                    X1_f, X1_b, X2_f, X2_b, self.X1w_mask, self.X2w_mask,
                    self.keep_prob, self.config.mp_dim)

            # Aggregate the representations from the matching functions.
            with tf.variable_scope("aggregate_layers"):
                seq_1_fb, _ = bi_gru(match_1_to_2_out, self.X1w_l,
                                     (self.config.bi_dim, ), 2,
                                     self.initializer, 1.0, "bi_gru")
                seq_2_fb, _ = bi_gru(match_2_to_1_out,
                                     self.X2w_l, (self.config.bi_dim, ),
                                     2,
                                     self.initializer,
                                     1.0,
                                     "bi_gru",
                                     reuse=True)
                combined_aggregated_representation = tf.concat(
                    [seq_1_fb, seq_2_fb], -1)

            with tf.variable_scope("fc_layers"):
                h = tf.nn.dropout(combined_aggregated_representation,
                                  keep_prob=self.keep_prob)
                h = tf.layers.dense(h,
                                    self.config.un_dim,
                                    activation=tf.nn.selu,
                                    kernel_initializer=self.initializer)
                h = tf.nn.dropout(h, keep_prob=self.keep_prob)
                pi = 0.01
                self.logits = tf.layers.dense(
                    h,
                    1,
                    kernel_initializer=self.initializer,
                    bias_initializer=tf.constant_initializer(-np.log((1 - pi) /
                                                                     pi)))
            self.pos_prob = tf.nn.sigmoid(self.logits)
            self.var_list = [v for v in tf.global_variables()]
            if self.config.fine_tune:
                self.var_list_trainable = [
                    v for v in tf.trainable_variables()
                    if "embedded" in v.name or "fc" in v.name
                ]
            else:
                self.var_list_trainable = [v for v in tf.trainable_variables()]

            with tf.name_scope("Loss"):
                self.loss_op = build_loss(labels=self.y,
                                          logits=self.logits,
                                          focal=self.config.focal,
                                          alpha=self.config.alpha,
                                          gamma=self.config.gamma)

            with tf.name_scope("Optimize"):
                self.adam_op = tf.train.AdamOptimizer(learning_rate=self.config.init_learning_rate). \
                    minimize(self.loss_op, var_list=self.var_list_trainable)
                self.sgd_op = tf.train.MomentumOptimizer(learning_rate=self.config.init_learning_rate, momentum=0.9). \
                    minimize(self.loss_op, var_list=self.var_list_trainable)

            with tf.name_scope("Prediction"):
                self.predicted = tf.cast(tf.greater_equal(
                    self.pos_prob, self.config.threshold),
                                         dtype=tf.int32)

            with tf.name_scope("Summary"):
                self.summaries = build_summaries()