Esempio n. 1
0
def normal_sampling(features, t, k=100):
    b, n, d = features.get_shape().as_list()  # b, n, d
    # score for each point
    score_h1 = model_utils.dense_layer(tf.reshape(features, [-1, d]), 256,
                                       'score_h1')  # b*n, 256
    origin_score = model_utils.dense_layer(score_h1,
                                           1,
                                           'score',
                                           activation=tf.nn.sigmoid)  # b*n, 1
    score = tf.reshape(origin_score, [b, n])  # b, n
    noise = tf.nn.relu(tf.random.truncated_normal([b, n], stddev=t**2))  # b, n
    score += noise  # b, n
    # sort with top_k
    sorted_score, sorted_indicies = tf.nn.top_k(score, n)  # b, n
    coord1 = tf.reshape(tf.tile(tf.expand_dims(tf.range(b), axis=-1), [1, n]),
                        [-1])  # b*k
    coord2 = tf.reshape(sorted_indicies, [-1])  # b*n
    coords = tf.reshape(tf.stack([coord1, coord2], axis=1),
                        [b, n, 2])  # b, n, 2
    sorted_features = tf.gather_nd(features, coords)  # b, n, d

    top_features, bot_features = tf.split(sorted_features, [k, n - k], axis=1)
    top_scores, bot_scores = tf.split(sorted_score, [k, n - k], axis=1)

    # sampled features
    top_scores = tf.tile(tf.expand_dims(top_scores, axis=2),
                         [1, 1, d])  # b, k, d
    # sub_features = tf.pow(top_scores, t) * top_features
    sub_features = top_scores * top_features
    return sub_features, tf.reshape(score, [b, n])
Esempio n. 2
0
    def build_model(self):
        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)
        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        self.logits = dense_layer(tf.concat(
            [last_output_hypo, last_output_prem], axis=1),
                                  3,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        adv_in_hypo = tf.reshape(
            self.e_hypo, [-1, self.e_hypo.shape[1] * self.e_hypo.shape[2]])
        adv_in_prem = tf.reshape(
            self.e_prem, [-1, self.e_prem.shape[1] * self.e_hypo.shape[2]])
        """
        ### Debug ###
        self.w_adv = tf.get_variable("w", shape=[adv_in.shape[-1], 2],
                                     initializer=tf.truncated_normal_initializer())
        self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32)

        adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv
        ############
        """
        adv_logits = dense_layer(tf.concat([adv_in_hypo, adv_in_prem], axis=1),
                                 3,
                                 activation=None,
                                 name="adv_encoder")
        adv_cost = 1 / tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(self.y_holder, depth=3), logits=adv_logits))

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))
        self.cost = self.cost + 0.01 * adv_cost

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Esempio n. 3
0
def concrete_sampling(features, t, k=100):
    b, n, d = features.get_shape().as_list()  # b, n, d
    alpha_h = model_utils.dense_layer(tf.reshape(features, [-1, d]), 256,
                                      'alpha_h')  # b*n, 256
    alpha = model_utils.dense_layer(alpha_h, 1, 'alpha',
                                    activation=None)  # b*n, 1
    alpha_n = tf.tile(tf.reshape(alpha, [b, 1, n]), [1, k, 1])  # b, k, n
    uniform_noise = tf.random_uniform([b, k, n])  # b, k, n
    gumble_noise = -tf.log(-tf.log(uniform_noise))  # b, k, n
    noisy_alpha = (alpha_n + gumble_noise) / (t * 10.)
    samples = tf.nn.softmax(noisy_alpha, axis=-1)  # b, k, n
    sub_features = tf.matmul(samples, features)  # b, k, d
    return sub_features, tf.reshape(alpha, [b, n])
Esempio n. 4
0
    def build_model(self):
        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)
        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        self.logits = dense_layer(tf.concat(
            [last_output_hypo, last_output_prem], axis=1),
                                  3,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Esempio n. 5
0
    def build_model(self):

        rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size,
                                              self.batch_size, self.seq_len)

        last_output, self.alphas = attention_layer(self.attention_size,
                                                   rnn_outputs,
                                                   "pred_encoder",
                                                   sparse=self.sparse)

        last_output = tf.nn.dropout(last_output, self.keep_probs)

        self.logits = dense_layer(last_output,
                                  2,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Esempio n. 6
0
    def build_model(self):
        # shape = (batch_size, sentence_length, emb_dim)

        rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size,
                                              self.batch_size, self.seq_len)

        last_output, self.alphas = attention_layer(self.attention_size,
                                                   rnn_outputs,
                                                   "encoder",
                                                   sparse=self.sparse)

        self.logits = dense_layer(last_output,
                                  2,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency.
        # Do not call this op with the output of softmax, as it will produce incorrect results.
        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))

        reg = get_reg(self.alphas, lam=self.lam, type=self.reg)
        self.cost += reg

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Esempio n. 7
0
    def build_model(self):
        inputs = tf.reshape(self.e, [-1, self.e.shape[1] * self.e.shape[2]])

        self.logits = dense_layer(inputs, 2, name="pred_out", activation=None)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)

        self.y = tf.nn.softmax(self.logits)

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))
Esempio n. 8
0
    def build_model(self):

        rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size,
                                              self.batch_size, self.seq_len)

        last_output, self.alphas = attention_layer(self.attention_size,
                                                   rnn_outputs,
                                                   "pred_encoder",
                                                   sparse=self.sparse)

        last_output = tf.nn.dropout(last_output, self.keep_probs)

        self.logits = dense_layer(last_output,
                                  2,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        ### Debug ###
        adv_in = tf.reshape(self.e, [-1, self.e.shape[1] * self.e.shape[2]])
        self.w_adv = tf.get_variable(
            "w",
            shape=[adv_in.shape[-1], 2],
            initializer=tf.truncated_normal_initializer())
        self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32)

        adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv
        ############

        # adv_logits = dense_layer(adv_in, 2, activation=None, name="adv_encoder")
        adv_cost = 1 / tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(self.y_holder, depth=2), logits=adv_logits))

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=2),
                                                    logits=self.logits))
        self.cost = self.cost + 0.01 * adv_cost

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Esempio n. 9
0
    def build_model(self):
        # input shape = (batch_size, sentence_length, emb_dim)

        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)

        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        self.logits = dense_layer(tf.concat(
            [last_output_hypo, last_output_prem], axis=1),
                                  3,
                                  activation=None,
                                  name="pred_out")
        self.y = tf.nn.softmax(self.logits)

        # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency.
        # Do not call this op with the output of softmax, as it will produce incorrect results.
        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))

        reg1 = get_reg(alphas_hypo, lam=self.lam, type=self.reg)
        reg2 = get_reg(alphas_prem, lam=self.lam, type=self.reg)
        self.cost += reg1 + reg2

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
Esempio n. 10
0
    def build_model(self):
        # Define prediction rnn
        rnn_outputs_hypo, final_state_hypo = lstm_layer(
            self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo,
            "hypo")
        rnn_outputs_prem, final_state_prem = lstm_layer(
            self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem,
            "prem")

        last_output_hypo, alphas_hypo = attention_layer(self.attention_size,
                                                        rnn_outputs_hypo,
                                                        "encoder_hypo",
                                                        sparse=self.sparse)
        last_output_prem, alphas_prem = attention_layer(self.attention_size,
                                                        rnn_outputs_prem,
                                                        "encoder_prem",
                                                        sparse=self.sparse)
        self.alphas_hypo = alphas_hypo
        self.alphas_prem = alphas_prem
        # last_output = tf.nn.dropout(last_output, self.keep_probs)

        # Define key-word model rnn
        kwm_rnn_outputs_hypo, kwm_final_state_hypo = lstm_layer(
            self.e_hypo,
            self.lstm_size,
            self.batch_size,
            self.seq_len_hypo,
            scope="kwm_hypo")
        kwm_rnn_outputs_prem, kwm_final_state_prem = lstm_layer(
            self.e_prem,
            self.lstm_size,
            self.batch_size,
            self.seq_len_prem,
            scope="kwm_prem")
        kwm_last_output_hypo, kwm_alphas_hypo = attention_layer(
            self.attention_size,
            kwm_rnn_outputs_hypo,
            "kwm_encoder_hypo",
            sparse=self.sparse)
        kwm_last_output_prem, kwm_alphas_prem = attention_layer(
            self.attention_size,
            kwm_rnn_outputs_prem,
            "kwm_encoder_prem",
            sparse=self.sparse)

        last_output = tf.concat([last_output_hypo, last_output_prem], axis=1)
        kwm_last_output = tf.concat(
            [kwm_last_output_hypo, kwm_last_output_prem], axis=1)

        ############################
        # Hex #########################

        h_fc1 = last_output
        h_fc2 = kwm_last_output

        # Hex layer definition
        """
        self.W_cl_1 = tf.Variable(tf.random_normal([self.dim, 3], stddev=0.1))
        self.W_cl_2 = tf.Variable(tf.random_normal([1200, 3]), trainable=True)
        self.b_cl = tf.Variable(tf.random_normal((3,)), trainable=True)
        self.W_cl = tf.concat([self.W_cl_1, self.W_cl_2], 0)
        """

        # Compute prediction using [h_fc1, 0(pad)]
        pad = tf.zeros_like(h_fc2, tf.float32)
        # print(pad.shape) -> (?, 600)

        yconv_contact_pred = tf.nn.dropout(tf.concat([h_fc1, pad], 1),
                                           self.keep_probs)

        # y_conv_pred = tf.matmul(yconv_contact_pred, self.W_cl) + self.b_cl
        y_conv_pred = dense_layer(yconv_contact_pred, 3, name="conv_pred")

        self.logits = y_conv_pred  # Prediction

        # Compute loss using [h_fc1, h_fc2] and [0(pad2), h_fc2]
        pad2 = tf.zeros_like(h_fc1, tf.float32)

        yconv_contact_H = tf.concat([pad2, h_fc2], 1)
        # Get Fg
        # y_conv_H = tf.matmul(yconv_contact_H, self.W_cl) + self.b_cl  # get Fg
        y_conv_H = dense_layer(yconv_contact_H, 3, name="conv_H")

        yconv_contact_loss = tf.nn.dropout(tf.concat([h_fc1, h_fc2], 1),
                                           self.keep_probs)
        # Get Fb
        # y_conv_loss = tf.matmul(yconv_contact_loss, self.W_cl) + self.b_cl  # get Fb
        y_conv_loss = dense_layer(yconv_contact_loss, 3, name="conv_loss")

        temp = tf.matmul(y_conv_H, y_conv_H, transpose_a=True)
        self.temp = temp

        y_conv_loss = y_conv_loss - tf.matmul(
            tf.matmul(tf.matmul(y_conv_H, tf.matrix_inverse(temp)),
                      y_conv_H,
                      transpose_b=True), y_conv_loss)  # get loss

        self.logits = y_conv_loss
        self.y = tf.nn.softmax(self.logits)

        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(
                self.y_holder, depth=3),
                                                    logits=self.logits))

        # Regularize kwm attention
        reg1 = get_reg(kwm_alphas_hypo, lam=self.lam, type=self.reg)
        reg2 = get_reg(kwm_alphas_prem, lam=self.lam, type=self.reg)

        self.cost += reg1 + reg2

        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = self.optimizer.minimize(self.cost)
        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))