Exemple #1
0
 def testNonBatchMatrix(self):
   matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
   expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
   with self.test_session():
     transposed = tf.batch_matrix_transpose(matrix)
     self.assertEqual((3, 2), transposed.get_shape())
     self.assertAllEqual(expected_transposed, transposed.eval())
Exemple #2
0
 def testNonBatchMatrix(self):
     matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
     expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
     with self.test_session():
         transposed = tf.batch_matrix_transpose(matrix)
         self.assertEqual((3, 2), transposed.get_shape())
         self.assertAllEqual(expected_transposed, transposed.eval())
Exemple #3
0
 def testNonBatchMatrixDynamicallyDefined(self):
     matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
     expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
     with self.test_session():
         matrix_ph = tf.placeholder(tf.int32)
         transposed = tf.batch_matrix_transpose(matrix_ph)
         self.assertAllEqual(expected_transposed,
                             transposed.eval(feed_dict={matrix_ph: matrix}))
Exemple #4
0
 def testNonBatchMatrixDynamicallyDefined(self):
   matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
   expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
   with self.test_session():
     matrix_ph = tf.placeholder(tf.int32)
     transposed = tf.batch_matrix_transpose(matrix_ph)
     self.assertAllEqual(
         expected_transposed,
         transposed.eval(feed_dict={matrix_ph: matrix}))
 def testBatchMatrix(self):
   matrix_0 = [[1, 2, 3], [4, 5, 6]]
   matrix_0_t = [[1, 4], [2, 5], [3, 6]]
   matrix_1 = [[11, 22, 33], [44, 55, 66]]
   matrix_1_t = [[11, 44], [22, 55], [33, 66]]
   batch_matrix = [matrix_0, matrix_1]
   expected_transposed = [matrix_0_t, matrix_1_t]
   with self.test_session():
     transposed = tf.batch_matrix_transpose(batch_matrix)
     self.assertAllEqual(expected_transposed, transposed.eval())
Exemple #6
0
 def testBatchMatrix(self):
     matrix_0 = [[1, 2, 3], [4, 5, 6]]
     matrix_0_t = [[1, 4], [2, 5], [3, 6]]
     matrix_1 = [[11, 22, 33], [44, 55, 66]]
     matrix_1_t = [[11, 44], [22, 55], [33, 66]]
     batch_matrix = [matrix_0, matrix_1]
     expected_transposed = [matrix_0_t, matrix_1_t]
     with self.test_session():
         transposed = tf.batch_matrix_transpose(batch_matrix)
         self.assertAllEqual(expected_transposed, transposed.eval())
Exemple #7
0
 def testBatchMatrixDynamicallyDefined(self):
   matrix_0 = [[1, 2, 3], [4, 5, 6]]
   matrix_0_t = [[1, 4], [2, 5], [3, 6]]
   matrix_1 = [[11, 22, 33], [44, 55, 66]]
   matrix_1_t = [[11, 44], [22, 55], [33, 66]]
   batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
   expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
   with self.test_session():
     batch_matrix_ph = tf.placeholder(tf.int32)
     transposed = tf.batch_matrix_transpose(batch_matrix_ph)
     self.assertAllEqual(
         expected_transposed,
         transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
Exemple #8
0
 def testBatchMatrixDynamicallyDefined(self):
     matrix_0 = [[1, 2, 3], [4, 5, 6]]
     matrix_0_t = [[1, 4], [2, 5], [3, 6]]
     matrix_1 = [[11, 22, 33], [44, 55, 66]]
     matrix_1_t = [[11, 44], [22, 55], [33, 66]]
     batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
     expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
     with self.test_session():
         batch_matrix_ph = tf.placeholder(tf.int32)
         transposed = tf.batch_matrix_transpose(batch_matrix_ph)
         self.assertAllEqual(
             expected_transposed,
             transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
  def _updated_mat(self, mat, v, diag):
    # Get dense matrix defined by its square root, which is an update of `mat`:
    # A = (mat + v D v^T) (mat + v D v^T)^T
    # D is the diagonal matrix with `diag` on the diagonal.

    # If diag is None, then it defaults to the identity matrix, so DV^T = V^T
    if diag is None:
      diag_vt = tf.batch_matrix_transpose(v)
    else:
      diag_mat = tf.batch_matrix_diag(diag)
      diag_vt = tf.batch_matmul(diag_mat, v, adj_y=True)

    v_diag_vt = tf.batch_matmul(v, diag_vt)
    sqrt = mat + v_diag_vt
    a = tf.batch_matmul(sqrt, sqrt, adj_y=True)
    return a.eval()
    def _updated_mat(self, mat, v, diag):
        # Get dense matrix defined by its square root, which is an update of `mat`:
        # A = (mat + v D v^T) (mat + v D v^T)^T
        # D is the diagonal matrix with `diag` on the diagonal.

        # If diag is None, then it defaults to the identity matrix, so DV^T = V^T
        if diag is None:
            diag_vt = tf.batch_matrix_transpose(v)
        else:
            diag_mat = tf.batch_matrix_diag(diag)
            diag_vt = tf.batch_matmul(diag_mat, v, adj_y=True)

        v_diag_vt = tf.batch_matmul(v, diag_vt)
        sqrt = mat + v_diag_vt
        a = tf.batch_matmul(sqrt, sqrt, adj_y=True)
        return a.eval()
 def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self):
   vector = [1, 2, 3]
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "should be a "):
       tf.batch_matrix_transpose(vector)
 def testNonBatchMatrix(self):
   matrix = [[1, 2, 3], [4, 5, 6]]
   expected_transposed = [[1, 4], [2, 5], [3, 6]]
   with self.test_session():
     transposed = tf.batch_matrix_transpose(matrix)
     self.assertAllEqual(expected_transposed, transposed.eval())
Exemple #13
0
 def testNonBatchMatrix(self):
     matrix = [[1, 2, 3], [4, 5, 6]]
     expected_transposed = [[1, 4], [2, 5], [3, 6]]
     with self.test_session():
         transposed = tf.batch_matrix_transpose(matrix)
         self.assertAllEqual(expected_transposed, transposed.eval())
Exemple #14
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 embedding_size_lex,
                 attention_depth_w2v,
                 attention_depth_lex,
                 l2_reg_lambda=0.0,
                 l1_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x_2c = tf.placeholder(
            tf.float32, [None, sequence_length, embedding_size, 2],
            name="input_x_2c")
        self.input_x = tf.placeholder(tf.float32,
                                      [None, sequence_length, embedding_size],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        self.input_x_lexicon = tf.placeholder(
            tf.float32, [None, sequence_length, embedding_size_lex],
            name="input_x_lexicon")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)
        l1_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.embedded_chars = self.input_x
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)
            print self.embedded_chars_expanded

            # lexicon embedding
            self.embedded_chars_lexicon = self.input_x_lexicon
            self.embedded_chars_expanded_lexicon = tf.expand_dims(
                self.embedded_chars_lexicon, -1)

            print '[self.embedded_chars]', self.embedded_chars
            print '[self.embedded_chars_expanded]', self.embedded_chars_expanded

            print '[self.embedded_chars_lexicon]', self.embedded_chars_lexicon
            print '[self.embedded_chars_expanded_lexicon]', self.embedded_chars_expanded_lexicon

        attention_outputs = []
        with tf.name_scope("pre-attention"):
            U_shape = [embedding_size, attention_depth_w2v]  # (400, 60)
            self.U_w2v = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1),
                                     name="U_w2v")
            U_shape = [embedding_size_lex, attention_depth_lex]  # (15, 60)
            self.U_lex = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1),
                                     name="U_lex")

            self.embedded_chars_tr = tf.batch_matrix_transpose(
                self.embedded_chars)
            self.embedded_chars_lexicon_tr = tf.batch_matrix_transpose(
                self.embedded_chars_lexicon)
            print '[self.embedded_chars_lexicon_tr]', self.embedded_chars_lexicon_tr

            def fn_matmul_w2v(previous_output, current_input):
                print(current_input.get_shape())
                current_ouput = tf.matmul(current_input, self.U_w2v)
                print 'previous_output', previous_output
                print 'current_ouput', current_ouput
                return current_ouput

            def fn_matmul_lex(previous_output, current_input):
                print(current_input.get_shape())
                current_ouput = tf.matmul(current_input, self.U_lex)
                print 'previous_output', previous_output
                print 'current_ouput', current_ouput
                return current_ouput

            initializer = tf.constant(np.zeros(
                [sequence_length, attention_depth_w2v]),
                                      dtype=tf.float32)
            WU_w2v = tf.scan(fn_matmul_w2v,
                             self.embedded_chars,
                             initializer=initializer)
            print '[WU_w2v]', WU_w2v

            initializer = tf.constant(np.zeros(
                [sequence_length, attention_depth_lex]),
                                      dtype=tf.float32)
            LU_lex = tf.scan(fn_matmul_lex,
                             self.embedded_chars_lexicon,
                             initializer=initializer)
            print '[LU_lex]', LU_lex

            WU_w2v_expanded = tf.expand_dims(WU_w2v, -1)
            print '[WU_w2v_expanded]', WU_w2v_expanded  # (?, 60(seq_len), 60(depth), 1)

            w2v_pool = tf.nn.max_pool(WU_w2v_expanded,
                                      ksize=[1, 1, attention_depth_w2v, 1],
                                      strides=[1, 1, 1, 1],
                                      padding='VALID',
                                      name="w2v_pool")

            print '[w2v_pool]', w2v_pool  # (?, 60(seq_len), 1, 1) #select attention for w2v

            LU_lex_expanded = tf.expand_dims(LU_lex, -1)
            print '[LU_lex_expanded]', LU_lex_expanded  # (?, 60(seq_len), 60(depth), 1)

            lex_pool = tf.nn.max_pool(LU_lex_expanded,
                                      ksize=[1, 1, attention_depth_lex, 1],
                                      strides=[1, 1, 1, 1],
                                      padding='VALID',
                                      name="lex_pool")

            print '[lex_pool]', lex_pool  # (?, 60(seq_len), 1, 1) #select attention for lex

            w2v_pool_sq = tf.expand_dims(
                tf.squeeze(w2v_pool, squeeze_dims=[2, 3]), -1)  # (?, 60, 1)
            print '[w2v_pool_sq]', w2v_pool_sq

            lex_pool_sq = tf.expand_dims(
                tf.squeeze(lex_pool, squeeze_dims=[2, 3]), -1)  # (?, 60, 1)
            print '[lex_pool_sq]', lex_pool_sq

            attentioned_w2v = tf.batch_matmul(self.embedded_chars_tr,
                                              w2v_pool_sq)
            attentioned_lex = tf.batch_matmul(self.embedded_chars_lexicon_tr,
                                              lex_pool_sq)

            attentioned_w2v_sq = tf.squeeze(attentioned_w2v, squeeze_dims=[2])
            attentioned_lex_sq = tf.squeeze(attentioned_lex, squeeze_dims=[2])

            print '[attentioned_w2v]', attentioned_w2v_sq
            print '[attentioned_lex]', attentioned_lex_sq
            attention_outputs.append(attentioned_w2v_sq)
            attention_outputs.append(attentioned_lex_sq)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 2, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")

                # l2_loss += tf.nn.l2_loss(W)/1000
                # l2_loss += tf.nn.l2_loss(b)/1000

                conv = tf.nn.conv2d(self.input_x_2c,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        self.appended_pool = tf.concat(
            1, [self.h_pool_flat, attention_outputs[0], attention_outputs[1]])
        print '[self.appended_pool]', self.appended_pool
        num_filters_total = num_filters_total + embedding_size + embedding_size_lex

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.appended_pool,
                                        self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W) / 30
            l2_loss += tf.nn.l2_loss(b) / 30
            l1_loss += tf.reduce_sum(tf.abs(W))
            l1_loss += tf.reduce_sum(tf.abs(b))
            self._b = b
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(
                losses) + l2_reg_lambda * l2_loss + l1_reg_lambda * l1_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")

        with tf.name_scope("avg_f1"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            self.preds = self.predictions

            # positive recall
            pos_gold_sel = tf.equal(self.golds, 2)  # positive_gold
            posg_golds = tf.boolean_mask(self.golds, pos_gold_sel)
            posg_preds = tf.boolean_mask(self.preds, pos_gold_sel)
            correct_predictions_pr = tf.equal(posg_golds, posg_preds)
            pos_r = tf.reduce_mean(tf.cast(correct_predictions_pr, "float"),
                                   name="pos_recall")

            # positive precision
            pos_pred_sel = tf.equal(self.preds, 2)  # positive_pred
            posp_golds = tf.boolean_mask(self.golds, pos_pred_sel)
            posp_preds = tf.boolean_mask(self.preds, pos_pred_sel)
            correct_predictions_pp = tf.equal(posp_golds, posp_preds)
            pos_p = tf.reduce_mean(tf.cast(correct_predictions_pp, "float"),
                                   name="pos_precision")

            # negative recall
            neg_gold_sel = tf.equal(self.golds, 0)  # positive_gold
            negg_golds = tf.boolean_mask(self.golds, neg_gold_sel)
            negg_preds = tf.boolean_mask(self.preds, neg_gold_sel)
            correct_predictions_nr = tf.equal(negg_golds, negg_preds)
            self.neg_r = tf.reduce_mean(tf.cast(correct_predictions_nr,
                                                "float"),
                                        name="neg_recall")

            # negative precision
            neg_pred_sel = tf.equal(self.preds, 0)  # positive_pred
            negp_golds = tf.boolean_mask(self.golds, neg_pred_sel)
            negp_preds = tf.boolean_mask(self.preds, neg_pred_sel)
            correct_predictions_np = tf.equal(negp_golds, negp_preds)
            self.neg_p = tf.reduce_mean(tf.cast(correct_predictions_np,
                                                "float"),
                                        name="neg_precision")

            self.f1_neg = 2 * self.neg_p * self.neg_r / (self.neg_p +
                                                         self.neg_r) * 100
            self.f1_pos = 2 * pos_p * pos_r / (pos_p + pos_r) * 100

            self.avg_f1 = (self.f1_neg + self.f1_pos) / 2
Exemple #15
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 embedding_size_lex,
                 num_filters_lex,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32,
                                      [None, sequence_length, embedding_size],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # lexicon input
        self.input_x_lexicon = tf.placeholder(
            tf.float32, [None, sequence_length, embedding_size_lex],
            name="input_x_lexicon")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)
        self.h_list = []
        self.h_lex_list = []

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.embedded_chars = self.input_x
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)
            print self.embedded_chars_expanded

            # lexicon embedding
            self.embedded_chars_lexicon = self.input_x_lexicon
            self.embedded_chars_expanded_lexicon = tf.expand_dims(
                self.embedded_chars_lexicon, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                U_shape = [num_filters, num_filters_lex]  # (256, 9)
                U = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1),
                                name="U")

                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W_E = tf.Variable(tf.truncated_normal(filter_shape,
                                                      stddev=0.1),
                                  name="W_E")
                b_E = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                  name="b_E")

                conv_E = tf.nn.conv2d(self.embedded_chars_expanded,
                                      W_E,
                                      strides=[1, 1, 1, 1],
                                      padding="VALID",
                                      name="conv")

                # Apply nonlinearity
                w2v_conv = tf.nn.relu(tf.nn.bias_add(conv_E, b_E),
                                      name="relu_E")  # (?, 59, 1, 256)
                self.h_list.append(w2v_conv)

                # for the lex
                filter_shape = [
                    filter_size, embedding_size_lex, 1, num_filters_lex
                ]
                W_L = tf.Variable(tf.truncated_normal(filter_shape,
                                                      stddev=0.1),
                                  name="W_L")
                b_L = tf.Variable(tf.constant(0.1, shape=[num_filters_lex]),
                                  name="b_L")

                conv_L = tf.nn.conv2d(self.embedded_chars_expanded_lexicon,
                                      W_L,
                                      strides=[1, 1, 1, 1],
                                      padding="VALID",
                                      name="conv")
                # Apply nonlinearity
                lex_conv = tf.nn.relu(tf.nn.bias_add(conv_L, b_L),
                                      name="relu_L")  # (?, 59, 1, 9)
                self.h_lex_list.append(lex_conv)

                w2v_sq = tf.squeeze(w2v_conv, squeeze_dims=[2])  # (?, 59, 256)
                lex_sq = tf.squeeze(lex_conv, squeeze_dims=[2])  # (?, 59, 9)

                print '[w2v_sq]', w2v_sq

                w2v_sq_tr = tf.batch_matrix_transpose(w2v_sq)
                print '[w2v_sq_tr]', w2v_sq_tr

                lex_sq_tr = tf.batch_matrix_transpose(lex_sq)
                print '[lex_sq_tr]', lex_sq_tr

                def fn(previous_output, current_input):
                    print(current_input.get_shape())
                    current_ouput = tf.matmul(U, current_input)
                    print 'previous_output', previous_output
                    print 'current_ouput', current_ouput
                    return current_ouput

                initializer = tf.constant(np.zeros([num_filters, 59]),
                                          dtype=tf.float32)

                Ulex = tf.scan(fn, lex_sq_tr, initializer=initializer)
                print '[Ulex]', Ulex

                WUL = tf.batch_matmul(w2v_sq, Ulex)
                print '[WUL]', WUL

                WUL_expanded = tf.expand_dims(WUL, -1)
                print '[WUL_expanded]', WUL_expanded

                # Maxpooling over the outputs
                row_pool = tf.nn.max_pool(
                    WUL_expanded,
                    ksize=[1, 1, sequence_length - filter_size + 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="row_pool")

                print '[row_pool]', row_pool

                col_pool = tf.nn.max_pool(
                    WUL_expanded,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="col_pool")

                print '[col_pool]', col_pool

                row_pool_sq = tf.expand_dims(
                    tf.squeeze(row_pool, squeeze_dims=[2, 3]),
                    -1)  # (?, 59, 256)
                print '[row_pool_sq]', row_pool_sq

                col_pool_sq = tf.expand_dims(
                    tf.squeeze(col_pool, squeeze_dims=[1, 3]),
                    -1)  # (?, 59, 256)
                print '[col_pool_sq]', col_pool_sq

                print '[w2v_sq_tr]', w2v_sq_tr
                print '[lex_sq_tr]', lex_sq_tr

                attentioned_w2v = tf.batch_matmul(w2v_sq_tr, col_pool_sq)
                attentioned_lex = tf.batch_matmul(lex_sq_tr, row_pool_sq)

                attentioned_w2v_sq = tf.squeeze(attentioned_w2v,
                                                squeeze_dims=[2])
                attentioned_lex_sq = tf.squeeze(attentioned_lex,
                                                squeeze_dims=[2])

                print '[attentioned_w2v]', attentioned_w2v_sq
                print '[attentioned_lex]', attentioned_lex_sq

                pooled_outputs.append(attentioned_w2v_sq)
                pooled_outputs.append(attentioned_lex_sq)

        # Combine all the pooled features
        num_filters_total = num_filters * len(
            filter_sizes) + num_filters_lex * len(filter_sizes)
        print '[pooled_outputs]', len(pooled_outputs)
        self.h_pool = tf.concat(1, pooled_outputs)
        print '[self.h_pool]', self.h_pool
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
        print '[self.h_pool_flat]', self.h_pool_flat

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                        self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W) / 30
            l2_loss += tf.nn.l2_loss(b) / 30
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")

        with tf.name_scope("avg_f1"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            self.preds = self.predictions

            # positive recall
            pos_gold_sel = tf.equal(self.golds, 2)  # positive_gold
            posg_golds = tf.boolean_mask(self.golds, pos_gold_sel)
            posg_preds = tf.boolean_mask(self.preds, pos_gold_sel)
            correct_predictions_pr = tf.equal(posg_golds, posg_preds)
            pos_r = tf.reduce_mean(tf.cast(correct_predictions_pr, "float"),
                                   name="pos_recall")

            # positive precision
            pos_pred_sel = tf.equal(self.preds, 2)  # positive_pred
            posp_golds = tf.boolean_mask(self.golds, pos_pred_sel)
            posp_preds = tf.boolean_mask(self.preds, pos_pred_sel)
            correct_predictions_pp = tf.equal(posp_golds, posp_preds)
            pos_p = tf.reduce_mean(tf.cast(correct_predictions_pp, "float"),
                                   name="pos_precision")

            # negative recall
            neg_gold_sel = tf.equal(self.golds, 0)  # positive_gold
            negg_golds = tf.boolean_mask(self.golds, neg_gold_sel)
            negg_preds = tf.boolean_mask(self.preds, neg_gold_sel)
            correct_predictions_nr = tf.equal(negg_golds, negg_preds)
            self.neg_r = tf.reduce_mean(tf.cast(correct_predictions_nr,
                                                "float"),
                                        name="neg_recall")

            # negative precision
            neg_pred_sel = tf.equal(self.preds, 0)  # positive_pred
            negp_golds = tf.boolean_mask(self.golds, neg_pred_sel)
            negp_preds = tf.boolean_mask(self.preds, neg_pred_sel)
            correct_predictions_np = tf.equal(negp_golds, negp_preds)
            self.neg_p = tf.reduce_mean(tf.cast(correct_predictions_np,
                                                "float"),
                                        name="neg_precision")

            self.f1_neg = 2 * self.neg_p * self.neg_r / (
                self.neg_p + self.neg_r + 0.00001) * 100
            self.f1_pos = 2 * pos_p * pos_r / (pos_p + pos_r + 0.00001) * 100

            self.avg_f1 = (self.f1_neg + self.f1_pos) / 2
Exemple #16
0
    def __init__(
      self, sequence_length, num_classes,
      embedding_size, filter_sizes, num_filters, embedding_size_lex, num_filters_lex, l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # lexicon input
        self.input_x_lexicon = tf.placeholder(tf.float32, [None, sequence_length, embedding_size_lex],
                                              name="input_x_lexicon")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)
        self.h_list=[]
        self.h_lex_list = []

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.embedded_chars = self.input_x
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
            print self.embedded_chars_expanded

            # lexicon embedding
            self.embedded_chars_lexicon = self.input_x_lexicon
            self.embedded_chars_expanded_lexicon = tf.expand_dims(self.embedded_chars_lexicon, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                U_shape = [num_filters, num_filters_lex] # (256, 9)
                U = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1), name="U")

                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W_E = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W_E")
                b_E = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b_E")

                conv_E = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W_E,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")

                # Apply nonlinearity
                w2v_conv = tf.nn.relu(tf.nn.bias_add(conv_E, b_E), name="relu_E") # (?, 59, 1, 256)
                self.h_list.append(w2v_conv)

                # for the lex
                filter_shape = [filter_size, embedding_size_lex, 1, num_filters_lex]
                W_L = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W_L")
                b_L = tf.Variable(tf.constant(0.1, shape=[num_filters_lex]), name="b_L")

                conv_L = tf.nn.conv2d(
                    self.embedded_chars_expanded_lexicon,
                    W_L,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                lex_conv = tf.nn.relu(tf.nn.bias_add(conv_L, b_L), name="relu_L") # (?, 59, 1, 9)
                self.h_lex_list.append(lex_conv)

                w2v_sq = tf.squeeze(w2v_conv, squeeze_dims=[2]) # (?, 59, 256)
                lex_sq = tf.squeeze(lex_conv, squeeze_dims=[2]) # (?, 59, 9)

                print '[w2v_sq]', w2v_sq

                w2v_sq_tr = tf.batch_matrix_transpose(w2v_sq)
                print '[w2v_sq_tr]', w2v_sq_tr

                lex_sq_tr = tf.batch_matrix_transpose(lex_sq)
                print '[lex_sq_tr]', lex_sq_tr

                def fn(previous_output, current_input):
                    print(current_input.get_shape())
                    current_ouput = tf.matmul(U, current_input)
                    print 'previous_output', previous_output
                    print 'current_ouput', current_ouput
                    return current_ouput

                initializer = tf.constant(np.zeros([num_filters,59]), dtype=tf.float32)

                Ulex = tf.scan(fn, lex_sq_tr, initializer=initializer)
                print '[Ulex]', Ulex

                WUL = tf.batch_matmul(w2v_sq, Ulex)
                print '[WUL]', WUL


                WUL_expanded = tf.expand_dims(WUL, -1)
                print '[WUL_expanded]', WUL_expanded

                # Maxpooling over the outputs
                row_pool = tf.nn.max_pool(
                    WUL_expanded,
                    ksize=[1, 1, sequence_length - filter_size + 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="row_pool")

                print '[row_pool]', row_pool

                col_pool = tf.nn.max_pool(
                    WUL_expanded,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="col_pool")

                print '[col_pool]', col_pool

                row_pool_sq = tf.expand_dims(tf.squeeze(row_pool, squeeze_dims=[2, 3]), -1)  # (?, 59, 256)
                print '[row_pool_sq]', row_pool_sq

                col_pool_sq = tf.expand_dims(tf.squeeze(col_pool, squeeze_dims=[1, 3]), -1)  # (?, 59, 256)
                print '[col_pool_sq]', col_pool_sq

                print '[w2v_sq_tr]', w2v_sq_tr
                print '[lex_sq_tr]', lex_sq_tr

                attentioned_w2v = tf.batch_matmul(w2v_sq_tr, col_pool_sq)
                attentioned_lex = tf.batch_matmul(lex_sq_tr, row_pool_sq)

                attentioned_w2v_sq = tf.squeeze(attentioned_w2v, squeeze_dims=[2])
                attentioned_lex_sq = tf.squeeze(attentioned_lex, squeeze_dims=[2])

                print '[attentioned_w2v]', attentioned_w2v_sq
                print '[attentioned_lex]', attentioned_lex_sq


                pooled_outputs.append(attentioned_w2v_sq)
                pooled_outputs.append(attentioned_lex_sq)


        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes) + num_filters_lex * len(filter_sizes)
        print '[pooled_outputs]', len(pooled_outputs)
        self.h_pool = tf.concat(1, pooled_outputs)
        print '[self.h_pool]', self.h_pool
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
        print '[self.h_pool_flat]', self.h_pool_flat

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)/30
            l2_loss += tf.nn.l2_loss(b)/30
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")


        with tf.name_scope("avg_f1"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            self.preds = self.predictions

            # positive recall
            pos_gold_sel = tf.equal(self.golds, 2)  # positive_gold
            posg_golds = tf.boolean_mask(self.golds, pos_gold_sel)
            posg_preds = tf.boolean_mask(self.preds, pos_gold_sel)
            correct_predictions_pr = tf.equal(posg_golds, posg_preds)
            pos_r = tf.reduce_mean(tf.cast(correct_predictions_pr, "float"), name="pos_recall")

            # positive precision
            pos_pred_sel = tf.equal(self.preds, 2)  # positive_pred
            posp_golds = tf.boolean_mask(self.golds, pos_pred_sel)
            posp_preds = tf.boolean_mask(self.preds, pos_pred_sel)
            correct_predictions_pp = tf.equal(posp_golds, posp_preds)
            pos_p = tf.reduce_mean(tf.cast(correct_predictions_pp, "float"), name="pos_precision")

            # negative recall
            neg_gold_sel = tf.equal(self.golds, 0)  # positive_gold
            negg_golds = tf.boolean_mask(self.golds, neg_gold_sel)
            negg_preds = tf.boolean_mask(self.preds, neg_gold_sel)
            correct_predictions_nr = tf.equal(negg_golds, negg_preds)
            self.neg_r = tf.reduce_mean(tf.cast(correct_predictions_nr, "float"), name="neg_recall")

            # negative precision
            neg_pred_sel = tf.equal(self.preds, 0)  # positive_pred
            negp_golds = tf.boolean_mask(self.golds, neg_pred_sel)
            negp_preds = tf.boolean_mask(self.preds, neg_pred_sel)
            correct_predictions_np = tf.equal(negp_golds, negp_preds)
            self.neg_p = tf.reduce_mean(tf.cast(correct_predictions_np, "float"), name="neg_precision")

            self.f1_neg = 2 * self.neg_p * self.neg_r / (self.neg_p + self.neg_r + 0.00001) * 100
            self.f1_pos = 2 * pos_p * pos_r / (pos_p + pos_r + 0.00001) * 100

            self.avg_f1 = (self.f1_neg + self.f1_pos) / 2
Exemple #17
0
 def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self):
     vector = [1, 2, 3]
     with self.test_session():
         with self.assertRaisesRegexp(ValueError, "should be a "):
             tf.batch_matrix_transpose(vector)
Exemple #18
0
    def __init__(
            self, sequence_length, num_classes,
            embedding_size, filter_sizes, num_filters, embedding_size_lex,
            attention_depth_w2v, attention_depth_lex, l2_reg_lambda=0.0,
            l1_reg_lambda=0.0):


        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # lexicon input
        self.input_x_lexicon = tf.placeholder(tf.float32, [None, sequence_length, embedding_size_lex],
                                              name="input_x_lexicon")

        self.input_x_concat = tf.concat(2, [self.input_x, self.input_x_lexicon])
        print 'self.input_x_concat', self.input_x_concat

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.embedded_chars = self.input_x
            self.embedded_chars_lexicon = self.input_x_lexicon
            self.embedded_chars_expanded = tf.expand_dims(self.input_x_concat, -1)

        attention_outputs = []
        with tf.name_scope("pre-attention"):
            U_shape = [embedding_size, attention_depth_w2v]  # (400, 60)
            self.U_w2v = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1), name="U_w2v")
            U_shape = [embedding_size_lex, attention_depth_lex]  # (15, 60)
            self.U_lex = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1), name="U_lex")

            self.embedded_chars_tr = tf.batch_matrix_transpose(self.embedded_chars)
            self.embedded_chars_lexicon_tr = tf.batch_matrix_transpose(self.embedded_chars_lexicon)
            print '[self.embedded_chars_lexicon_tr]', self.embedded_chars_lexicon_tr

            def fn_matmul_w2v(previous_output, current_input):
                print(current_input.get_shape())
                current_ouput = tf.matmul(current_input, self.U_w2v)
                print 'previous_output', previous_output
                print 'current_ouput', current_ouput
                return current_ouput

            def fn_matmul_lex(previous_output, current_input):
                print(current_input.get_shape())
                current_ouput = tf.matmul(current_input, self.U_lex)
                print 'previous_output', previous_output
                print 'current_ouput', current_ouput
                return current_ouput

            initializer = tf.constant(np.zeros([sequence_length, attention_depth_w2v]), dtype=tf.float32)
            WU_w2v = tf.scan(fn_matmul_w2v, self.embedded_chars, initializer=initializer)
            print '[WU_w2v]', WU_w2v

            initializer = tf.constant(np.zeros([sequence_length, attention_depth_lex]), dtype=tf.float32)
            LU_lex = tf.scan(fn_matmul_lex, self.embedded_chars_lexicon, initializer=initializer)
            print '[LU_lex]', LU_lex

            WU_w2v_expanded = tf.expand_dims(WU_w2v, -1)
            print '[WU_w2v_expanded]', WU_w2v_expanded  # (?, 60(seq_len), 60(depth), 1)

            w2v_pool = tf.nn.max_pool(
                WU_w2v_expanded,
                ksize=[1, 1, attention_depth_w2v, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="w2v_pool")

            print '[w2v_pool]', w2v_pool  # (?, 60(seq_len), 1, 1) #select attention for w2v

            LU_lex_expanded = tf.expand_dims(LU_lex, -1)
            print '[LU_lex_expanded]', LU_lex_expanded  # (?, 60(seq_len), 60(depth), 1)

            lex_pool = tf.nn.max_pool(
                LU_lex_expanded,
                ksize=[1, 1, attention_depth_lex, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="lex_pool")

            print '[lex_pool]', lex_pool  # (?, 60(seq_len), 1, 1) #select attention for lex

            w2v_pool_sq = tf.expand_dims(tf.squeeze(w2v_pool, squeeze_dims=[2, 3]), -1)  # (?, 60, 1)
            print '[w2v_pool_sq]', w2v_pool_sq

            lex_pool_sq = tf.expand_dims(tf.squeeze(lex_pool, squeeze_dims=[2, 3]), -1)  # (?, 60, 1)
            print '[lex_pool_sq]', lex_pool_sq

            attentioned_w2v = tf.batch_matmul(self.embedded_chars_tr, w2v_pool_sq)
            attentioned_lex = tf.batch_matmul(self.embedded_chars_lexicon_tr, lex_pool_sq)

            attentioned_w2v_sq = tf.squeeze(attentioned_w2v, squeeze_dims=[2])
            attentioned_lex_sq = tf.squeeze(attentioned_lex, squeeze_dims=[2])

            print '[attentioned_w2v]', attentioned_w2v_sq
            print '[attentioned_lex]', attentioned_lex_sq
            attention_outputs.append(attentioned_w2v_sq)
            attention_outputs.append(attentioned_lex_sq)


        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size+embedding_size_lex, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")

                # l2_loss += tf.nn.l2_loss(W)/1000
                # l2_loss += tf.nn.l2_loss(b)/1000

                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        self.appended_pool = tf.concat(1, [self.h_pool_flat, attention_outputs[0], attention_outputs[1]])
        print '[self.appended_pool]', self.appended_pool
        num_filters_total = num_filters_total + embedding_size + embedding_size_lex

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.appended_pool, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)/30
            l2_loss += tf.nn.l2_loss(b)/30
            self._b = b
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")


        with tf.name_scope("avg_f1"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            self.preds = self.predictions

            # positive recall
            pos_gold_sel = tf.equal(self.golds, 2)  # positive_gold
            posg_golds = tf.boolean_mask(self.golds, pos_gold_sel)
            posg_preds = tf.boolean_mask(self.preds, pos_gold_sel)
            correct_predictions_pr = tf.equal(posg_golds, posg_preds)
            pos_r = tf.reduce_mean(tf.cast(correct_predictions_pr, "float"), name="pos_recall")

            # positive precision
            pos_pred_sel = tf.equal(self.preds, 2)  # positive_pred
            posp_golds = tf.boolean_mask(self.golds, pos_pred_sel)
            posp_preds = tf.boolean_mask(self.preds, pos_pred_sel)
            correct_predictions_pp = tf.equal(posp_golds, posp_preds)
            pos_p = tf.reduce_mean(tf.cast(correct_predictions_pp, "float"), name="pos_precision")

            # negative recall
            neg_gold_sel = tf.equal(self.golds, 0)  # positive_gold
            negg_golds = tf.boolean_mask(self.golds, neg_gold_sel)
            negg_preds = tf.boolean_mask(self.preds, neg_gold_sel)
            correct_predictions_nr = tf.equal(negg_golds, negg_preds)
            self.neg_r = tf.reduce_mean(tf.cast(correct_predictions_nr, "float"), name="neg_recall")

            # negative precision
            neg_pred_sel = tf.equal(self.preds, 0)  # positive_pred
            negp_golds = tf.boolean_mask(self.golds, neg_pred_sel)
            negp_preds = tf.boolean_mask(self.preds, neg_pred_sel)
            correct_predictions_np = tf.equal(negp_golds, negp_preds)
            self.neg_p = tf.reduce_mean(tf.cast(correct_predictions_np, "float"), name="neg_precision")

            self.f1_neg = 2 * self.neg_p * self.neg_r / (self.neg_p + self.neg_r + 0.00001) * 100
            self.f1_pos = 2 * pos_p * pos_r / (pos_p + pos_r + 0.00001) * 100

            self.avg_f1 = (self.f1_neg + self.f1_pos) / 2