Example #1
0
    def _factorized_reduction(self, x, out_filters, stride, is_training):
        """Reduces the shape of x without information loss due to striding."""
        assert out_filters % 2 == 0, (
            "Need even number of filters when using this factorized reduction."
        )
        if stride == 1:
            with tf.variable_scope("path_conv"):
                x = _conv_opt(x, 1, out_filters)
                x = batch_norm(x, is_training)
                return x

        actual_data_format = "channels_first"  # only support NCHW
        #stride_spec = self._get_strides(stride)
        # Skip path 1
        path1 = tf.layers.max_pooling1d(x,
                                        1,
                                        stride,
                                        "VALID",
                                        data_format=actual_data_format)
        #path1 = tf.nn.max_pool(x, [1, 1, 1], stride_spec, "VALID", data_format="NCHW")
        #print("after max_pool:", path1.shape)

        with tf.variable_scope("path1_conv"):
            path1 = _conv_opt(path1, 1, out_filters // 2)

        print("after conv:", path1.shape)

        # Skip path 2
        # First pad with 0"s on the right and bottom, then shift the filter to
        # include those 0"s that were added.

        pad_arr = [[0, 0], [0, 0], [0, 1]]
        path2 = tf.pad(x, pad_arr)[:, :, 1:]
        inp_c = path2.get_shape()[1].value
        if inp_c > 1:
            concat_axis = 1
        else:
            concat_axis = 2

        path2 = tf.layers.max_pooling1d(path2,
                                        1,
                                        stride,
                                        "VALID",
                                        data_format=actual_data_format)
        #path2 = tf.nn.max_pool(path2, [1, 1, 1], stride_spec, "VALID", data_format=self.data_format)
        with tf.variable_scope("path2_conv"):
            path2 = _conv_opt(path2, 1, out_filters // 2)

        # Concat and apply BN
        final_path = tf.concat(values=[path1, path2], axis=concat_axis)
        final_path = batch_norm(final_path, is_training)

        return final_path
Example #2
0
            def post_process_out(inputs, out):
                '''Form skip connection and perform batch norm'''
                optional_inputs = inputs[1]
                print("post_process_out::", inputs, optional_inputs)
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope("skip"):
                        #print("layers",layers)
                        inputs = layers[-1]

                        inp_d = inputs.get_shape()[1].value
                        inp_l = inputs.get_shape()[2].value

                        out.set_shape([None, out_filters, inp_l])
                        try:
                            out = tf.add_n(
                                [out,
                                 tf.reduce_sum(optional_inputs, axis=0)])
                        except Exception as e:
                            print(e)
                        out = batch_norm(out, is_training)
                layers.append(out)
                return out
Example #3
0
    def _model(self,
               doc,
               bow_doc,
               datasets,
               is_training,
               reuse=False,
               mode="train"):

        with tf.variable_scope(self.name, reuse=reuse):
            layers = []

            if is_training:
                self.valid_lengths = []
            with tf.variable_scope('embed'):
                regularizer = tf.contrib.layers.l2_regularizer(
                    scale=self.l2_reg)
                if self.embedding_model == "none":
                    embedding = create_weight(
                        "w",
                        shape=self.embedding["none"].shape,
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                elif self.embedding_model == "glove":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                elif self.embedding_model == "word2vec":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                elif self.embedding_model == "all":
                    embedding_glove = create_weight(
                        "w_glove",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                    print("embedding_glove: {0}".format(
                        embedding_glove.get_shape()))
                    embedding_word2vec = create_weight(
                        "w_word2vec",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                    print("embedding_word2vec: {0}".format(
                        embedding_word2vec.get_shape()))
                    embedding = tf.concat(
                        [embedding_glove, embedding_word2vec], axis=0)
                    print("join embedding: {0}".format(embedding.get_shape()))
                field_embedding = create_weight(
                    "w_field",
                    shape=self.embedding["field"].shape,
                    trainable=True,
                    initializer=tf.truncated_normal_initializer,
                    regularizer=regularizer)

                self.final_embedding = embedding
                print("embedding: {0}".format(embedding))
                print("doc: {0}".format(doc))
                print("bow_doc: {0}".format(bow_doc))

                if is_training or mode == "valid":
                    batch_size = self.batch_size
                else:
                    batch_size = self.eval_batch_size

                if self.sliding_window:
                    doc, sliding_windows = self._to_sliding_window(doc,
                                                                   batch_size,
                                                                   size=64,
                                                                   step=32)
                    bow_doc, _ = self._to_sliding_window(bow_doc,
                                                         batch_size,
                                                         size=64,
                                                         step=32)
                    print("doc after sliding window: {0}".format(doc))

                if is_training:
                    embedding = tf.nn.dropout(embedding,
                                              keep_prob=self.embed_keep_prob)

                doc = tf.nn.embedding_lookup(embedding, doc, max_norm=None)
                field_embedding = tf.nn.embedding_lookup(field_embedding,
                                                         bow_doc,
                                                         max_norm=None)
                if self.input_field_embedding:
                    doc = tf.add_n([doc, field_embedding])
                doc = tf.transpose(doc, [0, 2, 1])

                print("doc_shape", doc.shape)
                inp_c = doc.shape[1]
                inp_w = doc.shape[2]
                #doc = tf.reshape(doc, [-1, inp_c, 1, inp_w])
                doc = tf.reshape(doc, [-1, inp_c, inp_w])
                field_embedding = tf.transpose(field_embedding, [0, 2, 1])
                #field_embedding = tf.reshape(field_embedding, [-1, inp_c, 1, inp_w])
                field_embedding = tf.reshape(field_embedding,
                                             [-1, inp_c, inp_w])

                print("after: doc, field_embedding", doc.shape,
                      field_embedding.shape)

            x = doc
            pos_batch_size = 1
            # initialize pos_embedding for transformer
            if self.input_positional_encoding:
                out_filters = 300
            else:
                out_filters = self.out_filters
            if self.is_sinusolid:
                pos_embedding = self._positional_encoding(
                    x,
                    pos_batch_size,
                    is_training,
                    num_units=out_filters,
                    zero_pad=False,
                    scale=False,
                    scope="enc_pe")
            else:
                pos_embedding = self._embedding(tf.tile(
                    tf.expand_dims(tf.range(inp_w), 0), [pos_batch_size, 1]),
                                                vocab_size=inp_w,
                                                num_units=out_filters,
                                                reuse=tf.AUTO_REUSE,
                                                zero_pad=True,
                                                scale=False,
                                                scope="enc_pe")
            print("pos embedding: {0}".format(pos_embedding))
            pos_embedding = tf.transpose(pos_embedding, [0, 2, 1])
            #pos_embedding = tf.expand_dims(pos_embedding, axis=2)
            print("pos embedding: {0}".format(pos_embedding))
            if self.input_positional_encoding:
                x += pos_embedding

            out_filters = self.out_filters
            with tf.variable_scope("init_conv"):  # adjust out_filter dimension
                #print("init_x", x.shape)
                x = _conv_opt(x, 1, self.out_filters)
                x = batch_norm(x, is_training)

                layers.append(x)

            # sveral operations for nni
            def add_fixed_pooling_layer(layer_id, layers, out_filters,
                                        is_training, pos_embedding,
                                        field_embedding):
                '''Add a fixed pooling layer every four layers'''
                with tf.variable_scope("pos_embed_pool_{0}".format(layer_id)):
                    pos_embedding = self._factorized_reduction(
                        pos_embedding, out_filters, 2, is_training)

                with tf.variable_scope(
                        "field_embed_pool_{0}".format(layer_id)):
                    field_embedding = self._factorized_reduction(
                        field_embedding, out_filters, 2, is_training)

                #out_filters *= 2
                with tf.variable_scope("pool_at_{0}".format(layer_id)):
                    pooled_layers = []
                    for i, layer in enumerate(layers):
                        #print("pooling_layer", i, layer)
                        with tf.variable_scope("from_{0}".format(i)):
                            x = self._factorized_reduction(
                                layer, out_filters, 2, is_training)
                            #print("after x ", x)
                        pooled_layers.append(x)

                    layers = pooled_layers

                    return layers, out_filters

            def post_process_out(inputs, out):
                '''Form skip connection and perform batch norm'''
                optional_inputs = inputs[1]
                print("post_process_out::", inputs, optional_inputs)
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope("skip"):
                        #print("layers",layers)
                        inputs = layers[-1]

                        inp_d = inputs.get_shape()[1].value
                        inp_l = inputs.get_shape()[2].value

                        out.set_shape([None, out_filters, inp_l])
                        try:
                            out = tf.add_n(
                                [out,
                                 tf.reduce_sum(optional_inputs, axis=0)])
                        except Exception as e:
                            print(e)
                        out = batch_norm(out, is_training)
                layers.append(out)
                return out

            global layer_id
            layer_id = -1

            def get_layer_id():
                global layer_id
                layer_id += 1
                return 'layer_' + str(layer_id)

            size = [1, 3, 5, 7]
            separables = [False, False, False, False]

            def conv(inputs, size, separable=False):
                # res_layers is pre_layers that are chosen to form skip connection
                # layers[-1] is always the latest input
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('conv_' + str(size) + (
                            '_separable' if separable else '')):
                        #print("conv_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = conv_op(dealed_inputs,
                                      size,
                                      is_training,
                                      out_filters,
                                      out_filters,
                                      start_idx=None,
                                      separable=separable)
                #layers.append(out)
                return out

            def pool(inputs, ptype):
                assert ptype in ['avg',
                                 'max'], "pooling type must be avg or max"

                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('pooling_' + str(ptype)):
                        #print("pool_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = pool_op(dealed_inputs,
                                      is_training,
                                      out_filters,
                                      out_filters,
                                      ptype,
                                      start_idx=None)
                #layers.append(out)
                return out

            def rnn(inputs):

                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('branch_6'):
                        #print("rnn_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = recur_op(dealed_inputs,
                                       is_training,
                                       out_filters,
                                       out_filters,
                                       start_idx=0,
                                       lstm_x_keep_prob=self.lstm_x_keep_prob,
                                       lstm_h_keep_prob=self.lstm_h_keep_prob,
                                       lstm_o_keep_prob=self.lstm_o_keep_prob,
                                       var_rec=self.var_rec)
                #layers.append(out)
                return out

            def attention(inputs):

                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('branch_7'):
                        #print("attention_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = attention_op(
                            dealed_inputs,
                            pos_embedding,
                            field_embedding,
                            is_training,
                            out_filters,
                            out_filters,
                            start_idx=0,
                            positional_encoding=self.positional_encoding,
                            attention_keep_prob=self.attention_keep_prob,
                            do_field_embedding=self.field_embedding)
                #layers.append(out)
                return out

            def final_process(inputs):
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('final_out'):
                        print("final_inputs::", inputs)
                        dealed_inputs = tf.reduce_mean(inputs[1], axis=0)
                        print("dealed_inputs::", dealed_inputs)
                        out = dealed_inputs
                        #out = tf.reduce_mean(inputs[1], axis=0)
                        print("final_out::", inputs, out)
                layers.append(out)
                return out

            """@nni.mutable_layers(
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x],
          optional_input_size: 1,
          layer_output: layer_0_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_0_out_0)],
          optional_inputs: [],
          optional_input_size: 1,
          layer_output: layer_0_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out],
          optional_input_size: 1,
          layer_output: layer_1_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_1_out_0)],
          optional_inputs: [layer_0_out],
          optional_input_size: 1,
          layer_output: layer_1_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out],
          optional_input_size: 1,
          layer_output: layer_2_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_2_out_0)],
          optional_inputs: [layer_0_out, layer_1_out],
          optional_input_size: 1,
          layer_output: layer_2_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out],
          optional_input_size: 1,
          layer_output: layer_3_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_3_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out],
          optional_input_size: 1,
          layer_output: layer_3_out
      }
      )"""
            layers, out_filters = add_fixed_pooling_layer(
                3, layers, out_filters, is_training, pos_embedding,
                field_embedding)
            x, layer_0_out, layer_1_out, layer_2_out, layer_3_out = layers[-5:]
            print("layer_out", x, layer_0_out, layer_1_out, layer_2_out,
                  layer_3_out)
            """@nni.mutable_layers(
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out],
          optional_input_size: 1,
          layer_output: layer_4_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_4_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out],
          optional_input_size: 1,
          layer_output: layer_4_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
          optional_input_size: 1,
          layer_output: layer_5_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_5_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
          optional_input_size: 1,
          layer_output: layer_5_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
          optional_input_size: 1,
          layer_output: layer_6_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_6_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
          optional_input_size: 1,
          layer_output: layer_6_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
          optional_input_size: 1,
          layer_output: layer_7_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_7_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
          optional_input_size: 1,
          layer_output: layer_7_out
      }
      )"""
            layers, out_filters = add_fixed_pooling_layer(
                7, layers, out_filters, is_training, pos_embedding,
                field_embedding)
            x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out = layers[
                -9:]
            """@nni.mutable_layers(
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
          optional_input_size: 1,
          layer_output: layer_8_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_8_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
          optional_input_size: 1,
          layer_output: layer_8_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
          optional_input_size: 1,
          layer_output: layer_9_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_9_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
          optional_input_size: 1,
          layer_output: layer_9_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
          optional_input_size: 1,
          layer_output: layer_10_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_10_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
          optional_input_size: 1,
          layer_output: layer_10_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
          optional_input_size: 1,
          layer_output: layer_11_out_1
      },
      {
          layer_choice: [post_process_out(out=layer_11_out_1)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
          optional_input_size: 1,
          layer_output: layer_11_out
      },
      {
          layer_choice: [final_process()],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out, layer_11_out],
          optional_input_size: 1,
          layer_output: final_out
      }
      )"""

            print("len_layers: ", len(layers))
            x = final_out

            if self.sliding_window:
                x = self._from_sliding_window(x, batch_size, sliding_windows)

            class_num = self.class_num
            with tf.variable_scope("fc"):
                if not self.is_output_attention:
                    x = tf.reduce_mean(x, 2)
                else:
                    batch_size = x.get_shape()[0].value
                    inp_d = x.get_shape()[1].value
                    inp_l = x.get_shape()[2].value

                    final_attention_query = create_weight(
                        "query",
                        shape=[1, inp_d],
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                    if is_training or mode == "valid":
                        batch_size = self.batch_size
                    else:
                        batch_size = self.eval_batch_size
                    final_attention_query = tf.tile(final_attention_query,
                                                    [batch_size, 1])
                    print("final_attention_query: {0}".format(
                        final_attention_query))

                    #put channel to the last dim
                    x = tf.transpose(x, [0, 2, 1])
                    x = tf.reshape(x, [-1, inp_l, inp_d])
                    print("x: {0}".format(x))
                    x = multihead_attention(
                        queries=final_attention_query,
                        keys=x,
                        pos_embedding=pos_embedding,
                        field_embedding=field_embedding,
                        num_units=inp_d,
                        num_heads=8,
                        dropout_rate=0,
                        is_training=is_training,
                        causality=False,
                        positional_encoding=self.positional_encoding)
                    print("x: {0}".format(x))
                    x = tf.reshape(x, [-1, 1, inp_d])
                    x = tf.reduce_sum(x, axis=1)
                    print("x: {0}".format(x))
                if is_training:
                    x = tf.nn.dropout(x, self.keep_prob)
                x = tf.layers.dense(x, units=class_num)

        return x
Example #4
0
    def _fixed_layer(self, inputs, pos_embedding, field_embedding, layer_id,
                     prev_layers, final_flags, start_idx, pre_idx, out_filters,
                     is_training):
        """
      Args:
        layer_id: current layer
        prev_layers: cache of previous layers. for skip connections
        start_idx: where to start looking at. technically, we can infer this
          from layer_id, but why bother...
        is_training: for batch_norm
      """
        if len(prev_layers) > 0:
            inputs = prev_layers[-1]

        if len(prev_layers) > 0:
            if self.multi_path:
                pre_layer_id = self.sample_arc[start_idx]
                start_idx += 1
                num_pre_layers = len(prev_layers)
                if num_pre_layers > 5:
                    num_pre_layers = 5
                matched = False
                for i in range(0, num_pre_layers):
                    if pre_layer_id == i:
                        layer_idx = len(prev_layers) - 1 - i
                        final_flags[layer_idx] = 0
                        matched = True
                        inputs = prev_layers[layer_idx]
                if not matched:
                    final_flags[-1] = 0
                    inputs = prev_layers[-1]
            else:
                final_flags[-1] = 0

        size = [1, 3, 5, 7]
        separables = [False, False, False, False]
        actual_data_format = "channels_first"  # NCHW

        out = inputs
        count = self.sample_arc[start_idx]
        if count in [0, 1, 2, 3]:
            filter_size = size[count]
            separable = separables[count]
            with tf.variable_scope("conv_{0}x{0}".format(filter_size)):
                out = tf.nn.relu(out)
                out = conv_op(out, filter_size, is_training, out_filters,
                              out_filters)
                out = batch_norm(out, is_training)
        elif count == 4:
            with tf.variable_scope("average_pool"):
                out = pool_op(out, is_training, out_filters, out_filters,
                              "avg")
        elif count == 5:
            with tf.variable_scope("max_pool"):
                out = pool_op(out, is_training, out_filters, out_filters,
                              "max")
        elif count == 7:
            with tf.variable_scope("out_attention"):
                out = attention_op(
                    out,
                    pos_embedding,
                    field_embedding,
                    is_training,
                    out_filters,
                    out_filters,
                    start_idx=0,
                    positional_encoding=self.positional_encoding,
                    attention_keep_prob=self.attention_keep_prob,
                    do_field_embedding=self.field_embedding)
                out = batch_norm(out, is_training)
        elif count == 6:
            with tf.variable_scope("rnn"):
                out = recur_op(out,
                               is_training,
                               out_filters,
                               out_filters,
                               start_idx=0,
                               lstm_x_keep_prob=self.lstm_x_keep_prob,
                               lstm_h_keep_prob=self.lstm_h_keep_prob,
                               lstm_o_keep_prob=self.lstm_o_keep_prob,
                               var_rec=self.var_rec)
        else:
            raise ValueError("Unknown operation number '{0}'".format(count))

        if layer_id > 0:
            skip_start = start_idx + 1
            skip = self.sample_arc[skip_start:skip_start + layer_id]
            total_skip_channels = np.sum(skip) + 1

            res_layers = []
            for i in range(layer_id):
                if skip[i] == 1:
                    res_layers.append(prev_layers[i])
                    final_flags[i] = 0
            prev = res_layers + [out]

            if not self.skip_concat:
                out = tf.add_n(prev)
            else:
                prev = tf.concat(prev, axis=1)
                out = prev
                print(out, out_filters)
                with tf.variable_scope("skip"):
                    out = tf.nn.relu(out)
                    out = conv_op(out, 1, is_training, out_filters,
                                  out_filters)
            out = batch_norm(out, is_training)

        return out
Example #5
0
    def _model(self,
               doc,
               bow_doc,
               datasets,
               is_training,
               reuse=False,
               mode="train"):

        with tf.variable_scope(self.name, reuse=reuse):
            layers = []
            final_flags = []

            if is_training:
                self.valid_lengths = []
            with tf.variable_scope('embed'):
                regularizer = tf.contrib.layers.l2_regularizer(
                    scale=self.l2_reg)
                if self.embedding_model == "none":
                    embedding = create_weight(
                        "w",
                        shape=self.embedding["none"].shape,
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                elif self.embedding_model == "glove":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                elif self.embedding_model == "word2vec":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                elif self.embedding_model == "all":
                    embedding_glove = create_weight(
                        "w_glove",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                    print("embedding_glove: {0}".format(
                        embedding_glove.get_shape()))
                    embedding_word2vec = create_weight(
                        "w_word2vec",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                    print("embedding_word2vec: {0}".format(
                        embedding_word2vec.get_shape()))
                    embedding = tf.concat(
                        [embedding_glove, embedding_word2vec], axis=0)
                    print("join embedding: {0}".format(embedding.get_shape()))
                field_embedding = create_weight(
                    "w_field",
                    shape=self.embedding["field"].shape,
                    trainable=True,
                    initializer=tf.truncated_normal_initializer,
                    regularizer=regularizer)

                self.final_embedding = embedding
                print("embedding: {0}".format(embedding))
                print("doc: {0}".format(doc))
                print("bow_doc: {0}".format(bow_doc))

                if is_training or mode == "valid":
                    batch_size = self.batch_size
                else:
                    batch_size = self.eval_batch_size

                if self.sliding_window:
                    doc, sliding_windows = self._to_sliding_window(doc,
                                                                   batch_size,
                                                                   size=64,
                                                                   step=32)
                    bow_doc, _ = self._to_sliding_window(bow_doc,
                                                         batch_size,
                                                         size=64,
                                                         step=32)
                    print("doc after sliding window: {0}".format(doc))

                if is_training:
                    embedding = tf.nn.dropout(embedding,
                                              keep_prob=self.embed_keep_prob)

                doc = tf.nn.embedding_lookup(embedding, doc, max_norm=None)
                field_embedding = tf.nn.embedding_lookup(field_embedding,
                                                         bow_doc,
                                                         max_norm=None)
                if self.input_field_embedding:
                    doc = tf.add_n([doc, field_embedding])
                doc = tf.transpose(doc, [0, 2, 1])

                print("doc_shape", doc.shape)
                inp_c = doc.shape[1]
                inp_w = doc.shape[2]
                #doc = tf.reshape(doc, [-1, inp_c, 1, inp_w])
                doc = tf.reshape(doc, [-1, inp_c, inp_w])
                field_embedding = tf.transpose(field_embedding, [0, 2, 1])
                #field_embedding = tf.reshape(field_embedding, [-1, inp_c, 1, inp_w])
                field_embedding = tf.reshape(field_embedding,
                                             [-1, inp_c, inp_w])

                print("after: doc, field_embedding", doc.shape,
                      field_embedding.shape)

            x = doc
            pos_batch_size = 1
            # initialize pos_embedding for transformer
            if self.input_positional_encoding:
                out_filters = 300
            else:
                out_filters = self.out_filters
            if self.is_sinusolid:
                pos_embedding = self._positional_encoding(
                    x,
                    pos_batch_size,
                    is_training,
                    num_units=out_filters,
                    zero_pad=False,
                    scale=False,
                    scope="enc_pe")
            else:
                pos_embedding = self._embedding(tf.tile(
                    tf.expand_dims(tf.range(inp_w), 0), [pos_batch_size, 1]),
                                                vocab_size=inp_w,
                                                num_units=out_filters,
                                                reuse=tf.AUTO_REUSE,
                                                zero_pad=True,
                                                scale=False,
                                                scope="enc_pe")
            print("pos embedding: {0}".format(pos_embedding))
            pos_embedding = tf.transpose(pos_embedding, [0, 2, 1])
            #pos_embedding = tf.expand_dims(pos_embedding, axis=2)
            print("pos embedding: {0}".format(pos_embedding))
            if self.input_positional_encoding:
                x += pos_embedding

            out_filters = self.out_filters
            with tf.variable_scope("init_conv"):  # adjust out_filter dimension
                #print("init_x", x.shape)
                x = _conv_opt(x, 1, self.out_filters)
                x = batch_norm(x, is_training)

            def add_fixed_pooling_layer(layer_id, layers, out_filters,
                                        is_training, pos_embedding,
                                        field_embedding):
                '''Add a fixed pooling layer every four layers'''
                with tf.variable_scope("pos_embed_pool_{0}".format(layer_id)):
                    pos_embedding = self._factorized_reduction(
                        pos_embedding, out_filters, 2, is_training)

                with tf.variable_scope(
                        "field_embed_pool_{0}".format(layer_id)):
                    field_embedding = self._factorized_reduction(
                        field_embedding, out_filters, 2, is_training)

                #out_filters *= 2
                with tf.variable_scope("pool_at_{0}".format(layer_id)):
                    pooled_layers = []
                    for i, layer in enumerate(layers):
                        #print("pooling_layer", i, layer)
                        with tf.variable_scope("from_{0}".format(i)):
                            x = self._factorized_reduction(
                                layer, out_filters, 2, is_training)
                            #print("after x ", x)
                        pooled_layers.append(x)

                    layers = pooled_layers

                    return layers, out_filters

            start_idx = 0
            print("xxxxx", x)

            for layer_id in range(self.num_layers):
                with tf.variable_scope("layer_{0}".format(layer_id)):
                    print("layers", layers)
                    print("layer_id, x", layer_id, x)

                    x = self._fixed_layer(x, pos_embedding, field_embedding,
                                          layer_id, layers, final_flags,
                                          start_idx, 0, out_filters,
                                          is_training)

                    layers.append(x)
                    if self.fixed_arc is not None:
                        final_flags.append(1)

                    print("sample_arc: {0}".format(self.sample_arc[start_idx]))
                    if layer_id in self.pool_layers:
                        layers, out_filters = add_fixed_pooling_layer(
                            layer_id, layers, out_filters, is_training,
                            pos_embedding, field_embedding)

                start_idx += 1 + layer_id
                if self.multi_path:
                    start_idx += 1
                print(layers[-1])

            print("all_layers:", layers)
            final_layers = []
            final_layers_idx = []
            for i in range(0, len(layers)):
                if self.all_layer_output:
                    if self.num_last_layer_output == 0:
                        final_layers.append(layers[i])
                        final_layers_idx.append(i)
                    elif i >= max(
                        (len(layers) - self.num_last_layer_output), 0):
                        final_layers.append(layers[i])
                        final_layers_idx.append(i)
                elif self.fixed_arc is not None and final_flags[i] == 1:
                    final_layers.append(layers[i])
                    final_layers_idx.append(i)
                elif self.fixed_arc is None:
                    final_layers.append(final_flags[i] * layers[i])

            if self.fixed_arc is not None:
                print("final_layers: {0}".format(' '.join(
                    [str(idx) for idx in final_layers_idx])))

            if self.fixed_arc is not None and self.output_linear_combine:
                x = self._linear_combine(final_layers)
            else:
                x = tf.add_n(final_layers)

            if self.sliding_window:
                x = self._from_sliding_window(x, batch_size, sliding_windows)

            class_num = self.class_num
            with tf.variable_scope("fc"):
                if not self.is_output_attention:
                    x = tf.reduce_mean(x, 2)
                else:
                    batch_size = x.get_shape()[0].value
                    inp_d = x.get_shape()[1].value
                    inp_l = x.get_shape()[2].value

                    final_attention_query = create_weight(
                        "query",
                        shape=[1, inp_d],
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                    if is_training or mode == "valid":
                        batch_size = self.batch_size
                    else:
                        batch_size = self.eval_batch_size
                    final_attention_query = tf.tile(final_attention_query,
                                                    [batch_size, 1])
                    print("final_attention_query: {0}".format(
                        final_attention_query))

                    # put channel to the last dim
                    x = tf.transpose(x, [0, 2, 1])
                    x = tf.reshape(x, [-1, inp_l, inp_d])
                    print("x: {0}".format(x))
                    x = multihead_attention(
                        queries=final_attention_query,
                        keys=x,
                        pos_embedding=pos_embedding,
                        field_embedding=field_embedding,
                        num_units=inp_d,
                        num_heads=8,
                        dropout_rate=0,
                        is_training=is_training,
                        causality=False,
                        positional_encoding=self.positional_encoding)
                    print("x: {0}".format(x))
                    x = tf.reshape(x, [-1, 1, inp_d])
                    x = tf.reduce_sum(x, axis=1)
                    print("x: {0}".format(x))
                if is_training:
                    x = tf.nn.dropout(x, self.keep_prob)
                x = tf.layers.dense(x, units=class_num)

        return x