Example #1
0
    def __init__(self, is_trainable=True):
        self.model_name = ModelName.resnet_clf
        super(ResnetClf, self).__init__(model_name=self.model_name,
                                        is_trainable=True,
                                        train_last_block=False)

        # self.base_image_model = Resnet(restore=False, is_trainable=False, train_last_block=False)
        self.resnet_representation = self.avg_pool_representation

        activation = tf.nn.relu
        self.initializer_type = 'normal' if activation == tf.nn.relu else 'xavier'
        dim_D = 2048

        with tf.variable_scope(self.model_name.name):
            # FC1 [dim_d] -> [512]
            with tf.variable_scope('image_fc1'):
                W_fc1 = weight_variable(is_trainable=is_trainable,
                                        shape=[dim_D, 512],
                                        initializer_type=self.initializer_type,
                                        name='W_fc1')
                b_fc1 = bias_variable(is_trainable=is_trainable,
                                      shape=[512],
                                      name='b_fc1')

                if use_batch_norm:
                    h_fc1 = batch_norm_dense_activation(
                        inputs=tf.matmul(self.resnet_representation, W_fc1) +
                        b_fc1,
                        is_training=self.train_mode,
                        activation=activation,
                        is_trainable=is_trainable)
                else:
                    h_fc1 = tf.nn.relu(
                        tf.matmul(self.resnet_representation, W_fc1) + b_fc1)
                h_fc1_drop = tf.nn.dropout(h_fc1, self.dropout_keep_prob)

            # Softmax
            with tf.variable_scope('softmax'):
                W_softmax = weight_variable(
                    is_trainable=is_trainable,
                    shape=[512, NUM_CLASSES],
                    initializer_type=self.initializer_type)
                b_softmax = bias_variable(is_trainable=is_trainable,
                                          shape=[NUM_CLASSES])
                self.scores = tf.matmul(h_fc1_drop, W_softmax) + b_softmax

            with tf.variable_scope('optimization'):
                # Finalize the predictions, the optimizing function, loss/accuracy stats etc.
                self._set_predictions_optimizer_and_loss()
def retrieveBiases(sess, shape, layer_name):
    bias_values = None
    with tf.name_scope(layer_name):
        with tf.variable_scope('biases', reuse=True):
            biases = bias_variable(shape, layer_name)
            bias_values = sess.run(biases)

    return bias_values
Example #3
0
    def __init__(self,
                 is_primary_model=True,
                 is_trainable=True,
                 train_last_layers=False):
        model_name = ModelName.image_cnn_v2
        # with tf.variable_scope(model_name.name):
        super(ImageCNNV2, self).__init__(model_name=model_name,
                                         is_primary_model=is_primary_model)

        last_pool_image_dim = int(IMAGE_SIZE / 32)

        # MODEL CONFIG:
        self.config = {
            'USE_AVG_POOLING': True,
            'IMAGE_SIZE': IMAGE_SIZE,
            'activation': tf.nn.relu,
        }

        activation = self.config['activation']

        # MODEL DEFINITION
        # ================================================================================
        # conv 1 # padding='SAME'
        # [IMG,IMG,3] -> [IMG,IMG,32]
        with tf.variable_scope('image_conv1'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv1 = weight_variable(is_trainable=is_trainable,
                                      shape=[7, 7, 3, 32])
            b_conv1 = bias_variable(is_trainable=is_trainable, shape=[32])
            if use_batch_norm:
                self.h_conv1 = batch_norm_conv_activation(
                    is_trainable=is_trainable,
                    inputs=conv2d(x=self.x_image, W=W_conv1) + b_conv1,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv1 = activation(
                    conv2d(x=self.x_image, W=W_conv1) + b_conv1)

        # conv1-pool 1
        # [IMG,IMG,32] -> [IMG/2,IMG/2,32] # ksize=[1,2,2,1], strides=[1,2,2,1]
        with tf.variable_scope('image_pool1'):
            self.h_pool1 = max_pool(self.h_conv1)
        # # norm
        # with tf.variable_scope('image_norm1'):
        #     h_norm1 = lr_norm(self.h_pool1, 4)

        # conv2
        # [IMG/2,IMG/2,32] -> [IMG/2,IMG/2,64]
        with tf.variable_scope('image_conv2'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv2 = weight_variable(is_trainable=is_trainable,
                                      shape=[5, 5, 32, 64])
            b_conv2 = bias_variable(is_trainable=is_trainable, shape=[64])
            if use_batch_norm:
                self.h_conv2 = batch_norm_conv_activation(
                    is_trainable=is_trainable,
                    inputs=conv2d(x=self.h_pool1, W=W_conv2) + b_conv2,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv2 = activation(
                    conv2d(x=self.h_pool1, W=W_conv2) + b_conv2)

        # conv3
        # [IMG/2,IMG/2,64] -> [IMG/2,IMG/2,64]
        with tf.variable_scope('image_conv3'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv3 = weight_variable(is_trainable=is_trainable,
                                      shape=[5, 5, 64, 64])
            b_conv3 = bias_variable(is_trainable=is_trainable, shape=[64])
            if use_batch_norm:
                self.h_conv3 = batch_norm_conv_activation(
                    is_trainable=is_trainable,
                    inputs=conv2d(x=self.h_conv2, W=W_conv3) + b_conv3,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv3 = activation(
                    conv2d(x=self.h_conv2, W=W_conv3) + b_conv3)

        # conv3-pool2
        # [IMG/2,IMG/2,64] -> [IMG/4,IMG/4,64]
        with tf.variable_scope('image_pool2'):
            self.h_pool2 = max_pool(self.h_conv3)

        # conv4
        # [IMG/4,IMG/4,64] -> [IMG/4,IMG/4,128]
        with tf.variable_scope('image_conv4'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv4 = weight_variable(is_trainable=is_trainable,
                                      shape=[3, 3, 64, 128])
            b_conv4 = bias_variable(is_trainable=is_trainable, shape=[128])
            if use_batch_norm:
                self.h_conv4 = batch_norm_conv_activation(
                    is_trainable=is_trainable,
                    inputs=conv2d(x=self.h_pool2, W=W_conv4) + b_conv4,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv4 = activation(
                    conv2d(x=self.h_pool2, W=W_conv4) + b_conv4)

        # conv5
        # [IMG/4,IMG/4,128] -> [IMG/4,IMG/4,128]
        with tf.variable_scope('image_conv5'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv5 = weight_variable(is_trainable=is_trainable,
                                      shape=[3, 3, 128, 128])
            b_conv5 = bias_variable(is_trainable=is_trainable, shape=[128])
            if use_batch_norm:
                self.h_conv5 = batch_norm_conv_activation(
                    is_trainable=is_trainable,
                    inputs=conv2d(x=self.h_conv4, W=W_conv5) + b_conv5,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv5 = activation(
                    conv2d(x=self.h_conv4, W=W_conv5) + b_conv5)

        # conv5-pool3
        # [IMG/4,IMG/4,128] -> [IMG/8,IMG/8,128]
        with tf.variable_scope('image_pool3'):
            self.h_pool3 = max_pool(self.h_conv5)

        last_layers_trainable = is_trainable or train_last_layers

        # conv6
        # [IMG/8,IMG/8,128] -> [IMG/8,IMG/8,256]
        with tf.variable_scope('image_conv6'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv6 = weight_variable(is_trainable=is_trainable,
                                      shape=[3, 3, 128, 256])
            b_conv6 = bias_variable(is_trainable=is_trainable, shape=[256])
            if use_batch_norm:
                self.h_conv6 = batch_norm_conv_activation(
                    is_trainable=last_layers_trainable,
                    inputs=conv2d(x=self.h_pool3, W=W_conv6) + b_conv6,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv6 = activation(
                    conv2d(x=self.h_pool3, W=W_conv6) + b_conv6)

        # conv7
        # [IMG/8,IMG/8,256] -> [IMG/8,IMG/8,256]
        with tf.variable_scope('image_conv7'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv7 = weight_variable(is_trainable=is_trainable,
                                      shape=[3, 3, 256, 256])
            b_conv7 = bias_variable(is_trainable=is_trainable, shape=[256])
            if use_batch_norm:
                self.h_conv7 = batch_norm_conv_activation(
                    is_trainable=last_layers_trainable,
                    inputs=conv2d(x=self.h_conv6, W=W_conv7) + b_conv7,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv7 = activation(
                    conv2d(x=self.h_conv6, W=W_conv7) + b_conv7)

        # conv7-pool4
        # [IMG/8,IMG/8,256] -> [IMG/16,IMG/16,256]
        with tf.variable_scope('image_pool4'):
            self.h_pool4 = max_pool(self.h_conv7)

        # conv8
        # [IMG/16,IMG/16,256] -> [IMG/16,IMG/16,512]
        with tf.variable_scope('image_conv8'):
            # [filter_size, filter_size, channel_size, num_filters]
            W_conv8 = weight_variable(is_trainable=last_layers_trainable,
                                      shape=[3, 3, 256, 512])
            b_conv8 = bias_variable(is_trainable=last_layers_trainable,
                                    shape=[512])
            if use_batch_norm:
                self.h_conv8 = batch_norm_conv_activation(
                    is_trainable=last_layers_trainable,
                    inputs=conv2d(x=self.h_pool4, W=W_conv8) + b_conv8,
                    is_training=self.train_mode,
                    activation=activation)
            else:
                self.h_conv8 = activation(
                    conv2d(x=self.h_pool4, W=W_conv8) + b_conv8)

        if self.config['USE_AVG_POOLING']:
            # conv8-avgPool
            # [IMG/16, IMG/16, 512] -> [512]
            with tf.variable_scope('image_avg_pool'):
                self.h_pool5_flat = tf.reduce_mean(self.h_conv8,
                                                   reduction_indices=[1, 2],
                                                   name="avg_pool")
        else:
            # conv8-pool5
            # [IMG/16,IMG/16,512] -> [IMG/32,IMG/32,512]
            with tf.variable_scope('image_pool5'):
                self.h_pool5 = max_pool(self.h_conv8)
                # Flatten last pool layer
                self.h_pool5_flat = tf.reshape(self.h_pool5,
                                               shape=[
                                                   -1, last_pool_image_dim *
                                                   last_pool_image_dim * 512
                                               ],
                                               name='h_pool5_flat')

        if not self.config['USE_AVG_POOLING']:
            # FC0 [image_dim*image_dim*512] -> [512]
            with tf.variable_scope('image_fc0'):
                W_fc0 = weight_variable(is_trainable=is_trainable,
                                        shape=[
                                            last_pool_image_dim *
                                            last_pool_image_dim * 512, 512
                                        ],
                                        name='W_fc0')
                b_fc0 = bias_variable(is_trainable=is_trainable,
                                      shape=[512],
                                      name='b_fc0')
                if use_batch_norm:
                    self.h_fc0 = batch_norm_dense_activation(
                        inputs=tf.nn.xw_plus_b(x=self.h_pool5_flat,
                                               weights=W_fc0,
                                               biases=b_fc0),
                        is_training=self.train_mode,
                        activation=activation,
                        is_trainable=is_trainable)
                else:
                    self.h_fc0 = activation(
                        tf.matmul(self.h_pool5_flat, W_fc0) + b_fc0)
                self.h_fc0_drop = tf.nn.dropout(self.h_fc0,
                                                self.dropout_keep_prob)
            last_layer = self.h_fc0_drop
        else:
            last_layer = self.h_pool5_flat

        if is_trainable:
            # FC1 [512] -> [256]
            with tf.variable_scope('image_fc1'):
                W_fc1 = weight_variable(is_trainable=is_trainable,
                                        shape=[512, 256])
                b_fc1 = bias_variable(is_trainable=is_trainable, shape=[256])
                if use_batch_norm:
                    self.h_fc1 = batch_norm_dense_activation(
                        inputs=tf.nn.xw_plus_b(x=last_layer,
                                               weights=W_fc1,
                                               biases=b_fc1),
                        is_training=self.train_mode,
                        activation=activation,
                        is_trainable=is_trainable)
                else:
                    self.h_fc1 = tf.nn.relu(
                        tf.matmul(last_layer, W_fc1) + b_fc1)
                # dropout
                self.h_fc1_drop = tf.nn.dropout(self.h_fc1,
                                                self.dropout_keep_prob)

            # Softmax
            with tf.variable_scope('softmax'):
                self.W_softmax = weight_variable(is_trainable=is_trainable,
                                                 shape=[256, NUM_CLASSES])
                self.b_softmax = bias_variable(is_trainable=is_trainable,
                                               shape=[NUM_CLASSES])
                self.probabilities = tf.nn.softmax(
                    tf.matmul(self.h_fc1_drop, self.W_softmax) +
                    self.b_softmax)

            # Finalize the predictions, the optimizing function, loss/accuracy stats etc.
            if self.is_primary_model:
                print("%s is a primary model, making optimizations" %
                      self.model_name.name)
                self._set_predictions_optimizer_and_loss()
            else:
                print("%s not primary model, skipping optimizations" %
                      self.model_name.name)
Example #4
0
    def __init__(self, is_trainable=True):
        super(ImageCNN, self).__init__(model_name=ModelName.image_cnn)

        # MODEL DEFINITION
        # ================================================================================
        # conv 1 # padding='SAME'
        with tf.variable_scope('image_conv1'):
            # [filter_size, filter_size, channel_size, num_filters]
            self.W_conv1 = weight_variable(is_trainable=is_trainable,
                                           shape=[7, 7, 3, 64],
                                           stddev=5e-2)
            self.b_conv1 = bias_variable(is_trainable=is_trainable, shape=[64])
            self.h_conv1 = tf.nn.relu(
                conv2d(self.x_image, self.W_conv1, is_training=self.train_mode)
                + self.b_conv1)
        # [128,128,64] -> [64,64,64] # ksize=[1,2,2,1], strides=[1,2,2,1]
        with tf.variable_scope('image_pool1'):
            self.h_pool1 = max_pool(self.h_conv1)

        # conv2
        with tf.variable_scope('image_conv2'):
            self.W_conv2 = weight_variable(is_trainable=is_trainable,
                                           shape=[7, 7, 64, 128],
                                           stddev=5e-2)
            self.b_conv2 = bias_variable(is_trainable=is_trainable,
                                         shape=[128])
            self.h_conv2 = tf.nn.relu(
                conv2d(self.h_pool1,
                       self.W_conv2,
                       isnorm=False,
                       is_training=self.train_mode) + self.b_conv2)
        # [64,64,128] -> [32,32,128]
        with tf.variable_scope('image_pool2'):
            self.h_pool2 = max_pool(self.h_conv2)

        # conv 3 [32,32,128] -> [16,16,256]
        with tf.variable_scope('image_conv3'):
            self.W_conv3 = weight_variable(is_trainable=is_trainable,
                                           shape=[5, 5, 128, 256],
                                           stddev=5e-2)
            self.b_conv3 = bias_variable(is_trainable=is_trainable,
                                         shape=[256])
            self.h_conv3 = tf.nn.relu(
                conv2d(self.h_pool2,
                       self.W_conv3,
                       strides=[1, 2, 2, 1],
                       isnorm=False,
                       is_training=self.train_mode) + self.b_conv3)
        # [16,16,256] -> [8,8,256]
        with tf.variable_scope('image_pool3'):
            self.h_pool3 = max_pool(self.h_conv3)
        # norm 3
        with tf.variable_scope('image_norm3'):
            self.h_norm3 = lr_norm(self.h_pool3, 4)

        # conv 4 [8,8,256] -> [2,2,512]
        with tf.variable_scope('image_conv4'):
            self.W_conv4 = weight_variable(is_trainable=is_trainable,
                                           shape=[5, 5, 256, 512],
                                           stddev=5e-2)
            self.b_conv4 = bias_variable(is_trainable=is_trainable,
                                         shape=[512])
            self.h_conv4 = tf.nn.relu(
                conv2d(self.h_norm3,
                       self.W_conv4,
                       strides=[1, 4, 4, 1],
                       isnorm=False,
                       is_training=self.train_mode) + self.b_conv4)
        # [2,2,512] -> [1,1,512]
        with tf.variable_scope('image_pool4'):
            self.h_pool4 = max_pool(self.h_conv4)

        # fc
        with tf.variable_scope('image_fc1'):
            self.W_fc1 = weight_variable(is_trainable=is_trainable,
                                         shape=[512, 256],
                                         stddev=0.04)
            self.b_fc1 = bias_variable(is_trainable=is_trainable, shape=[256])
            self.h_pool4_flat = tf.reshape(self.h_pool4, [-1, 512])
            self.h_fc1 = tf.nn.relu(
                tf.matmul(self.h_pool4_flat, self.W_fc1) + self.b_fc1)
            # dropout
            self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.dropout_keep_prob)
        # softmax
        with tf.variable_scope('softmax'):
            self.W_softmax = weight_variable(is_trainable=is_trainable,
                                             shape=[256, NUM_CLASSES],
                                             stddev=0.01)
            self.b_softmax = bias_variable(is_trainable=is_trainable,
                                           shape=[NUM_CLASSES])
            self.probabilities = tf.nn.softmax(
                tf.matmul(self.h_fc1_drop, self.W_softmax) + self.b_softmax)

        # Finalize the predictions, the optimizing function, loss/accuracy stats etc.
        self._set_predictions_optimizer_and_loss()
Example #5
0
    def __init__(self,
                 is_primary_model,
                 is_trainable,
                 filter_sizes=(1, 2, 3),
                 num_filters=128,
                 activation=tf.nn.tanh):
        """Define the TF elements for HieCoAtt's text representation.

        Parameters
        ----------
        :param filter_sizes: tuple, containing the different filter sizes for convolution

        :param num_filters: int, number of filters of each size

        :param activation: activation function from tf.nn (only tf.nn.relu, tf.nn.tanh supported)

        :param is_primary_model: bool, whether this model's output is used to perform the task

        :param is_trainable: bool, whether parameters can be updated during training
        """
        assert activation == tf.nn.relu or activation == tf.nn.tanh
        self.activation = activation
        self.initializer_type = 'xavier' if self.activation == tf.nn.tanh else 'normal'

        with tf.variable_scope(ModelName.hie_text.name):
            super(HieText, self).__init__(model_name=ModelName.hie_text,
                                          is_primary_model=is_primary_model,
                                          is_trainable=is_trainable)

            # Convert num_filters to list corresponding to number of filters for each filter size
            if isinstance(num_filters, int):
                num_filters = [num_filters] * len(filter_sizes)
            # Dimension of phrase level features should match dimension of word-level features
            # So, we need that num_filters matches EMBED_SIZES
            assert num_filters == EMBED_SIZES

            self.config = {
                'filter_sizes': filter_sizes,
                'num_filters': num_filters,
                'activation':
                'relu' if self.activation == tf.nn.relu else 'tanh'
            }

            # Shape fo self.word_level: [BATCH_SIZE, TEXT_LENGTH, EMBED_SIZES]
            self.word_level = self.seq_embedded

            # Convolution for phrase level
            self.conv_output = []
            for i, filter_size in enumerate(filter_sizes):
                with tf.name_scope("conv-%s" % filter_size):
                    # Convolution Layer
                    filter_shape = [
                        filter_size, EMBED_SIZES, 1, num_filters[i]
                    ]
                    W = weight_variable(is_trainable=is_trainable,
                                        shape=filter_shape,
                                        initializer_type=self.initializer_type,
                                        name='W%d' % i)
                    twopadding = filter_size - 1  # (h+2p-f)/s + 1 = h #s=1 #same height padding
                    top_padding = twopadding // 2
                    bottom_padding = twopadding - top_padding
                    self.x_padded = tf.pad(
                        self.x_text, [[0, 0], [top_padding, bottom_padding],
                                      [0, 0], [0, 0]])
                    conv = tf.nn.conv2d(self.x_padded,
                                        W,
                                        strides=[1, 1, 1, 1],
                                        padding='VALID',
                                        name="conv")
                    bn_conv = batch_norm(conv, num_filters[i],
                                         self.phase_train)
                    h = self.activation(bn_conv, name="activation")
                    self.conv_output.append(h)

            # Shape of full_conv_output: [BATCH_SIZE, TEXT_LENGTH, len(filter_sizes), num_filters]
            full_conv_output = tf.concat(self.conv_output, 2)

            # Phrase level output shape: [BATCH_SIZE, TEXT_LENGTH, num_filters]
            self.phase_level = tf.reduce_max(full_conv_output, 2)

            # Sentence Level
            lstm_cell = rnn.BasicLSTMCell(EMBED_SIZES)
            if self.dropout_keep_prob is not None:
                lstm_cell = rnn.DropoutWrapper(
                    lstm_cell, output_keep_prob=self.dropout_keep_prob)
            self.lstm_outputs, states = tf.nn.dynamic_rnn(lstm_cell,
                                                          self.phase_level,
                                                          dtype=tf.float32)
            # Sentence_level text output
            # [BATCH_SIZE, TEXT_LENGTH, num_filters]
            self.sentence_level = tf.concat(self.lstm_outputs, 1)

            # Concatenate the different levels.
            # We tried the hierarchical approach in Lu et al., but it gave inferior results.
            self.final_text_embedding_spatial = tf.concat(
                values=(self.word_level, self.phase_level,
                        self.sentence_level),
                axis=-1)
            self.final_text_embedding = tf.reduce_mean(
                self.final_text_embedding_spatial, axis=1)
            # Add dropout
            with tf.variable_scope("dropout"):
                self.final_text_embedding = tf.nn.dropout(
                    self.final_text_embedding, self.dropout_keep_prob)
            self.final_embedding_dimension = self.final_text_embedding.shape[
                1].value

            if is_trainable:
                # Final (unnormalized) scores and predictions
                with tf.variable_scope("output"):
                    W_o = weight_variable(
                        is_trainable=is_trainable,
                        shape=[self.final_embedding_dimension, NUM_CLASSES],
                        name='W_o',
                        initializer_type='xavier'
                        if USE_MULTILABEL else 'normal')
                    b_o = bias_variable(is_trainable=is_trainable,
                                        shape=[NUM_CLASSES],
                                        name='b_o')
                    self.scores = tf.nn.xw_plus_b(self.final_text_embedding,
                                                  W_o,
                                                  b_o,
                                                  name="scores")

                if self.is_primary_model:
                    self._set_predictions_optimizer_and_loss()
Example #6
0
    def __init__(self,
                 filter_sizes=(1, 2, 3),
                 num_filters=(128, 256, 256),
                 activation=tf.nn.relu,
                 is_primary_model=True,
                 is_trainable=True,
                 embed_size_multiplier=1.):
        """Create an embedding matrix, a lookup procedure on it for text sequence input,
            convoultion using multiple filter sizes, and final optimizations if is_primary_model=True.

        Parameters
        ----------
        :param filter_sizes: tuple, containing the different filter sizes for convolution

        :param num_filters: int or tuple, should be of same length as filter_sizes,
            denoting the number of filter of each size. If int, it implies same number of filter for all sizes.

        :param activation: activation function from tf.nn (only tf.nn.relu, tf.nn.tanh supported)

        :param is_primary_model: bool, whether this model's output is used to perform the task

        :param is_trainable: bool, whether parameters can be updated during training

        :param embed_size_multiplier: float, factor with which to multiply default embedding size
        """
        with tf.variable_scope(ModelName.text_cnn.name):
            super(TextCNN,
                  self).__init__(model_name=ModelName.text_cnn,
                                 is_primary_model=is_primary_model,
                                 is_trainable=is_trainable,
                                 embed_size_multiplier=embed_size_multiplier)

            # Convert num_filters to list corresponding to number of filters for each filter size
            if isinstance(num_filters, int):
                num_filters = [num_filters] * len(filter_sizes)

            assert len(num_filters) == len(filter_sizes)
            assert activation == tf.nn.relu or activation == tf.nn.tanh

            self.config = {
                'filter_sizes': filter_sizes,
                'num_filters': num_filters,
                'activation': 'relu' if activation == tf.nn.relu else 'tanh'
            }
            # convolution + maxpool
            self.pooled_outputs = []

            for i, filter_size in enumerate(filter_sizes):
                with tf.variable_scope("conv-maxpool-%s" % filter_size):
                    # Convolution Layer
                    filter_shape = [
                        filter_size, self.embedding_size, 1, num_filters[i]
                    ]
                    initializer_type = "normal" if activation == tf.nn.relu else "xavier"
                    W_o = weight_variable(is_trainable=is_trainable,
                                          shape=filter_shape,
                                          initializer_type=initializer_type,
                                          name='W%d' % i)
                    # pad to prevent dimension reduction
                    twopadding = filter_size - 1  # (h+2p-f)/s + 1 = h #s=1 #same height padding
                    top_padding = twopadding // 2
                    bottom_padding = twopadding - top_padding
                    self.x_padded = tf.pad(
                        self.x_text, [[0, 0], [top_padding, bottom_padding],
                                      [0, 0], [0, 0]])
                    # Do convolution + batch_norm + activation
                    conv = tf.nn.conv2d(self.x_padded,
                                        W_o,
                                        strides=[1, 1, 1, 1],
                                        padding='VALID',
                                        name="conv")
                    bn_conv = batch_norm(conv, num_filters[i],
                                         self.phase_train)
                    h = activation(bn_conv, "activation")

                    # Maxpooling over the outputs
                    pooled = tf.nn.max_pool(h,
                                            ksize=[1, TEXT_LENTH, 1, 1],
                                            strides=[1, 1, 1, 1],
                                            padding='VALID',
                                            name="pool")

                    self.pooled_outputs.append(pooled)

            # Combine all the pooled features
            self.final_embedding_dimension = sum(num_filters)
            self.h_pool = tf.concat(self.pooled_outputs, 3)
            self.h_pool_flat = tf.reshape(self.h_pool,
                                          [-1, self.final_embedding_dimension])

            # Add dropout
            with tf.variable_scope("dropout"):
                self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                            self.dropout_keep_prob)

            if is_trainable:
                # Final (unnormalized) scores and predictions
                with tf.variable_scope("output"):
                    W_o = weight_variable(
                        is_trainable=is_trainable,
                        shape=[self.final_embedding_dimension, NUM_CLASSES],
                        name='W_o',
                        initializer_type='xavier'
                        if USE_MULTILABEL else 'normal')
                    b_o = bias_variable(is_trainable=is_trainable,
                                        shape=[NUM_CLASSES],
                                        name='b_o')
                    self.scores = tf.nn.xw_plus_b(
                        self.h_drop, W_o, b_o,
                        name="scores")  # unnormalized scores

                if self.is_primary_model:
                    self._set_predictions_optimizer_and_loss()