def inception_conv_layers(layer_dict,
                          inputs=None,
                          pretrained_dict=None,
                          bn=False,
                          wd=0,
                          init_w=None,
                          is_training=True,
                          trainable=True,
                          conv_stride=2):
    if inputs is None:
        inputs = layer_dict['cur_input']
    layer_dict['cur_input'] = inputs

    arg_scope = tf.contrib.framework.arg_scope
    with arg_scope([L.conv],
                   layer_dict=layer_dict,
                   pretrained_dict=pretrained_dict,
                   bn=bn,
                   nl=tf.nn.relu,
                   init_w=init_w,
                   trainable=trainable,
                   is_training=is_training,
                   wd=wd,
                   add_summary=False):
        conv1 = L.conv(7,
                       64,
                       inputs=inputs,
                       name='conv1_7x7_s2',
                       stride=conv_stride)
        padding1 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]])
        conv1_pad = tf.pad(conv1, padding1, 'CONSTANT')
        pool1, _ = L.max_pool(layer_dict=layer_dict,
                              inputs=conv1_pad,
                              stride=2,
                              filter_size=3,
                              padding='VALID',
                              name='pool1')
        pool1_lrn = tf.nn.local_response_normalization(pool1,
                                                       depth_radius=2,
                                                       alpha=2e-05,
                                                       beta=0.75,
                                                       name='pool1_lrn')

        conv2_reduce = L.conv(1, 64, inputs=pool1_lrn, name='conv2_3x3_reduce')
        conv2 = L.conv(3, 192, inputs=conv2_reduce, name='conv2_3x3')
        padding2 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]])
        conv2_pad = tf.pad(conv2, padding2, 'CONSTANT')
        pool2, _ = L.max_pool(layer_dict=layer_dict,
                              inputs=conv2_pad,
                              stride=2,
                              filter_size=3,
                              padding='VALID',
                              name='pool2')
        pool2_lrn = tf.nn.local_response_normalization(pool2,
                                                       depth_radius=2,
                                                       alpha=2e-05,
                                                       beta=0.75,
                                                       name='pool2_lrn')
    layer_dict['cur_input'] = pool2_lrn
    return pool2_lrn
def inception_layers(layer_dict,
                     inputs=None,
                     pretrained_dict=None,
                     bn=False,
                     init_w=None,
                     wd=0,
                     trainable=True,
                     is_training=True):
    if inputs is not None:
        layer_dict['cur_input'] = inputs

    arg_scope = tf.contrib.framework.arg_scope
    with arg_scope([inception_layer],
                   layer_dict=layer_dict,
                   pretrained_dict=pretrained_dict,
                   bn=bn,
                   init_w=init_w,
                   trainable=trainable,
                   is_training=is_training,
                   wd=wd):
        inception_layer(64, 96, 128, 16, 32, 32, name='inception_3a')
        inception_layer(128, 128, 192, 32, 96, 64, name='inception_3b')
        L.max_pool(layer_dict, stride=2, filter_size=3, name='pool3')

        inception_layer(192, 96, 208, 16, 48, 64, name='inception_4a')
        inception_layer(160, 112, 224, 24, 64, 64, name='inception_4b')
        inception_layer(128, 128, 256, 24, 64, 64, name='inception_4c')
        inception_layer(112, 144, 288, 32, 64, 64, name='inception_4d')
        inception_layer(256, 160, 320, 32, 128, 128, name='inception_4e')
        L.max_pool(layer_dict, stride=2, filter_size=3, name='pool4')

        inception_layer(256, 160, 320, 32, 128, 128, name='inception_5a')
        inception_layer(384, 192, 384, 48, 128, 128, name='inception_5b')

    return layer_dict['cur_input']
    def new_model_forward(weights, inputs, make_vars=False):

        # Create tf.Variables if required
        if make_vars:
            weights = [
                tf.Variable(w) if isinstance(w, np.ndarray) else w
                for w in weights
            ]

        outputs = tf.nn.conv2d(inputs,
                               weights[0], [1, 1, 1, 1],
                               padding='SAME')
        outputs += weights[1]
        # outputs = tf.nn.bias_add(outputs, )
        outputs = Layers.max_pool(outputs)
        outputs = tf.nn.relu(outputs)

        outputs = tf.nn.conv2d(outputs,
                               weights[2], [1, 1, 1, 1],
                               padding='SAME')
        outputs += weights[3]
        outputs = Layers.max_pool(outputs)
        outputs = tf.nn.relu(outputs)

        outputs = tf.nn.conv2d(outputs,
                               weights[4], [1, 1, 1, 1],
                               padding='SAME')
        outputs += weights[5]
        outputs = Layers.max_pool(outputs)
        outputs = tf.nn.relu(outputs)

        outputs = tf.nn.conv2d(outputs,
                               weights[6], [1, 1, 1, 1],
                               padding='SAME')
        outputs += weights[7]
        outputs = Layers.max_pool(outputs)
        outputs = tf.nn.relu(outputs)

        outputs = tf.nn.conv2d(outputs,
                               weights[8], [1, 1, 1, 1],
                               padding='SAME')
        outputs += weights[9]
        outputs = Layers.global_pool(outputs)
        # Reshape to one-hot predictions
        if isinstance(weights[-1], np.ndarray):
            outputs = tf.reshape(outputs, [-1, weights[-1].shape[-1]])
        else:
            outputs = tf.reshape(outputs,
                                 [-1, weights[-1].shape.as_list()[-1]])
        return outputs
    def _build_model(self, **kwargs):
        """
        Build model.
        :param kwargs: dict, extra arguments for building YOLO.
                -image_mean: np.ndarray, mean image for each input channel, shape: (C,).
        :return d: dict, containing outputs on each layer.
        """

        d = dict()
        x_mean = kwargs.pop('image_mean', 0.0)

        # input
        X_input = self.X - x_mean
        is_train = self.is_train

        #conv1 - batch_norm1 - leaky_relu1 - pool1
        with tf.variable_scope('layer1'):
            d['conv1'] = conv_layer(X_input,
                                    3,
                                    1,
                                    32,
                                    padding='SAME',
                                    use_bias=False,
                                    weights_stddev=0.01)
            d['batch_norm1'] = batchNormalization(d['conv1'], is_train)
            d['leaky_relu1'] = tf.nn.leaky_relu(d['batch_norm1'], alpha=0.1)
            d['pool1'] = max_pool(d['leaky_relu1'], 2, 2, padding='SAME')
        # (416, 416, 3) --> (208, 208, 32)
        print('layer1.shape', d['pool1'].get_shape().as_list())

        #conv2 - batch_norm2 - leaky_relu2 - pool2
        with tf.variable_scope('layer2'):
            d['conv2'] = depth_point_layer(d['pool1'],
                                           3,
                                           1,
                                           64,
                                           padding='SAME',
                                           use_bias=False,
                                           weights_stddev=0.01)
            d['batch_norm2'] = batchNormalization(d['conv2'], is_train)
            d['leaky_relu2'] = tf.nn.leaky_relu(d['batch_norm2'], alpha=0.1)
            d['pool2'] = max_pool(d['leaky_relu2'], 2, 2, padding='SAME')
        # (208, 208, 32) --> (104, 104, 64)
        print('layer2.shape', d['pool2'].get_shape().as_list())

        #conv3 - batch_norm3 - leaky_relu3
        with tf.variable_scope('layer3'):
            d['conv3'] = depth_point_layer(d['pool2'],
                                           3,
                                           1,
                                           128,
                                           padding='SAME',
                                           use_bias=False,
                                           weights_stddev=0.01)
            d['batch_norm3'] = batchNormalization(d['conv3'], is_train)
            d['leaky_relu3'] = tf.nn.leaky_relu(d['batch_norm3'], alpha=0.1)
        # (104, 104, 64) --> (104, 104, 128)
        print('layer3.shape', d['leaky_relu3'].get_shape().as_list())

        #conv4 - batch_norm4 - leaky_relu4
        with tf.variable_scope('layer4'):
            d['conv4'] = conv_layer(d['leaky_relu3'],
                                    1,
                                    1,
                                    64,
                                    padding='SAME',
                                    use_bias=False,
                                    weights_stddev=0.01)
            d['batch_norm4'] = batchNormalization(d['conv4'], is_train)
            d['leaky_relu4'] = tf.nn.leaky_relu(d['batch_norm4'], alpha=0.1)
        # (104, 104, 128) --> (104, 104, 64)
        print('layer4.shape', d['leaky_relu4'].get_shape().as_list())

        #conv5 - batch_norm5 - leaky_relu5 - pool5
        with tf.variable_scope('layer5'):
            d['conv5'] = depth_point_layer(d['leaky_relu4'],
                                           3,
                                           1,
                                           128,
                                           padding='SAME',
                                           use_bias=False,
                                           weights_stddev=0.01)
            d['batch_norm5'] = batchNormalization(d['conv5'], is_train)
            d['leaky_relu5'] = tf.nn.leaky_relu(d['batch_norm5'], alpha=0.1)
            d['pool5'] = max_pool(d['leaky_relu5'], 2, 2, padding='SAME')
        # (104, 104, 64) --> (52, 52, 128)
        print('layer5.shape', d['pool5'].get_shape().as_list())

        #conv6 - batch_norm6 - leaky_relu6
        with tf.variable_scope('layer6'):
            d['conv6'] = depth_point_layer(d['pool5'],
                                           3,
                                           1,
                                           256,
                                           padding='SAME',
                                           use_bias=False,
                                           weights_stddev=0.01)
            d['batch_norm6'] = batchNormalization(d['conv6'], is_train)
            d['leaky_relu6'] = tf.nn.leaky_relu(d['batch_norm6'], alpha=0.1)
        # (52, 52, 128) --> (52, 52, 256)
        print('layer6.shape', d['leaky_relu6'].get_shape().as_list())

        #conv7 - batch_norm7 - leaky_relu7
        with tf.variable_scope('layer7'):
            d['conv7'] = conv_layer(d['leaky_relu6'],
                                    1,
                                    1,
                                    128,
                                    padding='SAME',
                                    weights_stddev=0.01,
                                    biases_value=0.0)
            d['batch_norm7'] = batchNormalization(d['conv7'], is_train)
            d['leaky_relu7'] = tf.nn.leaky_relu(d['batch_norm7'], alpha=0.1)
        # (52, 52, 256) --> (52, 52, 128)
        print('layer7.shape', d['leaky_relu7'].get_shape().as_list())

        #conv8 - batch_norm8 - leaky_relu8 - pool8
        with tf.variable_scope('layer8'):
            d['conv8'] = depth_point_layer(d['leaky_relu7'],
                                           3,
                                           1,
                                           256,
                                           padding='SAME',
                                           use_bias=False,
                                           weights_stddev=0.01)
            d['batch_norm8'] = batchNormalization(d['conv8'], is_train)
            d['leaky_relu8'] = tf.nn.leaky_relu(d['batch_norm8'], alpha=0.1)
            d['pool8'] = max_pool(d['leaky_relu8'], 2, 2, padding='SAME')
        # (52, 52, 128) --> (26, 26, 256)
        print('layer8.shape', d['pool8'].get_shape().as_list())

        #conv9 - batch_norm9 - leaky_relu9
        with tf.variable_scope('layer9'):
            d['conv9'] = depth_point_layer(d['pool8'],
                                           3,
                                           1,
                                           512,
                                           padding='SAME',
                                           use_bias=False,
                                           weights_stddev=0.01)
            d['batch_norm9'] = batchNormalization(d['conv9'], is_train)
            d['leaky_relu9'] = tf.nn.leaky_relu(d['batch_norm9'], alpha=0.1)
        # (26, 26, 256) --> (26, 26, 512)
        print('layer9.shape', d['leaky_relu9'].get_shape().as_list())

        #conv10 - batch_norm10 - leaky_relu10
        with tf.variable_scope('layer10'):
            d['conv10'] = conv_layer(d['leaky_relu9'],
                                     1,
                                     1,
                                     256,
                                     padding='SAME',
                                     use_bias=False,
                                     weights_stddev=0.01)
            d['batch_norm10'] = batchNormalization(d['conv10'], is_train)
            d['leaky_relu10'] = tf.nn.leaky_relu(d['batch_norm10'], alpha=0.1)
        # (26, 26, 512) --> (26, 26, 256)
        print('layer10.shape', d['leaky_relu10'].get_shape().as_list())

        #conv11 - batch_norm11 - leaky_relu11
        with tf.variable_scope('layer11'):
            d['conv11'] = depth_point_layer(d['leaky_relu10'],
                                            3,
                                            1,
                                            512,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm11'] = batchNormalization(d['conv11'], is_train)
            d['leaky_relu11'] = tf.nn.leaky_relu(d['batch_norm11'], alpha=0.1)
        # (26, 26, 256) --> (26, 26, 512)
        print('layer11.shape', d['leaky_relu11'].get_shape().as_list())

        #conv12 - batch_norm12 - leaky_relu12
        with tf.variable_scope('layer12'):
            d['conv12'] = conv_layer(d['leaky_relu11'],
                                     1,
                                     1,
                                     256,
                                     padding='SAME',
                                     use_bias=False,
                                     weights_stddev=0.01)
            d['batch_norm12'] = batchNormalization(d['conv12'], is_train)
            d['leaky_relu12'] = tf.nn.leaky_relu(d['batch_norm12'], alpha=0.1)
        # (26, 26, 512) --> (26, 26, 256)
        print('layer12.shape', d['leaky_relu12'].get_shape().as_list())

        #conv13 - batch_norm13 - leaky_relu13 - pool13
        with tf.variable_scope('layer13'):
            d['conv13'] = depth_point_layer(d['leaky_relu12'],
                                            3,
                                            1,
                                            512,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm13'] = batchNormalization(d['conv13'], is_train)
            d['leaky_relu13'] = tf.nn.leaky_relu(d['batch_norm13'], alpha=0.1)
            d['pool13'] = max_pool(d['leaky_relu13'], 2, 2, padding='SAME')
        # (26, 26, 256) --> (13, 13, 512)
        print('layer13.shape', d['pool13'].get_shape().as_list())

        #conv14 - batch_norm14 - leaky_relu14
        with tf.variable_scope('layer14'):
            d['conv14'] = depth_point_layer(d['pool13'],
                                            3,
                                            1,
                                            1024,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm14'] = batchNormalization(d['conv14'], is_train)
            d['leaky_relu14'] = tf.nn.leaky_relu(d['batch_norm14'], alpha=0.1)
        # (13, 13, 512) --> (13, 13, 1024)
        print('layer14.shape', d['leaky_relu14'].get_shape().as_list())

        #conv15 - batch_norm15 - leaky_relu15
        with tf.variable_scope('layer15'):
            d['conv15'] = conv_layer(d['leaky_relu14'],
                                     1,
                                     1,
                                     512,
                                     padding='SAME',
                                     use_bias=False,
                                     weights_stddev=0.01)
            d['batch_norm15'] = batchNormalization(d['conv15'], is_train)
            d['leaky_relu15'] = tf.nn.leaky_relu(d['batch_norm15'], alpha=0.1)
        # (13, 13, 1024) --> (13, 13, 512)
        print('layer15.shape', d['leaky_relu15'].get_shape().as_list())

        #conv16 - batch_norm16 - leaky_relu16
        with tf.variable_scope('layer16'):
            d['conv16'] = depth_point_layer(d['leaky_relu15'],
                                            3,
                                            1,
                                            1024,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm16'] = batchNormalization(d['conv16'], is_train)
            d['leaky_relu16'] = tf.nn.leaky_relu(d['batch_norm16'], alpha=0.1)
        # (13, 13, 512) --> (13, 13, 1024)
        print('layer16.shape', d['leaky_relu16'].get_shape().as_list())

        #conv17 - batch_norm16 - leaky_relu17
        with tf.variable_scope('layer17'):
            d['conv17'] = conv_layer(d['leaky_relu16'],
                                     1,
                                     1,
                                     512,
                                     padding='SAME',
                                     use_bias=False,
                                     weights_stddev=0.01)
            d['batch_norm17'] = batchNormalization(d['conv17'], is_train)
            d['leaky_relu17'] = tf.nn.leaky_relu(d['batch_norm17'], alpha=0.1)
        # (13, 13, 1024) --> (13, 13, 512)
        print('layer17.shape', d['leaky_relu17'].get_shape().as_list())

        #conv18 - batch_norm18 - leaky_relu18
        with tf.variable_scope('layer18'):
            d['conv18'] = depth_point_layer(d['leaky_relu17'],
                                            3,
                                            1,
                                            1024,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm18'] = batchNormalization(d['conv18'], is_train)
            d['leaky_relu18'] = tf.nn.leaky_relu(d['batch_norm18'], alpha=0.1)
        # (13, 13, 512) --> (13, 13, 1024)
        print('layer18.shape', d['leaky_relu18'].get_shape().as_list())

        #conv19 - batch_norm19 - leaky_relu19
        with tf.variable_scope('layer19'):
            d['conv19'] = depth_point_layer(d['leaky_relu18'],
                                            3,
                                            1,
                                            1024,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm19'] = batchNormalization(d['conv19'], is_train)
            d['leaky_relu19'] = tf.nn.leaky_relu(d['batch_norm19'], alpha=0.1)
        # (13, 13, 1024) --> (13, 13, 1024)
        print('layer19.shape', d['leaky_relu19'].get_shape().as_list())

        #conv20 - batch_norm20 - leaky_relu20
        with tf.variable_scope('layer20'):
            d['conv20'] = depth_point_layer(d['leaky_relu19'],
                                            3,
                                            1,
                                            1024,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm20'] = batchNormalization(d['conv20'], is_train)
            d['leaky_relu20'] = tf.nn.leaky_relu(d['batch_norm20'], alpha=0.1)
        # (13, 13, 1024) --> (13, 13, 1024)
        print('layer20.shape', d['leaky_relu20'].get_shape().as_list())

        # concatenate layer20 and layer 13 using space to depth
        with tf.variable_scope('layer21'):
            d['skip_connection'] = conv_layer(d['leaky_relu13'],
                                              1,
                                              1,
                                              64,
                                              padding='SAME',
                                              use_bias=False,
                                              weights_stddev=0.01)
            d['skip_batch'] = batchNormalization(d['skip_connection'],
                                                 is_train)
            d['skip_leaky_relu'] = tf.nn.leaky_relu(d['skip_batch'], alpha=0.1)
            d['skip_space_to_depth_x2'] = tf.space_to_depth(
                d['skip_leaky_relu'], block_size=2)
            d['concat21'] = tf.concat(
                [d['skip_space_to_depth_x2'], d['leaky_relu20']], axis=-1)
        # (13, 13, 1024) --> (13, 13, 256+1024)
        print('layer21.shape', d['concat21'].get_shape().as_list())

        #conv22 - batch_norm22 - leaky_relu22
        with tf.variable_scope('layer22'):
            d['conv22'] = depth_point_layer(d['concat21'],
                                            3,
                                            1,
                                            1024,
                                            padding='SAME',
                                            use_bias=False,
                                            weights_stddev=0.01)
            d['batch_norm22'] = batchNormalization(d['conv22'], is_train)
            d['leaky_relu22'] = tf.nn.leaky_relu(d['batch_norm22'], alpha=0.1)
        # (13, 13, 1280) --> (13, 13, 1024)
        print('layer22.shape', d['leaky_relu22'].get_shape().as_list())

        output_channel = self.num_anchors * (5 + self.num_classes)
        d['logit'] = conv_layer(d['leaky_relu22'],
                                1,
                                1,
                                output_channel,
                                padding='SAME',
                                use_bias=True,
                                weights_stddev=0.01,
                                biases_value=0.1)
        d['pred'] = tf.reshape(d['logit'],
                               (-1, self.grid_size[0], self.grid_size[1],
                                self.num_anchors, 5 + self.num_classes))
        print('pred.shape', d['pred'].get_shape().as_list())
        # (13, 13, 1024) --> (13, 13, num_anchors , (5 + num_classes))

        return d
Exemple #5
0
    def _build_model(self, **kwargs):
        """
        Build model.
        :param kwargs: dict, extra arguments for building AlexNet.
            - image_mean: np.ndarray, mean image for each input channel, shape: (C,).
            - dropout_prob: float, the probability of dropping out each unit in FC layer.
        :return d: dict, containing outputs on each layer.
        """
        d = dict()    # Dictionary to save intermediate values returned from each layer.
        X_mean = kwargs.pop('image_mean', 0.0)
        dropout_prob = kwargs.pop('dropout_prob', 0.0)
        num_classes = int(self.y.get_shape()[-1])

        # The probability of keeping each unit for dropout layers
        keep_prob = tf.cond(self.is_train,
                            lambda: 1. - dropout_prob,
                            lambda: 1.)

        # input
        X_input = self.X - X_mean    # perform mean subtraction

        # First Convolution Layer
        # conv1 - relu1 - pool1

        with tf.variable_scope('conv1'):
            # conv_layer(x, side_l, stride, out_depth, padding='SAME', **kwargs):
            d['conv1'] = conv_layer(X_input, 3, 1, 64, padding='SAME',
                                    weights_stddev=0.01, biases_value=1.0)
            print('conv1.shape', d['conv1'].get_shape().as_list())
        d['relu1'] = tf.nn.relu(d['conv1'])
        # max_pool(x, side_l, stride, padding='SAME'):
        d['pool1'] = max_pool(d['relu1'], 2, 1, padding='SAME')
        d['drop1'] = tf.nn.dropout(d['pool1'], keep_prob)
        print('pool1.shape', d['pool1'].get_shape().as_list())

        # Second Convolution Layer
        # conv2 - relu2 - pool2
        with tf.variable_scope('conv2'):
            d['conv2'] = conv_layer(d['pool1'], 3, 1, 128, padding='SAME',
                                    weights_stddev=0.01, biases_value=1.0)
            print('conv2.shape', d['conv2'].get_shape().as_list())
        d['relu2'] = tf.nn.relu(d['conv2'])
        d['pool2'] = max_pool(d['relu2'], 2, 1, padding='SAME')
        d['drop2'] = tf.nn.dropout(d['pool2'], keep_prob)
        print('pool2.shape', d['pool2'].get_shape().as_list())

        # Third Convolution Layer
        # conv3 - relu3
        with tf.variable_scope('conv3'):
            d['conv3'] = conv_layer(d['pool2'], 3, 1, 256, padding='SAME',
                                    weights_stddev=0.01, biases_value=1.0)
            print('conv3.shape', d['conv3'].get_shape().as_list())
        d['relu3'] = tf.nn.relu(d['conv3'])
        d['pool3'] = max_pool(d['relu3'], 2, 1, padding='SAME')
        d['drop3'] = tf.nn.dropout(d['pool3'], keep_prob)
        print('pool3.shape', d['pool3'].get_shape().as_list())


        # Flatten feature maps
        f_dim = int(np.prod(d['drop3'].get_shape()[1:]))
        f_emb = tf.reshape(d['drop3'], [-1, f_dim])

        # fc4
        with tf.variable_scope('fc4'):
            d['fc4'] = fc_layer(f_emb, 1024,
                                weights_stddev=0.005, biases_value=0.1)
        d['relu4'] = tf.nn.relu(d['fc4'])
        print('fc4.shape', d['relu4'].get_shape().as_list())

        # fc5
        with tf.variable_scope('fc5'):
            d['fc5'] = fc_layer(d['relu4'], 1024,
                                weights_stddev=0.005, biases_value=0.1)
        d['relu5'] = tf.nn.relu(d['fc5'])
        print('fc5.shape', d['relu5'].get_shape().as_list())
        d['logits'] = fc_layer(d['relu5'], num_classes,
                               weights_stddev=0.01, biases_value=0.0)
        print('logits.shape', d['logits'].get_shape().as_list())

        # softmax
        d['pred'] = tf.nn.softmax(d['logits'])

        return d
Exemple #6
0
    def _build_model(self, **kwargs):
        """
        Build model.
        :param kwargs: dict, extra arguments for building YOLO.
                -image_mean: np.ndarray, mean image for each input channel, shape: (C,).
        :return d: dict, containing outputs on each layer.
        """

        d = dict()
        x_mean = kwargs.pop('image_mean', 0.0)
        pretrain = kwargs.pop('pretrain', False)
        frontend = kwargs.pop('frontend', 'resnet_v2_50')

        # input
        X_input = self.X - x_mean
        is_train = self.is_train

        # Feature Extractor
        if pretrain:
            frontend_dir = os.path.join('pretrained_models',
                                        '{}.ckpt'.format(frontend))
            with slim.arg_scope(resnet_v2.resnet_arg_scope()):
                logits, end_points = resnet_v2.resnet_v2_50(
                    self.X, is_training=self.is_train)
                d['init_fn'] = slim.assign_from_checkpoint_fn(
                    model_path=frontend_dir,
                    var_list=slim.get_model_variables(frontend))
            convs = [
                end_points[frontend + '/block{}'.format(x)] for x in [4, 2, 1]
            ]
            d['conv_s32'] = convs[0]
            d['conv_s16'] = convs[1]
        else:
            # Build ConvNet
            #conv1 - batch_norm1 - leaky_relu1 - pool1
            with tf.variable_scope('layer1'):
                d['conv1'] = conv_bn_relu(X_input, 32, (3, 3), is_train)
                d['pool1'] = max_pool(d['conv1'], 2, 2, padding='SAME')
            # (416, 416, 3) --> (208, 208, 32)

            #conv2 - batch_norm2 - leaky_relu2 - pool2
            with tf.variable_scope('layer2'):
                d['conv2'] = conv_bn_relu(d['pool1'], 64, (3, 3), is_train)
                d['pool2'] = max_pool(d['conv2'], 2, 2, padding='SAME')
            # (208, 208, 32) --> (104, 104, 64)

            #conv3 - batch_norm3 - leaky_relu3
            with tf.variable_scope('layer3'):
                d['conv3'] = conv_bn_relu(d['pool2'], 128, (3, 3), is_train)
            # (104, 104, 64) --> (104, 104, 128)

            #conv4 - batch_norm4 - leaky_relu4
            with tf.variable_scope('layer4'):
                d['conv4'] = conv_bn_relu(d['conv3'], 64, (1, 1), is_train)
            # (104, 104, 128) --> (104, 104, 64)

            #conv5 - batch_norm5 - leaky_relu5 - pool5
            with tf.variable_scope('layer5'):
                d['conv5'] = conv_bn_relu(d['conv4'], 128, (3, 3), is_train)
                d['pool5'] = max_pool(d['conv5'], 2, 2, padding='SAME')
            # (104, 104, 64) --> (52, 52, 128)

            #conv6 - batch_norm6 - leaky_relu6
            with tf.variable_scope('layer6'):
                d['conv6'] = conv_bn_relu(d['pool5'], 256, (3, 3), is_train)
            # (52, 52, 128) --> (52, 52, 256)

            #conv7 - batch_norm7 - leaky_relu7
            with tf.variable_scope('layer7'):
                d['conv7'] = conv_bn_relu(d['conv6'], 128, (1, 1), is_train)
            # (52, 52, 256) --> (52, 52, 128)

            #conv8 - batch_norm8 - leaky_relu8 - pool8
            with tf.variable_scope('layer8'):
                d['conv8'] = conv_bn_relu(d['conv7'], 256, (3, 3), is_train)
                d['pool8'] = max_pool(d['conv8'], 2, 2, padding='SAME')
            # (52, 52, 128) --> (26, 26, 256)

            #conv9 - batch_norm9 - leaky_relu9
            with tf.variable_scope('layer9'):
                d['conv9'] = conv_bn_relu(d['pool8'], 512, (3, 3), is_train)
            # (26, 26, 256) --> (26, 26, 512)

            #conv10 - batch_norm10 - leaky_relu10
            with tf.variable_scope('layer10'):
                d['conv10'] = conv_bn_relu(d['conv9'], 256, (1, 1), is_train)
            # (26, 26, 512) --> (26, 26, 256)

            #conv11 - batch_norm11 - leaky_relu11
            with tf.variable_scope('layer11'):
                d['conv11'] = conv_bn_relu(d['conv10'], 512, (3, 3), is_train)
            # (26, 26, 256) --> (26, 26, 512)

            #conv12 - batch_norm12 - leaky_relu12
            with tf.variable_scope('layer12'):
                d['conv12'] = conv_bn_relu(d['conv11'], 256, (1, 1), is_train)
            # (26, 26, 512) --> (26, 26, 256)

            #conv13 - batch_norm13 - leaky_relu13 - pool13
            with tf.variable_scope('layer13'):
                d['conv13'] = conv_bn_relu(d['conv12'], 512, (3, 3), is_train)
                d['pool13'] = max_pool(d['conv13'], 2, 2, padding='SAME')
            # (26, 26, 256) --> (13, 13, 512)

            #conv14 - batch_norm14 - leaky_relu14
            with tf.variable_scope('layer14'):
                d['conv14'] = conv_bn_relu(d['pool13'], 1024, (3, 3), is_train)
            # (13, 13, 512) --> (13, 13, 1024)

            #conv15 - batch_norm15 - leaky_relu15
            with tf.variable_scope('layer15'):
                d['conv15'] = conv_bn_relu(d['conv14'], 512, (1, 1), is_train)
            # (13, 13, 1024) --> (13, 13, 512)

            #conv16 - batch_norm16 - leaky_relu16
            with tf.variable_scope('layer16'):
                d['conv16'] = conv_bn_relu(d['conv15'], 1024, (3, 3), is_train)
            # (13, 13, 512) --> (13, 13, 1024)

            #conv17 - batch_norm16 - leaky_relu17
            with tf.variable_scope('layer17'):
                d['conv17'] = conv_bn_relu(d['conv16'], 512, (1, 1), is_train)
            # (13, 13, 1024) --> (13, 13, 512)

            #conv18 - batch_norm18 - leaky_relu18
            with tf.variable_scope('layer18'):
                d['conv18'] = conv_bn_relu(d['conv17'], 1024, (3, 3), is_train)
            # (13, 13, 512) --> (13, 13, 1024)

            #conv19 - batch_norm19 - leaky_relu19
            with tf.variable_scope('layer19'):
                d['conv19'] = conv_bn_relu(d['conv18'], 1024, (3, 3), is_train)
            # (13, 13, 1024) --> (13, 13, 1024)
            d['conv_s32'] = d['conv19']
            d['conv_s16'] = d['conv13']

        #Detection Layer
        #conv20 - batch_norm20 - leaky_relu20
        with tf.variable_scope('layer20'):
            d['conv20'] = conv_bn_relu(d['conv_s32'], 1024, (3, 3), is_train)
        # (13, 13, 1024) --> (13, 13, 1024)

        # concatenate layer20 and layer 13 using space to depth
        with tf.variable_scope('layer21'):
            d['skip_connection'] = conv_bn_relu(d['conv_s16'], 64, (1, 1),
                                                is_train)
            d['skip_space_to_depth_x2'] = tf.space_to_depth(
                d['skip_connection'], block_size=2)
            d['concat21'] = tf.concat(
                [d['skip_space_to_depth_x2'], d['conv20']], axis=-1)
        # (13, 13, 1024) --> (13, 13, 256+1024)

        #conv22 - batch_norm22 - leaky_relu22
        with tf.variable_scope('layer22'):
            d['conv22'] = conv_bn_relu(d['concat21'], 1024, (3, 3), is_train)
        # (13, 13, 1280) --> (13, 13, 1024)

        output_channel = self.num_anchors * (5 + self.num_classes)
        d['logits'] = conv_layer(d['conv22'],
                                 output_channel, (1, 1), (1, 1),
                                 padding='SAME',
                                 use_bias=True)
        d['pred'] = tf.reshape(d['logits'],
                               (-1, self.grid_size[0], self.grid_size[1],
                                self.num_anchors, 5 + self.num_classes))
        # (13, 13, 1024) --> (13, 13, num_anchors , (5 + num_classes))
        return d
Exemple #7
0
    def _prepare_module(self):

        d = OrderedDict()

        #conv1 - batch_norm1 - leaky_relu1 - pool1
        d['conv1'] = ConvBnAct(3, 32, 3, stride=1, padding=1)
        d['pool1'] = max_pool(2, 2)

        #conv2 - batch_norm2 - leaky_relu2 - pool2
        d['conv2'] = ConvBnAct(32, 64, 3, stride=1, padding=1)
        d['pool2'] = max_pool(2, 2)

        #conv3 - batch_norm3 - leaky_relu3
        d['conv3'] = ConvBnAct(64, 128, 3, stride=1, padding=1)

        #conv4 - batch_norm4 - leaky_relu4
        d['conv4'] = ConvBnAct(128, 64, 1, stride=1, padding=0)

        #conv5 - batch_norm5 - leaky_relu5 - pool5
        d['conv5'] = ConvBnAct(64, 128, 3, stride=1, padding=1)
        d['pool5'] = max_pool(2, 2)

        #conv6 - batch_norm6 - leaky_relu6
        d['conv6'] = ConvBnAct(128, 256, 3, stride=1, padding=1)

        #conv7 - batch_norm7 - leaky_relu7
        d['conv7'] = ConvBnAct(256, 128, 1, stride=1, padding=0)

        #conv8 - batch_norm8 - leaky_relu8 - pool8
        d['conv8'] = ConvBnAct(128, 256, 3, stride=1, padding=1)
        d['pool8'] = max_pool(2, 2)

        #conv9 - batch_norm9 - leaky_relu9
        d['conv9'] = ConvBnAct(256, 512, 3, stride=1, padding=1)

        #conv10 - batch_norm10 - leaky_relu10
        d['conv10'] = ConvBnAct(512, 256, 1, stride=1, padding=0)

        #conv11 - batch_norm11 - leaky_relu11
        d['conv11'] = ConvBnAct(256, 512, 3, stride=1, padding=1)

        #conv12 - batch_norm12 - leaky_relu12
        d['conv12'] = ConvBnAct(512, 256, 1, stride=1, padding=0)

        #conv13 - batch_norm13 - leaky_relu13 - pool13
        d['conv13'] = ConvBnAct(256, 512, 3, stride=1, padding=1)
        d['pool13'] = max_pool(2, 2)

        #conv14 - batch_norm14 - leaky_relu14
        d['conv14'] = ConvBnAct(512, 1024, 3, stride=1, padding=1)

        #conv15 - batch_norm15 - leaky_relu15
        d['conv15'] = ConvBnAct(1024, 512, 1, stride=1, padding=0)

        #conv16 - batch_norm16 - leaky_relu16
        d['conv16'] = ConvBnAct(512, 1024, 3, stride=1, padding=1)

        #conv17 - batch_norm16 - leaky_relu17
        d['conv17'] = ConvBnAct(1024, 512, 1, stride=1, padding=0)

        #conv18 - batch_norm18 - leaky_relu18
        d['conv18'] = ConvBnAct(512, 1024, 3, stride=1, padding=1)

        #conv19 - batch_norm19 - leaky_relu19
        d['conv19'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1)

        # Detection Layer
        #conv20 - batch_norm20 - leaky_relu20
        d['conv20'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1)

        # concatenate layer20 and layer 13 using space to depth
        d['skip_connection'] = nn.Sequential(
            ConvBnAct(512, 64, 1, stride=1, padding=0), SpaceToDepth(2))
        d['conv21'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1)

        #conv22 - batch_norm22 - leaky_relu22
        d['conv22'] = ConvBnAct(1280, 1024, 3, stride=1, padding=1)

        output_channel = self.num_anchors * (5 + self.num_classes)
        d['logits'] = conv2d(1024,
                             output_channel,
                             1,
                             stride=1,
                             padding=0,
                             bias=True)

        self.module = nn.ModuleList()
        for i in d.values():
            self.module.append(i)
        return d
    def _build_model(self, **kwargs):
        """
        Build model.
        :param kwargs: dict, extra arguments for building AlexNet.
            - image_mean: np.ndarray, mean image for each input channel, shape: (C,).
            - dropout_prob: float, the probability of dropping out each unit in FC layer.
        :return d: dict, containing outputs on each layer.
        """
        d = dict(
        )  # Dictionary to save intermediate values returned from each layer.
        X_mean = kwargs.pop('image_mean', 0.0)
        dropout_prob = kwargs.pop('dropout_prob', 0.0)
        num_classes = int(self.y.get_shape()[-1])

        # The probability of keeping each unit for dropout layers
        keep_prob = tf.cond(self.is_train, lambda: 1. - dropout_prob,
                            lambda: 1.)

        # input
        X_input = self.X - X_mean  # perform mean subtraction

        # conv1 - relu1 - pool1
        with tf.variable_scope('conv1'):
            d['conv1'] = conv_layer(X_input,
                                    11,
                                    4,
                                    96,
                                    padding='VALID',
                                    weights_stddev=0.01,
                                    biases_value=0.0)
            print('conv1.shape', d['conv1'].get_shape().as_list())
        d['relu1'] = tf.nn.relu(d['conv1'])
        # (227, 227, 3) --> (55, 55, 96)
        d['pool1'] = max_pool(d['relu1'], 3, 2, padding='VALID')
        # (55, 55, 96) --> (27, 27, 96)
        print('pool1.shape', d['pool1'].get_shape().as_list())

        # conv2 - relu2 - pool2
        with tf.variable_scope('conv2'):
            d['conv2'] = conv_layer(d['pool1'],
                                    5,
                                    1,
                                    256,
                                    padding='SAME',
                                    weights_stddev=0.01,
                                    biases_value=0.1)
            print('conv2.shape', d['conv2'].get_shape().as_list())
        d['relu2'] = tf.nn.relu(d['conv2'])
        # (27, 27, 96) --> (27, 27, 256)
        d['pool2'] = max_pool(d['relu2'], 3, 2, padding='VALID')
        # (27, 27, 256) --> (13, 13, 256)
        print('pool2.shape', d['pool2'].get_shape().as_list())

        # conv3 - relu3
        with tf.variable_scope('conv3'):
            d['conv3'] = conv_layer(d['pool2'],
                                    3,
                                    1,
                                    384,
                                    padding='SAME',
                                    weights_stddev=0.01,
                                    biases_value=0.0)
            print('conv3.shape', d['conv3'].get_shape().as_list())
        d['relu3'] = tf.nn.relu(d['conv3'])
        # (13, 13, 256) --> (13, 13, 384)

        # conv4 - relu4
        with tf.variable_scope('conv4'):
            d['conv4'] = conv_layer(d['relu3'],
                                    3,
                                    1,
                                    384,
                                    padding='SAME',
                                    weights_stddev=0.01,
                                    biases_value=0.1)
            print('conv4.shape', d['conv4'].get_shape().as_list())
        d['relu4'] = tf.nn.relu(d['conv4'])
        # (13, 13, 384) --> (13, 13, 384)

        # conv5 - relu5 - pool5
        with tf.variable_scope('conv5'):
            d['conv5'] = conv_layer(d['relu4'],
                                    3,
                                    1,
                                    256,
                                    padding='SAME',
                                    weights_stddev=0.01,
                                    biases_value=0.1)
            print('conv5.shape', d['conv5'].get_shape().as_list())
        d['relu5'] = tf.nn.relu(d['conv5'])
        # (13, 13, 384) --> (13, 13, 256)
        d['pool5'] = max_pool(d['relu5'], 3, 2, padding='VALID')
        # (13, 13, 256) --> (6, 6, 256)
        print('pool5.shape', d['pool5'].get_shape().as_list())

        # Flatten feature maps
        f_dim = int(np.prod(d['pool5'].get_shape()[1:]))
        f_emb = tf.reshape(d['pool5'], [-1, f_dim])
        # (6, 6, 256) --> (9216)

        # fc6
        with tf.variable_scope('fc6'):
            d['fc6'] = fc_layer(f_emb,
                                4096,
                                weights_stddev=0.005,
                                biases_value=0.1)
        d['relu6'] = tf.nn.relu(d['fc6'])
        d['drop6'] = tf.nn.dropout(d['relu6'], keep_prob)
        # (9216) --> (4096)
        print('drop6.shape', d['drop6'].get_shape().as_list())

        # fc7
        with tf.variable_scope('fc7'):
            d['fc7'] = fc_layer(d['drop6'],
                                4096,
                                weights_stddev=0.005,
                                biases_value=0.1)
        d['relu7'] = tf.nn.relu(d['fc7'])
        d['drop7'] = tf.nn.dropout(d['relu7'], keep_prob)
        # (4096) --> (4096)
        print('drop7.shape', d['drop7'].get_shape().as_list())

        # fc8
        with tf.variable_scope('fc8'):
            d['logits'] = fc_layer(d['relu7'],
                                   num_classes,
                                   weights_stddev=0.01,
                                   biases_value=0.0)
        # (4096) --> (num_classes)

        # softmax
        d['pred'] = tf.nn.softmax(d['logits'])

        return d
def inception_layer(conv_11_size,
                    conv_33_reduce_size,
                    conv_33_size,
                    conv_55_reduce_size,
                    conv_55_size,
                    pool_size,
                    layer_dict,
                    inputs=None,
                    bn=False,
                    wd=0,
                    init_w=None,
                    pretrained_dict=None,
                    trainable=True,
                    is_training=True,
                    name='inception'):
    if inputs is None:
        inputs = layer_dict['cur_input']
    layer_dict['cur_input'] = inputs

    arg_scope = tf.contrib.framework.arg_scope
    with arg_scope([L.conv],
                   layer_dict=layer_dict,
                   pretrained_dict=pretrained_dict,
                   bn=bn,
                   nl=tf.nn.relu,
                   init_w=init_w,
                   trainable=trainable,
                   is_training=is_training,
                   wd=wd,
                   add_summary=False):
        conv_11 = L.conv(filter_size=1,
                         out_dim=conv_11_size,
                         inputs=inputs,
                         name='{}_1x1'.format(name))

        L.conv(filter_size=1,
               out_dim=conv_33_reduce_size,
               inputs=inputs,
               name='{}_3x3_reduce'.format(name))
        conv_33 = L.conv(filter_size=3,
                         out_dim=conv_33_size,
                         name='{}_3x3'.format(name))

        L.conv(filter_size=1,
               out_dim=conv_55_reduce_size,
               inputs=inputs,
               name='{}_5x5_reduce'.format(name))
        conv_55 = L.conv(filter_size=5,
                         out_dim=conv_55_size,
                         name='{}_5x5'.format(name))

        L.max_pool(layer_dict=layer_dict,
                   inputs=inputs,
                   stride=1,
                   filter_size=3,
                   padding='SAME',
                   name='{}_pool'.format(name))
        convpool = L.conv(filter_size=1,
                          out_dim=pool_size,
                          name='{}_pool_proj'.format(name))

        output = tf.concat([conv_11, conv_33, conv_55, convpool],
                           3,
                           name='{}_concat'.format(name))
        layer_dict['cur_input'] = output
        layer_dict[name] = output
    return output