Ejemplo n.º 1
0
  def __call__(self, inputs):
    """Generate Computation Graph"""
    with tf.variable_scope(self.scope):
      if self.data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 3, 1, 2])

      inputs = inputs / 255

      inputs, route2, route4 = darknet53(inputs, data_format=self.data_format)

      inputs, route1 = feature_pyramid_network(inputs, filters=512, data_format=self.data_format)
      detect1 = yolo_layer(inputs,
                           n_classes=self.n_classes,
                           anchors=_ANCHORS[6:],
                           img_size=self.input_size,
                           data_format=self.data_format)

      inputs = Conv2D(route1, filters=256, kernel_size=1, data_format=self.data_format)
      inputs = BatchNormalization(inputs, data_format=self.data_format)
      inputs = LeakyReLU(inputs)

      upsample_size = route2.get_shape().as_list()
      inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format)
      axis = 1 if self.data_format == 'channels_first' else 3
      inputs = tf.concat([inputs, route2], axis=axis)

      inputs, route3 = feature_pyramid_network(inputs, filters=256, data_format=self.data_format)
      detect2 = yolo_layer(inputs,
                           n_classes=self.n_classes,
                           anchors=_ANCHORS[3:6],
                           img_size=self.input_size,
                           data_format=self.data_format)

      inputs = Conv2D(route3, filters=128, kernel_size=1, data_format=self.data_format)
      inputs = BatchNormalization(inputs, data_format=self.data_format)
      inputs = LeakyReLU(inputs)

      upsample_size = route4.get_shape().as_list()
      inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format)
      axis = 1 if self.data_format == 'channels_first' else 3
      inputs = tf.concat([inputs, route4], axis=axis)

      inputs, _ = feature_pyramid_network(inputs, filters=128, data_format=self.data_format)
      detect3 = yolo_layer(inputs,
                           n_classes=self.n_classes,
                           anchors=_ANCHORS[:3],
                           img_size=self.input_size,
                           data_format=self.data_format)

      inputs = tf.concat([detect1, detect2, detect3], axis=1)
      inputs = build_boxes(inputs)
      boxes_dicts = non_max_suppression(inputs,
                                        n_classes=self.n_classes,
                                        max_output_size=self.max_output_size,
                                        iou_threshold=self.iou_threshold,
                                        confidence_threshold=self.confidence_threshold)
      return boxes_dicts
Ejemplo n.º 2
0
def _pnet(styles, data_format="channels_last"):
    with tf.variable_scope("pnet", reuse=tf.AUTO_REUSE):
        pnet_outs = {}

        inputs = ((styles / 255.) - 0.5) * 2
        pnet_outs["conv1"] = \
            conv1 = conv(inputs, name="conv1", data_format=data_format, filters=32, kernel_size=9, strides=1)
        pnet_outs["conv2"] = \
            conv2 = conv(conv1, name="conv2", data_format=data_format, filters=64, kernel_size=3, strides=2)
        pnet_outs["conv3"] = \
            conv3 = conv(conv2, name="conv3", data_format=data_format, filters=128, kernel_size=3, strides=2)
        res1 = residual_block(conv3,
                              name="res1",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3)
        res2 = residual_block(res1,
                              name="res2",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3)
        res3 = residual_block(res2,
                              name="res3",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3)
        res4 = residual_block(res3,
                              name="res4",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3)
        res5 = residual_block(res4,
                              name="res5",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3)
        pnet_outs["up1"] \
            = up1 = upsample(res5, name="up1", data_format=data_format,  filters=64, kernel_size=3, strides=2)
        pnet_outs["up2"] \
            = _ = upsample(up1, name="up2", data_format=data_format, filters=32, kernel_size=3, strides=2)

        gammas, betas = _pnet_fc(pnet_outs)

    return gammas, betas
Ejemplo n.º 3
0
 def model_fn(inputs):
     with tf.variable_scope("tnet", reuse=tf.AUTO_REUSE):
         inputs = ((inputs / 255.) - 0.5) * 2
         conv1 = conv(inputs,
                      name="conv1",
                      data_format=data_format,
                      filters=32,
                      kernel_size=9,
                      strides=1)
         conv2 = conv(conv1,
                      name="conv2",
                      data_format=data_format,
                      filters=64,
                      kernel_size=3,
                      strides=2)
         conv3 = conv(conv2,
                      name="conv3",
                      data_format=data_format,
                      filters=128,
                      kernel_size=3,
                      strides=2)
         res1 = residual_block(conv3,
                               name="res1",
                               data_format=data_format,
                               filters=128,
                               kernel_size=3)
         res2 = residual_block(res1,
                               name="res2",
                               data_format=data_format,
                               filters=128,
                               kernel_size=3)
         res3 = residual_block(res2,
                               name="res3",
                               data_format=data_format,
                               filters=128,
                               kernel_size=3)
         res4 = residual_block(res3,
                               name="res4",
                               data_format=data_format,
                               filters=128,
                               kernel_size=3)
         res5 = residual_block(res4,
                               name="res5",
                               data_format=data_format,
                               filters=128,
                               kernel_size=3)
         up1 = upsample(res5,
                        name="up1",
                        data_format=data_format,
                        filters=64,
                        kernel_size=3,
                        strides=2)
         up2 = upsample(up1,
                        name="up2",
                        data_format=data_format,
                        filters=32,
                        kernel_size=3,
                        strides=2)
         conv4 = conv(up2,
                      name="conv4",
                      data_format=data_format,
                      filters=3,
                      kernel_size=9,
                      strides=1,
                      with_bn=False,
                      with_relu=False)
         out = tf.clip_by_value(conv4, 0., 255.)
     return out
Ejemplo n.º 4
0
    def build_nework(self, input_data):
        route_1, route_2, input_data = backbone.DarkNet53(
            input_data, self.trainable)

        # 对darknet的最后一层输出继续做两次33卷积,并生成大分辨率的预测输出
        input_data = layers.convolution_layer(input_data, (1, 1, 1024, 512),
                                              self.trainable, 'conv52')
        input_data = layers.convolution_layer(input_data, (3, 3, 512, 1024),
                                              self.trainable, 'conv53')
        input_data = layers.convolution_layer(input_data, (1, 1, 1024, 512),
                                              self.trainable, 'conv54')
        input_data = layers.convolution_layer(input_data, (3, 3, 512, 1024),
                                              self.trainable, 'conv55')
        input_data = layers.convolution_layer(input_data, (1, 1, 1024, 512),
                                              self.trainable, 'conv56')

        conv_lobj_branch = layers.convolution_layer(input_data,
                                                    (3, 3, 512, 1024),
                                                    self.trainable,
                                                    'conv_lobj_branch')
        #输出大分辨率的输出 (13,13,256)
        conv_lbbox = layers.convolution_layer(
            conv_lobj_branch,
            (1, 1, 1024, self.anchor_per_scale * (self.num_class + 5)),
            self.trainable,
            'conv_lbbox',
            activate=False,
            bn=False)

        # route1和darknet output相结合,生成中分辨率的预测输出
        input_data = layers.convolution_layer(input_data, (1, 1, 512, 256),
                                              self.trainable, 'conv57')
        input_data = layers.upsample(
            input_data, name='upsample0',
            method=self.upsample_method)  # (26,26,256)
        with tf.variable_scope('route_1'):
            input_data = tf.concat([input_data, route_2],
                                   axis=-1)  # (26, 26, 768)
        input_data = layers.convolution_layer(input_data, (1, 1, 768, 256),
                                              self.trainable, 'conv58')
        input_data = layers.convolution_layer(input_data, (3, 3, 256, 512),
                                              self.trainable, 'conv59')
        input_data = layers.convolution_layer(input_data, (1, 1, 512, 256),
                                              self.trainable, 'conv60')
        input_data = layers.convolution_layer(input_data, (3, 3, 256, 512),
                                              self.trainable, 'conv61')
        input_data = layers.convolution_layer(input_data, (1, 1, 512, 256),
                                              self.trainable, 'conv62')

        conv_mobj_branch = layers.convolution_layer(input_data,
                                                    (3, 3, 256, 512),
                                                    self.trainable,
                                                    'conv_mobj_branch')
        # 输出中分辨率的输出 (26, 26, 255)
        conv_mbbox = layers.convolution_layer(
            conv_mobj_branch,
            (1, 1, 512, self.anchor_per_scale * (self.num_class + 5)),
            self.trainable,
            'conv_mbbox',
            activate=False,
            bn=False)

        # route1和route2结合生成小分辨率的预测
        input_data = layers.convolution_layer(input_data, (1, 1, 256, 128),
                                              self.trainable, 'conv63')
        input_data = layers.upsample(
            input_data, name='upsample1',
            method=self.upsample_method)  # (52, 52, 128)
        with tf.variable_scope('route_2'):
            input_data = tf.concat([input_data, route_1],
                                   axis=-1)  # (52, 52, 384)
        input_data = layers.convolution_layer(input_data, (1, 1, 384, 128),
                                              self.trainable, 'conv64')
        input_data = layers.convolution_layer(input_data, (3, 3, 128, 256),
                                              self.trainable, 'conv65')
        input_data = layers.convolution_layer(input_data, (1, 1, 256, 128),
                                              self.trainable, 'conv66')
        input_data = layers.convolution_layer(input_data, (3, 3, 128, 256),
                                              self.trainable, 'conv67')
        input_data = layers.convolution_layer(input_data, (1, 1, 256, 128),
                                              self.trainable, 'conv68')

        conv_sobj_branch = layers.convolution_layer(input_data,
                                                    (3, 3, 128, 256),
                                                    self.trainable,
                                                    'conv_sobj_branch')
        # 输出小分辨率的输出 (52, 52, 255)
        conv_sbbox = layers.convolution_layer(
            conv_sobj_branch,
            (1, 1, 256, self.anchor_per_scale * (self.num_class + 5)),
            self.trainable,
            'conv_sbbox',
            activate=False,
            bn=False)
        return conv_lbbox, conv_mbbox, conv_sbbox
Ejemplo n.º 5
0
def _tnet(contents, gammas, betas, data_format="channels_last"):
    with tf.variable_scope("tnet", reuse=tf.AUTO_REUSE):
        inputs = ((contents / 255.) - 0.5) * 2
        conv1 = conv(inputs,
                     name="conv1",
                     data_format=data_format,
                     filters=32,
                     kernel_size=9,
                     strides=1,
                     bn_gamma=gammas["conv1"],
                     bn_beta=betas["conv1"])
        conv2 = conv(conv1,
                     name="conv2",
                     data_format=data_format,
                     filters=64,
                     kernel_size=3,
                     strides=2,
                     bn_gamma=gammas["conv2"],
                     bn_beta=betas["conv2"])
        conv3 = conv(conv2,
                     name="conv3",
                     data_format=data_format,
                     filters=128,
                     kernel_size=3,
                     strides=2,
                     bn_gamma=gammas["conv3"],
                     bn_beta=betas["conv3"])
        res1 = residual_block(conv3,
                              name="res1",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3,
                              bn_gamma1=gammas["res1_1"],
                              bn_beta1=betas["res1_1"],
                              bn_gamma2=gammas["res1_2"],
                              bn_beta2=betas["res1_2"])
        res2 = residual_block(res1,
                              name="res2",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3,
                              bn_gamma1=gammas["res2_1"],
                              bn_beta1=betas["res2_1"],
                              bn_gamma2=gammas["res2_2"],
                              bn_beta2=betas["res2_2"])
        res3 = residual_block(res2,
                              name="res3",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3,
                              bn_gamma1=gammas["res3_1"],
                              bn_beta1=betas["res3_1"],
                              bn_gamma2=gammas["res3_2"],
                              bn_beta2=betas["res3_2"])
        res4 = residual_block(res3,
                              name="res4",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3,
                              bn_gamma1=gammas["res4_1"],
                              bn_beta1=betas["res4_1"],
                              bn_gamma2=gammas["res4_2"],
                              bn_beta2=betas["res4_2"])
        res5 = residual_block(res4,
                              name="res5",
                              data_format=data_format,
                              filters=128,
                              kernel_size=3,
                              bn_gamma1=gammas["res5_1"],
                              bn_beta1=betas["res5_1"],
                              bn_gamma2=gammas["res5_2"],
                              bn_beta2=betas["res5_2"])
        up1 = upsample(res5,
                       name="up1",
                       data_format=data_format,
                       filters=64,
                       kernel_size=3,
                       strides=2,
                       bn_gamma=gammas["up1"],
                       bn_beta=betas["up1"])
        up2 = upsample(up1,
                       name="up2",
                       data_format=data_format,
                       filters=32,
                       kernel_size=3,
                       strides=2,
                       bn_gamma=gammas["up2"],
                       bn_beta=betas["up2"])
        conv4 = conv(up2,
                     name="conv4",
                     data_format=data_format,
                     filters=3,
                     kernel_size=9,
                     strides=1,
                     with_bn=False,
                     with_relu=False)
        out = tf.clip_by_value(conv4, 0., 255.)
    return out