def __call__(self, inputs): """Generate Computation Graph""" with tf.variable_scope(self.scope): if self.data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 3, 1, 2]) inputs = inputs / 255 inputs, route2, route4 = darknet53(inputs, data_format=self.data_format) inputs, route1 = feature_pyramid_network(inputs, filters=512, data_format=self.data_format) detect1 = yolo_layer(inputs, n_classes=self.n_classes, anchors=_ANCHORS[6:], img_size=self.input_size, data_format=self.data_format) inputs = Conv2D(route1, filters=256, kernel_size=1, data_format=self.data_format) inputs = BatchNormalization(inputs, data_format=self.data_format) inputs = LeakyReLU(inputs) upsample_size = route2.get_shape().as_list() inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format) axis = 1 if self.data_format == 'channels_first' else 3 inputs = tf.concat([inputs, route2], axis=axis) inputs, route3 = feature_pyramid_network(inputs, filters=256, data_format=self.data_format) detect2 = yolo_layer(inputs, n_classes=self.n_classes, anchors=_ANCHORS[3:6], img_size=self.input_size, data_format=self.data_format) inputs = Conv2D(route3, filters=128, kernel_size=1, data_format=self.data_format) inputs = BatchNormalization(inputs, data_format=self.data_format) inputs = LeakyReLU(inputs) upsample_size = route4.get_shape().as_list() inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format) axis = 1 if self.data_format == 'channels_first' else 3 inputs = tf.concat([inputs, route4], axis=axis) inputs, _ = feature_pyramid_network(inputs, filters=128, data_format=self.data_format) detect3 = yolo_layer(inputs, n_classes=self.n_classes, anchors=_ANCHORS[:3], img_size=self.input_size, data_format=self.data_format) inputs = tf.concat([detect1, detect2, detect3], axis=1) inputs = build_boxes(inputs) boxes_dicts = non_max_suppression(inputs, n_classes=self.n_classes, max_output_size=self.max_output_size, iou_threshold=self.iou_threshold, confidence_threshold=self.confidence_threshold) return boxes_dicts
def _pnet(styles, data_format="channels_last"): with tf.variable_scope("pnet", reuse=tf.AUTO_REUSE): pnet_outs = {} inputs = ((styles / 255.) - 0.5) * 2 pnet_outs["conv1"] = \ conv1 = conv(inputs, name="conv1", data_format=data_format, filters=32, kernel_size=9, strides=1) pnet_outs["conv2"] = \ conv2 = conv(conv1, name="conv2", data_format=data_format, filters=64, kernel_size=3, strides=2) pnet_outs["conv3"] = \ conv3 = conv(conv2, name="conv3", data_format=data_format, filters=128, kernel_size=3, strides=2) res1 = residual_block(conv3, name="res1", data_format=data_format, filters=128, kernel_size=3) res2 = residual_block(res1, name="res2", data_format=data_format, filters=128, kernel_size=3) res3 = residual_block(res2, name="res3", data_format=data_format, filters=128, kernel_size=3) res4 = residual_block(res3, name="res4", data_format=data_format, filters=128, kernel_size=3) res5 = residual_block(res4, name="res5", data_format=data_format, filters=128, kernel_size=3) pnet_outs["up1"] \ = up1 = upsample(res5, name="up1", data_format=data_format, filters=64, kernel_size=3, strides=2) pnet_outs["up2"] \ = _ = upsample(up1, name="up2", data_format=data_format, filters=32, kernel_size=3, strides=2) gammas, betas = _pnet_fc(pnet_outs) return gammas, betas
def model_fn(inputs): with tf.variable_scope("tnet", reuse=tf.AUTO_REUSE): inputs = ((inputs / 255.) - 0.5) * 2 conv1 = conv(inputs, name="conv1", data_format=data_format, filters=32, kernel_size=9, strides=1) conv2 = conv(conv1, name="conv2", data_format=data_format, filters=64, kernel_size=3, strides=2) conv3 = conv(conv2, name="conv3", data_format=data_format, filters=128, kernel_size=3, strides=2) res1 = residual_block(conv3, name="res1", data_format=data_format, filters=128, kernel_size=3) res2 = residual_block(res1, name="res2", data_format=data_format, filters=128, kernel_size=3) res3 = residual_block(res2, name="res3", data_format=data_format, filters=128, kernel_size=3) res4 = residual_block(res3, name="res4", data_format=data_format, filters=128, kernel_size=3) res5 = residual_block(res4, name="res5", data_format=data_format, filters=128, kernel_size=3) up1 = upsample(res5, name="up1", data_format=data_format, filters=64, kernel_size=3, strides=2) up2 = upsample(up1, name="up2", data_format=data_format, filters=32, kernel_size=3, strides=2) conv4 = conv(up2, name="conv4", data_format=data_format, filters=3, kernel_size=9, strides=1, with_bn=False, with_relu=False) out = tf.clip_by_value(conv4, 0., 255.) return out
def build_nework(self, input_data): route_1, route_2, input_data = backbone.DarkNet53( input_data, self.trainable) # 对darknet的最后一层输出继续做两次33卷积,并生成大分辨率的预测输出 input_data = layers.convolution_layer(input_data, (1, 1, 1024, 512), self.trainable, 'conv52') input_data = layers.convolution_layer(input_data, (3, 3, 512, 1024), self.trainable, 'conv53') input_data = layers.convolution_layer(input_data, (1, 1, 1024, 512), self.trainable, 'conv54') input_data = layers.convolution_layer(input_data, (3, 3, 512, 1024), self.trainable, 'conv55') input_data = layers.convolution_layer(input_data, (1, 1, 1024, 512), self.trainable, 'conv56') conv_lobj_branch = layers.convolution_layer(input_data, (3, 3, 512, 1024), self.trainable, 'conv_lobj_branch') #输出大分辨率的输出 (13,13,256) conv_lbbox = layers.convolution_layer( conv_lobj_branch, (1, 1, 1024, self.anchor_per_scale * (self.num_class + 5)), self.trainable, 'conv_lbbox', activate=False, bn=False) # route1和darknet output相结合,生成中分辨率的预测输出 input_data = layers.convolution_layer(input_data, (1, 1, 512, 256), self.trainable, 'conv57') input_data = layers.upsample( input_data, name='upsample0', method=self.upsample_method) # (26,26,256) with tf.variable_scope('route_1'): input_data = tf.concat([input_data, route_2], axis=-1) # (26, 26, 768) input_data = layers.convolution_layer(input_data, (1, 1, 768, 256), self.trainable, 'conv58') input_data = layers.convolution_layer(input_data, (3, 3, 256, 512), self.trainable, 'conv59') input_data = layers.convolution_layer(input_data, (1, 1, 512, 256), self.trainable, 'conv60') input_data = layers.convolution_layer(input_data, (3, 3, 256, 512), self.trainable, 'conv61') input_data = layers.convolution_layer(input_data, (1, 1, 512, 256), self.trainable, 'conv62') conv_mobj_branch = layers.convolution_layer(input_data, (3, 3, 256, 512), self.trainable, 'conv_mobj_branch') # 输出中分辨率的输出 (26, 26, 255) conv_mbbox = layers.convolution_layer( conv_mobj_branch, (1, 1, 512, self.anchor_per_scale * (self.num_class + 5)), self.trainable, 'conv_mbbox', activate=False, bn=False) # route1和route2结合生成小分辨率的预测 input_data = layers.convolution_layer(input_data, (1, 1, 256, 128), self.trainable, 'conv63') input_data = layers.upsample( input_data, name='upsample1', method=self.upsample_method) # (52, 52, 128) with tf.variable_scope('route_2'): input_data = tf.concat([input_data, route_1], axis=-1) # (52, 52, 384) input_data = layers.convolution_layer(input_data, (1, 1, 384, 128), self.trainable, 'conv64') input_data = layers.convolution_layer(input_data, (3, 3, 128, 256), self.trainable, 'conv65') input_data = layers.convolution_layer(input_data, (1, 1, 256, 128), self.trainable, 'conv66') input_data = layers.convolution_layer(input_data, (3, 3, 128, 256), self.trainable, 'conv67') input_data = layers.convolution_layer(input_data, (1, 1, 256, 128), self.trainable, 'conv68') conv_sobj_branch = layers.convolution_layer(input_data, (3, 3, 128, 256), self.trainable, 'conv_sobj_branch') # 输出小分辨率的输出 (52, 52, 255) conv_sbbox = layers.convolution_layer( conv_sobj_branch, (1, 1, 256, self.anchor_per_scale * (self.num_class + 5)), self.trainable, 'conv_sbbox', activate=False, bn=False) return conv_lbbox, conv_mbbox, conv_sbbox
def _tnet(contents, gammas, betas, data_format="channels_last"): with tf.variable_scope("tnet", reuse=tf.AUTO_REUSE): inputs = ((contents / 255.) - 0.5) * 2 conv1 = conv(inputs, name="conv1", data_format=data_format, filters=32, kernel_size=9, strides=1, bn_gamma=gammas["conv1"], bn_beta=betas["conv1"]) conv2 = conv(conv1, name="conv2", data_format=data_format, filters=64, kernel_size=3, strides=2, bn_gamma=gammas["conv2"], bn_beta=betas["conv2"]) conv3 = conv(conv2, name="conv3", data_format=data_format, filters=128, kernel_size=3, strides=2, bn_gamma=gammas["conv3"], bn_beta=betas["conv3"]) res1 = residual_block(conv3, name="res1", data_format=data_format, filters=128, kernel_size=3, bn_gamma1=gammas["res1_1"], bn_beta1=betas["res1_1"], bn_gamma2=gammas["res1_2"], bn_beta2=betas["res1_2"]) res2 = residual_block(res1, name="res2", data_format=data_format, filters=128, kernel_size=3, bn_gamma1=gammas["res2_1"], bn_beta1=betas["res2_1"], bn_gamma2=gammas["res2_2"], bn_beta2=betas["res2_2"]) res3 = residual_block(res2, name="res3", data_format=data_format, filters=128, kernel_size=3, bn_gamma1=gammas["res3_1"], bn_beta1=betas["res3_1"], bn_gamma2=gammas["res3_2"], bn_beta2=betas["res3_2"]) res4 = residual_block(res3, name="res4", data_format=data_format, filters=128, kernel_size=3, bn_gamma1=gammas["res4_1"], bn_beta1=betas["res4_1"], bn_gamma2=gammas["res4_2"], bn_beta2=betas["res4_2"]) res5 = residual_block(res4, name="res5", data_format=data_format, filters=128, kernel_size=3, bn_gamma1=gammas["res5_1"], bn_beta1=betas["res5_1"], bn_gamma2=gammas["res5_2"], bn_beta2=betas["res5_2"]) up1 = upsample(res5, name="up1", data_format=data_format, filters=64, kernel_size=3, strides=2, bn_gamma=gammas["up1"], bn_beta=betas["up1"]) up2 = upsample(up1, name="up2", data_format=data_format, filters=32, kernel_size=3, strides=2, bn_gamma=gammas["up2"], bn_beta=betas["up2"]) conv4 = conv(up2, name="conv4", data_format=data_format, filters=3, kernel_size=9, strides=1, with_bn=False, with_relu=False) out = tf.clip_by_value(conv4, 0., 255.) return out