def __call__(self, inputs): """Generate Computation Graph""" with tf.variable_scope(self.scope): if self.data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 3, 1, 2]) inputs = inputs / 255 inputs, route2, route4 = darknet53(inputs, data_format=self.data_format) inputs, route1 = feature_pyramid_network(inputs, filters=512, data_format=self.data_format) detect1 = yolo_layer(inputs, n_classes=self.n_classes, anchors=_ANCHORS[6:], img_size=self.input_size, data_format=self.data_format) inputs = Conv2D(route1, filters=256, kernel_size=1, data_format=self.data_format) inputs = BatchNormalization(inputs, data_format=self.data_format) inputs = LeakyReLU(inputs) upsample_size = route2.get_shape().as_list() inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format) axis = 1 if self.data_format == 'channels_first' else 3 inputs = tf.concat([inputs, route2], axis=axis) inputs, route3 = feature_pyramid_network(inputs, filters=256, data_format=self.data_format) detect2 = yolo_layer(inputs, n_classes=self.n_classes, anchors=_ANCHORS[3:6], img_size=self.input_size, data_format=self.data_format) inputs = Conv2D(route3, filters=128, kernel_size=1, data_format=self.data_format) inputs = BatchNormalization(inputs, data_format=self.data_format) inputs = LeakyReLU(inputs) upsample_size = route4.get_shape().as_list() inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format) axis = 1 if self.data_format == 'channels_first' else 3 inputs = tf.concat([inputs, route4], axis=axis) inputs, _ = feature_pyramid_network(inputs, filters=128, data_format=self.data_format) detect3 = yolo_layer(inputs, n_classes=self.n_classes, anchors=_ANCHORS[:3], img_size=self.input_size, data_format=self.data_format) inputs = tf.concat([detect1, detect2, detect3], axis=1) inputs = build_boxes(inputs) boxes_dicts = non_max_suppression(inputs, n_classes=self.n_classes, max_output_size=self.max_output_size, iou_threshold=self.iou_threshold, confidence_threshold=self.confidence_threshold) return boxes_dicts
def darknet53_residual_block(inputs, filters, data_format, strides=1): """Creates a residual block for Darknet.""" shortcut = inputs inputs = Conv2D(inputs, filters=filters, kernel_size=1, strides=strides, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) filters *= 2 inputs = Conv2D(inputs, filters=filters, kernel_size=3, strides=strides, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) inputs += shortcut return inputs
def feature_pyramid_network(inputs, data_format): """Creates convolution operations layer used after Darknet""" inputs = Conv2D(inputs, filters=256, kernel_size=1, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) route = inputs inputs = Conv2D(inputs, filters=512, kernel_size=3, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) return inputs, route
def darknet(inputs, data_format): """Creates Darknet model""" filters = 16 for _ in range(4): inputs = Conv2D(inputs, filters, kernel_size=3, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) inputs = MaxPooling2D(inputs, pool_size=[2, 2], strides=[2, 2], data_format=data_format) filters *= 2 inputs = Conv2D(inputs, filters=256, kernel_size=3, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) route = inputs # layers 8 inputs = MaxPooling2D(inputs, pool_size=[2, 2], strides=[2, 2], data_format=data_format) inputs = Conv2D(inputs, filters=512, kernel_size=3, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) inputs = MaxPooling2D(inputs, pool_size=[2, 2], strides=[1, 1], data_format=data_format) inputs = Conv2D(inputs, filters=1024, kernel_size=3, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) return inputs, route
def darknet53(inputs, data_format): """Creates Darknet53 model""" inputs = Conv2D(inputs, filters=32, kernel_size=3, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) inputs = Conv2D(inputs, filters=64, kernel_size=3, strides=2, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) inputs = darknet53_residual_block(inputs, filters=32, data_format=data_format) inputs = Conv2D(inputs, filters=128, kernel_size=3, strides=2, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) for _ in range(2): inputs = darknet53_residual_block(inputs, filters=64, data_format=data_format) inputs = Conv2D(inputs, filters=256, kernel_size=3, strides=2, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) for _ in range(8): inputs = darknet53_residual_block(inputs, filters=128, data_format=data_format) route4 = inputs # layers 36 inputs = Conv2D(inputs, filters=512, kernel_size=3, strides=2, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) for _ in range(8): inputs = darknet53_residual_block(inputs, filters=256, data_format=data_format) route2 = inputs # layers 61 inputs = Conv2D(inputs, filters=1024, kernel_size=3, strides=2, data_format=data_format) inputs = BatchNormalization(inputs, data_format=data_format) inputs = LeakyReLU(inputs) for _ in range(4): inputs = darknet53_residual_block(inputs, filters=512, data_format=data_format) return inputs, route2, route4
def network_definition(self): n_convfilter = [96, 128, 256, 256, 256, 256] n_fc_filters = [1024] n_deconvfilter = [128, 128, 128, 64, 32, 2] input_shape = (self.batch_size, 3, self.img_w, self.img_h) x = InputLayer(input_shape) conv1a = ConvLayer(x, (n_convfilter[0], 7, 7)) conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3)) pool1 = PoolLayer(conv1b) conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3)) conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3)) conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1)) pool2 = PoolLayer(conv2c) conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3)) conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3)) conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1)) pool3 = PoolLayer(conv3b) conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3)) conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3)) pool4 = PoolLayer(conv4b) conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3)) conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3)) conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1)) pool5 = PoolLayer(conv5b) conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3)) conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3)) pool6 = PoolLayer(conv6b) flat6 = FlattenLayer(pool6) fc7 = TensorProductLayer(flat6, n_fc_filters[0]) # Set the size to be 256x4x4x4 s_shape = (self.batch_size, self.n_gru_vox, n_deconvfilter[0], self.n_gru_vox, self.n_gru_vox) # Dummy 3D grid hidden representations prev_s = InputLayer(s_shape) t_x_s_update = FCConv3DLayer(prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) t_x_s_reset = FCConv3DLayer(prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) reset_gate = SigmoidLayer(t_x_s_reset) rs = EltwiseMultiplyLayer(reset_gate, prev_s) t_x_rs = FCConv3DLayer(rs, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6_ = LeakyReLU(conv6b_) res6_ = AddLayer(pool5_, rect6_) pool6_ = PoolLayer(res6_) flat6_ = FlattenLayer(pool6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape, prev_s_tensor) t_x_s_update_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_update.params) t_x_s_reset_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_reset.params) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv3DLayer( rs_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output s_update, _ = theano.scan(recurrence, sequences=[self.x], # along with images, feed in the index of the current frame outputs_info=[T.zeros_like(np.zeros(s_shape), dtype=theano.config.floatX), T.zeros_like(np.zeros(s_shape), dtype=theano.config.floatX)]) update_all = s_update[-1] s_all = s_update[0] s_last = s_all[-1] gru_s = InputLayer(s_shape, s_last) unpool7 = Unpool3DLayer(gru_s) conv7a = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3)) rect7a = LeakyReLU(conv7a) conv7b = Conv3DLayer(rect7a, (n_deconvfilter[1], 3, 3, 3)) rect7 = LeakyReLU(conv7b) res7 = AddLayer(unpool7, rect7) unpool8 = Unpool3DLayer(res7) conv8a = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3)) rect8a = LeakyReLU(conv8a) conv8b = Conv3DLayer(rect8a, (n_deconvfilter[2], 3, 3, 3)) rect8 = LeakyReLU(conv8b) res8 = AddLayer(unpool8, rect8) unpool9 = Unpool3DLayer(res8) conv9a = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3)) rect9a = LeakyReLU(conv9a) conv9b = Conv3DLayer(rect9a, (n_deconvfilter[3], 3, 3, 3)) rect9 = LeakyReLU(conv9b) conv9c = Conv3DLayer(unpool9, (n_deconvfilter[3], 1, 1, 1)) res9 = AddLayer(conv9c, rect9) conv10a = Conv3DLayer(res9, (n_deconvfilter[4], 3, 3, 3)) rect10a = LeakyReLU(conv10a) conv10b = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3)) rect10 = LeakyReLU(conv10b) conv10c = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3)) res10 = AddLayer(conv10c, rect10) conv11 = Conv3DLayer(res10, (n_deconvfilter[5], 3, 3, 3)) mse_loss = MseLoss3D(conv11.output) self.loss = mse_loss.loss(self.y) self.params = get_trainable_params() self.output = mse_loss.prediction() self.activations = [update_all]
def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6_ = LeakyReLU(conv6b_) res6_ = AddLayer(pool5_, rect6_) pool6_ = PoolLayer(res6_) flat6_ = FlattenLayer(pool6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape, prev_s_tensor) t_x_s_update_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_update.params) t_x_s_reset_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_reset.params) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv3DLayer( rs_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output