예제 #1
0
  def __call__(self, inputs):
    """Generate Computation Graph"""
    with tf.variable_scope(self.scope):
      if self.data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 3, 1, 2])

      inputs = inputs / 255

      inputs, route2, route4 = darknet53(inputs, data_format=self.data_format)

      inputs, route1 = feature_pyramid_network(inputs, filters=512, data_format=self.data_format)
      detect1 = yolo_layer(inputs,
                           n_classes=self.n_classes,
                           anchors=_ANCHORS[6:],
                           img_size=self.input_size,
                           data_format=self.data_format)

      inputs = Conv2D(route1, filters=256, kernel_size=1, data_format=self.data_format)
      inputs = BatchNormalization(inputs, data_format=self.data_format)
      inputs = LeakyReLU(inputs)

      upsample_size = route2.get_shape().as_list()
      inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format)
      axis = 1 if self.data_format == 'channels_first' else 3
      inputs = tf.concat([inputs, route2], axis=axis)

      inputs, route3 = feature_pyramid_network(inputs, filters=256, data_format=self.data_format)
      detect2 = yolo_layer(inputs,
                           n_classes=self.n_classes,
                           anchors=_ANCHORS[3:6],
                           img_size=self.input_size,
                           data_format=self.data_format)

      inputs = Conv2D(route3, filters=128, kernel_size=1, data_format=self.data_format)
      inputs = BatchNormalization(inputs, data_format=self.data_format)
      inputs = LeakyReLU(inputs)

      upsample_size = route4.get_shape().as_list()
      inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format)
      axis = 1 if self.data_format == 'channels_first' else 3
      inputs = tf.concat([inputs, route4], axis=axis)

      inputs, _ = feature_pyramid_network(inputs, filters=128, data_format=self.data_format)
      detect3 = yolo_layer(inputs,
                           n_classes=self.n_classes,
                           anchors=_ANCHORS[:3],
                           img_size=self.input_size,
                           data_format=self.data_format)

      inputs = tf.concat([detect1, detect2, detect3], axis=1)
      inputs = build_boxes(inputs)
      boxes_dicts = non_max_suppression(inputs,
                                        n_classes=self.n_classes,
                                        max_output_size=self.max_output_size,
                                        iou_threshold=self.iou_threshold,
                                        confidence_threshold=self.confidence_threshold)
      return boxes_dicts
예제 #2
0
def darknet53_residual_block(inputs, filters, data_format, strides=1):
  """Creates a residual block for Darknet."""
  shortcut = inputs

  inputs = Conv2D(inputs, filters=filters, kernel_size=1, strides=strides, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  filters *= 2
  inputs = Conv2D(inputs, filters=filters, kernel_size=3, strides=strides, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  inputs += shortcut
  return inputs
예제 #3
0
def feature_pyramid_network(inputs, data_format):
    """Creates convolution operations layer used after Darknet"""
    inputs = Conv2D(inputs,
                    filters=256,
                    kernel_size=1,
                    data_format=data_format)
    inputs = BatchNormalization(inputs, data_format=data_format)
    inputs = LeakyReLU(inputs)
    route = inputs

    inputs = Conv2D(inputs,
                    filters=512,
                    kernel_size=3,
                    data_format=data_format)
    inputs = BatchNormalization(inputs, data_format=data_format)
    inputs = LeakyReLU(inputs)
    return inputs, route
예제 #4
0
def darknet(inputs, data_format):
    """Creates Darknet model"""
    filters = 16
    for _ in range(4):
        inputs = Conv2D(inputs,
                        filters,
                        kernel_size=3,
                        data_format=data_format)
        inputs = BatchNormalization(inputs, data_format=data_format)
        inputs = LeakyReLU(inputs)
        inputs = MaxPooling2D(inputs,
                              pool_size=[2, 2],
                              strides=[2, 2],
                              data_format=data_format)
        filters *= 2

    inputs = Conv2D(inputs,
                    filters=256,
                    kernel_size=3,
                    data_format=data_format)
    inputs = BatchNormalization(inputs, data_format=data_format)
    inputs = LeakyReLU(inputs)
    route = inputs  # layers 8
    inputs = MaxPooling2D(inputs,
                          pool_size=[2, 2],
                          strides=[2, 2],
                          data_format=data_format)

    inputs = Conv2D(inputs,
                    filters=512,
                    kernel_size=3,
                    data_format=data_format)
    inputs = BatchNormalization(inputs, data_format=data_format)
    inputs = LeakyReLU(inputs)
    inputs = MaxPooling2D(inputs,
                          pool_size=[2, 2],
                          strides=[1, 1],
                          data_format=data_format)

    inputs = Conv2D(inputs,
                    filters=1024,
                    kernel_size=3,
                    data_format=data_format)
    inputs = BatchNormalization(inputs, data_format=data_format)
    inputs = LeakyReLU(inputs)
    return inputs, route
예제 #5
0
def darknet53(inputs, data_format):
  """Creates Darknet53 model"""
  inputs = Conv2D(inputs, filters=32, kernel_size=3, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)
  inputs = Conv2D(inputs, filters=64, kernel_size=3, strides=2, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  inputs = darknet53_residual_block(inputs, filters=32, data_format=data_format)

  inputs = Conv2D(inputs, filters=128, kernel_size=3, strides=2, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  for _ in range(2):
    inputs = darknet53_residual_block(inputs, filters=64, data_format=data_format)

  inputs = Conv2D(inputs, filters=256, kernel_size=3, strides=2, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  for _ in range(8):
    inputs = darknet53_residual_block(inputs, filters=128, data_format=data_format)
  route4 = inputs # layers 36

  inputs = Conv2D(inputs, filters=512, kernel_size=3, strides=2, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  for _ in range(8):
    inputs = darknet53_residual_block(inputs, filters=256, data_format=data_format)
  route2 = inputs # layers 61

  inputs = Conv2D(inputs, filters=1024, kernel_size=3, strides=2, data_format=data_format)
  inputs = BatchNormalization(inputs, data_format=data_format)
  inputs = LeakyReLU(inputs)

  for _ in range(4):
    inputs = darknet53_residual_block(inputs, filters=512, data_format=data_format)
  return inputs, route2, route4
예제 #6
0
    def network_definition(self):
        n_convfilter    = [96, 128, 256, 256, 256, 256]
        n_fc_filters    = [1024]
        n_deconvfilter  = [128, 128, 128, 64, 32, 2]
        input_shape     = (self.batch_size, 3, self.img_w, self.img_h)

        x = InputLayer(input_shape)
        conv1a = ConvLayer(x, (n_convfilter[0], 7, 7))
        conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3))
        pool1 = PoolLayer(conv1b)

        conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3))
        conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1))
        pool2 = PoolLayer(conv2c)

        conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3))
        conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1))
        pool3 = PoolLayer(conv3b)

        conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3))
        pool4 = PoolLayer(conv4b)

        conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3))
        conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1))
        pool5 = PoolLayer(conv5b)

        conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3))
        conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3))
        pool6 = PoolLayer(conv6b)

        flat6 = FlattenLayer(pool6)
        fc7 = TensorProductLayer(flat6, n_fc_filters[0])

        # Set the size to be 256x4x4x4
        s_shape = (self.batch_size, self.n_gru_vox, n_deconvfilter[0], self.n_gru_vox, self.n_gru_vox)

        # Dummy 3D grid hidden representations
        prev_s = InputLayer(s_shape)

        t_x_s_update = FCConv3DLayer(prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))
        t_x_s_reset = FCConv3DLayer(prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        reset_gate = SigmoidLayer(t_x_s_reset)

        rs = EltwiseMultiplyLayer(reset_gate, prev_s)
        t_x_rs = FCConv3DLayer(rs, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor):
            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params)
            rect1_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1_)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params)
            rect2_ = LeakyReLU(conv2b_)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params)
            res2_ = AddLayer(conv2c_, rect2_)
            pool2_ = PoolLayer(res2_)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params)
            rect3_ = LeakyReLU(conv3b_)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params)
            res3_ = AddLayer(conv3c_, rect3_)
            pool3_ = PoolLayer(res3_)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params)
            rect4_ = LeakyReLU(conv4b_)
            pool4_ = PoolLayer(rect4_)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params)
            rect5_ = LeakyReLU(conv5b_)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params)
            res5_ = AddLayer(conv5c_, rect5_)
            pool5_ = PoolLayer(res5_)

            conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params)
            rect6a_ = LeakyReLU(conv6a_)
            conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params)
            rect6_ = LeakyReLU(conv6b_)
            res6_ = AddLayer(pool5_, rect6_)
            pool6_ = PoolLayer(res6_)

            flat6_ = FlattenLayer(pool6_)
            fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            prev_s_ = InputLayer(s_shape, prev_s_tensor)

            t_x_s_update_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_update.params)

            t_x_s_reset_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_reset.params)

            update_gate_ = SigmoidLayer(t_x_s_update_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_)
            t_x_rs_ = FCConv3DLayer(
                rs_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_)

            gru_out_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_.output, update_gate_.output

        s_update, _ = theano.scan(recurrence,
            sequences=[self.x],  # along with images, feed in the index of the current frame
            outputs_info=[T.zeros_like(np.zeros(s_shape),
                                            dtype=theano.config.floatX),
                           T.zeros_like(np.zeros(s_shape),
                                             dtype=theano.config.floatX)])

        update_all = s_update[-1]
        s_all = s_update[0]
        s_last = s_all[-1]
        gru_s = InputLayer(s_shape, s_last)
        unpool7 = Unpool3DLayer(gru_s)
        conv7a = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3))
        rect7a = LeakyReLU(conv7a)
        conv7b = Conv3DLayer(rect7a, (n_deconvfilter[1], 3, 3, 3))
        rect7 = LeakyReLU(conv7b)
        res7 = AddLayer(unpool7, rect7)

        unpool8 = Unpool3DLayer(res7)
        conv8a = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3))
        rect8a = LeakyReLU(conv8a)
        conv8b = Conv3DLayer(rect8a, (n_deconvfilter[2], 3, 3, 3))
        rect8 = LeakyReLU(conv8b)
        res8 = AddLayer(unpool8, rect8)

        unpool9 = Unpool3DLayer(res8)
        conv9a = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3))
        rect9a = LeakyReLU(conv9a)
        conv9b = Conv3DLayer(rect9a, (n_deconvfilter[3], 3, 3, 3))
        rect9 = LeakyReLU(conv9b)

        conv9c = Conv3DLayer(unpool9, (n_deconvfilter[3], 1, 1, 1))
        res9 = AddLayer(conv9c, rect9)

        conv10a = Conv3DLayer(res9, (n_deconvfilter[4], 3, 3, 3))
        rect10a = LeakyReLU(conv10a)
        conv10b = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3))
        rect10 = LeakyReLU(conv10b)

        conv10c = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3))
        res10 = AddLayer(conv10c, rect10)

        conv11 = Conv3DLayer(res10, (n_deconvfilter[5], 3, 3, 3))
        mse_loss = MseLoss3D(conv11.output)

        self.loss = mse_loss.loss(self.y)
        self.params = get_trainable_params()
        self.output = mse_loss.prediction()
        self.activations = [update_all]
예제 #7
0
        def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor):
            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params)
            rect1_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1_)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params)
            rect2_ = LeakyReLU(conv2b_)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params)
            res2_ = AddLayer(conv2c_, rect2_)
            pool2_ = PoolLayer(res2_)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params)
            rect3_ = LeakyReLU(conv3b_)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params)
            res3_ = AddLayer(conv3c_, rect3_)
            pool3_ = PoolLayer(res3_)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params)
            rect4_ = LeakyReLU(conv4b_)
            pool4_ = PoolLayer(rect4_)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params)
            rect5_ = LeakyReLU(conv5b_)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params)
            res5_ = AddLayer(conv5c_, rect5_)
            pool5_ = PoolLayer(res5_)

            conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params)
            rect6a_ = LeakyReLU(conv6a_)
            conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params)
            rect6_ = LeakyReLU(conv6b_)
            res6_ = AddLayer(pool5_, rect6_)
            pool6_ = PoolLayer(res6_)

            flat6_ = FlattenLayer(pool6_)
            fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            prev_s_ = InputLayer(s_shape, prev_s_tensor)

            t_x_s_update_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_update.params)

            t_x_s_reset_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_reset.params)

            update_gate_ = SigmoidLayer(t_x_s_update_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_)
            t_x_rs_ = FCConv3DLayer(
                rs_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_)

            gru_out_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_.output, update_gate_.output