Beispiel #1
0
  def create_heads(self, connections, params_dicts):
    image_size = get_spatial_dims(self.input_shape, self.data_format)

    with tf.variable_scope('ssd_heads'):
      scores, bboxes, priors = [], [], []
      priors_array = []
      for head_id, (tensor, params) in enumerate(zip(connections, params_dicts)):
        with tf.variable_scope('head_{}'.format(head_id)):
          if 'clustered_sizes' in params:
            priors_fn = prior_box_clusterd
          elif 'box_specs' in params:
            priors_fn = prior_box_specs
          else:
            priors_fn = prior_box
          fn_params = {k: v for k, v in params.items() if not k == 'prefix' and not k == 'suffix'}
          fn_params['data_format'] = self.data_format

          numpy_priors, num_priors_per_pixel = priors_fn(tensor, image_size, **fn_params)
          assert np.prod(get_spatial_dims(tensor)) * num_priors_per_pixel == numpy_priors.shape[2] // 4
          self.priors_info.append([get_spatial_dims(tensor), num_priors_per_pixel])
          priors_array.append(numpy_priors)

          priors_tensor = tf.convert_to_tensor(numpy_priors, name='{}_priorbox'.format(params['prefix']))
          priors.append(priors_tensor)

          score, bbox = self._add_single_ssd_head(tensor, self.num_classes, num_priors_per_pixel, params['prefix'],
                                                  params.get('suffix', ''))
          scores.append(score)
          bboxes.append(bbox)

      with tf.name_scope('concat_reshape_softmax'):
        # Gather all predictions
        self.mbox_loc = tf.concat(bboxes, axis=-1, name='mbox_loc') if len(connections) > 1 else bboxes[0]
        self.mbox_conf = tf.concat(scores, axis=-1, name='mbox_conf') if len(connections) > 1 else scores[0]
        self.mbox_priorbox = tf.concat(priors, axis=-1, name='mbox_priorbox') if len(connections) > 1 else priors[0]

        total_priors = self.mbox_conf.get_shape()[-1] // self.num_classes
        self.mbox_loc = tf.reshape(self.mbox_loc, shape=(-1, total_priors, 4), name='mbox_loc_final')
        self.logits = tf.reshape(self.mbox_conf, shape=(-1, total_priors, self.num_classes), name='mbox_conf_logits')
        self.mbox_conf = tf.sigmoid(self.logits, name='mbox_conf_final')
        # self.mbox_conf = tf.nn.softmax(self.logits, name='mbox_conf_final')

    self.priors_array = np.reshape(np.concatenate(priors_array, axis=-1), (2, -1, 4))
    self.priors = tf.reshape(self.mbox_priorbox, shape=(1, 2, -1, 4), name='mbox_priorbox_final')
    assert self.priors_array.shape[1] == total_priors

    self.predictions = dict(locs=self.mbox_loc, confs=self.mbox_conf, logits=self.logits)
    return self.predictions
def prior_box_clusterd(blob,
                       image_size,
                       clustered_sizes,
                       step,
                       clip=False,
                       offset=0.5,
                       variance=None,
                       data_format='NHWC'):
    """
  Generates numpy array of priors in caffe format

  :param blob: input feature blob  (we only need spatial dimension from it)
  :param image_size: input image size (height, width)
  :param clustered_sizes: list of (height, width) tuples
  :param step:
  :param clip: clip priors to image bounding box
  :param offset: a subpixel offset for priors location
  :param variance: optional array of lenghts 4 with variances to encode inpriors array
  :param data_format: NHWC or NCHW
  """
    assert variance is None or len(variance) == 4
    assert data_format in ['NHWC', 'NCHW']

    if isinstance(step, (list, tuple)):
        step_y, step_x = step
    else:
        step_y, step_x = step, step

    if len(blob.get_shape()) == 2:
        layer_height = layer_width = 1
    else:
        layer_height, layer_width = get_spatial_dims(blob, data_format)

    num_priors_per_pixel = len(clustered_sizes)
    top_shape = 1, (2 if variance else
                    1), layer_height * layer_width * num_priors_per_pixel * 4

    anchors = []
    for height in range(layer_height):
        for width in range(layer_width):
            center_x = (width + offset) * step_x / image_size[1]
            center_y = (height + offset) * step_y / image_size[0]

            for (box_rows, box_cols) in clustered_sizes:
                xmin = center_x - box_cols / 2.
                ymin = center_y - box_rows / 2.
                xmax = center_x + box_cols / 2.
                ymax = center_y + box_rows / 2.
                anchors.extend([xmin, ymin, xmax, ymax])

    if clip:
        anchors = np.clip(anchors, 0., 1.).tolist()

    if variance:
        anchors.extend(
            list(variance) * layer_width * layer_height * num_priors_per_pixel)

    assert len(anchors) == np.prod(top_shape)
    priors_array = np.array([anchors], dtype=np.float32).reshape(top_shape)
    return priors_array, num_priors_per_pixel
def prior_box_specs(blob,
                    image_size,
                    box_specs,
                    step,
                    clip=False,
                    offset=0.5,
                    variance=None,
                    data_format='NHWC'):
    """
  Generates numpy array of priors in caffe format


  :param blob: input feature blob  (we only need spatial dimension from it)
  :param image_size: input image size (height, width)
  :param box_specs: list of pairs [size, aspect_ratio]
  :param step:
  :param flip: flip each aspect ration or not
  :param clip: clip priors to image bounding box
  :param offset: a subpixel offset for priors location
  :param variance: optional array of lenghts 4 with variances to encode inpriors array
  :param data_format: NHWC or NCHW
  """
    assert isinstance(box_specs, list)
    assert variance is None or len(variance) == 4
    assert data_format in ['NHWC', 'NCHW']

    if isinstance(step, (list, tuple)):
        step_y, step_x = step
    else:
        step_y, step_x = step, step

    if len(blob.get_shape()) == 2:
        layer_height = layer_width = 1
    else:
        layer_height, layer_width = get_spatial_dims(blob, data_format)

    anchors = []
    for height in range(layer_height):
        for width in range(layer_width):
            center_y = (height + offset) * step_y
            center_x = (width + offset) * step_x

            for size, aspect_ratio in box_specs:
                box_w = size * math.sqrt(aspect_ratio)
                box_h = size / math.sqrt(aspect_ratio)
                xmin = (center_x - box_w / 2.) / image_size[1]
                ymin = (center_y - box_h / 2.) / image_size[0]
                xmax = (center_x + box_w / 2.) / image_size[1]
                ymax = (center_y + box_h / 2.) / image_size[0]
                anchors.extend([xmin, ymin, xmax, ymax])

    if clip:
        anchors = np.clip(anchors, 0., 1.).tolist()

    num_priors_per_pixel = len(anchors) // (layer_height * layer_width * 4)
    num_priors_alt_formula = len(box_specs)
    assert num_priors_per_pixel == num_priors_alt_formula

    if variance:
        anchors.extend(
            list(variance) * layer_height * layer_width * num_priors_per_pixel)

    top_shape = 1, (2 if variance else
                    1), layer_height * layer_width * num_priors_per_pixel * 4

    assert len(anchors) == np.prod(top_shape)
    priors_array = np.array([anchors], dtype=np.float32).reshape(top_shape)
    return priors_array, num_priors_per_pixel