def create_heads(self, connections, params_dicts): image_size = get_spatial_dims(self.input_shape, self.data_format) with tf.variable_scope('ssd_heads'): scores, bboxes, priors = [], [], [] priors_array = [] for head_id, (tensor, params) in enumerate(zip(connections, params_dicts)): with tf.variable_scope('head_{}'.format(head_id)): if 'clustered_sizes' in params: priors_fn = prior_box_clusterd elif 'box_specs' in params: priors_fn = prior_box_specs else: priors_fn = prior_box fn_params = {k: v for k, v in params.items() if not k == 'prefix' and not k == 'suffix'} fn_params['data_format'] = self.data_format numpy_priors, num_priors_per_pixel = priors_fn(tensor, image_size, **fn_params) assert np.prod(get_spatial_dims(tensor)) * num_priors_per_pixel == numpy_priors.shape[2] // 4 self.priors_info.append([get_spatial_dims(tensor), num_priors_per_pixel]) priors_array.append(numpy_priors) priors_tensor = tf.convert_to_tensor(numpy_priors, name='{}_priorbox'.format(params['prefix'])) priors.append(priors_tensor) score, bbox = self._add_single_ssd_head(tensor, self.num_classes, num_priors_per_pixel, params['prefix'], params.get('suffix', '')) scores.append(score) bboxes.append(bbox) with tf.name_scope('concat_reshape_softmax'): # Gather all predictions self.mbox_loc = tf.concat(bboxes, axis=-1, name='mbox_loc') if len(connections) > 1 else bboxes[0] self.mbox_conf = tf.concat(scores, axis=-1, name='mbox_conf') if len(connections) > 1 else scores[0] self.mbox_priorbox = tf.concat(priors, axis=-1, name='mbox_priorbox') if len(connections) > 1 else priors[0] total_priors = self.mbox_conf.get_shape()[-1] // self.num_classes self.mbox_loc = tf.reshape(self.mbox_loc, shape=(-1, total_priors, 4), name='mbox_loc_final') self.logits = tf.reshape(self.mbox_conf, shape=(-1, total_priors, self.num_classes), name='mbox_conf_logits') self.mbox_conf = tf.sigmoid(self.logits, name='mbox_conf_final') # self.mbox_conf = tf.nn.softmax(self.logits, name='mbox_conf_final') self.priors_array = np.reshape(np.concatenate(priors_array, axis=-1), (2, -1, 4)) self.priors = tf.reshape(self.mbox_priorbox, shape=(1, 2, -1, 4), name='mbox_priorbox_final') assert self.priors_array.shape[1] == total_priors self.predictions = dict(locs=self.mbox_loc, confs=self.mbox_conf, logits=self.logits) return self.predictions
def prior_box_clusterd(blob, image_size, clustered_sizes, step, clip=False, offset=0.5, variance=None, data_format='NHWC'): """ Generates numpy array of priors in caffe format :param blob: input feature blob (we only need spatial dimension from it) :param image_size: input image size (height, width) :param clustered_sizes: list of (height, width) tuples :param step: :param clip: clip priors to image bounding box :param offset: a subpixel offset for priors location :param variance: optional array of lenghts 4 with variances to encode inpriors array :param data_format: NHWC or NCHW """ assert variance is None or len(variance) == 4 assert data_format in ['NHWC', 'NCHW'] if isinstance(step, (list, tuple)): step_y, step_x = step else: step_y, step_x = step, step if len(blob.get_shape()) == 2: layer_height = layer_width = 1 else: layer_height, layer_width = get_spatial_dims(blob, data_format) num_priors_per_pixel = len(clustered_sizes) top_shape = 1, (2 if variance else 1), layer_height * layer_width * num_priors_per_pixel * 4 anchors = [] for height in range(layer_height): for width in range(layer_width): center_x = (width + offset) * step_x / image_size[1] center_y = (height + offset) * step_y / image_size[0] for (box_rows, box_cols) in clustered_sizes: xmin = center_x - box_cols / 2. ymin = center_y - box_rows / 2. xmax = center_x + box_cols / 2. ymax = center_y + box_rows / 2. anchors.extend([xmin, ymin, xmax, ymax]) if clip: anchors = np.clip(anchors, 0., 1.).tolist() if variance: anchors.extend( list(variance) * layer_width * layer_height * num_priors_per_pixel) assert len(anchors) == np.prod(top_shape) priors_array = np.array([anchors], dtype=np.float32).reshape(top_shape) return priors_array, num_priors_per_pixel
def prior_box_specs(blob, image_size, box_specs, step, clip=False, offset=0.5, variance=None, data_format='NHWC'): """ Generates numpy array of priors in caffe format :param blob: input feature blob (we only need spatial dimension from it) :param image_size: input image size (height, width) :param box_specs: list of pairs [size, aspect_ratio] :param step: :param flip: flip each aspect ration or not :param clip: clip priors to image bounding box :param offset: a subpixel offset for priors location :param variance: optional array of lenghts 4 with variances to encode inpriors array :param data_format: NHWC or NCHW """ assert isinstance(box_specs, list) assert variance is None or len(variance) == 4 assert data_format in ['NHWC', 'NCHW'] if isinstance(step, (list, tuple)): step_y, step_x = step else: step_y, step_x = step, step if len(blob.get_shape()) == 2: layer_height = layer_width = 1 else: layer_height, layer_width = get_spatial_dims(blob, data_format) anchors = [] for height in range(layer_height): for width in range(layer_width): center_y = (height + offset) * step_y center_x = (width + offset) * step_x for size, aspect_ratio in box_specs: box_w = size * math.sqrt(aspect_ratio) box_h = size / math.sqrt(aspect_ratio) xmin = (center_x - box_w / 2.) / image_size[1] ymin = (center_y - box_h / 2.) / image_size[0] xmax = (center_x + box_w / 2.) / image_size[1] ymax = (center_y + box_h / 2.) / image_size[0] anchors.extend([xmin, ymin, xmax, ymax]) if clip: anchors = np.clip(anchors, 0., 1.).tolist() num_priors_per_pixel = len(anchors) // (layer_height * layer_width * 4) num_priors_alt_formula = len(box_specs) assert num_priors_per_pixel == num_priors_alt_formula if variance: anchors.extend( list(variance) * layer_height * layer_width * num_priors_per_pixel) top_shape = 1, (2 if variance else 1), layer_height * layer_width * num_priors_per_pixel * 4 assert len(anchors) == np.prod(top_shape) priors_array = np.array([anchors], dtype=np.float32).reshape(top_shape) return priors_array, num_priors_per_pixel