def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5, score_threshold=0.3): """ Applies Non-max suppression (NMS) to a set of boxes Arguments: scores -- tensor of shape (None,), output of yolo_filter_boxes() boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later) classes -- tensor of shape (None,), output of yolo_filter_boxes() max_boxes -- integer, maximum number of predicted boxes you'd like iou_threshold -- real value, "intersection over union" threshold used for NMS filtering score_threshold -- real value, minimum score used for NMS filtering Returns: scores -- tensor of shape (, None), predicted score for each box boxes -- tensor of shape (4, None), predicted box coordinates classes -- tensor of shape (, None), predicted class for each box Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this function will transpose the shapes of scores, boxes, classes. This is made for convenience. """ max_boxes_tensor = K.constant(max_boxes, dtype='int32') # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold, score_threshold) # Use K.gather() to select only nms_indices from scores, boxes and classes scores = K.gather(scores, nms_indices) boxes = K.gather(boxes, nms_indices) classes = K.gather(classes, nms_indices) return scores, boxes, classes
def call(self, inputs, training=None): class_labels = K.squeeze(inputs[1], axis=1) inputs = inputs[0] input_shape = K.int_shape(inputs) reduction_axes = list(range(0, len(input_shape))) if self.axis is not None: del reduction_axes[self.axis] del reduction_axes[0] normed = inputs broadcast_shape = [1] * len(input_shape) broadcast_shape[0] = K.shape(inputs)[0] if self.axis is not None: broadcast_shape[self.axis] = input_shape[self.axis] if self.scale: broadcast_gamma = K.reshape(K.gather(self.gamma, class_labels), broadcast_shape) normed = normed * broadcast_gamma if self.center: broadcast_beta = K.reshape(K.gather(self.beta, class_labels), broadcast_shape) normed = normed + broadcast_beta return normed
def call(self, inputs, **kwargs): backend = retina_net_tensorflow_backend() boxes, classification, detections = inputs # TODO: support batch size > 1. boxes = boxes[0] classification = classification[0] detections = detections[0] scores = K.max(classification, axis=1) # selecting best anchors theoretically improves speed at the cost of minor performance if self.top_k: scores, indices = backend.top_k(scores, self.top_k, sorted=False) boxes = K.gather(boxes, indices) classification = K.gather(classification, indices) detections = K.gather(detections, indices) indices = backend.non_max_suppression(boxes, scores, max_output_size=self.max_boxes, iou_threshold=self.nms_threshold) detections = K.gather(detections, indices) return K.expand_dims(detections, axis=0)
def call(self, inputs): def apply_separate_filter_for_each_batch(inputs): kernel = inputs[1] x = K.expand_dims(inputs[0], axis=0) outputs = K.conv2d( x, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.bias is not None: bias = inputs[2] outputs = K.bias_add(outputs, bias, data_format=self.data_format) return K.squeeze(outputs, axis=0) x = inputs[0] classes = K.squeeze(inputs[1], axis=1) if self.bias is not None: outputs = K.map_fn(apply_separate_filter_for_each_batch, [x, K.gather(self.kernel, classes), K.gather(self.bias, classes)], dtype='float32') else: outputs = K.map_fn(apply_separate_filter_for_each_batch, [x, K.gather(self.kernel, classes)], dtype='float32') if self.activation is not None: return self.activation(outputs) return outputs
def YOLOEval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): '''Returns evaluated filtered boxes based on given input.''' num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for i in range(num_layers): _boxes, _box_scores = YOLOBoxesAndScores(yolo_outputs[i], anchors[anchor_mask[i]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_, scores_, classes_ = [], [], [] for i in range(num_classes): _class_boxes = tf.boolean_mask(boxes, mask[:, i]) _class_boxes_scores = tf.boolean_mask(box_scores[:, i], mask[:, i]) _nms_index = tf.image.non_max_suppression(_class_boxes, _class_boxes_scores, max_boxes_tensor, iou_threshold=iou_threshold) _class_boxes = K.gather(_class_boxes, _nms_index) _class_boxes_scores = K.gather(_class_boxes_scores, _nms_index) _classes = K.ones_like(_class_boxes_scores, dtype='int32') * i boxes_.append(_class_boxes) scores_.append(_class_boxes_scores) classes_.append(_classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def yolo_eval(yolo_outputs, image_shape, max_boxes=10, score_threshold=.6, iou_threshold=.5): """ Evaluate YOLO model on given input batch and return filtered boxes. Parameters ---------- yolo_outputs: Tuple Contains box_confidence, box_xy, box_wh, box_class_probs variables from yolo_head function image_shape: tf.Tensor A Tensor contains image shapes max_boxes: int Maximum boxes score_threshold: float Probability threshold value iou_threshold: float IOU threshold value Returns ------- boxes, scores, classes: (tf.Tensor, tf.Tensor, tf.Tensor) A tuple of Tensors contains boxes, scores and classes. """ box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs boxes = boxes_to_corners(box_xy, box_wh) boxes, scores, classes = filter_boxes(box_confidence, boxes, box_class_probs, threshold=score_threshold) # Scale boxes back to original image shape. height, width = image_shape[0], image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims # TODO: Something must be done about this ugly hack! max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.variables_initializer([max_boxes_tensor])) nms_index = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) boxes = K.gather(boxes, nms_index) scores = K.gather(scores, nms_index) classes = K.gather(classes, nms_index) return boxes, scores, classes
def yolo_eval( yolo_outputs, #通过nms生成相对大小的预测框 anchors, num_classes, image_shape, max_boxes=50, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[ 3, 4, 5 ], [1, 2, 3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def call(self, inputs): if self.data_format is None: data_format = self.data_format if self.data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format ' + str(data_format)) strides = (1,) + self.strides + (1,) x = inputs[0] cls = K.squeeze(inputs[1], axis=-1) #Kernel preprocess kernel = K.gather(self.kernel, cls) #(bs, w, h, c) kernel = tf.transpose(kernel, [1, 2, 3, 0]) #(w, h, c, bs) kernel = K.reshape(kernel, (self.kernel_size[0], self.kernel_size[1], -1)) #(w, h, c * bs) kernel = K.expand_dims(kernel, axis=-1) #(w, h, c * bs, 1) if self.data_format == 'channles_first': x = tf.transpose(x, [0, 2, 3, 1]) bs, w, h, c = K.int_shape(x) #(bs, w, h, c) x = tf.transpose(x, [1, 2, 3, 0]) #(w, h, c, bs) x = K.reshape(x, (w, h, -1)) #(w, h, c * bs) x = K.expand_dims(x, axis=0) #(1, w, h, c * bs) padding = _preprocess_padding(self.padding) outputs = tf.nn.depthwise_conv2d(x, kernel, strides=strides, padding=padding, rate=self.dilation_rate) #(1, w, h, c * bs) _, w, h, _ = K.int_shape(outputs) outputs = K.reshape(outputs, [w, h, self.filters, -1]) #(w, h, c, bs) outputs = tf.transpose(outputs, [3, 0, 1, 2]) #(bs, w, h, c) if self.bias is not None: #(num_cls, out) bias = tf.gather(self.bias, cls) #(bs, bias) bias = tf.expand_dims(bias, axis=1) bias = tf.expand_dims(bias, axis=1) #(bs, bias, 1, 1) outputs += bias if self.data_format == 'channles_first': outputs = tf.transpose(outputs, [0, 3, 1, 2]) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') def _l2normalize(v, eps=1e-12): return v / (K.sum(v ** 2) ** 0.5 + eps) def power_iteration(W, u): #Accroding the paper, we only need to do power iteration one time. _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.embeddings.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.embeddings, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma=K.dot(_v, W_reshaped) sigma=K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) self.embeddings = W_bar out = K.gather(self.embeddings, inputs) return out
def compute_mask_loss(boxes, masks, annotations, masks_target, width, height, iou_threshold=0.5, mask_size=(28, 28)): """compute overlap of boxes with annotations""" iou = overlap(boxes, annotations) argmax_overlaps_inds = K.argmax(iou, axis=1) max_iou = K.max(iou, axis=1) # filter those with IoU > 0.5 indices = tf.where(K.greater_equal(max_iou, iou_threshold)) boxes = tf.gather_nd(boxes, indices) masks = tf.gather_nd(masks, indices) argmax_overlaps_inds = K.cast(tf.gather_nd(argmax_overlaps_inds, indices), 'int32') labels = K.cast(K.gather(annotations[:, 4], argmax_overlaps_inds), 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = K.stack([ y1 / (K.cast(height, dtype=K.floatx()) - 1), x1 / (K.cast(width, dtype=K.floatx()) - 1), (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1), (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1), ], axis=1) # crop and resize masks_target # append a fake channel dimension masks_target = K.expand_dims(masks_target, axis=3) masks_target = tf.image.crop_and_resize( masks_target, boxes, argmax_overlaps_inds, mask_size ) masks_target = masks_target[:, :, :, 0] # remove fake channel dimension # gather the predicted masks using the annotation label masks = tf.transpose(masks, (0, 3, 1, 2)) label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1) masks = tf.gather_nd(masks, label_indices) # compute mask loss mask_loss = K.binary_crossentropy(masks_target, masks) normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape(masks)[2] normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1) mask_loss = K.sum(mask_loss) / normalizer return mask_loss
def call(self, inputs): classes = K.squeeze(inputs[1], axis=1) kernel = K.gather(self.kernel, classes) #(bs, in, out) x = K.expand_dims(inputs[0], axis=1) #(bs, 1, in) output = tf.matmul(x, kernel) #(bs, 1, out) output = K.squeeze(output, axis=1) #(bs, out) if self.bias is not None: b = K.gather(self.bias, classes) output += b if self.activation is not None: return self.activation(output) return output
def _filter_detections(scores, labels): # threshold based on score indices = tf.where(K.greater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = K.gather(scores, indices)[:, 0] # perform NMS nms_indices = tf.image.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = K.gather(indices, nms_indices) # add indices to list of all indices labels = tf.gather_nd(labels, indices) indices = K.stack([indices[:, 0], labels], axis=1) return indices
def get_total_loss(content_losses, style_losses, total_var_loss, content_weights, style_weights, tv_weights, class_targets): total_loss = K.variable(0.) # Compute content losses for loss in content_losses: weighted_loss = K.mean(K.gather(content_weights, class_targets) * loss) weighted_content_losses.append(weighted_loss) total_loss += weighted_loss # Compute style losses for loss in style_losses: weighted_loss = K.mean(K.gather(style_weights, class_targets) * loss) weighted_style_losses.append(weighted_loss) total_loss += weighted_loss # Compute tv loss weighted_tv_loss = K.mean( K.gather(tv_weights, class_targets) * total_var_loss) total_loss += weighted_tv_loss return (total_loss, weighted_content_losses, weighted_style_losses, weighted_tv_loss)
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') if self.dropout < 0. or self.dropout > 1.: logging.warning('WARNING: value of dropout not in [0, 1), ' 'automatically set to 0.') self.dropout = 0. if 0. < self.dropout < 1. and self.training: retain_p = 1. - self.dropout self.B = K.random_binomial((self.input_dim,), p=retain_p) * \ (1. / retain_p) self.B = K.expand_dims(self.B) self.W = self.embeddings * self.B else: self.W = self.embeddings out = K.gather(self.W, inputs) return out
def call(self, inputs, training=None): class_labels = K.squeeze(inputs[1], axis=1) inputs = inputs[0] input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != range(ndim)[:-1]) param_broadcast = [1] * len(input_shape) param_broadcast[self.axis] = input_shape[self.axis] param_broadcast[0] = K.shape(inputs)[0] if self.scale: broadcast_gamma = K.reshape(K.gather(self.gamma, class_labels), param_broadcast) else: broadcast_gamma = None if self.center: broadcast_beta = K.reshape(K.gather(self.beta, class_labels), param_broadcast) else: broadcast_beta = None normed, mean, variance = K.normalize_batch_in_training( inputs, gamma=None, beta=None, reduction_axes=reduction_axes, epsilon=self.epsilon) if training in {0, False}: return normed else: self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) def normalize_inference(): if needs_broadcasting: # In this case we must explictly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape( self.moving_variance, broadcast_shape) return K.batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, beta=None, gamma=None, epsilon=self.epsilon) else: return K.batch_normalization(inputs, self.moving_mean, self.moving_variance, beta=None, gamma=None, epsilon=self.epsilon) # Pick the normalized form corresponding to the training phase. out = K.in_train_phase(normed, normalize_inference, training=training) return out * broadcast_gamma + broadcast_beta
def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)): # split up the different predicted blobs boxes = y_pred[:, :, :4] masks = y_pred[:, :, 4:] # split up the different blobs annotations = y_true[:, :, :5] width = K.cast(y_true[0, 0, 5], dtype='int32') height = K.cast(y_true[0, 0, 6], dtype='int32') masks_target = y_true[:, :, 7:] # reshape the masks back to their original size masks_target = K.reshape(masks_target, (K.shape(masks_target)[0] * K.shape(masks_target)[1], height, width)) masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1], mask_size[0], mask_size[1], -1)) # batch size > 1 fix boxes = K.reshape(boxes, (-1, K.shape(boxes)[2])) annotations = K.reshape(annotations, (-1, K.shape(annotations)[2])) # compute overlap of boxes with annotations iou = overlap(boxes, annotations) argmax_overlaps_inds = K.argmax(iou, axis=1) max_iou = K.max(iou, axis=1) # filter those with IoU > 0.5 indices = tf.where(K.greater_equal(max_iou, iou_threshold)) boxes = tf.gather_nd(boxes, indices) masks = tf.gather_nd(masks, indices) argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices) argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32') labels = K.gather(annotations[:, 4], argmax_overlaps_inds) labels = K.cast(labels, 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = K.stack([ y1 / (K.cast(height, dtype=K.floatx()) - 1), x1 / (K.cast(width, dtype=K.floatx()) - 1), (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1), (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1), ], axis=1) # crop and resize masks_target # append a fake channel dimension masks_target = K.expand_dims(masks_target, axis=3) masks_target = tf.image.crop_and_resize(masks_target, boxes, argmax_overlaps_inds, mask_size) # remove fake channel dimension masks_target = masks_target[:, :, :, 0] # gather the predicted masks using the annotation label masks = tf.transpose(masks, (0, 3, 1, 2)) label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1) masks = tf.gather_nd(masks, label_indices) # compute mask loss mask_loss = K.binary_crossentropy(masks_target, masks) normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape( masks)[2] normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1) mask_loss = K.sum(mask_loss) / normalizer return mask_loss
def filter_detections(boxes, classification, other=[], class_specific_filter=True, nms=True, score_threshold=0.05, max_detections=300, nms_threshold=0.5): """Filter detections using the boxes and classification values. Args: boxes: Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format. classification: Tensor of shape (num_boxes, num_classes) containing the classification scores. other: List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores. class_specific_filter: Whether to perform filtering per class, or take the best scoring class and filter those. nms: Flag to enable/disable non maximum suppression. score_threshold: Threshold used to prefilter the boxes with. max_detections: Maximum number of detections to keep. nms_threshold: Threshold for the IoU value to determine when a box should be suppressed. Returns: A list of [boxes, scores, labels, other[0], other[1], ...]. boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes. scores is shaped (max_detections,) and contains the scores of the predicted class. labels is shaped (max_detections,) and contains the predicted label. other[i] is shaped (max_detections, ...) and contains the filtered other[i] data. In case there are less than max_detections detections, the tensors are padded with -1's. """ def _filter_detections(scores, labels): # threshold based on score indices = tf.where(K.greater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = K.gather(scores, indices)[:, 0] # perform NMS nms_indices = tf.image.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = K.gather(indices, nms_indices) # add indices to list of all indices labels = tf.gather_nd(labels, indices) indices = K.stack([indices[:, 0], labels], axis=1) return indices if class_specific_filter: all_indices = [] # perform per class filtering for c in range(K.int_shape(classification)[1]): scores = classification[:, c] labels = c * tf.ones((K.shape(scores)[0], ), dtype='int64') all_indices.append(_filter_detections(scores, labels)) # concatenate indices to single tensor indices = K.concatenate(all_indices, axis=0) else: scores = K.max(classification, axis=1) labels = K.argmax(classification, axis=1) indices = _filter_detections(scores, labels) # select top k scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.nn.top_k(scores, k=K.minimum(max_detections, K.shape(scores)[0])) # filter input using the final set of indices indices = K.gather(indices[:, 0], top_indices) boxes = K.gather(boxes, indices) labels = K.gather(labels, top_indices) other_ = [K.gather(o, indices) for o in other] # zero pad the outputs pad_size = K.maximum(0, max_detections - K.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = K.cast(labels, 'int32') pads = lambda x: [[0, pad_size]] + [[0, 0] for _ in range(1, K.ndim(x))] other_ = [tf.pad(o, pads(o), constant_values=-1) for o in other_] # set shapes, since we know what they are boxes.set_shape([max_detections, 4]) scores.set_shape([max_detections]) labels.set_shape([max_detections]) for o, s in zip(other_, [list(K.int_shape(o)) for o in other]): o.set_shape([max_detections] + s[1:]) return [boxes, scores, labels] + other_