def infer(model, cls_names, input): pred_boxes = [[] for i in range(len(cls_names))] output = [ decode_layer(layer, i) for i, layer in enumerate(model.predict(input)) ] for i, preds in enumerate(output): xywh, obj, conf = preds gw, gh = xywh.shape[1:3] for ix in range(gw): for iy in range(gh): for ir in range(3): x, y, w, h = xywh[0, ix, iy, ir] cls = np.argmax(conf[0, ix, iy, ir]) objectness = conf[0, ix, iy, ir, cls] * obj[0, ix, iy, ir] if objectness > 0.25: l, t, r, b = x - 0.5 * w, y - 0.5 * h, x + 0.5 * w, y + 0.5 * h pred_boxes[cls].append( (objectness.numpy(), [l, t, r, b])) # nms def iou(box1, box2): l = max(box1[0], box2[0]) t = max(box1[1], box2[1]) r = min(box1[2], box2[2]) b = min(box1[3], box2[3]) i = max(0, r - l) * max(0, b - t) u = (box1[2] - box1[0]) * (box1[3] - box1[1]) + (box2[2] - box2[0]) * ( box2[3] - box2[1]) - i return i / u boxes = [] scores = [] labels = [] for cls in range(len(cls_names)): cls_preds = sorted(pred_boxes[cls]) while len(cls_preds) > 0: score, box = cls_preds[-1] box_xywh = (box[0] + box[2]) / 2, ( box[1] + box[3]) / 2, box[2] - box[0], box[3] - box[1] boxes.append(box_xywh) scores.append(score) labels.append(cls_names[cls]) rem = [] for score2, box2 in cls_preds: if iou(box, box2) < 0.213: rem.append((score2, box2)) cls_preds = rem return boxes, scores, labels
def decode_prediction(prediction, num_classes): pred_boxes = [[] for i in range(num_classes)] for i, layer in enumerate(prediction): xywh, obj, conf = utils.decode_layer(layer, i) ltrb = utils.xywh_to_ltrb(xywh) objectness = tf.math.reduce_max(conf, axis=-1) * obj clss = tf.argmax(conf, axis=-1) detected = tf.where(objectness > 0.25) for idx in detected: batch, ix, iy, ir = idx score = objectness[batch, ix, iy, ir].numpy() cls = clss[batch, ix, iy, ir] box = list(ltrb[batch, ix, iy, ir].numpy()) pred_boxes[cls].append((score, box)) # nms def iou(box1, box2): l = max(box1[0], box2[0]) t = max(box1[1], box2[1]) r = min(box1[2], box2[2]) b = min(box1[3], box2[3]) i = max(0, r - l) * max(0, b - t) u = (box1[2] - box1[0]) * (box1[3] - box1[1]) + (box2[2] - box2[0]) * ( box2[3] - box2[1]) - i return i / u boxes = [] scores = [] labels = [] for cls in range(num_classes): cls_preds = sorted(pred_boxes[cls]) while len(cls_preds) > 0: score, box = cls_preds[-1] boxes.append(box) scores.append(score) labels.append(cls) rem = [] for score2, box2 in cls_preds: if iou(box, box2) < 0.213: rem.append((score2, box2)) cls_preds = rem return boxes, scores, labels
def get_decode_out(self,img): outs = self.model(img) outs = decode_layer(outs,num_dets = 80) return outs.data.cpu().numpy()
def calc_loss(layer_id, gt, preds, debug=False): gt_boxes = gt[..., :4] gt_labels = tf.cast(gt[..., 4], tf.int32) gt_count = tf.shape(gt_labels)[-1] gt_mask = tf.where(gt_labels == -1, 0.0, 1.0) layer_xywh, layer_obj, layer_cls = utils.decode_layer(preds, layer_id) cls_count = layer_cls.shape[-1] s = tf.shape(preds) # (batch, x, y, ratio * stuff) batch_size = s[0] gw = s[1] gh = s[2] stride_x = 1 / gw stride_y = 1 / gh d = s[3] truth_mask = tf.zeros((batch_size, gw, gh, 3)) box_loss = 0.0 cls_loss = 0.0 ix = tf.cast(tf.math.floor(tf.cast(gw, tf.float32) * gt_boxes[..., 0]), tf.int32) iy = tf.cast(tf.math.floor(tf.cast(gh, tf.float32) * gt_boxes[..., 1]), tf.int32) ix = tf.clip_by_value(ix, 0, gw - 1) iy = tf.clip_by_value(iy, 0, gh - 1) box_shape = tf.shape(gt_labels) zeros = tf.zeros_like(gt_labels, dtype=tf.float32) gt_shift = tf.stack([zeros, zeros, gt_boxes[..., 2], gt_boxes[..., 3]], axis=-1) gt_shift = tf.stack([gt_shift, gt_shift, gt_shift], axis=1) anchors_ws = [ tf.cast(tf.fill(box_shape, anchor_sizes[layer_id][ir][0]), dtype=tf.float32) / 608.0 for ir in range(3) ] anchors_hs = [ tf.cast(tf.fill(box_shape, anchor_sizes[layer_id][ir][1]), dtype=tf.float32) / 608.0 for ir in range(3) ] anchors = tf.stack([ tf.stack([zeros, zeros, anchors_ws[ir], anchors_hs[ir]], axis=-1) for ir in range(3) ], axis=1) ious = utils.calc_ious(gt_shift, anchors) ious_argmax = tf.cast(tf.argmax(ious, axis=1), dtype=tf.int32) batch_idx = tf.tile( tf.range(batch_size)[:, tf.newaxis], [1, box_shape[-1]]) indices = tf.stack([batch_idx, iy, ix, ious_argmax], axis=-1) pred_boxes = tf.gather_nd(layer_xywh, indices) box_loss = tf.math.reduce_sum( gt_mask * (1.0 - utils.calc_gious(pred_boxes, gt_boxes))) cls_one_hot = tf.one_hot(gt_labels, cls_count) pred_cls = tf.gather_nd(layer_cls, indices) cls_diffs = tf.math.reduce_sum(tf.math.square(pred_cls - cls_one_hot), axis=-1) cls_loss = tf.math.reduce_sum(gt_mask * cls_diffs) indices_not_null = tf.gather_nd(indices, tf.where(gt_labels != -1)) truth_mask = tf.tensor_scatter_nd_update( truth_mask, indices_not_null, tf.ones_like(indices_not_null, dtype=tf.float32)[:, 0]) inv_truth_mask = 1.0 - truth_mask obj_loss = tf.math.reduce_sum(tf.math.square(1 - layer_obj) * truth_mask) gt_boxes_exp = tf.tile( tf.reshape(gt_boxes, (batch_size, 1, 1, 1, gt_count, 4)), [1, gw, gh, 3, 1, 1]) pred_boxes_exp = tf.tile( tf.reshape(layer_xywh, (batch_size, gw, gh, 3, 1, 4)), [1, 1, 1, 1, gt_count, 1]) iou_mask = tf.cast( tf.math.reduce_max(utils.calc_ious(gt_boxes_exp, pred_boxes_exp), axis=-1) < 0.7, tf.float32) obj_loss += tf.math.reduce_sum( tf.math.square(layer_obj) * inv_truth_mask * iou_mask) return (0.05 * box_loss + 1.0 * obj_loss + 0.5 * cls_loss) / tf.cast( batch_size, dtype=tf.float32)