def run_k_mean(n_anchors, boxes, centroids): ''' Perform K-mean clustering on training ground truth to generate anchors. In the paper, authors argues that generating anchors through anchors would improve Recall of the network NOTE: Euclidean distance produces larger errors for larger boxes. Therefore, YOLOv2 did not use Euclidean distance to measure calculate loss. Instead, it uses the following formula: d(box, centroid)= 1 - IoU (box, centroid) :param n_anchors: :param boxes: :param centroids: :return: new_centroids: set of new anchors groups: wth? loss: compared to current bboxes ''' loss = 0 groups = [] new_centroids = [] for i in range(n_anchors): groups.append([]) new_centroids.append(Box(0, 0, 0, 0)) for box in boxes: min_distance = 1 group_index = 0 for i, centroid in enumerate(centroids): distance = 1 - box_iou(box, centroid) # Used in YOLO9000 if distance < min_distance: min_distance = distance group_index = i groups[group_index].append(box) loss += min_distance new_centroids[group_index].w += box.w new_centroids[group_index].h += box.h for i in range(n_anchors): if len(groups[i]) == 0: continue new_centroids[i].w /= len(groups[i]) new_centroids[i].h /= len(groups[i]) iou = 0 counter = 0 for i, anchor in enumerate(new_centroids): for gt_box in groups[i]: iou += box_iou(gt_box, anchor) counter += 1 avg_iou = iou / counter return new_centroids, avg_iou, loss
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' anchor_boxes = self.anchor_boxes ious = box_iou(anchor_boxes, boxes) max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] # cls_targets[max_ious<0.5] = 0 # ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] # cls_targets[ignore] = -1 # mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) / variance[1] th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): '''Find the max value index(row & col) of a 2D tensor.''' v, i = x.max(0) j = v.max(0)[1].item() return (i[j], j) default_boxes = self.default_boxes_new # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets
def processBoxes(self, boxes, max_iou = 0.25): #self.say('processBoxes, got {} boxes'.format(len(boxes))) threshold = self.FLAGS.threshold for box in boxes: indx = np.argmax(box.probs) prob = box.probs[indx] if prob < threshold: boxes.remove(box) return False for box_one in boxes: for box_two in boxes: # Classes must be different, otherwise it can be two intersected correct boxes if box_one != box_two and box_one.class_num == box_two.class_num: if not BoundBox.isRectsIntersect(box_one, box_two): continue iou = BoundBox.box_iou(box_one, box_two) if (iou > max_iou): # Two boxes conflict. Which one will survive? max_indx_one = np.argmax(box_one.probs) max_prob_one = box_one.probs[max_indx_one] max_indx_two = np.argmax(box_two.probs) max_prob_two = box_two.probs[max_indx_two] if box_one.w + box_one.h > box_two.w + box_two.h: boxes.remove(box_two) else: boxes.remove(box_one) return False if (box_one.x > box_two.x) and\ (box_one.x + box_one.w < box_two.x + box_two.w) and\ (box_one.y > box_two.y) and\ (box_one.y + box_one.h < box_two.y + box_two.h): # One box inside other while them both the same class? It's definitely not ok boxes.remove(box_one) return False if (box_two.x > box_one.x) and\ (box_two.x + box_two.w < box_one.x + box_one.w) and\ (box_two.y > box_one.y) and\ (box_two.y + box_two.h < box_one.y + box_one.h): # One box inside other while them both the same class? It's definitely not ok boxes.remove(box_two) return False return True
def random_crop(img, boxes, labels, min_scale=0.3, max_aspect_ratio=2.): '''Randomly crop a PIL image. Args: img: (PIL.Image) image. boxes: (tensor) bounding boxes, sized [#obj, 4]. labels: (tensor) bounding box labels, sized [#obj,]. min_scale: (float) minimal image width/height scale. max_aspect_ratio: (float) maximum width/height aspect ratio. Returns: img: (PIL.Image) cropped image. boxes: (tensor) object boxes. labels: (tensor) object labels. ''' imw, imh = img.size params = [(0, 0, imw, imh)] # crop roi (x,y,w,h) out for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9): for _ in range(100): scale = random.uniform(min_scale, 1) aspect_ratio = random.uniform( max(1 / max_aspect_ratio, scale * scale), min(max_aspect_ratio, 1 / (scale * scale))) # w = int(imw * scale * math.sqrt(aspect_ratio)) # h = int(imh * scale / math.sqrt(aspect_ratio)) w = int(imw * scale) h = int(imh * scale) x = random.randrange(imw - w) y = random.randrange(imh - h) roi = torch.tensor([[x, y, x + w, y + h]], dtype=torch.float) ious = box_iou(boxes, roi) if ious.min() >= min_iou: params.append((x, y, w, h)) break x, y, w, h = random.choice(params) img = img.crop((x, y, x + w, y + h)) center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = (center[:,0]>=x) & (center[:,0]<=x+w) \ & (center[:,1]>=y) & (center[:,1]<=y+h) if mask.any(): boxes = boxes[mask] - torch.tensor([x, y, x, y], dtype=torch.float) boxes = box_clamp(boxes, 0, 0, w, h) labels = labels[mask] else: boxes = torch.tensor([[0, 0, 0, 0]], dtype=torch.float) labels = torch.tensor([0], dtype=torch.long) return img, boxes, labels
def GetProcessedBoxes(self, raw_boxes, raw_yolo_coords = False): threshold = self.FLAGS.threshold max_iou = self.FLAGS.slice_max_iou # non zero boxes boxes = [] for box in raw_boxes: indx = np.argmax(box.probs) prob = box.probs[indx] if prob > 0.0: print('prob', prob) if prob >= threshold: boxes.append(box) correct_boxes = [] for i in range(0, len(boxes)): # checking box[i] good_box = True iou = 0 max_prob_one = 0 max_prob_two = 0 for j in range(0, len(boxes)): if i != j and boxes[i].class_num == boxes[j].class_num: if BoundBox.isRectsIntersect(boxes[i], boxes[j], raw_yolo_coords): iou = BoundBox.box_iou(boxes[i], boxes[j]) if (iou > max_iou): max_indx_one = np.argmax(boxes[i].probs) max_prob_one = boxes[i].probs[max_indx_one] max_indx_two = np.argmax(boxes[j].probs) max_prob_two = boxes[j].probs[max_indx_two] if max_prob_one <= max_prob_two: good_box = False break if boxes[i].isMeInsideThat(boxes[j], raw_yolo_coords): # One box inside other while them both the same class? It's definitely not ok good_box = False break if good_box == True: correct_boxes.append(boxes[i]) return correct_boxes
def findboxes(self, net_out): meta, FLAGS = self.meta, self.FLAGS threshold, sqrt = FLAGS.threshold, meta['sqrt'] + 1 C, B, S = meta['classes'], meta['num'], meta['side'] boxes = [] SS = S * S # number of grid cells prob_size = SS * C # class probabilities conf_size = SS * B # confidences for each grid cell # net_out = net_out[0] probs = net_out[0:prob_size] confs = net_out[prob_size:(prob_size + conf_size)] cords = net_out[(prob_size + conf_size):] probs = probs.reshape([SS, C]) confs = confs.reshape([SS, B]) cords = cords.reshape([SS, B, 4]) for grid in range(SS): for b in range(B): bx = BoundBox(C) bx.c = confs[grid, b] bx.x = (cords[grid, b, 0] + grid % S) / S bx.y = (cords[grid, b, 1] + grid // S) / S bx.w = cords[grid, b, 2]**sqrt bx.h = cords[grid, b, 3]**sqrt p = probs[grid, :] * bx.c p *= (p > threshold) bx.probs = p boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key=prob_compare, reverse=True) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. return boxes
def encode(self, boxes, labels, input_size): """Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj, ]. input_size: (int/tuple) model input size of (w, h), should be the same. Returns: loc_trues: (tensor) encoded bounding boxes, sized [#anchors, 4]. cls_trues: (tensor) encoded class labels, sized [#anchors, ]. """ input_size = _make_list_input_size(input_size) boxes = tf.reshape(boxes, [-1, 4]) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes *= tf.tile(input_size, [2]) # scaled back to original size ious = box_iou(anchor_boxes, boxes, order='xywh') max_ids = tf.argmax(ious, axis=1) max_ious = tf.reduce_max(ious, axis=1) boxes = tf.gather(boxes, max_ids) # broadcast automatically, [#anchors, 4] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = tf.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_trues = tf.concat([loc_xy, loc_wh], 1) cls_trues = tf.gather(labels, max_ids) # TODO: check if needs add 1 here cls_trues = tf.where(max_ious < 0.5, tf.zeros_like(cls_trues), cls_trues) ignore = (max_ious > 0.4) & ( max_ious < 0.5) # ignore ious between (0.4, 0.5), and marked as -1 cls_trues = tf.where(ignore, tf.ones_like(cls_trues) * -1, cls_trues) cls_trues = tf.cast(cls_trues, tf.float32) return loc_trues, cls_trues
def encode(self, boxes, labels): def argmax(x): v, i = x.max(0) # j = v.max(0)[1][0] j = v.max(0)[1].item() return (i[j], j) # 第j个obj 以及第j个obj的最大anchors坐标 default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_( -1) # 与anchor匹配的boxes坐标 masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j #设置与anchor匹配度的boxes坐标 masked_ious[i, :] = 0 # 设置设置过得roi为0,表示已经搜索过次roi, 对应于while里的条件 masked_ious[:, j] = 0 mask = (index < 0) & ( ious.max(1)[0] >= 0.5 ) # 没有在第一次进行匹配到的 并且 对于每一个anchor与任何boxes的roi大于0.5的 if mask.any(): # 如果存在 # index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] index[mask] = ious[mask].max(1)[1] #设置匹配 【1】表示使用坐标位置 对应于58行 boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets # cls>0 的是正样本 其他为0 ; loc在cls=0的地方是无效值
def encode_(self, image, boxes, labels): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' anchor_boxes = self.default_boxes # xywh anchor_boxes = change_box_order(anchor_boxes, 'xywh2xyxy') default_boxes_ = anchor_boxes ious = box_iou(anchor_boxes, boxes) max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') variances = (0.1, 0.2) # variances = (1, 1) loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.5] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # mark ignored to -1 # return loc_targets, cls_targets, self.default_boxes, default_boxes_ return loc_targets, cls_targets
def findboxes(self, net_out): # meta meta = self.meta H, W, _ = meta['out_size'] threshold = meta['thresh'] C, B = meta['classes'], meta['num'] anchors = meta['anchors'] net_out = net_out.reshape([H, W, B, -1]) boxes = list() for row in range(H): for col in range(W): for b in range(B): bx = BoundBox(C) bx.x, bx.y, bx.w, bx.h, bx.c = net_out[row, col, b, :5] bx.c = expit(bx.c) bx.x = (col + expit(bx.x)) / W bx.y = (row + expit(bx.y)) / H bx.w = math.exp(bx.w) * anchors[2 * b + 0] / W bx.h = math.exp(bx.h) * anchors[2 * b + 1] / H classes = net_out[row, col, b, 5:] bx.probs = _softmax(classes) * bx.c bx.probs *= bx.probs > threshold boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key=prob_compare, reverse=True) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. return boxes
def postprocess(self, net_out, im, save=True): """ Takes net output, draw predictions, save to disk """ meta, FLAGS = self.meta, self.FLAGS threshold, sqrt = FLAGS.threshold, meta['sqrt'] + 1 C, B, S = meta['classes'], meta['num'], meta['side'] colors, labels = meta['colors'], meta['labels'] boxes = [] SS = S * S # number of grid cells prob_size = SS * C # class probabilities conf_size = SS * B # confidences for each grid cell #net_out = net_out[0] probs = net_out[0:prob_size] confs = net_out[prob_size:(prob_size + conf_size)] cords = net_out[(prob_size + conf_size):] probs = probs.reshape([SS, C]) confs = confs.reshape([SS, B]) cords = cords.reshape([SS, B, 4]) for grid in range(SS): for b in range(B): bx = BoundBox(C) bx.c = confs[grid, b] bx.x = (cords[grid, b, 0] + grid % S) / S bx.y = (cords[grid, b, 1] + grid // S) / S bx.w = cords[grid, b, 2]**sqrt bx.h = cords[grid, b, 3]**sqrt p = probs[grid, :] * bx.c p *= (p > threshold) bx.probs = p boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key=prob_compare, reverse=True) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape textBuff = "[" for b in boxes: max_indx = np.argmax(b.probs) max_prob = b.probs[max_indx] label = self.meta['labels'][max_indx] if max_prob > threshold: left = int((b.x - b.w / 2.) * w) right = int((b.x + b.w / 2.) * w) top = int((b.y - b.h / 2.) * h) bot = int((b.y + b.h / 2.) * h) if left < 0: left = 0 if right > w - 1: right = w - 1 if top < 0: top = 0 if bot > h - 1: bot = h - 1 thick = int((h + w) // 300) mess = '{}'.format(label) if self.FLAGS.json: line = ('{"label":"%s",' '"topleft":{"x":%d,"y":%d},' '"bottomright":{"x":%d,"y":%d}},\n') % \ (mess, left, top, right, bot) textBuff += line continue cv2.rectangle(imgcv, (left, top), (right, bot), self.meta['colors'][max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, self.meta['colors'][max_indx], thick // 3) # Removing trailing comma+newline adding json list terminator. textBuff = textBuff[:-2] + "]" if self.FLAGS.json: textFile = os.path.splitext(img_name)[0] + ".json" with open(textFile, 'w') as f: f.write(textBuff) return if not save: return imgcv outfolder = os.path.join(self.FLAGS.test, 'out') img_name = os.path.join(outfolder, im.split('/')[-1]) cv2.imwrite(img_name, imgcv)
def postprocess(self, net_out, im, save=True): """ Takes net output, draw net_out, save to disk """ # meta meta = self.meta H, W, _ = meta['out_size'] threshold = meta['thresh'] C, B = meta['classes'], meta['num'] anchors = meta['anchors'] net_out = net_out.reshape([H, W, B, -1]) boxes = list() for row in range(H): for col in range(W): for b in range(B): bx = BoundBox(C) bx.x, bx.y, bx.w, bx.h, bx.c = net_out[row, col, b, :5] bx.c = expit(bx.c) bx.x = (col + expit(bx.x)) / W bx.y = (row + expit(bx.y)) / H bx.w = math.exp(bx.w) * anchors[2 * b + 0] / W bx.h = math.exp(bx.h) * anchors[2 * b + 1] / H classes = net_out[row, col, b, 5:] bx.probs = _softmax(classes) * bx.c bx.probs *= bx.probs > threshold boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key=prob_compare) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape for b in boxes: max_indx = np.argmax(b.probs) max_prob = b.probs[max_indx] label = 'object' * int(C < 2) label += labels[max_indx] * int(C > 1) if max_prob > threshold: left = int((b.x - b.w / 2.) * w) right = int((b.x + b.w / 2.) * w) top = int((b.y - b.h / 2.) * h) bot = int((b.y + b.h / 2.) * h) if left < 0: left = 0 if right > w - 1: right = w - 1 if top < 0: top = 0 if bot > h - 1: bot = h - 1 thick = int((h + w) / 300) cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) mess = '{}'.format(label) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 3) if not save: return imgcv outfolder = os.path.join(self.FLAGS.test, 'out') img_name = os.path.join(outfolder, im.split('/')[-1]) cv2.imwrite(img_name, imgcv) # def _postprocess(self, net_out, im, save = True): # """ # Takes net output, draw net_out, save to disk # """ # # meta # meta = self.meta # H, W, _ = meta['out_size'] # threshold = meta['thresh'] # C, B = meta['classes'], meta['num'] # anchors = meta['anchors'] # net_out = net_out.reshape([H, W, B, -1]) # boxes = list() # for row in range(H): # for col in range(W): # for b in range(B): # bx = BoundBox(C) # bx.x, bx.y, bx.w, bx.h, bx.c = net_out[row, col, b, :5] # bx.c = expit(bx.c) # bx.x = (col + expit(bx.x)) / W # bx.y = (row + expit(bx.y)) / H # bx.w = math.exp(bx.w) * anchors[2 * b + 0] / W # bx.h = math.exp(bx.h) * anchors[2 * b + 1] / H # p = net_out[row, col, b, 5:] * bx.c # mi = np.argmax(p) # if p[mi] < threshold*2: continue # bx.ind = mi; bx.pi = p[mi] # boxes.append(bx) # # non max suppress boxes # boxes = sorted(boxes, cmp = prob_compare2) # for i in range(len(boxes)): # boxi = boxes[i] # if boxi.pi == 0: continue # for j in range(i + 1, len(boxes)): # boxj = boxes[j] # areaj = boxj.w * boxj.h # if box_intersection(boxi, boxj)/areaj >= .4: # boxes[j].pi = 0. # colors = meta['colors'] # labels = meta['labels'] # if type(im) is not np.ndarray: # imgcv = cv2.imread(im) # else: imgcv = im # h, w, _ = imgcv.shape # for b in boxes: # if b.pi > 0.: # label = labels[b.ind] # left = int ((b.x - b.w/2.) * w) # right = int ((b.x + b.w/2.) * w) # top = int ((b.y - b.h/2.) * h) # bot = int ((b.y + b.h/2.) * h) # if left < 0 : left = 0 # if right > w - 1: right = w - 1 # if top < 0 : top = 0 # if bot > h - 1: bot = h - 1 # thick = int((h+w)/300) # cv2.rectangle(imgcv, # (left, top), (right, bot), # colors[b.ind], thick) # mess = '{}'.format(label) # cv2.putText(imgcv, mess, (left, top - 12), # 0, 1e-3 * h, colors[b.ind], thick // 3) # if not save: return imgcv # outfolder = os.path.join(self.FLAGS.test, 'out') # img_name = os.path.join(outfolder, im.split('/')[-1]) # cv2.imwrite(img_name, imgcv)
def eval_list(boxes, self): actuals = udacity_voc_csv(self.FLAGS.valAnn, self.meta['labels']) names = list() for box in actuals: names.append(box[0]) imgName = list(OrderedDict.fromkeys(names)) conf_thresh = 0.25 nms_thresh = 0.4 iou_thresh = 0.5 min_box_scale = 8. / 448 total = 0.0 proposals = 0.0 correct = 0.0 lineId = 0 avg_iou = 0.0 groundBox = BoundBox(20) prdiction = BoundBox(20) for names in imgName: for boxgt in actuals: if (names[1:] == boxgt[0][1:]): total = total + 1 best_iou = 0 for box in boxes: if (box[0] == boxgt[0][1:] and box[1][2][0][0] == boxgt[1][2][0][0]): proposals = proposals + 1 box_gt = boxgt[1][2][0][1:5] boxp = box[1][2][0][1:5] groundBox.x = (box_gt[2] + box_gt[0]) / 2 groundBox.y = (box_gt[1] + box_gt[3]) / 2 groundBox.w = (box_gt[2] - box_gt[0]) groundBox.h = (box_gt[3] - box_gt[1]) prdiction.x = (boxp[2] + boxp[0]) / 2 prdiction.y = (boxp[1] + boxp[3]) / 2 prdiction.w = (boxp[2] - boxp[0]) prdiction.h = (boxp[3] - boxp[1]) iou = box_iou(groundBox, prdiction) best_iou = max(iou, best_iou) if best_iou > iou_thresh: avg_iou += best_iou correct = correct + 1 if (proposals == 0): precision = 0 else: precision = 1.0 * correct / proposals recall = 1.0 * correct / total if (correct == 0): fscore = 0 IOU = 0 else: fscore = 2.0 * precision * recall / (precision + recall) IOU = avg_iou / correct proposals = 0 total = 0 print( "Image no:", names[1:], "IOU: %f, Recal: %f, Precision: %f, Fscore: %f" % (IOU, recall, precision, fscore))
def interpret_output(output): #output = np.transpose(output,(0,2,1)) probs = np.zeros( (cfg.cell_size, cfg.cell_size, cfg.boxes_per_cell, cfg.num_class)) #print output.shape info_num = cfg.coords + cfg.scale + cfg.num_class #output = np.reshape(output,(cfg.cell_size,cfg.cell_size,cfg.boxes_per_cell,info_num)) class_probs = np.reshape( output[:, :, :, cfg.coords + 1:], (cfg.cell_size, cfg.cell_size, cfg.boxes_per_cell, cfg.num_class)) #scales = np.reshape(output[:,:,:,cfg.coords], ( cfg.cell_size, cfg.cell_size,cfg.boxes_per_cell, cfg.scale)) scales = np.zeros((cfg.cell_size, cfg.cell_size, cfg.boxes_per_cell), dtype=np.float32) scales = output[:, :, :, cfg.coords] print "probs", probs.shape probs = class_probs * scales[:, :, :, np.newaxis] print probs[probs > 0.5] print np.where(probs > 0.5) #offset = np.transpose(np.reshape(np.array([np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell), (self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0)) boxes = output[:, :, :, :cfg.coords] for row in xrange(cfg.cell_size): for col in xrange(cfg.cell_size): for n in xrange(cfg.boxes_per_cell): boxes[row, col, n] = get_region_box(boxes, col, row, n, cfg.anchors) filter_mat_probs = np.array(probs >= cfg.threshold, dtype='bool') filter_mat_boxes = np.nonzero(filter_mat_probs) boxes_filtered = boxes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] probs_filtered = probs[filter_mat_probs] classes_num_filtered = np.argmax(filter_mat_probs, axis=3)[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] argsort = np.array(np.argsort(probs_filtered))[::-1] boxes_filtered = boxes_filtered[argsort] probs_filtered = probs_filtered[argsort] classes_num_filtered = classes_num_filtered[argsort] for i in range(len(boxes_filtered)): if probs_filtered[i] == 0: continue for j in range(i + 1, len(boxes_filtered)): if box.box_iou(boxes_filtered[i], boxes_filtered[j]) > cfg.iou_threshold: probs_filtered[j] = 0.0 filter_iou = np.array(probs_filtered > 0.0, dtype='bool') boxes_filtered = boxes_filtered[filter_iou] probs_filtered = probs_filtered[filter_iou] classes_num_filtered = classes_num_filtered[filter_iou] result = [] for i in range(len(boxes_filtered)): result.append([ cfg.cls[classes_num_filtered[i]], boxes_filtered[i][0], boxes_filtered[i][1], boxes_filtered[i][2], boxes_filtered[i][3], probs_filtered[i] ]) return result
def encode(self, boxes, labels, input_size, pos_iou_threshold=0.5, neg_iou_threshold=0.4): """Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj, ]. input_size: (int/tuple) model input size of (w, h), should be the same. Returns: loc_trues: (tensor) encoded bounding boxes, sized [#anchors, 4]. cls_trues: (tensor) encoded class labels, sized [#anchors, ]. """ input_size = _make_list_input_size(input_size) boxes = tf.reshape(boxes, [-1, 4]) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes *= tf.tile(input_size, [ 2 ]) # scaled back to original size ####exchange these two lines???? ious = box_iou(anchor_boxes, boxes, order='xywh') #[#anchor, num_bboxes] max_ids = tf.argmax(ious, axis=1) #[#anchor,] max_ious = tf.reduce_max(ious, axis=1) #[#anchor,] gboxes = tf.gather(boxes, max_ids) # broadcast automatically, [#anchors, 4] loc_xy = (gboxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = tf.log(gboxes[:, 2:] / anchor_boxes[:, 2:]) loc_trues = tf.concat([loc_xy, loc_wh], 1) #[#anchors, 4] cls_trues = tf.gather(labels, max_ids) # TODO: check if needs add 1 here cls_trues = tf.where(max_ious < pos_iou_threshold, tf.zeros_like(cls_trues), cls_trues) ignore = (max_ious > neg_iou_threshold) & ( max_ious < pos_iou_threshold ) # ignore ious between (0.4, 0.5), and marked as -1 cls_trues = tf.where(ignore, tf.ones_like(cls_trues) * -1, cls_trues) cls_trues = tf.cast(cls_trues, tf.float32) ################################################################################### """second bigger iou """ if conf.use_secondbig_loss_constrain: mask_ious = tf.one_hot(max_ids, tf.shape(ious, out_type=tf.int32)[1]) ious -= mask_ious second_max_ids = tf.argmax(ious, axis=1) #[#anchor,] sec_gboxes = tf.gather( boxes, second_max_ids) # broadcast automatically, [#anchors, 4] se_loc_xy = (sec_gboxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] se_loc_wh = tf.log(sec_gboxes[:, 2:] / anchor_boxes[:, 2:]) sec_loc_trues = tf.concat([se_loc_xy, se_loc_wh], 1) loc_trues = tf.concat([loc_trues, sec_loc_trues], 1) ################################################################################### return loc_trues, cls_trues
def postprocess(self, net_out, im, save=True): """ Takes net output, draw predictions, save to disk """ meta, FLAGS = self.meta, self.FLAGS threshold, sqrt = FLAGS.threshold, meta['sqrt'] + 1 C, B, S = meta['classes'], meta['num'], meta['side'] colors, labels = meta['colors'], meta['labels'] boxes = [] SS = S * S # number of grid cells prob_size = SS * C # class probabilities conf_size = SS * B # confidences for each grid cell #net_out = net_out[0] probs = net_out[0:prob_size] confs = net_out[prob_size:(prob_size + conf_size)] cords = net_out[(prob_size + conf_size):] probs = probs.reshape([SS, C]) confs = confs.reshape([SS, B]) cords = cords.reshape([SS, B, 4]) for grid in range(SS): for b in range(B): bx = BoundBox(C) bx.c = confs[grid, b] bx.x = (cords[grid, b, 0] + grid % S) / S bx.y = (cords[grid, b, 1] + grid // S) / S bx.w = cords[grid, b, 2]**sqrt bx.h = cords[grid, b, 3]**sqrt p = probs[grid, :] * bx.c p *= (p > threshold) bx.probs = p boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key=prob_compare) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape for b in boxes: max_indx = np.argmax(b.probs) max_prob = b.probs[max_indx] label = self.meta['labels'][max_indx] if max_prob > _thresh.get(label, threshold): left = int((b.x - b.w / 2.) * w) right = int((b.x + b.w / 2.) * w) top = int((b.y - b.h / 2.) * h) bot = int((b.y + b.h / 2.) * h) if left < 0: left = 0 if right > w - 1: right = w - 1 if top < 0: top = 0 if bot > h - 1: bot = h - 1 thick = int((h + w) // 150) cv2.rectangle(imgcv, (left, top), (right, bot), self.meta['colors'][max_indx], thick) mess = '{}'.format(label) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, self.meta['colors'][max_indx], thick // 3) if not save: return imgcv outfolder = os.path.join(FLAGS.test, 'out') img_name = os.path.join(outfolder, im.split('/')[-1]) cv2.imwrite(img_name, imgcv)
def postprocess(self, net_out, im, save = True): """ Takes net output, draw net_out, save to disk """ # meta meta = self.meta H, W, _ = meta['out_size'] threshold = meta['thresh'] C, B = meta['classes'], meta['num'] anchors = meta['anchors'] net_out = net_out.reshape([H, W, B, -1]) boxes = list() for row in range(H): for col in range(W): for b in range(B): bx = BoundBox(C) bx.x, bx.y, bx.w, bx.h, bx.c = net_out[row, col, b, :5] bx.c = expit(bx.c) bx.x = (col + expit(bx.x)) / W bx.y = (row + expit(bx.y)) / H bx.w = math.exp(bx.w) * anchors[2 * b + 0] / W bx.h = math.exp(bx.h) * anchors[2 * b + 1] / H classes = net_out[row, col, b, 5:] bx.probs = _softmax(classes) * bx.c bx.probs *= bx.probs > threshold boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key = prob_compare, reverse = True) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape textBuff = "[" for b in boxes: max_indx = np.argmax(b.probs) max_prob = b.probs[max_indx] label = labels[max_indx] if max_prob > threshold: left = int ((b.x - b.w/2.) * w) right = int ((b.x + b.w/2.) * w) top = int ((b.y - b.h/2.) * h) bot = int ((b.y + b.h/2.) * h) if left < 0 : left = 0 if right > w - 1: right = w - 1 if top < 0 : top = 0 if bot > h - 1: bot = h - 1 thick = int((h+w)/300) mess = '{}'.format(label) if self.FLAGS.json: line = ('{"label":"%s",' '"topleft":{"x":%d,"y":%d},' '"bottomright":{"x":%d,"y":%d}},\n') % \ (mess, left, top, right, bot) textBuff += line continue cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx],thick//3) # Removing trailing comma+newline adding json list terminator. textBuff = textBuff[:-2] + "]" outfolder = os.path.join(self.FLAGS.test, 'out') img_name = os.path.join(outfolder, im.split('/')[-1]) if self.FLAGS.json: textFile = os.path.splitext(img_name)[0] + ".json" with open(textFile, 'w') as f: f.write(textBuff) return if not save: return imgcv cv2.imwrite(img_name, imgcv)
def postprocess(self, net_out, im, save = True, check= False): """ Takes net output, draw net_out, save to disk """ # meta meta = self.meta H, W, _ = meta['out_size'] threshold = meta['thresh'] C, B = meta['classes'], meta['num'] anchors = meta['anchors'] net_out = net_out.reshape([H, W, B, -1]) boxes = list() for row in range(H): for col in range(W): for b in range(B): bx = BoundBox(C) bx.x, bx.y, bx.w, bx.h, bx.c = net_out[row, col, b, :5] bx.c = expit(bx.c) bx.x = (col + expit(bx.x)) / W bx.y = (row + expit(bx.y)) / H bx.w = math.exp(bx.w) * anchors[2 * b + 0] / W bx.h = math.exp(bx.h) * anchors[2 * b + 1] / H classes = net_out[row, col, b, 5:] bx.probs = _softmax(classes) * bx.c bx.probs *= bx.probs > threshold boxes.append(bx) # non max suppress boxes for c in range(C): for i in range(len(boxes)): boxes[i].class_num = c boxes = sorted(boxes, key = prob_compare) for i in range(len(boxes)): boxi = boxes[i] if boxi.probs[c] == 0: continue for j in range(i + 1, len(boxes)): boxj = boxes[j] if box_iou(boxi, boxj) >= .4: boxes[j].probs[c] = 0. colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape resultsForJSON = [] for b in boxes: max_indx = np.argmax(b.probs) max_prob = b.probs[max_indx] label = 'object' * int(C < 2) label += labels[max_indx] * int(C>1) if max_prob > threshold: left = int ((b.x - b.w/2.) * w) right = int ((b.x + b.w/2.) * w) top = int ((b.y - b.h/2.) * h) bot = int ((b.y + b.h/2.) * h) if left < 0 : left = 0 if right > w - 1: right = w - 1 if top < 0 : top = 0 if bot > h - 1: bot = h - 1 thick = int((h+w)/300) mess = '{}'.format(label) #print(("label", mess, "confidence", max_prob, "topleft ","x ", left, "y ", top, "bottomright", " x", right, "y", bot)) if self.FLAGS.json: resultsForJSON.append({"label": mess, "confidence": float('%.2f' % max_prob), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}}) continue cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx],thick//3) outfolder = os.path.join(self.FLAGS.test, 'out') img_name = os.path.join(outfolder, im.split('/')[-1]) if(check==False): if self.FLAGS.json: textJSON = json.dumps(resultsForJSON) textFile = os.path.splitext(img_name)[0] + ".json" with open(textFile, 'w') as f: f.write(textJSON) return if not save: return imgcv cv2.imwrite(img_name, imgcv)