def _load_kitti_annotation(self): def _get_obj_level(obj): height = float(obj[7]) - float(obj[5]) + 1 truncation = float(obj[1]) occlusion = float(obj[2]) if height >= 40 and truncation <= 0.15 and occlusion <= 0: return 1 elif height >= 25 and truncation <= 0.3 and occlusion <= 1: return 2 elif height >= 25 and truncation <= 0.5 and occlusion <= 2: return 3 else: return 4 idx2annotation = {} for index in self._image_idx: filename = os.path.join(self._label_path, index + '.txt') with open(filename, 'r') as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') try: cls = self._class_to_idx[obj[0].lower().strip()] except: continue # print(_get_obj_level(obj), self.mc.EXCLUDE_HARD_EXAMPLES) if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3: continue xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) # print(xmin, ymin, xmax, ymax) # if not xmin >= 0.0 and xmin <= xmax: # continue # if not ymin >= 0.0 and ymin <= ymax: # continue assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) # print([x, y, w, h, cls]) bboxes.append([x, y, w, h, cls]) idx2annotation[index] = bboxes # print(idx2annotation) # sdfsd return idx2annotation
def _load_pascal_annotation(self): idx2annotation = {} for index in self._image_idx: filename = os.path.join(self._data_path, 'Annotations', index + '.xml') tree = ET.parse(filename) objs = tree.findall('object') objs = [ obj for obj in objs if int(obj.find('difficult').text) == 0 ] bboxes = [] for obj in objs: bbox = obj.find('bndbox') # Make pixel indexes 0-based xmin = float(bbox.find('xmin').text) - 1 xmax = float(bbox.find('xmax').text) - 1 ymin = float(bbox.find('ymin').text) - 1 ymax = float(bbox.find('ymax').text) - 1 assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.xml' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.xml' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) cls = self._class_to_idx[obj.find('name').text.lower().strip()] bboxes.append([x, y, w, h, cls]) idx2annotation[index] = bboxes return idx2annotation
def _load_coco_annotation(self): idx2annotation = {} for index in self._image_idx: filename = os.path.join(self._label_path, index+'.txt') with open(filename, 'r') as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') try: cls = self._class_to_idx[obj[0].lower().strip()] except: continue xmin = float(obj[1]) ymin = float(obj[2]) xmax = float(obj[3]) ymax = float(obj[4]) assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls]) idx2annotation[index] = bboxes return idx2annotation
def _load_vid_annotation(self): idx2annotation = {} for index in self._image_idx: filename = os.path.join(self._anno_path, index + '.xml') tree = ET.parse(filename) objs = tree.findall('object') bboxes = [] for obj in objs: bbox = obj.find('bndbox') xmin = float(bbox.find('xmin').text) xmax = float(bbox.find('xmax').text) ymin = float(bbox.find('ymin').text) ymax = float(bbox.find('ymax').text) assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.xml' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.xml' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) cls = self._raw_cname_to_idx[obj.find( 'name').text.lower().strip()] bboxes.append([x, y, w, h, cls]) idx2annotation[index] = bboxes return idx2annotation
def _filter_ann(self, line_str, filter_cond=None): ''' input: line_str: str_line info filter_cond: see @ _load_caltech_annotation return: bboxes: [cent_x,cent_y,w,h] gt_class: 0/-1,true/ignore ''' strs = line_str.split(' ') str_len = len(strs) label = strs[0] x = float(strs[1]) y = float(strs[2]) w = float(strs[3]) h = float(strs[4]) occ_flag = float(strs[5]) vis_x = float(strs[6]) vis_y = float(strs[7]) vis_w = float(strs[8]) vis_h = float(strs[9]) ignore_flag = 0 if str_len > 9: ignore_flag = float(strs[10]) if str_len > 10: #not used now orientation_angle = float(strs[11]) # priority: error_anno>ignore>true err_flag = x < 0 or y < 0 or w <= 0 or h <= 0 err_flag = err_flag or (label != filter_cond['lbls'] and label != filter_cond['ilbls']) if err_flag: return [0, 0, 0, 0, -2, 0] ignore_flag = ignore_flag or (x + w) > 640 or ( y + h) > 480 or label == filter_cond['ilbls'] ignore_flag = ignore_flag or ( not filter_cond['hRng'][1] > w > filter_cond['hRng'][0]) if occ_flag: vis_ratio = 1.0 * vis_w * vis_h / w / h ignore_flag = ignore_flag or (not filter_cond['vRng'][1] > vis_ratio > filter_cond['vRng'][0]) x2 = x + w y2 = y + h if x2 > 640: x2 = 640 if y2 > 480: y2 = 480 x, y, w, h = bbox_transform_inv([x, y, x2, y2]) if ignore_flag: return [x, y, w, h, -1, 0] return [x, y, w, h, 0, 0]
def _load_kitti_annotation(self): def _get_obj_level(obj): height = float(obj[7]) - float(obj[5]) + 1 truncation = float(obj[1]) occlusion = float(obj[2]) if height >= 40 and truncation <= 0.15 and occlusion <= 0: return 1 elif height >= 25 and truncation <= 0.3 and occlusion <= 1: return 2 elif height >= 25 and truncation <= 0.5 and occlusion <= 2: return 3 else: return 4 idx2annotation = {} remove_list = [] for index in self._image_idx: filename = os.path.join(self._label_path, index+'.txt') if index.endswith('.png') or not os.path.exists(filename): remove_list.append(index) continue # imagepath = os.path.join(self._image_path, index+'.png') # im = cv2.imread(imagepath) # if im is None: # print("Corrupted Image: ", imagepath) # remove_list.append(index) # continue with open(filename, 'r') as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') try: cls = self._class_to_idx[obj[0].lower().strip()] except: continue if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3: continue xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) if xmin < 0.0 or xmin > xmax or ymin < 0.0 or ymin > ymax: continue x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls]) if len(bboxes) == 0: remove_list.append(index) continue idx2annotation[index] = bboxes self._image_idx = [index for index in self._image_idx if index not in remove_list] return idx2annotation
def _load_kitti_annotation(self): def _get_obj_level(obj): height = float(obj[7]) - float(obj[5]) + 1 truncation = float(obj[1]) occlusion = float(obj[2]) if height >= 40 and truncation <= 0.15 and occlusion <= 0: return 1 elif height >= 25 and truncation <= 0.3 and occlusion <= 1: return 2 elif height >= 25 and truncation <= 0.5 and occlusion <= 2: return 3 else: return 4 idx2annotation = {} for index in self._image_idx: filename = os.path.join(self._label_path, index + '.txt') with open(filename, 'r') as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') try: cls = self._class_to_idx[obj[0].lower().strip()] except: continue if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3: continue im = np.expand_dims( cv2.imread(self._image_path_at(index), cv2.IMREAD_GRAYSCALE), -1) orig_h, orig_w = [float(v) for v in im.shape[:2]] self.x_scale = self.mc.IMAGE_WIDTH / orig_w self.y_scale = self.mc.IMAGE_HEIGHT / orig_h xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls]) idx2annotation[index] = bboxes return idx2annotation
def geteval_op_list(self): """get all tensorflow operations regarding this model evaluation. """ detection_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([ self.det_boxes["xmins"], self.det_boxes["ymins"], self.det_boxes["xmaxs"], self.det_boxes["ymaxs"] ])), (1, 2, 0), name='bbox') return filter(lambda x: x != None, [ self.prediction_boxes, self.score, self.cls_idx_per_img, self.filenames, self.widths, self.heights, self.viz_op, self.det_boxes, self.det_probs, self.det_class ])
def _load_kitti_annotation(mc, filenames, class_to_idx): def _get_obj_level(obj): height = float(obj[7]) - float(obj[5]) + 1 truncation = float(obj[1]) occlusion = float(obj[2]) if height >= 40 and truncation <= 0.15 and occlusion <= 0: return 1 elif height >= 25 and truncation <= 0.3 and occlusion <= 1: return 2 elif height >= 25 and truncation <= 0.5 and occlusion <= 2: return 3 else: return 4 label_path = os.path.join(mc.DATA_PATH, 'training', 'label_2') idx2annotation = {} for index in filenames: filename = os.path.join(label_path, index+'.txt') with open(filename, 'r') as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') try: cls = class_to_idx[obj[0].lower().strip()] except: continue if mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3: continue xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \ .format(ymin, ymax, index) x, y, w, h = util.bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls]) idx2annotation[index] = bboxes return idx2annotation
def drift(self, image, gt_boxes): mc = self.mc drift_prob = np.random.rand() if drift_prob > mc.DRIFT_PROB: return image, gt_boxes ori_height, ori_width, ori_channel = [int(v) for v in image.shape] gt_boxes[:, 0::2] *= ori_width gt_boxes[:, 1::2] *= ori_height gt_boxes = np.array([bbox_transform_inv(box) for box in gt_boxes]) # Ensures that gt boundibg box is not cutted out of the image max_drift_x = min(gt_boxes[:, 0] - gt_boxes[:, 2] / 2.0 + 1) max_drift_y = min(gt_boxes[:, 1] - gt_boxes[:, 3] / 2.0 + 1) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image' dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_boxes[:, 0] = gt_boxes[:, 0] - dx gt_boxes[:, 1] = gt_boxes[:, 1] - dy #print ('[drift] -----------4') orig_h, orig_w, _ = [int(v) for v in image.shape] # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = image[orig_y:, orig_x:, :] im = distorted_im gt_boxes = np.array([bbox_transform(box) for box in gt_boxes]) #print ('[drift] -----------finish') height, width, channel = [int(v) for v in im.shape] gt_boxes[:, 0::2] /= width gt_boxes[:, 1::2] /= height return im, gt_boxes
def _define_bbox(pred_bbox_delta, ANCHOR_BOX): delta_x, delta_y, delta_w, delta_h = tf.unstack( pred_bbox_delta, axis=2) # set_anchors(mc, scale) anchor_x = ANCHOR_BOX[:, 0] anchor_y = ANCHOR_BOX[:, 1] anchor_w = ANCHOR_BOX[:, 2] anchor_h = ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity( anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity( anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform([ box_center_x, box_center_y, box_width, box_height ]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') return det_boxes
if (IMAGE_TYPE == 'GROUND_TRUTH'): xmin = int(obj[2]) ymin = int(obj[3]) xmax = int(obj[4]) ymax = int(obj[5]) print(filename) assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h]) # print(im.size()) im = im.astype(np.float32, copy=False) im = cv2.resize(im, (1920, 1200)) color = (0, 0, 255) cv2.rectangle(im, (xmin, ymin), (xmax, ymax), color, 1) file_name = index out_file_name = os.path.join("./data/out/test_example", 'out_' + file_name + ".png") cv2.imwrite(out_file_name, im) print('Image detection output saved to {}'.format(out_file_name)) idx2annotation[index] = bboxes
def _add_yolo_interpret_graph(self): """Interpret yolo output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: # TODO(jeff): add summary N = mc.BATCH_SIZE H, W, B = mc.NET_OUT_SHAPE C = mc.CLASSES preds = self.preds preds = tf.reshape(self.preds, (N, H, W, B, 5 + C)) # confidence self.pred_conf = tf.sigmoid(tf.reshape(preds[:, :, :, :, 5], (N, H, W, B, 1)), name='conf') # bbox scale self.bbox_x = tf.reshape(tf.add( tf.sigmoid(preds[:, :, :, :, 0]), tf.reshape(tf.to_float(tf.range(0, W, 1)), (1, 1, W, 1))), (N, H, W, B, 1), name='bbox_x_ratio') self.bbox_y = tf.reshape(tf.add( tf.sigmoid(preds[:, :, :, :, 1]), tf.reshape(tf.to_float(tf.range(0, H, 1)), (1, H, 1, 1))), (N, H, W, B, 1), name='bbox_y_ratio') self.bbox_w = tf.reshape(tf.multiply(tf.exp(preds[:, :, :, :, 2]), mc.ANCHOR_BOX[:, :, :, 0]), (N, H, W, B, 1), name='bbox_w_ratio') self.bbox_h = tf.reshape(tf.multiply(tf.exp(preds[:, :, :, :, 3]), mc.ANCHOR_BOX[:, :, :, 1]), (N, H, W, B, 1), name='bbox_h_ratio') self.bbox = tf.stack( [self.bbox_x, self.bbox_y, self.bbox_w, self.bbox_h], axis=4, name='bbox_ratio') # bbox prediction w_scale = float(mc.IMAGE_WIDTH) / W h_scale = float(mc.IMAGE_HEIGHT) / H self.raw_boxes = tf.reshape(tf.stack([ self.bbox_x * w_scale, self.bbox_y * h_scale, self.bbox_w * w_scale, self.bbox_h * h_scale ], axis=4), (N, H * W * B, 4), name='raw_bbox') # trim bbox self.det_boxes = tf.py_func(lambda x: util.bbox_transform_inv(x), [ self._trim_bbox( tf.py_func(lambda x: util.bbox_transform(x), [self.raw_boxes], tf.float32)) ], tf.float32, name='det_boxes') # prob self.probs = tf.multiply(self._smooth_softmax(preds[:, :, :, :, 5:]), self.pred_conf, name='probs') # class prediction self.det_probs = tf.reshape( #tf.reduce_max(self.probs, 4), self.probs, (N, H * W * B, C), name='score') self.det_class = tf.reshape(tf.argmax(self.probs, 4), (N, H * W * B), name='class_idx')
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape( preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, self.num_mask_params], name='bbox_delta') # number of object. Used to normalize bbox and classification loss self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): if self.mc.EIGHT_POINT_REGRESSION: if mc.ENCODING_TYPE == 'normal': delta_x, delta_y, delta_w, delta_h, \ delta_of1, delta_of2, delta_of3, delta_of4 = tf.unstack( self.pred_box_delta, axis=2) else: delta_xmin, delta_ymin, delta_xmax, delta_ymax, \ delta_of1, delta_of2, delta_of3, delta_of4 = tf.unstack( self.pred_box_delta, axis=2) else: if mc.ENCODING_TYPE == 'normal': delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) else: delta_xmin, delta_ymin, delta_xmax, delta_ymax = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] if mc.ENCODING_TYPE == 'asymmetric_linear': xmins_a, ymins_a, xmaxs_a, ymaxs_a = util.bbox_transform( np.transpose(mc.ANCHOR_BOX)) xmins = tf.identity(xmins_a + delta_xmin * anchor_w, name='bbox_xmin_uncropped') ymins = tf.identity(ymins_a + delta_ymin * anchor_h, name='bbox_ymin_uncropped') xmaxs = tf.identity(xmaxs_a + delta_xmax * anchor_w, name='bbox_xmax_uncropped') ymaxs = tf.identity(ymaxs_a + delta_ymax * anchor_h, name='bbox_ymax_uncropped') box_center_x, box_center_y, box_width, box_height = util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs]) self._activation_summary(delta_xmin, 'delta_xmin') self._activation_summary(delta_ymin, 'delta_ymin') self._activation_summary(delta_xmax, 'delta_xmax') self._activation_summary(delta_ymax, 'delta_ymax') elif mc.ENCODING_TYPE == 'asymmetric_log': EPSILON = 0.5 xmins = tf.identity( anchor_x - (anchor_w * (util.safe_exp(delta_xmin, mc.EXP_THRESH) - EPSILON)), name='bbox_xmin_uncropped') ymins = tf.identity( anchor_y - (anchor_h * (util.safe_exp(delta_ymin, mc.EXP_THRESH) - EPSILON)), name='bbox_ymin_uncropped') xmaxs = tf.identity( anchor_x + (anchor_w * (util.safe_exp(delta_xmax, mc.EXP_THRESH) - EPSILON)), name='bbox_xmax_uncropped') ymaxs = tf.identity( anchor_y + (anchor_h * (util.safe_exp(delta_ymax, mc.EXP_THRESH) - EPSILON)), name='bbox_ymax_uncropped') box_center_x, box_center_y, box_width, box_height = util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs]) self._activation_summary(delta_xmin, 'delta_xmin') self._activation_summary(delta_ymin, 'delta_ymin') self._activation_summary(delta_xmax, 'delta_xmax') self._activation_summary(delta_ymax, 'delta_ymax') else: box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity( anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity( anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') if self.mc.EIGHT_POINT_REGRESSION: EPSILON = 1e-8 anchor_diag = (mc.ANCHOR_BOX[:, 2]**2 + mc.ANCHOR_BOX[:, 3]**2)**(0.5) box_of1 = tf.identity( (anchor_diag * util.safe_exp(delta_of1, mc.EXP_THRESH)) - EPSILON, name='bbox_of1') box_of2 = tf.identity( (anchor_diag * util.safe_exp(delta_of2, mc.EXP_THRESH)) - EPSILON, name='bbox_of2') box_of3 = tf.identity( (anchor_diag * util.safe_exp(delta_of3, mc.EXP_THRESH)) - EPSILON, name='bbox_of3') box_of4 = tf.identity( (anchor_diag * util.safe_exp(delta_of4, mc.EXP_THRESH)) - EPSILON, name='bbox_of4') self._activation_summary(delta_of1, 'delta_of1') self._activation_summary(delta_of2, 'delta_of2') self._activation_summary(delta_of3, 'delta_of3') self._activation_summary(delta_of4, 'delta_of4') self._activation_summary(box_of1, 'box_of1') self._activation_summary(box_of2, 'box_of2') self._activation_summary(box_of3, 'box_of3') self._activation_summary(box_of4, 'box_of4') with tf.variable_scope('trimming'): if self.mc.EIGHT_POINT_REGRESSION: xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 = util.bbox_transform2( [ box_center_x, box_center_y, box_width, box_height, box_of1, box_of2, box_of3, box_of4 ]) else: if mc.ENCODING_TYPE == 'normal': xmins, ymins, xmaxs, ymaxs = util.bbox_transform([ box_center_x, box_center_y, box_width, box_height ]) if self.mc.EIGHT_POINT_REGRESSION: self.det_boxes_uncropped = tf.transpose( tf.stack( util.bbox_transform_inv2([ xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 ])), (1, 2, 0), name='bbox_uncropped') else: self.det_boxes_uncropped = tf.transpose( tf.stack( util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox_uncropped') # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') if self.mc.EIGHT_POINT_REGRESSION: self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv2([ xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 ])), (1, 2, 0), name='bbox') else: self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('IOU'): def _tensor_iou(box1, box2): with tf.variable_scope('intersection'): xmin = tf.maximum(box1[0], box2[0], name='xmin') ymin = tf.maximum(box1[1], box2[1], name='ymin') xmax = tf.minimum(box1[2], box2[2], name='xmax') ymax = tf.minimum(box1[3], box2[3], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.variable_scope('union'): w1 = tf.subtract(box1[2], box1[0], name='w1') h1 = tf.subtract(box1[3], box1[1], name='h1') w2 = tf.subtract(box2[2], box2[0], name='w2') h2 = tf.subtract(box2[3], box2[1], name='h2') union = w1 * h1 + w2 * h2 - intersection return intersection/(union+mc.EPSILON) \ * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) if self.mc.EIGHT_POINT_REGRESSION: tensor_det_boxes = util.bbox_transform2( tf.unstack(self.det_boxes, axis=2)) tensor_input_boxes = util.bbox_transform2( tf.unstack(self.box_input, axis=2)) else: tensor_det_boxes = util.bbox_transform( tf.unstack(self.det_boxes, axis=2)) tensor_input_boxes = util.bbox_transform( tf.unstack(self.box_input, axis=2)) self.ious = self.ious.assign( _tensor_iou(tensor_det_boxes, tensor_input_boxes)) self._activation_summary(self.ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def Get_feed_data(self): mc = self.mc batch_gt_boxes, batch_gt_labels, batch_image = self.read_batch_gt_data( shuffle=True) batch_gt_boxes = np.array(batch_gt_boxes) batch_gt_labels = np.array(batch_gt_labels) batch_image = np.array(batch_image) input_images = [] gt_data = [] #print ('[Get_feed_data] 0') #print ('len(batch_gt_boxes):',len(batch_gt_boxes)) for i in range(0, len(batch_gt_boxes)): #print ('------------{} get feed data'.format(i)) im = batch_image[i] im -= mc.BGR_MEANS gt_bbox = np.array(batch_gt_boxes[i]) gt_label = np.array(batch_gt_labels[i]) if len(gt_bbox) == 0: print('len(gt_bbox) == 0') raw_input('pause') im, gt_bbox, anno_box_filter_idx = self.Preprocess(im, gt_bbox) #print ('3---------------------------------') #anno_box_filter_idx = [i for i in range(len(gt_boxes))] assert len(anno_box_filter_idx) == len(gt_bbox) assert len(gt_bbox) != 0 lables = [] for idx in anno_box_filter_idx: lables.append(gt_label[idx]) gt_label = np.array(lables) orig_h, orig_w, _ = [float(v) for v in im.shape] #mirror gt_bbox[:, 0::2] *= orig_w gt_bbox[:, 1::2] *= orig_h gt_bbox_center = np.array( [bbox_transform_inv(box) for box in gt_bbox]) if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox_center[:, 0] = orig_w - 1 - gt_bbox_center[:, 0] gt_bbox = np.array([bbox_transform(box) for box in gt_bbox_center]) gt_bbox[:, 0::2] /= orig_w gt_bbox[:, 1::2] /= orig_h #print ('[Get_feed_data]gt_bbox:',gt_bbox) im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_images.append(im) ''' filename = 'logs/' + str(i) + '.jpg' self.draw_annno(im,gt_bbox,filename) print ('---------save :',filename) #cv2.waitKey(20) ''' #raw_input('pasue') # scale image #image_anno = im + mc.BGR_MEANS #self.draw_annno(image_anno,gt_bbox,'test_' + str(i) + '.jpg') #gt_data.append([i,]) num = len(gt_bbox) for j in range(0, num): #gt_data.append([i,gt_label[j],0,gt_bbox[j][0],gt_bbox[j][1],gt_bbox[j][2],gt_bbox[j][3]]) gt_data.append([ float(i), float(gt_label[j]), float(0), float(gt_bbox[j][0]), float(gt_bbox[j][1]), float(gt_bbox[j][2]), float(gt_bbox[j][3]) ]) #batch_ids = np.ones((num ,1))*i #instance_ids = np.ones((num ,1)) #gt_data.append(np.concatenate([batch_ids,gt_label,instance_ids,gt_bbox],axis=1)) #print ('4---------------------------------') gt_boxes, gt_labels = self.parse_gt_data(gt_data) all_match_indices, all_match_overlaps = self._match_bbox( mc.ANCHOR_BOX, gt_boxes) assert len(all_match_indices) != 0 gt_boxes_dense, gt_labels_dense, input_mask = self._sparse_to_dense( gt_boxes, gt_labels, all_match_indices) assert len(gt_bbox) != 0 return input_images, gt_boxes_dense, gt_labels_dense, input_mask, all_match_overlaps
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES if mc.CLASSES == 1: self.pred_class_probs = tf.reshape( tf.sigmoid( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') else: self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_class_probs:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = self.pred_class_probs self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') # number of object. Used to normalize bbox and classification loss self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('IOU'): def _tensor_iou(box1, box2): with tf.variable_scope('intersection'): xmin = tf.maximum(box1[0], box2[0], name='xmin') ymin = tf.maximum(box1[1], box2[1], name='ymin') xmax = tf.minimum(box1[2], box2[2], name='xmax') ymax = tf.minimum(box1[3], box2[3], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.variable_scope('union'): w1 = tf.subtract(box1[2], box1[0], name='w1') h1 = tf.subtract(box1[3], box1[1], name='h1') w2 = tf.subtract(box2[2], box2[0], name='w2') h2 = tf.subtract(box2[3], box2[1], name='h2') union = w1 * h1 + w2 * h2 - intersection return intersection/(union+mc.EPSILON) \ * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) self.ious = self.ious.assign( _tensor_iou( util.bbox_transform(tf.unstack(self.det_boxes, axis=2)), util.bbox_transform(tf.unstack(self.box_input, axis=2)))) self._activation_summary(self.ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def _load_annotation(self): def _get_obj_level(obj): height = float(obj[7]) - float(obj[5]) + 1 truncation = float(obj[1]) occlusion = float(obj[2]) if height >= 40 and truncation <= 0.15 and occlusion <= 0: return 1 elif height >= 25 and truncation <= 0.3 and occlusion <= 1: return 2 elif height >= 25 and truncation <= 0.5 and occlusion <= 2: return 3 else: return 4 idx2annotation = {} idx2annotation1 = {} idx2annotation2 = {} idx2annotation3 = {} # ff = open('train.txt', 'w') for index in self._image_idx: filename = os.path.join(self._label_path, index+'.txt') print(self._label_path, index) with open(filename, 'r') as f: lines = f.readlines() f.close() bboxes = [] landmarks = [] poses = [] ages = [] falsex = 0 for line in lines: obj = line.strip().split(' ') #print obj try: cls = self._class_to_idx[obj[0].lower().strip()] pose = self._pose_to_idx[obj[1].lower().strip()] if '01baby' in obj[2]: obj2 = obj[2].split('01baby') age = self._age_to_idx['01baby'.strip()] else: obj2 = obj[2].split('02adult') age = self._age_to_idx['02adult'.strip()] #print cls except: continue if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3: continue xmin = float(obj2[1])#(obj[3]) ymin = float(obj[3]) xmax = float(obj[4]) ymax = float(obj[5]) # print(xmin, xmax) # if xmin < 0.0: # falsex = 1 # continue # if ymin < 0.0: # falsex = 1 # continue assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \ .format(xmin, xmax, index) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \ .format(ymin, ymax, index) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls]) x0 = float(obj[6]) x1 = float(obj[7]) x2 = float(obj[8]) x3 = float(obj[9]) x4 = float(obj[10]) y0 = float(obj[11]) y1 = float(obj[12]) y2 = float(obj[13]) y3 = float(obj[14]) y4 = float(obj[15]) landmarks.append([x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, cls]) poses.append([pose]) ages.append([age]) # if falsex==1: # continue # ff.write(index+"\n") idx2annotation[index] = bboxes idx2annotation1[index] = landmarks idx2annotation2[index] = poses idx2annotation3[index] = ages # ff.close() # sdfs return idx2annotation, idx2annotation1, idx2annotation2, idx2annotation3
def analyze_detections(self, detection_file_dir, det_error_file): def _save_detection(f, idx, error_type, det, score): f.write( '{:s} {:s} {:.1f} {:.1f} {:.1f} {:.1f} {:s} {:.3f}\n'.format( idx, error_type, det[0] - det[2] / 2., det[1] - det[3] / 2., det[0] + det[2] / 2., det[1] + det[3] / 2., self._classes[int(det[4])], score)) # load detections self._det_rois = {} for idx in self._image_idx: det_file_name = os.path.join(detection_file_dir, idx + '.txt') with open(det_file_name) as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') cls = self._class_to_idx[obj[0].lower().strip()] xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) score = float(obj[-1]) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls, score]) bboxes.sort(key=lambda x: x[-1], reverse=True) self._det_rois[idx] = bboxes # do error analysis num_objs = 0. num_dets = 0. num_correct = 0. num_loc_error = 0. num_cls_error = 0. num_bg_error = 0. num_repeated_error = 0. num_detected_obj = 0. with open(det_error_file, 'w') as f: for idx in self._image_idx: gt_bboxes = np.array(self._rois[idx]) num_objs += len(gt_bboxes) detected = [False] * len(gt_bboxes) det_bboxes = self._det_rois[idx] if len(gt_bboxes) < 1: continue for i, det in enumerate(det_bboxes): if i < len(gt_bboxes): num_dets += 1 ious = batch_iou(gt_bboxes[:, :4], det[:4]) max_iou = np.max(ious) gt_idx = np.argmax(ious) if max_iou > 0.1: if gt_bboxes[gt_idx, 4] == det[4]: if max_iou >= 0.5: if i < len(gt_bboxes): if not detected[gt_idx]: num_correct += 1 detected[gt_idx] = True else: num_repeated_error += 1 else: if i < len(gt_bboxes): num_loc_error += 1 _save_detection(f, idx, 'loc', det, det[5]) else: if i < len(gt_bboxes): num_cls_error += 1 _save_detection(f, idx, 'cls', det, det[5]) else: if i < len(gt_bboxes): num_bg_error += 1 _save_detection(f, idx, 'bg', det, det[5]) for i, gt in enumerate(gt_bboxes): if not detected[i]: _save_detection(f, idx, 'missed', gt, -1.0) num_detected_obj += sum(detected) f.close() print('Detection Analysis:') print(' Number of detections: {}'.format(num_dets)) print(' Number of objects: {}'.format(num_objs)) print(' Percentage of correct detections: {}'.format(num_correct / num_dets)) print(' Percentage of localization error: {}'.format(num_loc_error / num_dets)) print(' Percentage of classification error: {}'.format( num_cls_error / num_dets)) print(' Percentage of background error: {}'.format(num_bg_error / num_dets)) print(' Percentage of repeated detections: {}'.format( num_repeated_error / num_dets)) print(' Recall: {}'.format(num_detected_obj / num_objs)) out = {} out['num of detections'] = num_dets out['num of objects'] = num_objs out['% correct detections'] = num_correct / num_dets out['% localization error'] = num_loc_error / num_dets out['% classification error'] = num_cls_error / num_dets out['% background error'] = num_bg_error / num_dets out['% repeated error'] = num_repeated_error / num_dets out['% recall'] = num_detected_obj / num_objs return out
def Get_feed_data(self): mc = self.mc batch_gt_boxes, batch_gt_labels, batch_image = self.read_batch_gt_data( shuffle=True) batch_gt_boxes = np.array(batch_gt_boxes) batch_gt_labels = np.array(batch_gt_labels) batch_image = np.array(batch_image) input_images = [] gt_data = [] for i in range(0, len(batch_gt_boxes)): im = batch_image[i] im -= mc.BGR_MEANS gt_bbox = np.array(batch_gt_boxes[i]) gt_label = np.array(batch_gt_labels[i]) im, gt_bbox, anno_box_filter_idx = self.Preprocess(im, gt_bbox) assert len(anno_box_filter_idx) == len(gt_bbox) lables = [] for idx in anno_box_filter_idx: lables.append(gt_label[idx]) #lables = [[gt_label[idx]] for idx in anno_box_filter_idx] gt_label = np.array(lables) orig_h, orig_w, _ = [float(v) for v in im.shape] #mirror gt_bbox[:, 0::2] *= orig_w gt_bbox[:, 1::2] *= orig_h gt_bbox_center = np.array( [bbox_transform_inv(box) for box in gt_bbox]) if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox_center[:, 0] = orig_w - 1 - gt_bbox_center[:, 0] gt_bbox = np.array([bbox_transform(box) for box in gt_bbox_center]) gt_bbox[:, 0::2] /= orig_w gt_bbox[:, 1::2] /= orig_h im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) input_images.append(im) # scale image #image_anno = im + mc.BGR_MEANS #self.draw_annno(image_anno,gt_bbox,'test_' + str(i) + '.jpg') #gt_data.append([i,]) num = len(gt_bbox) for j in range(0, num): gt_data.append([ i, gt_label[j], 0, gt_bbox[j][0], gt_bbox[j][1], gt_bbox[j][2], gt_bbox[j][3] ]) #batch_ids = np.ones((num ,1))*i #instance_ids = np.ones((num ,1)) #gt_data.append(np.concatenate([batch_ids,gt_label,instance_ids,gt_bbox],axis=1)) gt_boxes, gt_labels = self.parse_gt_data(gt_data) all_match_indices, all_match_overlaps = self._math_bbox( mc.ANCHOR_BOX, gt_boxes) gt_boxes_dense, gt_labels_dense, input_mask = self._sparse_to_dense( gt_boxes, gt_labels, all_match_indices) return input_images, gt_boxes_dense, gt_labels_dense, input_mask, all_match_overlaps