def getLosses(clss, regr, img_data, C, module): R = roi_helpers.rpn_to_roi(clss, regr, C, K.image_dim_ordering(), module, use_regr=True, overlap_thresh=0.5, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping, module) print(module) print(R) # print(X2, Y1, Y2, IouS) x_img = cv2.imread(img_data['filepath']) (width, height) = (img_data['width'], img_data['height']) (rows, cols, _) = x_img.shape (resized_width, resized_height) = data_generators.get_new_img_size( width, height, C.im_size) x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) final_image = np.zeros((C.im_size, C.im_size, 3)) final_image[:resized_height, :resized_width, :] = x_img x_img = final_image if X2 is None: return -1, -1 neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) # cv2.rectangle(img, (x1_gt, y1_gt), (x2_gt, y2_gt), (0, 255, 0), 1) if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] return len(pos_samples), pos_samples
def calc_iou(R, img_data, C, class_mapping): bboxes = img_data['bboxes'] (width, height) = (img_data['width'], img_data['height']) # get image dimensions for resizing (resized_width, resized_height) = data_generators.get_new_img_size( width, height, C.im_size) gta = np.zeros((len(bboxes), 4)) for bbox_num, bbox in enumerate(bboxes): # get the GT box coordinates, and resize to account for image resizing gta[bbox_num, 0] = int( round(bbox['x1'] * (resized_width / float(width)) / C.rpn_stride)) gta[bbox_num, 1] = int( round(bbox['x2'] * (resized_width / float(width)) / C.rpn_stride)) gta[bbox_num, 2] = int( round(bbox['y1'] * (resized_height / float(height)) / C.rpn_stride)) gta[bbox_num, 3] = int( round(bbox['y2'] * (resized_height / float(height)) / C.rpn_stride)) x_roi = [] y_class_num = [] y_class_regr_coords = [] y_class_regr_label = [] IoUs = [] # for debugging only for ix in range(R.shape[0]): (x1, y1, x2, y2) = R[ix, :] x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) best_iou = 0.0 best_bbox = -1 for bbox_num in range(len(bboxes)): curr_iou = data_generators.iou([ gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3] ], [x1, y1, x2, y2]) if curr_iou > best_iou: best_iou = curr_iou best_bbox = bbox_num if best_iou < C.classifier_min_overlap: continue else: w = x2 - x1 h = y2 - y1 x_roi.append([x1, y1, w, h]) IoUs.append(best_iou) if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap: # hard negative example cls_name = 'bg' elif C.classifier_max_overlap <= best_iou: cls_name = bboxes[best_bbox]['class'] cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0 cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0 cx = x1 + w / 2.0 cy = y1 + h / 2.0 tx = (cxg - cx) / float(w) ty = (cyg - cy) / float(h) tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w)) th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h)) else: print('roi = {}'.format(best_iou)) raise RuntimeError class_num = class_mapping[cls_name] class_label = len(class_mapping) * [0] class_label[class_num] = 1 y_class_num.append(copy.deepcopy(class_label)) coords = [0] * 4 * (len(class_mapping) - 1) labels = [0] * 4 * (len(class_mapping) - 1) if cls_name != 'bg': label_pos = 4 * class_num sx, sy, sw, sh = C.classifier_regr_std coords[label_pos:4 + label_pos] = [sx * tx, sy * ty, sw * tw, sh * th] labels[label_pos:4 + label_pos] = [1, 1, 1, 1] y_class_regr_coords.append(copy.deepcopy(coords)) y_class_regr_label.append(copy.deepcopy(labels)) else: y_class_regr_coords.append(copy.deepcopy(coords)) y_class_regr_label.append(copy.deepcopy(labels)) if len(x_roi) == 0: return None, None, None, None X = np.array(x_roi) Y1 = np.array(y_class_num) Y2 = np.concatenate( [np.array(y_class_regr_label), np.array(y_class_regr_coords)], axis=1) return np.expand_dims(X, axis=0), np.expand_dims( Y1, axis=0), np.expand_dims(Y2, axis=0), IoUs
def calc_iou(R, img_data, C,class_mapping): # R = (boxes, probs) bboxes = img_data['bboxes'] # all the ground truthbboxes of one image (width, height) = (img_data['width'], img_data['height']) # get image dimensions for resizing (resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size) gta = np.zeros((len(bboxes), 4)) # Transform all the landmars into the resized image frame resize_ratio = (resized_width / float(width))/float(C.rpn_stride) sx, sy, sw, sh = C.classifier_regr_std for bbox_num, bbox in enumerate(bboxes): # get the GT box coordinates, and resize to account for image resizing gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride)) gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride)) gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride)) gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride)) x_roi = [] y_class_num = [] y_class_regr_coords = [] y_class_regr_label = [] y_gender = [] y_pose = [] y_viz = [] y_landmark = [] # For each predicted box find the gt box that best overlaps above a threshold iou for ix in range(R.shape[0]): # R.shape[0] = numboxes? # bp() (x1, y1, x2, y2) = R[ix, :] x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) best_iou = 0.0 best_bbox = -1 # For the predicted(rpn) box <- iterate over all the ground truth box to find the best box for bbox_num in range(len(bboxes)): curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2]) if curr_iou > best_iou: best_iou = curr_iou best_bbox = bbox_num if best_iou < C.classifier_min_overlap: continue else: w = x2 - x1 h = y2 - y1 x_roi.append([x1, y1, w, h]) pose_label = [0,0,0] gender_label = [0,0] viz_label = np.zeros(21) landmark_label = np.zeros(42) if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap: # hard negative example cls_name = 'bg' class_label = [0,1] elif C.classifier_max_overlap <= best_iou: cls_name = 'face' class_label = [1,0] pose_label = [ bbox['roll'],bbox['pitch'],bbox['yaw'] ] gender_label[ int(bbox['sex']=='f') ] = 1 cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0 cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0 cx = x1 + w / 2.0 cy = y1 + h / 2.0 tx = (cxg - cx) / float(w) ty = (cyg - cy) / float(h) tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w)) th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h)) #************ CHECK ************# viz_label = bbox['feature_visible'] # Assuming a list of 21 ints # bp() trans_x = [sx*(xi - cx)*1./w for xi in (bbox['feature_x'] * resize_ratio) ] # Transform the landmark coordinate from the original image to the feature map trans_y = [sy*(yi - cy)*1./h for yi in (bbox['feature_y'] * resize_ratio) ] # bp() else: print('roi = {}'.format(best_iou)) raise RuntimeError coords = [0] * 4 landmark_label = [0] * 42 y_class_num.append(copy.deepcopy(class_label)) y_gender.append(copy.deepcopy(gender_label)) y_pose.append(copy.deepcopy(pose_label)) y_viz.append(copy.deepcopy(viz_label)) # labels = [0] * 4 * (len(class_mapping) - 1) if cls_name != 'bg': # sx, sy, sw, sh = C.classifier_regr_std coords = [sx*tx, sy*ty, sw*tw, sh*th] y_class_regr_coords.append(copy.deepcopy(coords)) landmark_label = trans_x + trans_y # Assuming both are lists of 21 ints each y_landmark.append(copy.deepcopy(landmark_label)) else: y_class_regr_coords.append(copy.deepcopy(coords)) y_landmark.append(copy.deepcopy(landmark_label)) if len(x_roi) == 0: return None, None, None X = np.array(x_roi) # Y1 = np.array(y_class_num) # Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1) Y = np.concatenate([np.array(y_class_num), np.array(y_pose), np.array(y_gender), np.array(y_viz), np.array(y_landmark), np.array(y_class_regr_coords)],axis=-1) return[np.expand_dims(X, axis=0), np.expand_dims(Y, axis=0)]
def calc_iou(R, img_data, C, class_mapping): bboxes = img_data['bboxes'] (width, height) = (img_data['width'], img_data['height']) # get image dimensions for resizing (resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size) gta = np.zeros((len(bboxes), 4)) for bbox_num, bbox in enumerate(bboxes): # get the GT box coordinates, and resize to account for image resizing gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride)) gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride)) gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride)) gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride)) x_roi = [] y_class_num = [] y_class_regr_coords = [] y_class_regr_label = [] for ix in range(R.shape[0]): (x1, y1, x2, y2) = R[ix, :] x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) best_iou = 0.0 best_bbox = -1 for bbox_num in range(len(bboxes)): curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2]) if curr_iou > best_iou: best_iou = curr_iou best_bbox = bbox_num if best_iou < C.classifier_min_overlap: continue else: w = x2 - x1 h = y2 - y1 x_roi.append([x1, y1, w, h]) if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap: # hard negative example cls_name = 'bg' elif C.classifier_max_overlap <= best_iou: cls_name = bboxes[best_bbox]['class'] cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0 cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0 cx = x1 + w / 2.0 cy = y1 + h / 2.0 tx = (cxg - cx) / float(w) ty = (cyg - cy) / float(h) tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w)) th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h)) else: print('roi = {}'.format(best_iou)) raise RuntimeError class_num = class_mapping[cls_name] class_label = len(class_mapping) * [0] class_label[class_num] = 1 y_class_num.append(copy.deepcopy(class_label)) coords = [0] * 4 * (len(class_mapping) - 1) labels = [0] * 4 * (len(class_mapping) - 1) if cls_name != 'bg': label_pos = 4 * class_num sx, sy, sw, sh = C.classifier_regr_std coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th] labels[label_pos:4+label_pos] = [1, 1, 1, 1] y_class_regr_coords.append(copy.deepcopy(coords)) y_class_regr_label.append(copy.deepcopy(labels)) else: y_class_regr_coords.append(copy.deepcopy(coords)) y_class_regr_label.append(copy.deepcopy(labels)) if len(x_roi) == 0: return None, None, None X = np.array(x_roi) Y1 = np.array(y_class_num) Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1) return np.expand_dims(X, axis=0), np.expand_dims(Y1, axis=0), np.expand_dims(Y2, axis=0)
def calc_iou(R, img_data, C, class_mapping): """Converts from (x1,y1,x2,y2) to (x,y,w,h) format Args: R: bboxes, probs """ bboxes = img_data['bboxes'] (width, height) = (img_data['width'], img_data['height']) # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) gta = np.zeros((len(bboxes), 4)) for bbox_num, bbox in enumerate(bboxes): # get the GT box coordinates, and resize to account for image resizing # gta[bbox_num, 0] = (40 * (600 / 800)) / 16 = int(round(1.875)) = 2 (x in feature map) gta[bbox_num, 0] = int( round(bbox['x1'] * (resized_width / float(width)) / C.rpn_stride)) gta[bbox_num, 1] = int( round(bbox['x2'] * (resized_width / float(width)) / C.rpn_stride)) gta[bbox_num, 2] = int( round(bbox['y1'] * (resized_height / float(height)) / C.rpn_stride)) gta[bbox_num, 3] = int( round(bbox['y2'] * (resized_height / float(height)) / C.rpn_stride)) x_roi = [] y_class_num = [] y_class_regr_coords = [] y_class_regr_label = [] IoUs = [] # for debugging only # R.shape[0]: number of bboxes (=300 from non_max_suppression) for ix in range(R.shape[0]): (x1, y1, x2, y2) = R[ix, :] x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) best_iou = 0.0 best_bbox = -1 # Iterate through all the ground-truth bboxes to calculate the iou for bbox_num in range(len(bboxes)): curr_iou = iou([ gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3] ], [x1, y1, x2, y2]) # Find out the corresponding ground-truth bbox_num with larget iou if curr_iou > best_iou: best_iou = curr_iou best_bbox = bbox_num if best_iou < C.classifier_min_overlap: continue else: w = x2 - x1 h = y2 - y1 x_roi.append([x1, y1, w, h]) IoUs.append(best_iou) if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap: # hard negative example cls_name = 'bg' elif C.classifier_max_overlap <= best_iou: cls_name = bboxes[best_bbox]['class'] cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0 cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0 cx = x1 + w / 2.0 cy = y1 + h / 2.0 tx = (cxg - cx) / float(w) ty = (cyg - cy) / float(h) tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w)) th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h)) else: print('roi = {}'.format(best_iou)) raise RuntimeError class_num = class_mapping[cls_name] class_label = len(class_mapping) * [0] class_label[class_num] = 1 y_class_num.append(copy.deepcopy(class_label)) coords = [0] * 4 * (len(class_mapping) - 1) labels = [0] * 4 * (len(class_mapping) - 1) if cls_name != 'bg': label_pos = 4 * class_num sx, sy, sw, sh = C.classifier_regr_std coords[label_pos:4 + label_pos] = [sx * tx, sy * ty, sw * tw, sh * th] labels[label_pos:4 + label_pos] = [1, 1, 1, 1] y_class_regr_coords.append(copy.deepcopy(coords)) y_class_regr_label.append(copy.deepcopy(labels)) else: y_class_regr_coords.append(copy.deepcopy(coords)) y_class_regr_label.append(copy.deepcopy(labels)) if len(x_roi) == 0: return None, None, None, None # bboxes that iou > C.classifier_min_overlap for all gt bboxes in 300 non_max_suppression bboxes X = np.array(x_roi) # one hot code for bboxes from above => x_roi (X) Y1 = np.array(y_class_num) # corresponding labels and corresponding gt bboxes Y2 = np.concatenate( [np.array(y_class_regr_label), np.array(y_class_regr_coords)], axis=1) return np.expand_dims(X, axis=0), np.expand_dims( Y1, axis=0), np.expand_dims(Y2, axis=0), IoUs
def calc_iou(R,img_data,C, class_mapping): bboxes = img_data['bboxes'] (width,height) = (img_data['width'],img_data['height']) #get image dimensions for resizing (resized_width,resized_height) = data_generators.get_new_img_size(width,height,C.im_size) gta = np.zeros((len(bboxes),4)) for bbox_num,bbox in enumerate(bboxes): # get the GT box coordinates, and resize to account for image resizing gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width)) / C.rpn_stride )) gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width)) / C.rpn_stride )) gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height)) / C.rpn_stride)) gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height)) / C.rpn_stride)) x_roi = [] y_class_num = [] y_class_regr_coords = [] y_class_regr_label = [] IoUs = [] #for debugging only for ix in range(R.shape[0]): (x1,y1,x2,y2) = R[ix,:] x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) best_iou =0.0 best_bbox = -1 for bbox_num in range(len(bboxes)): curr_iou = data_generators.iou([gta[bbox_num, 0],gta[bbox_num, 2],gta[bbox_num, 1],gta[bbox_num, 3]], [x1,y1,x2,y2]) if curr_iou>best_iou: best_iou=curr_iou best_bbox = bbox_num if best_iou < C.classifier_min_overlap: continue else: w = x2-x1 h = y2-y1 x_roi.append([x1,y1,w,h]) IoUs.append(best_iou) if C.classifier_min_overlap <=best_iou < C.classifier_max_overlap: cls_name = 'bg' elif C.classifier_max_overlap <=best_iou: cls_name = bboxes[best_bbox]['class'] cxg = (gta[best_bbox,0]+gta[best_bbox,1])/2.0 cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0 cx = x1+w/2.0 cy = y1+h/2.0 tx = (cxg-cx)/float(w) ty = (cyg-cy)/float(h) tw = np.log((gta[best_bbox,1]-gta[best_bbox,0])/float(w)) th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h)) else: print('roi={}'.format(best_iou)) raise RuntimeError class_num = class_mapping[cls_name] class_label = len(class_mapping)*[0] class_label[class_num] = 1 y_class_num.append(copy.deepcopy(class_label))