def run_wbf(predictions, image_index, image_size=1024, iou_thr=0.55, skip_box_thr=0.5, weights=None): boxes = [prediction[image_index]['boxes'].data.cpu().numpy()/(image_size-1) for prediction in predictions] scores = [prediction[image_index]['scores'].data.cpu().numpy() for prediction in predictions] labels = [np.ones(prediction[image_index]['scores'].shape[0]) for prediction in predictions] boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr) boxes = boxes*(image_size-1) return boxes, scores, labels
def run_wbf(predictions, image_index, image_size=512, iou_thr=0.44, skip_box_thr=0.43, weights=None): boxes = [(prediction[image_index]['boxes']/(image_size-1)).tolist() for prediction in predictions] scores = [prediction[image_index]['scores'].tolist() for prediction in predictions] labels = [np.ones(prediction[image_index]['scores'].shape[0]).astype(int).tolist() for prediction in predictions] boxes, scores, labels = ensemble_boxes.weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr) boxes = boxes*(image_size-1) return boxes, scores, labels
def run_wbf(boxes, scores, image_size=1024, iou_thr=0.55, skip_box_thr=0.1, weights=None): boxes = [coco2pascal(boxes)/image_size] labels = [np.ones(scores.shape[0]).tolist()] scores = [scores] boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr) boxes = boxes*(image_size-1) boxes = pascal2coco(boxes) return boxes, scores, labels
def run_wbf(predictions, image_index, image_size=TRAIN_SIZE, iou_thr=PredictConfig.IOU_THRESH, iou_thr2=PredictConfig.IOU_THRESH2, skip_box_thr=PredictConfig.SKIP_THRESH, weights=None): boxes = [(prediction[image_index]['boxes'] / (image_size - 1)).tolist() \ for prediction in predictions] scores = [ prediction[image_index]['scores'].tolist() for prediction in predictions ] labels = [ prediction[image_index]['labels'].tolist() for prediction in predictions ] boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) new_labels = [1.] * len(labels) new_boxes, new_scores, new_labels = weighted_boxes_fusion( [boxes], [scores], [new_labels], weights=weights, iou_thr=iou_thr2, skip_box_thr=skip_box_thr) index_boxes_save = [] for id_box, new_box in enumerate(new_boxes): for box in boxes: iou = bb_intersection_over_union(box, new_box) if iou > 0.2: index_boxes_save.append(id_box) break scores = [scores[i] for i in index_boxes_save] labels = [labels[i] for i in index_boxes_save] new_boxes = new_boxes * (image_size - 1) return new_boxes, scores, labels
def run_wbf(list_boxes, list_scores, list_classes, im_w=1024, im_h=1024, weights=None, iou_thr=0.5, skip_box_thr=0.4): enboxes = [] enscores = [] enlabels = [] for boxes, scores, classes in zip(list_boxes, list_scores, list_classes): boxes = boxes.astype(np.float32).clip(min=0) boxes[:, 0] = boxes[:, 0] / im_w boxes[:, 2] = boxes[:, 2] / im_w boxes[:, 1] = boxes[:, 1] / im_h boxes[:, 3] = boxes[:, 3] / im_h enboxes.append(boxes) enscores.append(scores) enlabels.append(classes) if is_wbf2: boxes, scores, labels = weighted_boxes_fusion_customized( enboxes, enscores, enlabels, weights=weights, iou_thr=0.5, skip_box_thr=skip_box_thr) else: boxes, scores, labels = weighted_boxes_fusion( enboxes, enscores, enlabels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) # boxes, scores, labels = nms(enboxes, enscores, enlabels, weights=weights, iou_thr=iou_thr) boxes[:, 0] = boxes[:, 0] * im_w boxes[:, 2] = boxes[:, 2] * im_w boxes[:, 1] = boxes[:, 1] * im_h boxes[:, 3] = boxes[:, 3] * im_h # boxes = boxes.astype(np.int32).clip(min=0) return boxes, scores, labels
def merge_bb_wbf(im_x_axis_size, im_y_axis_size, bb_df, label_column, x_min_col, y_min_col, x_max_col, y_max_col, iou_thr, scores_col=None): """this function uses zfturbos implementation of weighted boxes fusion""" dimensions = [ im_x_axis_size, im_y_axis_size, im_x_axis_size, im_y_axis_size ] # get bounding boxes for the image_id bboxes = bb_df[:, [x_min_col, y_min_col, x_max_col, y_max_col]] # normalize the bounding boxes so they are between 0 and 1 normalized = [np.divide(bboxes, dimensions)] labels = [bb_df[:, label_column]] if scores_col is None: # each bb has equal confidence score scores = [[1] * bb_df.shape[0]] else: scores = [bb_df[:, scores_col]] # we are considering only 1 model with weight=1 weights = [1] # skip bounding boxes which have confidence score < 0 skip_box_thr = 0 # zfturbo library boxes, scores, labels = weighted_boxes_fusion(normalized, scores, labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) # convert the fused bounding box co-ordinates back to non-normalized values fused_boxes = boxes * dimensions return np.c_[fused_boxes, labels, scores]
def ensemble_models(ipt_json_paths, opt_json_path, img_ann_path, weights, method='weighted_boxes_fusion', iou_thr=0.3, skip_box_thr=0.0001, sigma=0.1): img_info_dicts = mmcv.load(img_ann_path)['images'] img_info_dict = dict() for img_info in img_info_dicts: img_info_dict[img_info['id']] = img_info res_dicts = [] res_dict = dict() for json_path in ipt_json_paths: res_dicts.append(json_to_lisdict(json_path, img_info_dict)) for img_id in res_dicts[0]: boxes_list = [] scores_list = [] labels_list = [] for i in range(len(res_dicts)): if img_id not in res_dicts[i]: boxes_list.append([]) scores_list.append([]) labels_list.append([]) else: boxes_list.append(res_dicts[i][img_id]['boxes']) scores_list.append(res_dicts[i][img_id]['scores']) labels_list.append(res_dicts[i][img_id]['labels']) if method == 'nms': boxes, scores, labels = nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr) elif method == 'soft_nms': boxes, scores, labels = soft_nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, sigma=sigma, thresh=skip_box_thr) elif method == 'non_maximum_weighted': boxes, scores, labels = non_maximum_weighted(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) else: boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) res_dict[img_id] = dict(boxes=boxes, scores=scores, labels=labels) lisdict_to_json(res_dict, opt_json_path, img_info_dict)
def wbf_per_sample(sample_predictions, weights, iou_thr, score_thr): bboxes_list = [] scores_list = [] labels_list = [] for prediction in sample_predictions: bboxes, scores, labels = mmdet2wbf(prediction) bboxes_list.append(bboxes) scores_list.append(scores) labels_list.append(labels) bboxes, scores, labels = weighted_boxes_fusion( boxes_list=bboxes_list, scores_list=scores_list, labels_list=labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=score_thr, ) return [ np.concatenate( [bboxes * IMAGE_SIZE, scores.reshape(-1, 1)], axis=1) ]
def wbf(im, boxes, scores, classes): """ weighted boxes fusion """ w, h = im.size boxes[:, 0] /= w boxes[:, 2] /= w boxes[:, 1] /= h boxes[:, 3] /= h boxes_list = boxes.tolist() scores_list = scores.tolist() labels_list = classes.tolist() boxes, scores, _ = weighted_boxes_fusion([boxes_list], [scores_list], [labels_list], weights=None, iou_thr=0.2) boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h return boxes, scores
def ensemble_multibox(boxes, scores, labels, iou_thr, sigma, skip_box_thr, weights=None, method='wbf'): if method=='nms': boxes, scores, labels = nms(boxes, scores, labels, weights=weights, iou_thr=iou_thr) elif method=='soft_nms': boxes, scores, labels = soft_nms(boxes, scores, labels, weights=weights, sigma=sigma, iou_thr=iou_thr, thresh=skip_box_thr) elif method=='nms_weight': boxes, scores, labels = non_maximum_weighted(boxes, scores, labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) elif method=='wbf': boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) return boxes, scores, labels
def wbf(image_id): img_height, img_width = get_height_width(image_id) boxes_lst, scores_lst, labels_lst = [], [], [] boxes_large_lst, scores_large_lst, labels_large_lst = [], [], [] yolov5_fold0_tta_data = yolov5_fold0_tta_pred[yolov5_fold0_tta_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(yolov5_fold0_tta_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) yolov5_fold1_tta_data = yolov5_fold1_tta_pred[yolov5_fold1_tta_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(yolov5_fold1_tta_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) yolov5_fold2_tta_data = yolov5_fold2_tta_pred[yolov5_fold2_tta_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(yolov5_fold2_tta_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) yolov5_fold3_tta_data = yolov5_fold3_tta_pred[yolov5_fold3_tta_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(yolov5_fold3_tta_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) yolov5_fold4_tta_data = yolov5_fold4_tta_pred[yolov5_fold4_tta_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(yolov5_fold4_tta_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) detectron2_r101fpn3x_data = detectron2_r101fpn3x_pred[detectron2_r101fpn3x_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(detectron2_r101fpn3x_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) vfnet_r101fpn_data = vfnet_r101fpn_pred[vfnet_r101fpn_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(vfnet_r101fpn_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) vfnet_r101fpn_fold0_data = vfnet_r101fpn_fold0_pred[vfnet_r101fpn_fold0_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(vfnet_r101fpn_fold0_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) vfnet_r101fpn_fold1_data = vfnet_r101fpn_fold1_pred[vfnet_r101fpn_fold1_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(vfnet_r101fpn_fold1_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) vfnet_r101fpn_fold2_data = vfnet_r101fpn_fold2_pred[vfnet_r101fpn_fold2_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(vfnet_r101fpn_fold2_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) vfnet_r101fpn_fold3_data = vfnet_r101fpn_fold3_pred[vfnet_r101fpn_fold3_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(vfnet_r101fpn_fold3_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) vfnet_r101fpn_fold4_data = vfnet_r101fpn_fold4_pred[vfnet_r101fpn_fold4_pred['image_id'] == image_id]['PredictionString'].values[0] model_boxes_large_lst, model_scores_large_lst, model_labels_large_lst, model_boxes_lst, model_scores_lst, model_labels_lst = extract_data(vfnet_r101fpn_fold4_data, img_height, img_width) boxes_large_lst.append(model_boxes_large_lst) scores_large_lst.append(model_scores_large_lst) labels_large_lst.append(model_labels_large_lst) boxes_lst.append(model_boxes_lst) scores_lst.append(model_scores_lst) labels_lst.append(model_labels_lst) boxes_large, scores_large, labels_large = weighted_boxes_fusion(boxes_large_lst, scores_large_lst, labels_large_lst, weights=weights, iou_thr=iou_thr_large, skip_box_thr=skip_box_thr_large) boxes, scores, labels = weighted_boxes_fusion(boxes_lst, scores_lst, labels_lst, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr) boxes_large = scale_data(boxes_large, img_height, img_width) boxes = scale_data(boxes, img_height, img_width) # Join two boxes final_boxes = np.append(boxes, boxes_large, axis=0) final_scores = np.append(scores, scores_large, axis=0) final_labels = np.append(labels, labels_large, axis=0) merged_data = convert_data_to_row(final_boxes, final_scores, final_labels) merged_data = pd.DataFrame([[image_id, merged_data]], columns=['image_id', 'PredictionString']) return merged_data
def ensemble_models(*pred_df, path, evaluation=False): models = [] for i, pred in enumerate(pred_df): if i == 0: pred_df0 = pred else: models.append(pred) pred_df = pred_df0.append(models) image_ids = pred_df["image_id"].unique() class_dict = { 0: 'Aortic enlargement', 1: 'Atelectasis', 2: 'Calcification', 3: 'Cardiomegaly', 4: 'Consolidation', 5: 'ILD', 6: 'Infiltration', 7: 'Lung Opacity', 8: 'Nodule/Mass', 9: 'Other lesion', 10: 'Pleural effusion', 11: 'Pleural thickening', 12: 'Pneumothorax', 13: 'Pulmonary fibrosis', 14: "No finding" } for i in range(15): pred_df.loc[pred_df.class_name == class_dict[i], 'class_name'] = i # boxes_list = [] # scores_list = [] # labels_list = [] results = [] for image_id in tqdm(image_ids, total=len(image_ids)): # All annotations for the current image. data = pred_df[pred_df["image_id"] == image_id] data = data.reset_index(drop=True) box_list = [] score_list = [] label_list = [] # Loop through all of the annotations for idx, row in data.iterrows(): box_list.append( [row["x_min"], row["y_min"], row["x_max"], row["y_max"]]) score_list.append(row["score"]) label_list.append(row["class_name"]) boxes_list = [box_list] scores_list = [score_list] labels_list = [label_list] # Calculate WBF iou_thr = 0.6 skip_box_thr = 0.0001 boxes, scores, labels = weighted_boxes_fusion( boxes_list, scores_list, labels_list, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr) # Create df for evaluation, format for mAP by @ZFTurbo for idx, box in enumerate(boxes): if evaluation: results.append({ "image_id": image_id, "class_name": class_dict[int(labels[idx])], "score": scores[idx], "x_min": box[0], "x_max": box[2], "y_min": box[1], "y_max": box[3] }) else: results.append({ "image_id": image_id, "class_name": int(labels[idx]), "score": scores[idx], "x_min": box[0], "y_min": box[1], "x_max": box[2], "y_max": box[3] }) results = pd.DataFrame(results) results.to_csv(path / 'pred_df_ensembled.csv', index=False)
def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold=0.5, iou_threshold=0.2, use_WBF=False, WBF_thr=0.5, WBF_skip_thr=0.75, input_size=512): transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors, x) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > threshold)[:, :, 0] out = [] for i in range(x.shape[0]): if scores_over_thresh[i].sum() == 0: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) continue classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] scores_per = scores[i, scores_over_thresh[i, :], ...] scores_, classes_ = classification_per.max(dim=0) if not use_WBF: anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) if anchors_nms_idx.shape[0] != 0: classes_ = classes_[anchors_nms_idx] scores_ = scores_[anchors_nms_idx] boxes_ = transformed_anchors_per[anchors_nms_idx, :] out.append({ 'rois': boxes_.cpu().numpy(), 'class_ids': classes_.cpu().numpy(), 'scores': scores_.cpu().numpy(), }) else: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) else: boxes_WBF, scores_WBF, labels_WBF = weighted_boxes_fusion( transformed_anchors_per.unsqueeze(0).cpu().numpy() / (input_size - 1), scores_per[:, 0].unsqueeze(0).cpu().numpy(), classes_.unsqueeze(0).cpu().numpy(), weights=None, iou_thr=WBF_thr, skip_box_thr=WBF_skip_thr) if boxes_WBF.shape[0] != 0: out.append({ 'rois': (boxes_WBF * (input_size - 1)).astype(int), 'class_ids': labels_WBF.astype(int), 'scores': scores_WBF }) else: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) return out
def txt_visdrone(img_dir, save_path, ensemble_path, normalized_result=False): # initialize net = BuildFrameworks(False) img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) if cfgs.PRETRAIN_BACKBONE_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_tensor = img_plac / 255 - tf.constant([[cfgs.PIXEL_MEAN_]]) img_tensor = img_tensor / tf.constant([cfgs.PIXEL_STD]) else: img_tensor = img_plac - tf.constant([[cfgs.PIXEL_MEAN]]) img_tensor = tf.expand_dims(img_tensor, axis=0) # detection result dict det_dict = net.build_framworks(input_img_batch=img_tensor) final_boxes, final_scores, final_category = det_dict[ "final_boxes"], det_dict["final_scores"], det_dict["final_category"] restorer, restore_ckpt = net.get_restorer() if restore_ckpt == cfgs.PRETRAIN_BACKBONE_WEIGHTS: raise IndexError( "----->Please select the trained weights rather than pretrained weights." ) img_name = os.listdir(path=img_dir) with tf.Session() as sess: restorer.restore(sess, restore_ckpt) for i in range(len(img_name)): # read images img_array = cv2.imread(os.path.join(img_dir, img_name[i])) raw_h = img_array.shape[0] raw_w = img_array.shape[1] detected_scores_, detected_boxes_, detected_categories_ = [], [], [] start = time.time() if cfgs.MULTI_SCALSE_TESTING: short_size_len = cfgs.SHORT_SIZE_LIMITATION_LIST else: short_size_len = [cfgs.SHORT_SIZE_LIMITATION] for short_size in short_size_len: if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), cfgs.MAX_LENGTH) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), cfgs.MAX_LENGTH), short_size img_resize = cv2.resize(img_array, (new_w, new_h)) resized_img_, final_boxes_, final_scores_, final_category_ = sess.run( [img_plac, final_boxes, final_scores, final_category], feed_dict={img_plac: img_resize[:, :, ::-1]}) #剔除变成一条线的框 inds_inside = np.where( (final_boxes_[:, 2] > final_boxes_[:, 0]) & (final_boxes_[:, 3] > final_boxes_[:, 1]))[0] final_boxes_ = final_boxes_[inds_inside, :] final_scores_ = final_scores_[inds_inside] final_category_ = final_category_[inds_inside] xmin, ymin, xmax, ymax = final_boxes_[:, 0], final_boxes_[:, 1], \ final_boxes_[:, 2], final_boxes_[:, 3] resized_h, resized_w = resized_img_.shape[ 0], resized_img_.shape[1] # normalized boxes xmin = xmin / resized_w xmax = xmax / resized_w ymin = ymin / resized_h ymax = ymax / resized_h resized_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) detected_scores_.append(final_scores_) detected_boxes_.append(resized_boxes) detected_categories_.append(final_category_) end = time.time() from ensemble_boxes import weighted_boxes_fusion boxes, scores, labels = weighted_boxes_fusion(detected_boxes_, detected_scores_, detected_categories_, weights=None, iou_thr=0.6, skip_box_thr=0.001, conf_type='avg') detected_scores = np.array(scores) detected_boxes = np.array(boxes) detected_categories = np.array(labels) if not normalized_result: # map coordinate to raw picture xmin, ymin, xmax, ymax = detected_boxes[:, 0] * raw_w, detected_boxes[:, 1] * raw_h, \ detected_boxes[:, 2] * raw_w, detected_boxes[:, 3] * raw_h boxes = np.transpose( np.stack([xmin, ymin, xmax - xmin, ymax - ymin])) result_full_path = save_path + \ str('/' + img_name[i].split('.')[0]) + '.txt' else: result_full_path = ensemble_path +\ str('/' + img_name[i].split('.')[0]) + '.txt' if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists(ensemble_path): os.makedirs(ensemble_path) with open(result_full_path, 'w') as fout: num = 0 if normalized_result: fout.write(str(raw_w)) fout.write(',') fout.write(str(raw_h)) fout.write('\n') for j, box in enumerate(boxes): if num < 500: if detected_categories[j] != 1 and detected_categories[ j] != 12: fout.write(str(box[0])) fout.write(',') fout.write(str(box[1])) fout.write(',') fout.write(str(box[2])) fout.write(',') fout.write(str(box[3])) fout.write(',') fout.write(str(detected_scores[j])) fout.write(',') fout.write(str(detected_categories[j] - 1)) fout.write(',-1,') fout.write('-1' + '\n') num += 1 else: break fout.close() tools.view_bar( '{}, time cost: {}s'.format(img_name[i], (end - start)), i + 1, len(img_name))
def visualized_visdrone(img_dir, save_path): # initialize net = BuildFrameworks(False) img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) if cfgs.PRETRAIN_BACKBONE_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_tensor = img_plac / 255 - tf.constant([[cfgs.PIXEL_MEAN_]]) img_tensor = img_tensor / tf.constant([cfgs.PIXEL_STD]) else: img_tensor = img_plac - tf.constant([[cfgs.PIXEL_MEAN]]) img_tensor = tf.expand_dims(img_tensor, axis=0) # detection result dict det_dict = net.build_framworks(input_img_batch=img_tensor) final_boxes, final_scores, final_category = det_dict[ "final_boxes"], det_dict["final_scores"], det_dict["final_category"] restorer, restore_ckpt = net.get_restorer() if restore_ckpt == cfgs.PRETRAIN_BACKBONE_WEIGHTS: raise IndexError("----->Please select the trained weights rather than pretrained weights.") img_name = os.listdir(path=img_dir) with tf.Session() as sess: restorer.restore(sess, restore_ckpt) for i in range(len(img_name)): # read images img_array = cv2.imread(os.path.join(img_dir, img_name[i]))[:, :, ::-1] raw_h = img_array.shape[0] raw_w = img_array.shape[1] detected_scores_, detected_boxes_, detected_categories_ = [], [], [] start=time.time() if cfgs.MULTI_SCALSE_TESTING: short_size_len=cfgs.SHORT_SIZE_LIMITATION_LIST else: short_size_len=[cfgs.SHORT_SIZE_LIMITATION] for short_size in short_size_len: if raw_h < raw_w: new_h, new_w = short_size, min(int(short_size * float(raw_w) / raw_h), cfgs.MAX_LENGTH) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), cfgs.MAX_LENGTH), short_size img_resize = cv2.resize(img_array, (new_w, new_h)) resized_img_, final_boxes_, final_scores_, final_category_ = sess.run( [img_plac, final_boxes, final_scores, final_category], feed_dict={img_plac: img_resize}) #剔除变成一条线的框 inds_inside = np.where( (final_boxes_[:, 2] > final_boxes_[:, 0]) & (final_boxes_[:, 3] > final_boxes_[:, 1]) )[0] final_boxes_ = final_boxes_[inds_inside, :] final_scores_ = final_scores_[inds_inside] final_category_ = final_category_[inds_inside] xmin, ymin, xmax, ymax = final_boxes_[:, 0], final_boxes_[:, 1], \ final_boxes_[:, 2], final_boxes_[:, 3] resized_h, resized_w = resized_img_.shape[0], resized_img_.shape[1] # normalized boxes xmin = xmin / resized_w xmax = xmax / resized_w ymin = ymin / resized_h ymax = ymax / resized_h resized_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) detected_scores_.append(final_scores_) detected_boxes_.append(resized_boxes) detected_categories_.append(final_category_) end=time.time() from ensemble_boxes import weighted_boxes_fusion boxes, scores, labels = weighted_boxes_fusion(detected_boxes_, detected_scores_, detected_categories_, weights=None, iou_thr=0.6, skip_box_thr=0.001, conf_type='avg') detected_scores = np.array(scores) detected_boxes = np.array(boxes) detected_categories = np.array(labels) xmin, ymin, xmax, ymax = detected_boxes[:, 0] * raw_w, detected_boxes[:, 1] * raw_h, \ detected_boxes[:, 2] * raw_w, detected_boxes[:, 3] * raw_h detected_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) show_indices = detected_scores >= cfgs.INFERENCE_SCORE_THRSHOLD detected_scores = detected_scores[show_indices] detected_boxes = detected_boxes[show_indices] detected_categories = detected_categories[show_indices] if cfgs.PRETRAIN_BACKBONE_NAME in ['resnet101_v1d', 'resnet50_v1d']: draw_img = (img_array * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = img_array + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, boxes=detected_boxes, labels=detected_categories, scores=detected_scores, in_graph=False) cv2.imwrite(os.path.join(save_path, img_name[i]), final_detections[:, :, ::-1]) tools.view_bar('{}, time cost: {}s'.format(img_name[i], (end - start)), i + 1, len(img_name))
def ensemble( subm_list, iou_same=0.5, out_path=None, skip_box_thr=0.00000001, ): sizes = get_train_test_image_sizes() preds = [] weights = [] checker = None for path, weight in subm_list: s = pd.read_csv(path) s.sort_values('image_id', inplace=True) s.reset_index(drop=True, inplace=True) ids = s['image_id'] if checker: if tuple(ids) != checker: print(set(checker) - set(ids)) print('Different IDS!', len(tuple(ids)), path) exit() else: checker = tuple(ids) preds.append(s['PredictionString'].values) weights.append(weight) if out_path is None: out_path = SUBM_PATH + 'ensemble_iou_{}.csv'.format(iou_same) out = open(out_path, 'w') out.write('image_id,PredictionString\n') for j, id in enumerate(list(checker)): # print(id) boxes_list = [] scores_list = [] labels_list = [] empty = True for i in range(len(preds)): boxes = [] scores = [] labels = [] p1 = preds[i][j] if str(p1) != 'nan': arr = p1.strip().split(' ') for k in range(0, len(arr), 6): cls = int(arr[k]) prob = float(arr[k + 1]) x1 = float(arr[k + 2]) / sizes[id][1] y1 = float(arr[k + 3]) / sizes[id][0] x2 = float(arr[k + 4]) / sizes[id][1] y2 = float(arr[k + 5]) / sizes[id][0] boxes.append([x1, y1, x2, y2]) scores.append(prob) labels.append(cls) boxes_list.append(boxes) scores_list.append(scores) labels_list.append(labels) boxes, scores, labels = weighted_boxes_fusion( boxes_list, scores_list, labels_list, iou_thr=iou_same, skip_box_thr=skip_box_thr, weights=weights, allows_overflow=True ) # print(len(boxes), len(labels), len(scores)) if len(boxes) == 0: out.write('{},14 1 0 0 1 1\n'.format(id, )) else: final_str = '' for i in range(len(boxes)): cls = int(labels[i]) prob = scores[i] x1 = int(boxes[i][0] * sizes[id][1]) y1 = int(boxes[i][1] * sizes[id][0]) x2 = int(boxes[i][2] * sizes[id][1]) y2 = int(boxes[i][3] * sizes[id][0]) if cls == 14: final_str += '{} {} {} {} {} {} '.format(cls, prob, 0, 0, 1, 1) else: final_str += '{} {} {} {} {} {} '.format(cls, prob, x1, y1, x2, y2) out.write('{},{}\n'.format(id, final_str.strip())) out.close() return out_path
def merge_boxes_center_net(div_scale=4, iou_same=0.4): train = pd.read_csv(INPUT_PATH + 'train.csv') # train = train[train['image_id'].isin(['14600a97b1c302343b1b5850ed53ae13', '299278f67dc5e40ee4fd003595c6e8d7'])] unique_images = train['image_id'].unique() print(len(train), len(unique_images)) cls_names = get_classes_array() sizes = dict() sizes_df = pd.read_csv(OUTPUT_PATH + 'image_width_height_train.csv') for index, row in sizes_df.iterrows(): sizes[row['image_id']] = (row['height'], row['width']) groupby = train.groupby('image_id') out = open( OUTPUT_PATH + 'boxes_description_iou_{}_div_{}.csv'.format(iou_same, div_scale), 'w') out.write('id,x1,y1,x2,y2,class,score\n') for index, group in groupby: print(index, len(group)) is_empty = True for _, row in group.iterrows(): if row['class_id'] != 14: is_empty = False break if is_empty: out.write('{},,,,,,\n'.format(index)) continue boxes = [] scores = [] labels = [] # x_min,y_min,x_max,y_max for _, row in group.iterrows(): if row['class_id'] == 14: continue image_id = row['image_id'] x1 = row['x_min'] / sizes[image_id][1] y1 = row['y_min'] / sizes[image_id][0] x2 = row['x_max'] / sizes[image_id][1] y2 = row['y_max'] / sizes[image_id][0] if x2 < x1: print('Strange x2 < x1') exit() if y2 < y1: print('Strange y2 < y1') exit() if x2 > 1: print('Strange x2 > 1') exit() if y2 > 1: print('Strange y2 > 1') exit() boxes.append([x1, y1, x2, y2]) labels.append(row['class_id']) scores.append(1.0) print(len(boxes), len(labels), len(scores)) # print(boxes) # print(labels) # print(scores) # draw_boxes(index, boxes, scores, labels) boxes, scores, labels = weighted_boxes_fusion([boxes], [scores], [labels], iou_thr=iou_same, weights=None, allows_overflow=True) print(len(boxes), len(labels), len(scores)) # print(boxes) # print(labels) # print(scores) # draw_boxes(index, boxes, scores, labels) # Div 4 because I plan to use reduced images! scale_y = sizes[index][0] // div_scale scale_x = sizes[index][1] // div_scale boxes[:, 0] *= scale_x boxes[:, 1] *= scale_y boxes[:, 2] *= scale_x boxes[:, 3] *= scale_y boxes = np.round(boxes).astype(np.int32) labels = labels.astype(np.int32) for i in range(len(boxes)): if scores[i] > 3: scores[i] = 3 out.write("{},{},{},{},{},{},{:.0f}\n".format( index, boxes[i, 0], boxes[i, 1], boxes[i, 2], boxes[i, 3], cls_names[labels[i]], scores[i])) out.close()
def ensemble_object_detectors(list_object_detection_predictions, original_image_df, height_col, width_col, iou_thr, weights_list): """this function uses zfturbos implementation of weighted boxes fusion""" ensembled_outputs = [] # iterate through original_image_ids = original_image_df["image_id"].unique() # perform wbf for each image for o_id in original_image_ids: # get original image width and height width, height = original_image_df.loc[ original_image_df["image_id"] == o_id, [width_col, height_col]].values[0] dimensions = [width, height, width, height] normalized_arr = [] labels_arr = [] scores_arr = [] weights_arr = [] # iterate through each prediction list and get the image id for prediction, technique, weights in zip( list_object_detection_predictions, ["Faster RCNN", "YOLOv5"], weights_list): image_data = prediction[prediction["image_id"] == o_id] # get bounding boxes for the image_id bboxes = image_data.loc[:, ["x_min", "y_min", "x_max", "y_max"]] # normalize the bounding boxes so they are between 0 and 1 normalized = np.divide(bboxes, dimensions) normalized = check_normalization(normalized, technique, o_id) labels = image_data.loc[:, "label"] scores = image_data.loc[:, "confidence_score"] normalized_arr.append(normalized.values) labels_arr.append(labels.values) scores_arr.append(scores.values) weights_arr.append(weights) # zfturbo library boxes_merged, scores_merged, labels_merged = weighted_boxes_fusion( normalized_arr, scores_arr, labels_arr, weights=weights_arr, iou_thr=iou_thr, skip_box_thr=0) # convert the fused bounding box co-ordinates back to non-normalized values fused_boxes = boxes_merged * dimensions for merged_box, merged_score, merged_label in zip( fused_boxes, scores_merged, labels_merged): ensembled_outputs.append({ "image_id": o_id, "x_min": merged_box[0], "y_min": merged_box[1], "x_max": merged_box[2], "y_max": merged_box[3], "label": merged_label, "confidence_score": merged_score }) # convert to Dataframe ensembled_outputs_df = pd.DataFrame(ensembled_outputs) # handle missing image ids missing_ids = np.setdiff1d(original_image_ids, ensembled_outputs_df["image_id"].unique()) missing_data = [] for missing_id in missing_ids: missing_data.append({ "image_id": missing_id, "x_min": 0, "y_min": 0, "x_max": 1, "y_max": 1, "label": 14, "confidence_score": 1 }) missing_data_df = pd.DataFrame(missing_data) ensembled_outputs_df = ensembled_outputs_df.append(missing_data_df) return ensembled_outputs_df
def boxes_fusion_single_image(boxes, scores, classes, image_shape, nms_thresh=0.5, topk_per_image=-1, method='nms', device="cpu"): assert method in ["nms", "wbf"], f"Not implemented method {method}" assert len(scores) == len(boxes) and len(scores) == len(classes), \ f"Length of boxes, scores, classes is not equal!" # normalize the boxes for i, boxes_per_img in enumerate(boxes): boxes_per_img = Boxes(boxes_per_img) boxes_per_img.clip(image_shape) # filter the width or height < threshold boxes keep = boxes_per_img.nonempty(1.0) boxes_per_img = boxes_per_img[keep] boxes_per_img = boxes_per_img.tensor.cpu().numpy() boxes_per_img[:, 0::2] = boxes_per_img[:, 0::2] / image_shape[1] boxes_per_img[:, 1::2] = boxes_per_img[:, 1::2] / image_shape[0] boxes[i] = boxes_per_img scores[i] = scores[i][keep].cpu().numpy() classes[i] = classes[i][keep].cpu().numpy() # weights = [1.2, 1.2, 1.1, 1.1, 1.0, 1.0] if method == 'nms': boxes, scores, classes = weighted_boxes_fusion( boxes, scores, classes, # weights=weights, iou_thr=nms_thresh) else: # "wbf" boxes, scores, classes = weighted_boxes_fusion( boxes, scores, classes, # weights=weights, iou_thr=nms_thresh, # wbf higher than nms performance better ) if topk_per_image >= 0: boxes, scores, classes = boxes[: topk_per_image], scores[: topk_per_image], classes[: topk_per_image] # resize to image shape boxes[:, 0::2] = boxes[:, 0::2] * image_shape[1] boxes[:, 1::2] = boxes[:, 1::2] * image_shape[0] # to tensor boxes = torch.from_numpy(boxes).to(device=device) scores = torch.from_numpy(scores).to(device=device) classes = torch.from_numpy(classes).to(device=device) result = Instances(image_shape) boxes = Boxes(boxes) boxes.clip(image_shape) result.pred_boxes = boxes result.scores = scores result.pred_classes = classes return result