def _eval(self, data_dict): data_dict = get_eval( data_dict=data_dict, config=self.config, ) # dump self._running_log["lang_acc"] = data_dict["lang_acc"].item() self._running_log["ref_acc"] = np.mean(data_dict["ref_acc"]) self._running_log["seg_acc"] = data_dict["seg_acc"].item() self._running_log['ref_iou'] = data_dict['ref_iou']
def visualize(args): # init training dataset print("preparing data...") scanrefer, scene_list = get_scanrefer(args) # dataloader _, dataloader = get_dataloader(args, scanrefer, scene_list, "val", DC, False) # model model = get_model(args) # config POST_DICT = { 'remove_empty_box': True, 'use_3d_nms': True, 'nms_iou': 0.25, 'use_old_type_nms': False, 'cls_nms': True, 'per_class_proposal': True, 'conf_thresh': 0.05, 'dataset_config': DC } if not args.no_nms else None # evaluate print("visualizing...") for data in tqdm(dataloader): for key in data: data[key] = data[key].cuda() # feed data = model(data) # _, data = get_loss(data, DC, True, True, POST_DICT) _, data = get_loss( data_dict=data, config=DC, detection=True, reference=True ) data = get_eval( data_dict=data, config=DC, reference=True, post_processing=POST_DICT ) # visualize dump_results(args, scanrefer, data, DC) print("done!")
def _eval(self, data_dict): data_dict = get_eval(data_dict=data_dict, config=self.config, reference=self.reference, use_lang_classifier=self.use_lang_classifier) # dump self._running_log["lang_acc"] = data_dict["lang_acc"].item() self._running_log["ref_acc"] = np.mean(data_dict["ref_acc"]) self._running_log["obj_acc"] = data_dict["obj_acc"].item() self._running_log["pos_ratio"] = data_dict["pos_ratio"].item() self._running_log["neg_ratio"] = data_dict["neg_ratio"].item() self._running_log["iou_rate_0.25"] = np.mean( data_dict["ref_iou_rate_0.25"]) self._running_log["iou_rate_0.5"] = np.mean( data_dict["ref_iou_rate_0.5"])
def eval_ref(args): print("evaluate...") # constant DC = ScannetDatasetConfig() # init training dataset print("preparing data...") scanrefer, scene_list = get_scanrefer() # dataloader _, dataloader = get_dataloader(args, scanrefer, scene_list, "val") # model model = get_model(args) # random seeds seeds = [args.manual_seed] # evaluate print("evaluating...") score_path = os.path.join(args.use_checkpoint, "scores.p") pred_path = os.path.join(args.use_checkpoint, "predictions.p") if not os.path.exists(score_path): ref_acc_all = [] ious_all = [] masks_all = [] others_all = [] lang_acc_all = [] for seed in seeds: # reproducibility torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) print("generating the scores for seed {}...".format(seed)) ref_acc = [] ious = [] masks = [] others = [] lang_acc = [] predictions = {} for data in tqdm(dataloader): for key in data: if key in ['lang_feat', 'lang_len', 'object_cat', 'lidar', 'point_min', 'point_max', 'mlm_label', 'ref_center_label', 'ref_size_residual_label']: data[key] = data[key].cuda() # feed data = model(data) data = get_loss(data, DC) data = get_eval( data_dict=data, config=DC, ) ref_acc += data["ref_acc"] ious += data["ref_iou"] masks += data["ref_multiple_mask"] others += data["ref_others_mask"] lang_acc.append(data["lang_acc"].item()) # store predictions ids = data["scan_idx"].detach().cpu().numpy() for i in range(ids.shape[0]): idx = ids[i] scene_id = scanrefer[idx]["scene_id"] object_id = scanrefer[idx]["object_id"] ann_id = scanrefer[idx]["ann_id"] if scene_id not in predictions: predictions[scene_id] = {} if object_id not in predictions[scene_id]: predictions[scene_id][object_id] = {} if ann_id not in predictions[scene_id][object_id]: predictions[scene_id][object_id][ann_id] = {} predictions[scene_id][object_id][ann_id][""] = data["pred_bboxes"][i] predictions[scene_id][object_id][ann_id]["gt_bpred_bboxbox"] = data["gt_bboxes"][i] predictions[scene_id][object_id][ann_id]["iou"] = data["ref_iou"][i] # save the last predictions with open(pred_path, "wb") as f: pickle.dump(predictions, f) # save to global ref_acc_all.append(ref_acc) ious_all.append(ious) masks_all.append(masks) others_all.append(others) lang_acc_all.append(lang_acc) # convert to numpy array ref_acc = np.array(ref_acc_all) ious = np.array(ious_all) masks = np.array(masks_all) others = np.array(others_all) lang_acc = np.array(lang_acc_all) # save the global scores with open(score_path, "wb") as f: scores = { "ref_acc": ref_acc_all, "ious": ious_all, "masks": masks_all, "others": others_all, "lang_acc": lang_acc_all } pickle.dump(scores, f) else: print("loading the scores...") with open(score_path, "rb") as f: scores = pickle.load(f) # unpack ref_acc = np.array(scores["ref_acc"]) ious = np.array(scores["ious"]) masks = np.array(scores["masks"]) others = np.array(scores["others"]) lang_acc = np.array(scores["lang_acc"]) multiple_dict = { "unique": 0, "multiple": 1 } others_dict = { "not_in_others": 0, "in_others": 1 } # evaluation stats stats = {k: np.sum(masks[0] == v) for k, v in multiple_dict.items()} stats["overall"] = masks[0].shape[0] stats = {} for k, v in multiple_dict.items(): stats[k] = {} for k_o, v_o in others_dict.items(): stats[k][k_o] = np.sum(np.logical_and(masks[0] == v, others[0] == v_o)) stats[k]["overall"] = np.sum(masks[0] == v) stats["overall"] = {} for k_o, v_o in others_dict.items(): stats["overall"][k_o] = np.sum(others[0] == v_o) stats["overall"]["overall"] = masks[0].shape[0] # aggregate scores scores = {} for k, v in multiple_dict.items(): for k_o in others_dict.keys(): ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean( ref_acc[i][np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])]) \ if np.sum(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])) > 0 else 0 running_acc_025iou = ious[i][np.logical_and( np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o]), ious[i] >= 0.25)].shape[0] \ / ious[i][np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])].shape[0] \ if np.sum(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])) > 0 else 0 running_acc_05iou = ious[i][np.logical_and( np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o]), ious[i] >= 0.5)].shape[0] \ / ious[i][np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])].shape[0] \ if np.sum(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])) > 0 else 0 # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) if k not in scores: scores[k] = {k_o: {} for k_o in others_dict.keys()} scores[k][k_o]["ref_acc"] = np.mean(ref_accs) scores[k][k_o]["[email protected]"] = np.mean(acc_025ious) scores[k][k_o]["[email protected]"] = np.mean(acc_05ious) ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean(ref_acc[i][masks[i] == multiple_dict[k]]) if np.sum( masks[i] == multiple_dict[k]) > 0 else 0 running_acc_025iou = ious[i][np.logical_and(masks[i] == multiple_dict[k], ious[i] >= 0.25)].shape[0] \ / ious[i][masks[i] == multiple_dict[k]].shape[0] if np.sum( masks[i] == multiple_dict[k]) > 0 else 0 running_acc_05iou = ious[i][np.logical_and(masks[i] == multiple_dict[k], ious[i] >= 0.5)].shape[0] \ / ious[i][masks[i] == multiple_dict[k]].shape[0] if np.sum( masks[i] == multiple_dict[k]) > 0 else 0 # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) scores[k]["overall"] = {} scores[k]["overall"]["ref_acc"] = np.mean(ref_accs) scores[k]["overall"]["[email protected]"] = np.mean(acc_025ious) scores[k]["overall"]["[email protected]"] = np.mean(acc_05ious) scores["overall"] = {} for k_o in others_dict.keys(): ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean(ref_acc[i][others[i] == others_dict[k_o]]) if np.sum( others[i] == others_dict[k_o]) > 0 else 0 running_acc_025iou = ious[i][np.logical_and(others[i] == others_dict[k_o], ious[i] >= 0.25)].shape[0] \ / ious[i][others[i] == others_dict[k_o]].shape[0] if np.sum( others[i] == others_dict[k_o]) > 0 else 0 running_acc_05iou = ious[i][np.logical_and(others[i] == others_dict[k_o], ious[i] >= 0.5)].shape[0] \ / ious[i][others[i] == others_dict[k_o]].shape[0] if np.sum( others[i] == others_dict[k_o]) > 0 else 0 # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) # aggregate scores["overall"][k_o] = {} scores["overall"][k_o]["ref_acc"] = np.mean(ref_accs) scores["overall"][k_o]["[email protected]"] = np.mean(acc_025ious) scores["overall"][k_o]["[email protected]"] = np.mean(acc_05ious) ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean(ref_acc[i]) running_acc_025iou = ious[i][ious[i] >= 0.25].shape[0] / ious[i].shape[0] running_acc_05iou = ious[i][ious[i] >= 0.5].shape[0] / ious[i].shape[0] # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) # aggregate scores["overall"]["overall"] = {} scores["overall"]["overall"]["ref_acc"] = np.mean(ref_accs) scores["overall"]["overall"]["[email protected]"] = np.mean(acc_025ious) scores["overall"]["overall"]["[email protected]"] = np.mean(acc_05ious) # report print("\nstats:") for k_s in stats.keys(): for k_o in stats[k_s].keys(): print("{} | {}: {}".format(k_s, k_o, stats[k_s][k_o])) for k_s in scores.keys(): print("\n{}:".format(k_s)) for k_m in scores[k_s].keys(): for metric in scores[k_s][k_m].keys(): print("{} | {} | {}: {:.4f}".format(k_s, k_m, metric, scores[k_s][k_m][metric])) print("\nlanguage classification accuracy: {:.4f}".format(np.mean(lang_acc)))
def eval_det(args): print("evaluate detection...") # constant DC = ScannetDatasetConfig() # init training dataset print("preparing data...") scanrefer, scene_list = get_scanrefer(args) # dataloader _, dataloader = get_dataloader(args, scanrefer, scene_list, "val", DC) # model model = get_model(args, DC) # config POST_DICT = { "remove_empty_box": True, "use_3d_nms": True, "nms_iou": 0.25, "use_old_type_nms": False, "cls_nms": True, "per_class_proposal": True, "conf_thresh": 0.05, "dataset_config": DC } AP_IOU_THRESHOLDS = [0.25, 0.5] AP_CALCULATOR_LIST = [ APCalculator(iou_thresh, DC.class2type) for iou_thresh in AP_IOU_THRESHOLDS ] sem_acc = [] for data in tqdm(dataloader): for key in data: data[key] = data[key].cuda() # feed with torch.no_grad(): data = model(data) _, data = get_loss(data_dict=data, config=DC, detection=True, reference=False) data = get_eval(data_dict=data, config=DC, reference=False, post_processing=POST_DICT) sem_acc.append(data["sem_acc"].item()) batch_pred_map_cls = parse_predictions(data, POST_DICT) batch_gt_map_cls = parse_groundtruths(data, POST_DICT) for ap_calculator in AP_CALCULATOR_LIST: ap_calculator.step(batch_pred_map_cls, batch_gt_map_cls) # aggregate object detection results and report print("\nobject detection sem_acc: {}".format(np.mean(sem_acc))) for i, ap_calculator in enumerate(AP_CALCULATOR_LIST): print() print("-" * 10, "iou_thresh: %f" % (AP_IOU_THRESHOLDS[i]), "-" * 10) metrics_dict = ap_calculator.compute_metrics() for key in metrics_dict: print("eval %s: %f" % (key, metrics_dict[key]))
def eval_ref(args): print("evaluate localization...") # constant DC = ScannetDatasetConfig() # init training dataset print("preparing data...") scanrefer, scene_list = get_scanrefer(args) # dataloader _, dataloader = get_dataloader(args, scanrefer, scene_list, "val", DC) # model model = get_model(args, DC) # config POST_DICT = { "remove_empty_box": True, "use_3d_nms": True, "nms_iou": 0.25, "use_old_type_nms": False, "cls_nms": True, "per_class_proposal": True, "conf_thresh": 0.05, "dataset_config": DC } if not args.no_nms else None # random seeds seeds = [args.seed] + [2 * i for i in range(args.repeat - 1)] # evaluate print("evaluating...") score_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "scores.p") pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "predictions.p") gen_flag = ( not os.path.exists(score_path)) or args.force or args.repeat > 1 if gen_flag: ref_acc_all = [] ious_all = [] masks_all = [] others_all = [] lang_acc_all = [] for seed in seeds: # reproducibility torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) print("generating the scores for seed {}...".format(seed)) ref_acc = [] ious = [] masks = [] others = [] lang_acc = [] predictions = {} for data in tqdm(dataloader): print(data.keys()) print(data['lang_feat'].shape) assert (1 == 2) for key in data: data[key] = data[key].cuda() # feed data = model(data) _, data = get_loss(data_dict=data, config=DC, detection=True, reference=True, use_lang_classifier=not args.no_lang_cls) data = get_eval(data_dict=data, config=DC, reference=True, use_lang_classifier=not args.no_lang_cls, use_oracle=args.use_oracle, use_cat_rand=args.use_cat_rand, use_best=args.use_best, post_processing=POST_DICT) ref_acc += data["ref_acc"] ious += data["ref_iou"] masks += data["ref_multiple_mask"] others += data["ref_others_mask"] lang_acc.append(data["lang_acc"].item()) # store predictions ids = data["scan_idx"].detach().cpu().numpy() for i in range(ids.shape[0]): idx = ids[i] scene_id = scanrefer[idx]["scene_id"] object_id = scanrefer[idx]["object_id"] ann_id = scanrefer[idx]["ann_id"] if scene_id not in predictions: predictions[scene_id] = {} if object_id not in predictions[scene_id]: predictions[scene_id][object_id] = {} if ann_id not in predictions[scene_id][object_id]: predictions[scene_id][object_id][ann_id] = {} predictions[scene_id][object_id][ann_id][ "pred_bbox"] = data["pred_bboxes"][i] predictions[scene_id][object_id][ann_id]["gt_bbox"] = data[ "gt_bboxes"][i] predictions[scene_id][object_id][ann_id]["iou"] = data[ "ref_iou"][i] # save the last predictions with open(pred_path, "wb") as f: pickle.dump(predictions, f) # save to global ref_acc_all.append(ref_acc) ious_all.append(ious) masks_all.append(masks) others_all.append(others) lang_acc_all.append(lang_acc) # convert to numpy array ref_acc = np.array(ref_acc_all) ious = np.array(ious_all) masks = np.array(masks_all) others = np.array(others_all) lang_acc = np.array(lang_acc_all) # save the global scores with open(score_path, "wb") as f: scores = { "ref_acc": ref_acc_all, "ious": ious_all, "masks": masks_all, "others": others_all, "lang_acc": lang_acc_all } pickle.dump(scores, f) else: print("loading the scores...") with open(score_path, "rb") as f: scores = pickle.load(f) # unpack ref_acc = np.array(scores["ref_acc"]) ious = np.array(scores["ious"]) masks = np.array(scores["masks"]) others = np.array(scores["others"]) lang_acc = np.array(scores["lang_acc"]) multiple_dict = {"unique": 0, "multiple": 1} others_dict = {"not_in_others": 0, "in_others": 1} # evaluation stats stats = {k: np.sum(masks[0] == v) for k, v in multiple_dict.items()} stats["overall"] = masks[0].shape[0] stats = {} for k, v in multiple_dict.items(): stats[k] = {} for k_o, v_o in others_dict.items(): stats[k][k_o] = np.sum( np.logical_and(masks[0] == v, others[0] == v_o)) stats[k]["overall"] = np.sum(masks[0] == v) stats["overall"] = {} for k_o, v_o in others_dict.items(): stats["overall"][k_o] = np.sum(others[0] == v_o) stats["overall"]["overall"] = masks[0].shape[0] # aggregate scores scores = {} for k, v in multiple_dict.items(): for k_o in others_dict.keys(): ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean(ref_acc[i][np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])]) \ if np.sum(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])) > 0 else 0 running_acc_025iou = ious[i][np.logical_and(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o]), ious[i] >= 0.25)].shape[0] \ / ious[i][np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])].shape[0] \ if np.sum(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])) > 0 else 0 running_acc_05iou = ious[i][np.logical_and(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o]), ious[i] >= 0.5)].shape[0] \ / ious[i][np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])].shape[0] \ if np.sum(np.logical_and(masks[i] == multiple_dict[k], others[i] == others_dict[k_o])) > 0 else 0 # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) if k not in scores: scores[k] = {k_o: {} for k_o in others_dict.keys()} scores[k][k_o]["ref_acc"] = np.mean(ref_accs) scores[k][k_o]["[email protected]"] = np.mean(acc_025ious) scores[k][k_o]["[email protected]"] = np.mean(acc_05ious) ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean( ref_acc[i][masks[i] == multiple_dict[k]]) if np.sum( masks[i] == multiple_dict[k]) > 0 else 0 running_acc_025iou = ious[i][np.logical_and(masks[i] == multiple_dict[k], ious[i] >= 0.25)].shape[0] \ / ious[i][masks[i] == multiple_dict[k]].shape[0] if np.sum(masks[i] == multiple_dict[k]) > 0 else 0 running_acc_05iou = ious[i][np.logical_and(masks[i] == multiple_dict[k], ious[i] >= 0.5)].shape[0] \ / ious[i][masks[i] == multiple_dict[k]].shape[0] if np.sum(masks[i] == multiple_dict[k]) > 0 else 0 # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) scores[k]["overall"] = {} scores[k]["overall"]["ref_acc"] = np.mean(ref_accs) scores[k]["overall"]["[email protected]"] = np.mean(acc_025ious) scores[k]["overall"]["[email protected]"] = np.mean(acc_05ious) scores["overall"] = {} for k_o in others_dict.keys(): ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean( ref_acc[i][others[i] == others_dict[k_o]]) if np.sum( others[i] == others_dict[k_o]) > 0 else 0 running_acc_025iou = ious[i][np.logical_and(others[i] == others_dict[k_o], ious[i] >= 0.25)].shape[0] \ / ious[i][others[i] == others_dict[k_o]].shape[0] if np.sum(others[i] == others_dict[k_o]) > 0 else 0 running_acc_05iou = ious[i][np.logical_and(others[i] == others_dict[k_o], ious[i] >= 0.5)].shape[0] \ / ious[i][others[i] == others_dict[k_o]].shape[0] if np.sum(others[i] == others_dict[k_o]) > 0 else 0 # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) # aggregate scores["overall"][k_o] = {} scores["overall"][k_o]["ref_acc"] = np.mean(ref_accs) scores["overall"][k_o]["[email protected]"] = np.mean(acc_025ious) scores["overall"][k_o]["[email protected]"] = np.mean(acc_05ious) ref_accs, acc_025ious, acc_05ious = [], [], [] for i in range(masks.shape[0]): running_ref_acc = np.mean(ref_acc[i]) running_acc_025iou = ious[i][ ious[i] >= 0.25].shape[0] / ious[i].shape[0] running_acc_05iou = ious[i][ious[i] >= 0.5].shape[0] / ious[i].shape[0] # store ref_accs.append(running_ref_acc) acc_025ious.append(running_acc_025iou) acc_05ious.append(running_acc_05iou) # aggregate scores["overall"]["overall"] = {} scores["overall"]["overall"]["ref_acc"] = np.mean(ref_accs) scores["overall"]["overall"]["[email protected]"] = np.mean(acc_025ious) scores["overall"]["overall"]["[email protected]"] = np.mean(acc_05ious) # report print("\nstats:") for k_s in stats.keys(): for k_o in stats[k_s].keys(): print("{} | {}: {}".format(k_s, k_o, stats[k_s][k_o])) for k_s in scores.keys(): print("\n{}:".format(k_s)) for k_m in scores[k_s].keys(): for metric in scores[k_s][k_m].keys(): print("{} | {} | {}: {}".format(k_s, k_m, metric, scores[k_s][k_m][metric])) print("\nlanguage classification accuracy: {}".format(np.mean(lang_acc)))