def compute_average_precision_classification(ground_truth, prediction): """Compute average precision (classification task) between ground truth and predictions data frames. If multiple predictions occurs for the same predicted segment, only the one with highest score is matched as true positive. This code is greatly inspired by Pascal VOC devkit. Parameters ---------- ground_truth : df Data frame containing the ground truth instances. Required fields: ['video-id'] prediction : df Data frame containing the prediction instances. Required fields: ['video-id, 'score'] Outputs ------- ap : float Average precision score. """ npos = float(len(ground_truth)) lock_gt = np.ones(len(ground_truth)) * -1 # Sort predictions by decreasing score order. sort_idx = prediction['score'].values.argsort()[::-1] prediction = prediction.loc[sort_idx].reset_index(drop=True) # Initialize true positive and false positive vectors. tp = np.zeros(len(prediction)) fp = np.zeros(len(prediction)) # Assigning true positive to truly grount truth instances. for idx in range(len(prediction)): this_pred = prediction.loc[idx] gt_idx = ground_truth['video-id'] == this_pred['video-id'] # Check if there is at least one ground truth in the video associated. if not gt_idx.any(): fp[idx] = 1 continue this_gt = ground_truth.loc[gt_idx].reset_index() if lock_gt[this_gt['index']] >= 0: fp[idx] = 1 else: tp[idx] = 1 lock_gt[this_gt['index']] = idx # Computing prec-rec tp = np.cumsum(tp).astype(np.float) fp = np.cumsum(fp).astype(np.float) rec = tp / npos prec = tp / (tp + fp) return interpolated_prec_rec(prec, rec)
def compute_mAP_N(result,this_cls_pred,this_cls_gt): ap = np.zeros(len(result.tiou_thresholds)) tp = np.zeros((len(result.tiou_thresholds), len(this_cls_pred))) fp = np.zeros((len(result.tiou_thresholds), len(this_cls_pred))) for tidx, tiou in enumerate(result.tiou_thresholds): fp[tidx,pd.isnull(this_cls_pred[result.matched_gt_id_cols[tidx]]).values] = 1 tp[tidx,~(pd.isnull(this_cls_pred[result.matched_gt_id_cols[tidx]]).values)] = 1 tp_cumsum = np.cumsum(tp, axis=1).astype(np.float) fp_cumsum = np.cumsum(fp, axis=1).astype(np.float) recall_cumsum = tp_cumsum / len(np.unique(this_cls_gt['gt-id'])) precision_cumsum = recall_cumsum * result.average_num_instance_per_class / (recall_cumsum * result.average_num_instance_per_class + fp_cumsum) for tidx in range(len(result.tiou_thresholds)): ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx,:], recall_cumsum[tidx,:]) return ap.mean()
def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace( 0.5, 0.95, 10)): """Compute average precision (detection task) between ground truth and predictions data frames. If multiple predictions occurs for the same predicted segment, only the one with highest score is matches as true positive. This code is greatly inspired by Pascal VOC devkit. Parameters ---------- ground_truth : df Data frame containing the ground truth instances. Required fields: ['video-id', 't-start', 't-end'] prediction : df Data frame containing the prediction instances. Required fields: ['video-id, 't-start', 't-end', 'score'] tiou_thresholds : 1darray, optional Temporal intersection over union threshold. Outputs ------- ap : float Average precision score. """ npos = float(len(ground_truth)) lock_gt = np.ones((len(tiou_thresholds), len(ground_truth))) * -1 # Sort predictions by decreasing score order. sort_idx = prediction['score'].values.argsort()[::-1] prediction = prediction.loc[sort_idx].reset_index(drop=True) # Initialize true positive and false positive vectors. tp = np.zeros((len(tiou_thresholds), len(prediction))) fp = np.zeros((len(tiou_thresholds), len(prediction))) # Adaptation to query faster ground_truth_gbvn = ground_truth.groupby('video-id') # Assigning true positive to truly grount truth instances. for idx, this_pred in tqdm.tqdm(prediction.iterrows()): try: # Check if there is at least one ground truth in the video associated. ground_truth_videoid = ground_truth_gbvn.get_group( this_pred['video-id']) except Exception as e: fp[:, idx] = 1 continue this_gt = ground_truth_videoid.reset_index() tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values, this_gt[['t-start', 't-end']].values) # We would like to retrieve the predictions with highest tiou score. tiou_sorted_idx = tiou_arr.argsort()[::-1] for tidx, tiou_thr in enumerate(tiou_thresholds): for jdx in tiou_sorted_idx: if tiou_arr[jdx] < tiou_thr: fp[tidx, idx] = 1 break if lock_gt[tidx, this_gt.loc[jdx]['index']] >= 0: continue # Assign as true positive after the filters above. tp[tidx, idx] = 1 lock_gt[tidx, this_gt.loc[jdx]['index']] = idx break if fp[tidx, idx] == 0 and tp[tidx, idx] == 0: fp[tidx, idx] = 1 ap = np.zeros(len(tiou_thresholds)) for tidx in range(len(tiou_thresholds)): # Computing prec-rec this_tp = np.cumsum(tp[tidx, :]).astype(np.float) this_fp = np.cumsum(fp[tidx, :]).astype(np.float) rec = this_tp / npos prec = this_tp / (this_tp + this_fp) ap[tidx] = interpolated_prec_rec(prec, rec) return ap
def compute_average_precision_detection(ground_truth, prediction, tiou_thr=0.5): """Compute average precision (detection task) between ground truth and predictions data frames. If multiple predictions occurs for the same predicted segment, only the one with highest score is matches as true positive. This code is greatly inspired by Pascal VOC devkit. Parameters ---------- ground_truth : df Data frame containing the ground truth instances. Required fields: ['video-id', 't-start', 't-end'] prediction : df Data frame containing the prediction instances. Required fields: ['video-id, 't-start', 't-end', 'score'] tiou_thr : (float, optional) Temporal intersection over union threshold. Outputs ------- ap : float Average precision score. """ npos = float(len(ground_truth)) lock_gt = np.ones(len(ground_truth)) * -1 # Sort predictions by decreasing score order. sort_idx = prediction['score'].values.argsort()[::-1] prediction = prediction.loc[sort_idx].reset_index(drop=True) # Initialize true positive and false positive vectors. tp = np.zeros(len(prediction)) fp = np.zeros(len(prediction)) # Assigning true positive to truly grount truth instances. for idx in range(len(prediction)): this_pred = prediction.loc[idx] gt_idx = ground_truth['video-id'] == this_pred['video-id'] # Check if there is at least one ground truth in the video associated. if not gt_idx.any(): fp[idx] = 1 continue this_gt = ground_truth.loc[gt_idx].reset_index() tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values, this_gt[['t-start', 't-end']].values) # We would like to retrieve the predictions with highest tiou score. tiou_sorted_idx = tiou_arr.argsort()[::-1] for jdx in tiou_sorted_idx: if tiou_arr[jdx] < tiou_thr: fp[idx] = 1 break if lock_gt[this_gt.loc[jdx]['index']] >= 0: continue # Assign as true positive after the filters above. tp[idx] = 1 lock_gt[this_gt.loc[jdx]['index']] = idx break # Computing prec-rec tp = np.cumsum(tp).astype(np.float) fp = np.cumsum(fp).astype(np.float) rec = tp / npos prec = tp / (tp + fp) return interpolated_prec_rec(rec, prec)
def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10)): """Compute average precision (detection task) between ground truth and predictions data frames. If multiple predictions occurs for the same predicted segment, only the one with highest score is matches as true positive. This code is greatly inspired by Pascal VOC devkit. Parameters ---------- ground_truth : df Data frame containing the ground truth instances. Required fields: ['video-id', 't-start', 't-end'] prediction : df Data frame containing the prediction instances. Required fields: ['video-id, 't-start', 't-end', 'score'] tiou_thresholds : 1darray, optional Temporal intersection over union threshold. Outputs ------- ap : float Average precision score. """ ap = np.zeros(len(tiou_thresholds)) if prediction.empty: return ap npos = float(len(ground_truth)) lock_gt = np.ones((len(tiou_thresholds),len(ground_truth))) * -1 # Sort predictions by decreasing score order. sort_idx = prediction['score'].values.argsort()[::-1] prediction = prediction.loc[sort_idx].reset_index(drop=True) # Initialize true positive and false positive vectors. tp = np.zeros((len(tiou_thresholds), len(prediction))) fp = np.zeros((len(tiou_thresholds), len(prediction))) # Adaptation to query faster ground_truth_gbvn = ground_truth.groupby('video-id') # Assigning true positive to truly grount truth instances. for idx, this_pred in prediction.iterrows(): try: # Check if there is at least one ground truth in the video associated. ground_truth_videoid = ground_truth_gbvn.get_group(this_pred['video-id']) except Exception as e: fp[:, idx] = 1 continue this_gt = ground_truth_videoid.reset_index() tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values, this_gt[['t-start', 't-end']].values) # We would like to retrieve the predictions with highest tiou score. tiou_sorted_idx = tiou_arr.argsort()[::-1] for tidx, tiou_thr in enumerate(tiou_thresholds): for jdx in tiou_sorted_idx: if tiou_arr[jdx] < tiou_thr: fp[tidx, idx] = 1 break if lock_gt[tidx, this_gt.loc[jdx]['index']] >= 0: continue # Assign as true positive after the filters above. tp[tidx, idx] = 1 lock_gt[tidx, this_gt.loc[jdx]['index']] = idx break if fp[tidx, idx] == 0 and tp[tidx, idx] == 0: fp[tidx, idx] = 1 tp_cumsum = np.cumsum(tp, axis=1).astype(np.float) fp_cumsum = np.cumsum(fp, axis=1).astype(np.float) recall_cumsum = tp_cumsum / npos precision_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum) for tidx in range(len(tiou_thresholds)): ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx,:], recall_cumsum[tidx,:]) return ap
def compute_average_precision_detection(ground_truth, prediction, tiou_thr=0.5): """Compute average precision (detection task) between ground truth and predictions data frames. If multiple predictions occurs for the same predicted segment, only the one with highest score is matches as true positive. This code is greatly inspired by Pascal VOC devkit. Parameters ---------- ground_truth : df Data frame containing the ground truth instances. Required fields: ['video-id', 't-start', 't-end'] prediction : df Data frame containing the prediction instances. Required fields: ['video-id, 't-start', 't-end', 'score'] tiou_thr : (float, optional) Temporal intersection over union threshold. Outputs ------- ap : float Average precision score. """ npos = float(len(ground_truth)) lock_gt = np.ones(len(ground_truth)) * -1 # Sort predictions by decreasing score order. sort_idx = prediction['score'].values.argsort()[::-1] prediction = prediction.loc[sort_idx].reset_index(drop=True) # Initialize true positive and false positive vectors. tp = np.zeros(len(prediction)) fp = np.zeros(len(prediction)) # Assigning true positive to truly grount truth instances. for idx in range(len(prediction)): this_pred = prediction.loc[idx] gt_idx = ground_truth['video-id'] == this_pred['video-id'] # Check if there is at least one ground truth in the video associated. if not gt_idx.any(): fp[idx] = 1 continue this_gt = ground_truth.loc[gt_idx].reset_index() tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values, this_gt[['t-start', 't-end']].values) # We would like to retrieve the predictions with highest tiou score. tiou_sorted_idx = tiou_arr.argsort()[::-1] for jdx in tiou_sorted_idx: if tiou_arr[jdx] < tiou_thr: fp[idx] = 1 break if lock_gt[this_gt.loc[jdx]['index']] >= 0: continue # Assign as true positive after the filters above. tp[idx] = 1 lock_gt[this_gt.loc[jdx]['index']] = idx break # Computing prec-rec tp = np.cumsum(tp).astype(np.float) fp = np.cumsum(fp).astype(np.float) rec = tp / npos prec = tp / (tp + fp) return interpolated_prec_rec(prec, rec)
def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10), normalize_ap=False, average_num_instance_per_class=None, minimum_normalized_precision_threshold_for_detection=0.05): """Compute average precision (detection task) between ground truth and predictions data frames. If multiple predictions occurs for the same predicted segment, only the one with highest score is matches as true positive. This code is greatly inspired by Pascal VOC devkit. Parameters ---------- ground_truth : df Data frame containing the ground truth instances. Required fields: ['video-id', 't-start', 't-end'] prediction : df Data frame containing the prediction instances. Required fields: ['video-id, 't-start', 't-end', 'score'] tiou_thresholds : 1darray, optional Temporal intersection over union threshold. Outputs ------- ap : float Average precision score. """ gt_id_lst = np.unique(ground_truth['gt-id'].values) gt_id_to_index = dict(zip(gt_id_lst, range(len(gt_id_lst)))) lock_gt = np.ones((len(tiou_thresholds), len(gt_id_to_index))) * -1 npos = float(len(gt_id_lst)) # Sort predictions by decreasing score order. sort_idx = prediction['score'].values.argsort()[::-1] prediction = prediction.loc[sort_idx].reset_index(drop=True) # Initialize true positive and false positive vectors. tp = np.zeros((len(tiou_thresholds), len(prediction))) fp = np.zeros((len(tiou_thresholds), len(prediction))) matched_gt_id = np.nan*np.zeros((len(tiou_thresholds), len(prediction))) ap = np.zeros(len(tiou_thresholds)) if prediction.empty: return ap, matched_gt_id, prediction['prediction-id'].values, 0, 0 # Adaptation to query faster ground_truth_gbvn = ground_truth.groupby('video-id') # Assigning true positive to truly grount truth instances. for idx, this_pred in prediction.iterrows(): try: # Check if there is at least one ground truth in the video associated. ground_truth_videoid = ground_truth_gbvn.get_group(this_pred['video-id']) except Exception as e: fp[:, idx] = 1 continue this_gt = ground_truth_videoid.reset_index() tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values, this_gt[['t-start', 't-end']].values) # We would like to retrieve the predictions with highest tiou score. tiou_sorted_idx = tiou_arr.argsort()[::-1] for tidx, tiou_thr in enumerate(tiou_thresholds): for jdx in tiou_sorted_idx: if tiou_arr[jdx] < tiou_thr: fp[tidx, idx] = 1 break if lock_gt[tidx, gt_id_to_index[this_gt.loc[jdx]['gt-id']]] >= 0: continue # Assign as true positive after the filters above. tp[tidx, idx] = 1 lock_gt[tidx, gt_id_to_index[this_gt.loc[jdx]['gt-id']]] = idx matched_gt_id[tidx, idx] = this_gt.loc[jdx]['gt-id'] break if fp[tidx, idx] == 0 and tp[tidx, idx] == 0: fp[tidx, idx] = 1 tp_cumsum = np.cumsum(tp, axis=1).astype(np.float) fp_cumsum = np.cumsum(fp, axis=1).astype(np.float) recall_cumsum = tp_cumsum / npos if normalize_ap: precision_cumsum = recall_cumsum * average_num_instance_per_class / \ (recall_cumsum * average_num_instance_per_class + fp_cumsum) discard_index = precision_cumsum <= minimum_normalized_precision_threshold_for_detection tp[discard_index] = 0 fp[discard_index] = 1 matched_gt_id[discard_index] = np.nan tp_cumsum = np.cumsum(tp, axis=1).astype(np.float) fp_cumsum = np.cumsum(fp, axis=1).astype(np.float) recall_cumsum = tp_cumsum / npos precision_cumsum = recall_cumsum * average_num_instance_per_class / \ (recall_cumsum * average_num_instance_per_class + fp_cumsum) else: precision_cumsum = recall_cumsum * npos / (recall_cumsum * npos + fp_cumsum) for tidx in range(len(tiou_thresholds)): ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx, :], recall_cumsum[tidx, :]) recall = recall_cumsum[:, -1] precision = precision_cumsum[:, -1] return ap, matched_gt_id, prediction['prediction-id'].values, recall, precision
def diagnose(self): """Analyzes the error types and add the results to self.prediction DataFrame. Computes the average-mAP gain after removing each error type. [WARNING]: diagnose() can only be run after evaluate() has finished """ # Augment the prediction DataFrame with the error types self.fp_error_types = self.wrapper_analyze_fp_error_types() self.fp_error_types_count = {} for tidx, column_name in enumerate(self.fp_error_type_cols): self.prediction[column_name] = self.fp_error_types[tidx] this_tiou = self.tiou_thresholds[tidx] self.fp_error_types_count[this_tiou] = dict(zip(self.fp_error_types_legned.keys(), [0]*len(self.fp_error_types_legned))) error_ids, counts = np.unique(self.fp_error_types[tidx], return_counts=True) for error_id, count in zip(error_ids, counts): self.fp_error_types_count[this_tiou][self.fp_error_types_inverse_legned[error_id]] = count self.fp_error_types_count_df = pd.DataFrame(self.fp_error_types_count) self.fp_error_types_count_df['avg'] = self.fp_error_types_count_df.mean(axis=1) self.fp_error_types_precentage_df = self.fp_error_types_count_df/len(self.prediction) # Computes the average-mAP gain after removing each error type self.ap_gain, self.average_mAP_gain = {}, {} for err_name, err_code in self.fp_error_types_legned.items(): if err_code: self.ap_gain[err_name] = np.zeros((len(self.tiou_thresholds), len(self.activity_index))) for cidx in self.activity_index.values(): this_pred_df = self.prediction[self.prediction['label'] == cidx].reset_index(drop=True) sort_idx = this_pred_df['score'].values.argsort()[::-1] this_pred_df = this_pred_df.loc[sort_idx].reset_index(drop=True) this_gt_df = self.ground_truth[self.ground_truth['label'] == cidx] npos = len(this_gt_df) for tidx in range(len(self.tiou_thresholds)): this_error_types = this_pred_df[self.fp_error_type_cols[tidx]].T.values tp = (~np.isnan(this_pred_df[self.matched_gt_id_cols[tidx]].T)).astype(np.int) tp = tp[this_error_types != err_code] fp = np.abs(tp - 1) # Computing prec-rec this_tp = np.cumsum(tp).astype(np.float) this_fp = np.cumsum(fp).astype(np.float) rec = this_tp / npos if self.normalize_ap: prec = rec * self.average_num_instance_per_class / ( rec * self.average_num_instance_per_class + this_fp) else: prec = rec * npos / (rec * npos + this_fp) self.ap_gain[err_name][tidx, cidx] = interpolated_prec_rec(prec, rec) self.average_mAP_gain[err_name] = self.ap_gain[err_name].mean() - self.average_mAP if self.verbose: print('[DIAGNOSIS] Analysis of false positive error types.') print('\tPercentage of each error type:\n{}'.format(self.fp_error_types_precentage_df)) print('\tAverage mAP gain after removing each error type:\n{}'.format(self.average_mAP_gain))
import numpy as np from utils import interpolated_prec_rec prec = np.array([0, 1, 0.5, 0.2, 0.1]) rec = np.array([0, 0.1, 0.5, 0.8, 1]) ap = interpolated_prec_rec(prec, rec) print(ap)