def total_iou_matrix(self, item, label_weights=None, algorithm=None, qval=None, per_label=False): """ For each shape in current eval item, we compute IOU with identically labeled shape. :param item: to be compared with self :param label_weights: weight of particular label :param algorithm: algorithm of comparing values :param qval: q value :param per_label: calculate per label or overall :return: """ label_weights = label_weights or {} ious = [] comparator = get_text_comparator(algorithm, qval) for gt in self.get_values_iter(): for pred in item.get_values_iter(): label_sim = texts_similarity(gt[self._shape_key], pred[self._shape_key], comparator) if label_sim == 0: continue iou = self._iou(gt, pred) weight = sum( label_weights.get(l, 1) for l in gt[self._shape_key]) result = dict() result['iou'] = iou * weight result['weight'] = weight result['prediction'] = pred result['groundtruth'] = gt ious.append(result) return ious
def intersection(self, item, label_weights=None, algorithm=None, qval=None): comparator = get_text_comparator(algorithm, qval) label_weights = label_weights or {} someone_is_empty = self.empty ^ item.empty if someone_is_empty: return 0 if self.empty and item.empty: return 1 gt_values = self.get_values() total_score, total_weight = 0, 0 for pred_value in item.get_values_iter(): # find the best matching span inside gt_values best_matching_score = max( map(partial(self._match, y=pred_value, f=comparator), gt_values)) if self._shape_key in pred_value: weight = sum( label_weights.get(l, 1) for l in pred_value[self._shape_key]) else: weight = 1 total_score += weight * best_matching_score total_weight += weight if total_weight == 0: return 0 return total_score / total_weight
def match(self, item, algorithm='Levenshtein', qval=1): comparator = get_text_comparator(algorithm, qval) all_scores = [] for gt, pred in zip(self.get_values_iter(), item.get_values_iter()): all_scores.append( texts_similarity(gt[self._shape_key], pred[self._shape_key], comparator)) return sum(all_scores) / max(len(all_scores), 1)
def total_iou(self, item, label_weights=None, algorithm=None, qval=None, per_label=False): """ For each shape in current eval item, we compute IOU with identically labeled shape with largest intersection. This is suboptimal metric since it doesn't consider cases where multiple boxes from self coincides with with single box from item :param item: to be compared with self :param label_weights: weight of particular label :param algorithm: algorithm of comparing values :param qval: q value :param per_label: calculate per label or overall :return: """ label_weights = label_weights or {} if per_label: ious = defaultdict(list) else: ious, weights = [], [] comparator = get_text_comparator(algorithm, qval) for gt in self.get_values_iter(): max_iou = 0 for pred in item.get_values_iter(): label_sim = texts_similarity(gt[self._shape_key], pred[self._shape_key], comparator) if label_sim == 0: continue iou = self._iou(gt, pred) max_iou = max(iou, max_iou) if per_label: for l in gt[self._shape_key]: ious[l].append(max_iou) else: weight = sum( label_weights.get(l, 1) for l in gt[self._shape_key]) ious.append(max_iou * weight) weights.append(weight) if per_label: return {l: float(np.mean(v)) for l, v in ious.items()} return np.average(ious, weights=weights) if ious else 0.0
def total_iou(self, item, label_weights=None, algorithm=None, qval=None): """ For each shape in current eval item, we compute IOU with identically labeled shape with largest intersection. This is suboptimal metric since it doesn't consider cases where multiple boxes from self coincides with with single box from item :param item to be compared with self: :return: """ label_weights = label_weights or {} ious, weights = [], [] comparator = get_text_comparator(algorithm, qval) for gt in self.get_values_iter(): max_iou = 0 for pred in item.get_values_iter(): label_sim = texts_similarity(gt[self._shape_key], pred[self._shape_key], comparator) if label_sim == 0: continue iou = self._iou(gt, pred) max_iou = max(iou, max_iou) weight = sum(label_weights.get(l, 1) for l in gt[self._shape_key]) ious.append(max_iou * weight) weights.append(weight) return np.average(ious, weights=weights) if ious else 0.0
def intersection(self, item, label_weights=None, algorithm=None, qval=None, per_label=False, iou_threshold=None): comparator = get_text_comparator(algorithm, qval) label_weights = label_weights or {} someone_is_empty = self.empty ^ item.empty if someone_is_empty and not per_label: return 0 if self.empty and item.empty: return {} if per_label else 1 gt_values = self.get_values() if per_label: total_score, total_weight = defaultdict(int), defaultdict(int) else: total_score, total_weight = 0, 0 for pred_value in item.get_values_iter(): if len(gt_values) == 0: # for empty gt values, matching score for current prediction is the lowest best_matching_score = 0 else: # find the best matching span inside gt_values best_matching_score = max( map(partial(self._match, y=pred_value, f=comparator), gt_values)) if iou_threshold is not None: # make hard decision w.r.t. threshold whether current spans are matched best_matching_score = float( best_matching_score > iou_threshold) if per_label: # for per-label mode, label weights are unimportant - only scores are averaged prefix = pred_value.get('when_label_value', '') if prefix: prefix += ':' for l in pred_value[self._shape_key]: total_score[prefix + l] += best_matching_score total_weight[prefix + l] += 1 else: # when aggregating scores each individual label weight is taken into account if self._shape_key in pred_value: weight = sum( label_weights.get(l, 1) for l in pred_value[self._shape_key]) else: weight = 1 total_score += weight * best_matching_score total_weight += weight if per_label: # average per-label score for l in total_score: if total_weight[l] == 0: total_score[l] = 0 else: total_score[l] /= total_weight[l] return total_score # otherwise return overall score if total_weight == 0: return 0 return total_score / total_weight