def precision_score(y_true, y_pred, average='micro', suffix=False): """Compute the precision. The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of true positives and ``fp`` the number of false positives. The precision is intuitively the ability of the classifier not to label as positive a sample. The best value is 1 and the worst value is 0. Args: y_true : 2d array. Ground truth (correct) target values. y_pred : 2d array. Estimated targets as returned by a tagger. Returns: score : float. Example: >>> from seqeval.metrics import precision_score >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] >>> precision_score(y_true, y_pred) 0.50 """ true_entities = set(get_entities(y_true, suffix)) pred_entities = set(get_entities(y_pred, suffix)) nb_correct = len(true_entities & pred_entities) nb_pred = len(pred_entities) score = nb_correct / nb_pred if nb_pred > 0 else 0 return score
def post_processing(cls, logit_list, id2label, **kwargs): """ 由于ner返回需要提供实体在文本中的位置,因此输入需要包含原始文本 args: logit_list:模型预测结果 id2label: 标签id到标签的映射字典 input_list:输入token转换后的ids input_texts:原始文本 tokenizer:分词工具,辅助定位实体在原始文本中的位置 """ input_list = kwargs.get("input_list") input_texts = kwargs.get("input_texts", None) tokenizer = kwargs.get("tokenizer") result_list = [] for i in range(len(input_list)): inputs = input_list[i] logits = logit_list[i] text = "" rematch_list = [] if input_texts: text = input_texts[i] input_tokens = tokenizer.convert_ids_to_tokens(inputs) rematch_list = cls.rematch(text, input_tokens) pred_ids = np.argmax(logits, -1) input_ids = inputs labels = [id2label.get(i, "O") for i in pred_ids] chunk_list = get_entities(labels) # [('PER', 0, 1), ('LOC', 3, 3)] entity_list = [] for chunk in chunk_list: entity_type = chunk[0] tokens = [] begin = -1 end = -1 select_ids = [] for i in range(chunk[1], chunk[2] + 1): if input_ids[i] > 0: tokens.append( tokenizer._convert_id_to_token(input_ids[i])) select_ids.append(i) if len(tokens) > 0: entity = tokenizer.convert_tokens_to_string(tokens) entity = cls.clean_space(entity) if input_texts: begin = rematch_list[select_ids[0]][0] end = rematch_list[select_ids[-1]][-1] entity = text[begin:(end + 1)].strip() if entity: entity_list.append([entity, entity_type, begin, end]) result_list.append(entity_list) return result_list
def f1_score(y_true, y_pred, average='micro', suffix=False): """Compute the F1 score. The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. The relative contribution of precision and recall to the F1 score are equal. The formula for the F1 score is:: F1 = 2 * (precision * recall) / (precision + recall) Args: y_true : 2d array. Ground truth (correct) target values. y_pred : 2d array. Estimated targets as returned by a tagger. Returns: score : float. Example: >>> from seqeval.metrics import f1_score >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] >>> f1_score(y_true, y_pred) 0.50 """ true_entities = set(get_entities(y_true, suffix)) pred_entities = set(get_entities(y_pred, suffix)) nb_correct = len(true_entities & pred_entities) nb_pred = len(pred_entities) nb_true = len(true_entities) p = nb_correct / nb_pred if nb_pred > 0 else 0 r = nb_correct / nb_true if nb_true > 0 else 0 score = 2 * p * r / (p + r) if p + r > 0 else 0 return score
def classification_report(y_true, y_pred, digits=2, suffix=False): """Build a text report showing the main classification metrics. Args: y_true : 2d array. Ground truth (correct) target values. y_pred : 2d array. Estimated targets as returned by a classifier. digits : int. Number of digits for formatting output floating point values. Returns: report : string. Text summary of the precision, recall, F1 score for each class. Examples: >>> from seqeval.metrics import classification_report >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] >>> print(classification_report(y_true, y_pred)) precision recall f1-score support <BLANKLINE> MISC 0.00 0.00 0.00 1 PER 1.00 1.00 1.00 1 <BLANKLINE> micro avg 0.50 0.50 0.50 2 macro avg 0.50 0.50 0.50 2 <BLANKLINE> """ true_entities = set(get_entities(y_true, suffix)) pred_entities = set(get_entities(y_pred, suffix)) name_width = 0 d1 = defaultdict(set) d2 = defaultdict(set) for e in true_entities: d1[e[0]].add((e[1], e[2])) name_width = max(name_width, len(e[0])) for e in pred_entities: d2[e[0]].add((e[1], e[2])) last_line_heading = 'macro avg' width = max(name_width, len(last_line_heading), digits) headers = ["precision", "recall", "f1-score", "support"] head_fmt = u'{:>{width}s} ' + u' {:>9}' * len(headers) report = head_fmt.format(u'', *headers, width=width) report += u'\n\n' row_fmt = u'{:>{width}s} ' + u' {:>9.{digits}f}' * 3 + u' {:>9}\n' ps, rs, f1s, s = [], [], [], [] for type_name, true_entities in d1.items(): pred_entities = d2[type_name] nb_correct = len(true_entities & pred_entities) nb_pred = len(pred_entities) nb_true = len(true_entities) p = nb_correct / nb_pred if nb_pred > 0 else 0 r = nb_correct / nb_true if nb_true > 0 else 0 f1 = 2 * p * r / (p + r) if p + r > 0 else 0 report += row_fmt.format(*[type_name, p, r, f1, nb_true], width=width, digits=digits) ps.append(p) rs.append(r) f1s.append(f1) s.append(nb_true) report += u'\n' # compute averages report += row_fmt.format('micro avg', precision_score(y_true, y_pred, suffix=suffix), recall_score(y_true, y_pred, suffix=suffix), f1_score(y_true, y_pred, suffix=suffix), np.sum(s), width=width, digits=digits) report += row_fmt.format(last_line_heading, np.average(ps, weights=s), np.average(rs, weights=s), np.average(f1s, weights=s), np.sum(s), width=width, digits=digits) return report