def test_collect_named_entities_same_type_in_sequence(): tags = ['O', 'B-LOC', 'I-LOC', 'B-LOC', 'I-LOC', 'O'] result = collect_named_entities(tags) expected = [ Entity(e_type='LOC', start_offset=1, end_offset=2), Entity(e_type='LOC', start_offset=3, end_offset=4) ] assert result == expected
def test_collect_named_entities_entity_goes_until_last_token(): tags = ['O', 'B-LOC', 'I-LOC', 'B-LOC', 'I-LOC'] result = collect_named_entities(tags) expected = [ Entity(e_type='LOC', start_offset=1, end_offset=2), Entity(e_type='LOC', start_offset=3, end_offset=4) ] assert result == expected
} # overall results results = { 'strict': deepcopy(metrics_results), 'ent_type': deepcopy(metrics_results), 'partial': deepcopy(metrics_results), 'exact': deepcopy(metrics_results) } # results aggregated by entity type evaluation_agg_entities_type = {e: deepcopy(results) for e in ['Symptom']} for true_ents, pred_ents in zip(y_test_, y_pred_): tmp_results, tmp_agg_results = compute_metrics( collect_named_entities(true_ents), collect_named_entities(pred_ents), ['Symptom']) for eval_schema in results.keys(): for metric in metrics_results.keys(): results[eval_schema][metric] += tmp_results[eval_schema][metric] # Calculate global precision and recall results = compute_precision_recall_wrapper(results) # aggregate results by entity type for e_type in ['Symptom']: for eval_schema in tmp_agg_results[e_type]:
def test_collect_named_entities_no_entity(): tags = ['O', 'O', 'O', 'O', 'O'] result = collect_named_entities(tags) expected = [] assert result == expected
def test_collect_named_entities_sequence_has_only_one_entity(): tags = ['B-LOC', 'I-LOC', 'I-LOC'] result = collect_named_entities(tags) expected = [Entity(e_type='LOC', start_offset=0, end_offset=2)] assert result == expected