def test_collect_named_entities_same_type_in_sequence():
    tags = ['O', 'B-LOC', 'I-LOC', 'B-LOC', 'I-LOC', 'O']
    result = collect_named_entities(tags)
    expected = [
        Entity(e_type='LOC', start_offset=1, end_offset=2),
        Entity(e_type='LOC', start_offset=3, end_offset=4)
    ]
    assert result == expected
def test_collect_named_entities_entity_goes_until_last_token():
    tags = ['O', 'B-LOC', 'I-LOC', 'B-LOC', 'I-LOC']
    result = collect_named_entities(tags)
    expected = [
        Entity(e_type='LOC', start_offset=1, end_offset=2),
        Entity(e_type='LOC', start_offset=3, end_offset=4)
    ]
    assert result == expected
Beispiel #3
0
}

# overall results
results = {
    'strict': deepcopy(metrics_results),
    'ent_type': deepcopy(metrics_results),
    'partial': deepcopy(metrics_results),
    'exact': deepcopy(metrics_results)
}

# results aggregated by entity type
evaluation_agg_entities_type = {e: deepcopy(results) for e in ['Symptom']}

for true_ents, pred_ents in zip(y_test_, y_pred_):
    tmp_results, tmp_agg_results = compute_metrics(
        collect_named_entities(true_ents), collect_named_entities(pred_ents),
        ['Symptom'])

    for eval_schema in results.keys():
        for metric in metrics_results.keys():
            results[eval_schema][metric] += tmp_results[eval_schema][metric]

    # Calculate global precision and recall

    results = compute_precision_recall_wrapper(results)

    # aggregate results by entity type

    for e_type in ['Symptom']:

        for eval_schema in tmp_agg_results[e_type]:
def test_collect_named_entities_no_entity():
    tags = ['O', 'O', 'O', 'O', 'O']
    result = collect_named_entities(tags)
    expected = []
    assert result == expected
def test_collect_named_entities_sequence_has_only_one_entity():
    tags = ['B-LOC', 'I-LOC', 'I-LOC']
    result = collect_named_entities(tags)
    expected = [Entity(e_type='LOC', start_offset=0, end_offset=2)]
    assert result == expected