def scores(): results = Counter( { ("O", "O"): 30, ("ANIMAL", "ANIMAL"): 4, ("ANIMAL", "O"): 2, ("O", "ANIMAL"): 1, ("PERSON", "PERSON"): 2, } ) model = MockTokensModel(prediction=None) evaluator = Evaluator(model=model) evaluation_result = EvaluationResult(results=results) return evaluator.calculate_score([evaluation_result])
def test_evaluate_multiple_tokens_correct_statistics(): prediction = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["ANIMAL"]) sample = InputSample("I am the walrus amaericanus magnifico", masked=None, spans=None) sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"] sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] evaluated = evaluator.evaluate_sample(sample, prediction) evaluation = evaluator.calculate_score([evaluated]) assert evaluation.pii_precision == 1 assert evaluation.pii_recall == 1
def test_evaluate_multiple_examples_correct_statistics(): prediction = ["U-PERSON", "O", "O", "U-PERSON", "O", "O"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["PERSON"]) input_sample = InputSample("My name is Raphael or David", masked=None, spans=None) input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"] input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"] evaluated = evaluator.evaluate_all( [input_sample, input_sample, input_sample, input_sample]) scores = evaluator.calculate_score(evaluated) assert scores.pii_precision == 0.5 assert scores.pii_recall == 0.5
def test_evaluate_multiple_entities_to_keep_correct_statistics(): prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"] entities_to_keep = ["ANIMAL", "PLANT", "SPACESHIP"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=entities_to_keep) sample = InputSample(full_text="I dog the walrus", masked="I [ANIMAL] the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluation_result = evaluator.evaluate_sample(sample, prediction) assert evaluation_result.results[("O", "O")] == 2 assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1 assert evaluation_result.results[("O", "ANIMAL")] == 1
def test_evaluator_simple(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=["ANIMAL"]) evaluator = Evaluator(model=model) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = evaluator.evaluate_sample(sample, prediction) final_evaluation = evaluator.calculate_score([evaluated]) assert final_evaluation.pii_precision == 1 assert final_evaluation.pii_recall == 1
def test_evaluate_multiple_examples_ignore_entity_correct_statistics(): prediction = ["O", "O", "O", "U-PERSON", "O", "U-TENNIS_PLAYER"] model = MockTokensModel(prediction=prediction, labeling_scheme='BILOU', entities_to_keep=['PERSON', 'TENNIS_PLAYER']) input_sample = InputSample("My name is Raphael or David", masked=None, spans=None) input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"] input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"] evaluated = model.evaluate_all( [input_sample, input_sample, input_sample, input_sample]) scores = model.calculate_score(evaluated) assert scores.pii_precision == 1 assert scores.pii_recall == 1