def predict(self, sample: InputSample) -> List[str]: nlpArtifacts = None if self.withNlpArtifacts: nlpArtifacts = self.__make_nlp_artifacts(sample.full_text) results = self.recognizer.analyze(sample.full_text, self.entities, nlpArtifacts) starts = [] ends = [] tags = [] scores = [] for res in results: if not res.start: res.start = 0 starts.append(res.start) ends.append(res.end) tags.append(res.entity_type) scores.append(res.score) response_tags = span_to_tag( scheme=self.labeling_scheme, text=sample.full_text, start=starts, end=ends, tag=tags, tokens=sample.tokens, scores=scores, io_tags_only=self.compare_by_io, ) if len(sample.tags) == 0: sample.tags = ["0" for word in response_tags] return response_tags
def test_evaluate_sample_wrong_entities_to_keep_correct_statistics(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['SPACESHIP']) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = model.evaluate_sample(sample) assert evaluated.results[("O", "O")] == 4
def test_evaluate_same_entity_correct_statistics(): prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["ANIMAL"]) sample = InputSample(full_text="I dog the walrus", masked="I [ANIMAL] the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluation_result = evaluator.evaluate_sample(sample, prediction) assert evaluation_result.results[("O", "O")] == 2 assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1 assert evaluation_result.results[("O", "ANIMAL")] == 1
def test_evaluate_multiple_tokens_correct_statistics(): prediction = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["ANIMAL"]) sample = InputSample("I am the walrus amaericanus magnifico", masked=None, spans=None) sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"] sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] evaluated = evaluator.evaluate_sample(sample, prediction) evaluation = evaluator.calculate_score([evaluated]) assert evaluation.pii_precision == 1 assert evaluation.pii_recall == 1
def test_evaluate_multiple_examples_correct_statistics(): prediction = ["U-PERSON", "O", "O", "U-PERSON", "O", "O"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["PERSON"]) input_sample = InputSample("My name is Raphael or David", masked=None, spans=None) input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"] input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"] evaluated = evaluator.evaluate_all( [input_sample, input_sample, input_sample, input_sample]) scores = evaluator.calculate_score(evaluated) assert scores.pii_precision == 0.5 assert scores.pii_recall == 0.5
def test_evaluator_simple(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = model.evaluate_sample(sample) final_evaluation = model.calculate_score([evaluated]) assert final_evaluation.pii_precision == 1 assert final_evaluation.pii_recall == 1
def test_evaluate_multiple_entities_to_keep_correct_statistics(): prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, labeling_scheme='BIO', entities_to_keep=['ANIMAL', 'PLANT', 'SPACESHIP']) sample = InputSample(full_text="I dog the walrus", masked="I [ANIMAL] the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluation_result = model.evaluate_sample(sample) assert evaluation_result.results[("O", "O")] == 2 assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1 assert evaluation_result.results[("O", "ANIMAL")] == 1
def test_evaluate_multiple_tokens_no_match_match_correct_statistics(): prediction = ["O", "O", "O", "B-SPACESHIP", "L-SPACESHIP", "O"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample("I am the walrus amaericanus magnifico", masked=None, spans=None) sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"] sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] evaluated = model.evaluate_sample(sample) evaluation = model.calculate_score([evaluated]) assert np.isnan(evaluation.pii_precision) assert evaluation.pii_recall == 0
def test_evaluate_multiple_examples_ignore_entity_correct_statistics(): prediction = ["O", "O", "O", "U-PERSON", "O", "U-TENNIS_PLAYER"] model = MockTokensModel(prediction=prediction, labeling_scheme='BILOU', entities_to_keep=['PERSON', 'TENNIS_PLAYER']) input_sample = InputSample("My name is Raphael or David", masked=None, spans=None) input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"] input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"] evaluated = model.evaluate_all( [input_sample, input_sample, input_sample, input_sample]) scores = model.calculate_score(evaluated) assert scores.pii_precision == 1 assert scores.pii_recall == 1
self.analyze_template = template return requested_fields = [] for entity in entities: for field in template['fields']: if entity == field['name']: requested_fields.append(field) new_template = {'fields': requested_fields} self.analyze_template = new_template if __name__ == "__main__": # Example: text = "My siblings are Dan and magen" bilou_tags = ['O', 'O', 'O', 'U-PERSON', 'O', 'U-PERSON'] presidio = PresidioAPIEvaluator(verbose=True, all_fields=True, compare_by_io=True) tokens = tokenize(text) s = InputSample(text, masked=None, spans=None) s.tokens = tokens s.tags = bilou_tags evaluated_sample = presidio.evaluate_sample(s) p, r, entity_recall, f, mistakes = presidio.calculate_score([evaluated_sample]) print("Precision = {}\n" "Recall = {}\n" "F_3 = {}\n" "Errors = {}".format(p, r, f, mistakes))