def predict(self, sample: InputSample) -> List[str]: """ Predict the tags using a stanza model. :param sample: InputSample with text :return: list of tags """ doc = self.model(sample.full_text) if doc.ents: tags, texts, start, end = zip(*[(s.label_, s.text, s.start_char, s.end_char) for s in doc.ents]) # Stanza tokens might not be consistent with spaCy's tokens. # Use spacy tokenization and not stanza # to maintain consistency with other models: if not sample.tokens: sample.tokens = tokenize(sample.full_text) # Create tags (label per token) based on stanza spans and spacy tokens tags = span_to_tag( scheme=self.labeling_scheme, text=sample.full_text, starts=start, ends=end, tags=tags, tokens=sample.tokens, ) else: tags = ["O" for _ in range(len(sample.tokens))] if len(tags) != len(sample.tokens): print("mismatch between input tokens and new tokens") return tags
def predict(self, sample: InputSample) -> List[str]: sentence = Sentence(text=sample.full_text, use_tokenizer=self.spacy_tokenizer) self.model.predict(sentence) ents = sentence.get_spans("ner") if ents: tags, texts, start, end = zip(*[(ent.tag, ent.text, ent.start_pos, ent.end_pos) for ent in ents]) tags = [tag if tag != "PER" else "PERSON" for tag in tags] # Flair's tag for PERSON is PER # Flair tokens might not be consistent with spaCy's tokens (even when using spacy tokenizer) # Use spacy tokenization and not stanza to maintain consistency with other models: if not sample.tokens: sample.tokens = tokenize(sample.full_text) # Create tags (label per token) based on stanza spans and spacy tokens tags = span_to_tag( scheme="IO", text=sample.full_text, starts=start, ends=end, tags=tags, tokens=sample.tokens, ) else: tags = ["O" for _ in range(len(sample.tokens))] if len(tags) != len(sample.tokens): print("mismatch between input tokens and new tokens") return tags
def test_evaluate_sample_wrong_entities_to_keep_correct_statistics(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['SPACESHIP']) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = model.evaluate_sample(sample) assert evaluated.results[("O", "O")] == 4
def test_evaluate_same_entity_correct_statistics(): prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["ANIMAL"]) sample = InputSample(full_text="I dog the walrus", masked="I [ANIMAL] the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluation_result = evaluator.evaluate_sample(sample, prediction) assert evaluation_result.results[("O", "O")] == 2 assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1 assert evaluation_result.results[("O", "ANIMAL")] == 1
def test_evaluate_multiple_tokens_correct_statistics(): prediction = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["ANIMAL"]) sample = InputSample("I am the walrus amaericanus magnifico", masked=None, spans=None) sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"] sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] evaluated = evaluator.evaluate_sample(sample, prediction) evaluation = evaluator.calculate_score([evaluated]) assert evaluation.pii_precision == 1 assert evaluation.pii_recall == 1
def test_evaluate_multiple_examples_correct_statistics(): prediction = ["U-PERSON", "O", "O", "U-PERSON", "O", "O"] model = MockTokensModel(prediction=prediction) evaluator = Evaluator(model=model, entities_to_keep=["PERSON"]) input_sample = InputSample("My name is Raphael or David", masked=None, spans=None) input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"] input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"] evaluated = evaluator.evaluate_all( [input_sample, input_sample, input_sample, input_sample]) scores = evaluator.calculate_score(evaluated) assert scores.pii_precision == 0.5 assert scores.pii_recall == 0.5
def test_evaluator_simple(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = model.evaluate_sample(sample) final_evaluation = model.calculate_score([evaluated]) assert final_evaluation.pii_precision == 1 assert final_evaluation.pii_recall == 1
def test_evaluate_multiple_entities_to_keep_correct_statistics(): prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, labeling_scheme='BIO', entities_to_keep=['ANIMAL', 'PLANT', 'SPACESHIP']) sample = InputSample(full_text="I dog the walrus", masked="I [ANIMAL] the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluation_result = model.evaluate_sample(sample) assert evaluation_result.results[("O", "O")] == 2 assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1 assert evaluation_result.results[("O", "ANIMAL")] == 1
def test_evaluate_multiple_tokens_no_match_match_correct_statistics(): prediction = ["O", "O", "O", "B-SPACESHIP", "L-SPACESHIP", "O"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample("I am the walrus amaericanus magnifico", masked=None, spans=None) sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"] sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] evaluated = model.evaluate_sample(sample) evaluation = model.calculate_score([evaluated]) assert np.isnan(evaluation.pii_precision) assert evaluation.pii_recall == 0
def test_evaluate_multiple_examples_ignore_entity_correct_statistics(): prediction = ["O", "O", "O", "U-PERSON", "O", "U-TENNIS_PLAYER"] model = MockTokensModel(prediction=prediction, labeling_scheme='BILOU', entities_to_keep=['PERSON', 'TENNIS_PLAYER']) input_sample = InputSample("My name is Raphael or David", masked=None, spans=None) input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"] input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"] evaluated = model.evaluate_all( [input_sample, input_sample, input_sample, input_sample]) scores = model.calculate_score(evaluated) assert scores.pii_precision == 1 assert scores.pii_recall == 1
self.analyze_template = template return requested_fields = [] for entity in entities: for field in template['fields']: if entity == field['name']: requested_fields.append(field) new_template = {'fields': requested_fields} self.analyze_template = new_template if __name__ == "__main__": # Example: text = "My siblings are Dan and magen" bilou_tags = ['O', 'O', 'O', 'U-PERSON', 'O', 'U-PERSON'] presidio = PresidioAPIEvaluator(verbose=True, all_fields=True, compare_by_io=True) tokens = tokenize(text) s = InputSample(text, masked=None, spans=None) s.tokens = tokens s.tags = bilou_tags evaluated_sample = presidio.evaluate_sample(s) p, r, entity_recall, f, mistakes = presidio.calculate_score([evaluated_sample]) print("Precision = {}\n" "Recall = {}\n" "F_3 = {}\n" "Errors = {}".format(p, r, f, mistakes))