def test_manual_max_count(): # use short match dict # default_max_count=1 # expect 1 suggestion r_matcher = ReplaceMatcher( nlp, match_dict=short_match_dict, lm_path="./replacy/resources/test.arpa", filter_suggestions=True, default_max_count=1, debug=True, ) spans = r_matcher("They read us the stories they themselves had written.") assert len(spans[0]._.suggestions) == 1 # MAX_COUNT=2 for ['sing', 'give'] # default_max_count=1 # expect 2 suggestions short_match_dict["match-1"]["suggestions"][0][1]["MAX_COUNT"] = 2 r_matcher = ReplaceMatcher( nlp, match_dict=short_match_dict, lm_path="./replacy/resources/test.arpa", filter_suggestions=True, default_max_count=1, debug=True, ) spans = r_matcher("They read us the stories they themselves had written.") suggestions = spans[0]._.suggestions assert len(spans[0]._.suggestions) == 2
def test_span_filter_component(): replaCy = ReplaceMatcher(nlp, match_dict) spans = replaCy("hyuck hyuck") assert ( len(spans) == 3 ), "without span overlap filtering there are three spans (one for each hyuck, and one for both)" replaCy.add_pipe(filter_spans, before="joiner") spans = replaCy("hyuck hyuck") assert len( spans) == 1, "with span overlap filtering there is only one span"
def test_multiple_suggestions_max_count(): r_matcher = ReplaceMatcher( nlp, match_dict=short_match_dict_2_sugg, lm_path="./replacy/resources/test.arpa", filter_suggestions=True, debug=True, ) spans = r_matcher("They read us the stories they themselves had written.") assert len(spans[0]._.suggestions) == 3
def test_valid_format(): match_dict = get_match_dict() ReplaceMatcher.validate_match_dict(match_dict)
}, { "LOWER": "issue" }], "suggestions": [[{ "LOWER": "an" }, { "PATTERN_REF": -1 }]], "tests": { "positive": [], "negative": [] }, }, } replaCy = ReplaceMatcher(nlp, match_dict) aa = ArticleAgreer() replaCy.add_pipe(filter_spans, name="filter_spans", before="joiner") replaCy.add_pipe(aa, name="article_agreer", after="joiner") def test_a_to_an(): orig = "I have a problem." span = replaCy(orig)[0] replacement = span._.suggestions[0] assert (orig.replace( span.text, replacement) == "I have an answer."), "Automatically corrects a to an" def test_an_to_a():
That way, failures log which test case failed, not just one in a long list I would do this, but I am pretty sure I did it once a few PRs ago, and I guess it got overwritten """ import pytest import spacy from replacy import ReplaceMatcher from replacy.db import get_match_dict from functional import seq xfail = pytest.mark.xfail nlp = spacy.load("en_core_web_sm") match_dict = get_match_dict() r_matcher = ReplaceMatcher(nlp, match_dict) rule_all_suggs_pos = [] rule_all_suggs_neg = [] for rule_name in r_matcher.match_dict: rule_suggestions = [] for suggestion in r_matcher.match_dict[rule_name]["suggestions"]: rule_suggestions.append(" ".join([t["TEXT"] for t in suggestion])) rule_suggestions = ( seq(rule_suggestions) .map(lambda phrase: nlp(phrase)) .map(lambda doc: " ".join([token.lemma_ for token in doc])) .list()
def test_component_added_after_filter_is_called(): replaCy = ReplaceMatcher(nlp, match_dict) replaCy.add_pipe(garbler, after="filter") spans = replaCy("hyuck, that's funny") assert spans[0]._.suggestions[0] == NewComponent.gibberish
def test_add_pipe_after(): replaCy = ReplaceMatcher(nlp, match_dict) replaCy.add_pipe(garbler, after="filter") assert replaCy.pipe_names == ["sorter", "filter", "garbler", "joiner"]
def test_add_pipe_first(): replaCy = ReplaceMatcher(nlp, match_dict) replaCy.add_pipe(garbler, first=True) assert replaCy.pipe_names == ["garbler", "sorter", "filter", "joiner"]
def test_default_pipe(): replaCy = ReplaceMatcher(nlp, match_dict) assert replaCy.pipe_names == ["sorter", "filter", "joiner"]
"TEXT": "exact", "FROM_TEMPLATE_ID": 1 }]], "match_hook": [{ "name": "succeeded_by_phrase", "args": "revenge", "match_if_predicate_is": True, }], "test": { "positive": [ "And at the same time extract revenge on those he so despises?", # 0 "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team.", # 1 "In fact, the farmer was so mean to this young man he determined to extract revenge.", # 2 "And at the same time extract revenge on the whites he so despises?", # 10 sic ], "negative": ["Mother flavours her custards with lemon extract."], }, } } r_matcher = ReplaceMatcher(nlp, match_dict, allow_multiple_whitespaces=True) def test_multiple_whites(): sents = match_dict["extract-revenge"]["test"]["positive"] for sent in sents: assert len(r_matcher(sent)), "Should correct with multiple whitespaces" suggestion = r_matcher(sent)[0].text.strip() assert "extract" in suggestion, "Should correct with multiple whitespaces"
import json import pytest import spacy from replacy import ReplaceMatcher from replacy.db import get_match_dict nlp = spacy.load("en_core_web_sm") with open("replacy/resources/match_dict.json", "r") as md: match_dict = json.load(md) r_matcher = ReplaceMatcher(nlp, match_dict) r_matcher.match_dict.update({ "sometest": { "patterns": [{ "LOWER": "sometest" }], "suggestions": [[{ "TEXT": "this part isn't the point" }]], "test": { "positive": ["positive test"], "negative": ["negative test"] }, "comment": "this is an example comment", "description": 'The expression is "make do".', "category": "R:VERB", "yo": "yoyo", "whoa": ["it's", "a", "list"], "damn": {
import pytest import spacy from replacy import ReplaceMatcher from replacy.db import get_match_dict nlp = spacy.load("en_core_web_sm") lm_path = "replacy/resources/test.arpa" match_dict = get_match_dict() r_matcher = ReplaceMatcher(nlp, match_dict, lm_path=lm_path) dumb_matcher = ReplaceMatcher(nlp, match_dict, lm_path=None) test_examples = [ { "sent": "This x a sentence.", "span_start": 1, "span_end": 2, "suggestions": ["are", "were", "is"], "best_suggestion": "is", }, { "sent": "This is x sentence.", "span_start": 2, "span_end": 3, "suggestions": ["two", "a", "cat"], "best_suggestion": "a", }, { "sent": "This is a sentences.",
output_default_max_count_1 = [ "They sang us a stories THEY themselves wrote", "They sang us a story THEY themselves made", "They gave us a stories THEY themselves made", "They gave us a story THEY themselves wrote", "They sang us the stories THEY themselves made", "They sang us the story THEY themselves wrote", "They gave us the stories THEY themselves wrote", "They gave us the story THEY themselves made", "They sang us some stories THEY themselves created", "They gave us some story THEY themselves created", ] r_matcher1 = ReplaceMatcher( nlp, match_dict=match_dict, lm_path="./replacy/resources/test.arpa", filter_suggestions=True, ) spans = r_matcher1("They read us the stories they themselves had written.") suggestions = spans[0]._.suggestions def test_suggestions(): assert suggestions == outputs r_matcher_max_count_1 = ReplaceMatcher( nlp, match_dict=match_dict, lm_path="./replacy/resources/test.arpa",