예제 #1
0
def test_manual_max_count():
    # use short match dict
    # default_max_count=1
    # expect 1 suggestion

    r_matcher = ReplaceMatcher(
        nlp,
        match_dict=short_match_dict,
        lm_path="./replacy/resources/test.arpa",
        filter_suggestions=True,
        default_max_count=1,
        debug=True,
    )
    spans = r_matcher("They read us the stories they themselves had written.")
    assert len(spans[0]._.suggestions) == 1

    # MAX_COUNT=2 for ['sing', 'give']
    # default_max_count=1
    # expect 2 suggestions

    short_match_dict["match-1"]["suggestions"][0][1]["MAX_COUNT"] = 2

    r_matcher = ReplaceMatcher(
        nlp,
        match_dict=short_match_dict,
        lm_path="./replacy/resources/test.arpa",
        filter_suggestions=True,
        default_max_count=1,
        debug=True,
    )
    spans = r_matcher("They read us the stories they themselves had written.")
    suggestions = spans[0]._.suggestions

    assert len(spans[0]._.suggestions) == 2
예제 #2
0
def test_span_filter_component():
    replaCy = ReplaceMatcher(nlp, match_dict)
    spans = replaCy("hyuck hyuck")
    assert (
        len(spans) == 3
    ), "without span overlap filtering there are three spans (one for each hyuck, and one for both)"
    replaCy.add_pipe(filter_spans, before="joiner")
    spans = replaCy("hyuck hyuck")
    assert len(
        spans) == 1, "with span overlap filtering there is only one span"
예제 #3
0
def test_multiple_suggestions_max_count():
    r_matcher = ReplaceMatcher(
        nlp,
        match_dict=short_match_dict_2_sugg,
        lm_path="./replacy/resources/test.arpa",
        filter_suggestions=True,
        debug=True,
    )
    spans = r_matcher("They read us the stories they themselves had written.")
    assert len(spans[0]._.suggestions) == 3
예제 #4
0
def test_valid_format():
    match_dict = get_match_dict()
    ReplaceMatcher.validate_match_dict(match_dict)
예제 #5
0
        }, {
            "LOWER": "issue"
        }],
        "suggestions": [[{
            "LOWER": "an"
        }, {
            "PATTERN_REF": -1
        }]],
        "tests": {
            "positive": [],
            "negative": []
        },
    },
}

replaCy = ReplaceMatcher(nlp, match_dict)
aa = ArticleAgreer()
replaCy.add_pipe(filter_spans, name="filter_spans", before="joiner")
replaCy.add_pipe(aa, name="article_agreer", after="joiner")


def test_a_to_an():
    orig = "I have a problem."
    span = replaCy(orig)[0]
    replacement = span._.suggestions[0]
    assert (orig.replace(
        span.text,
        replacement) == "I have an answer."), "Automatically corrects a to an"


def test_an_to_a():
예제 #6
0
That way, failures log which test case failed, not just one in a long list

I would do this, but I am pretty sure I did it once a few PRs ago, and I guess it got overwritten
"""
import pytest
import spacy
from replacy import ReplaceMatcher
from replacy.db import get_match_dict
from functional import seq

xfail = pytest.mark.xfail

nlp = spacy.load("en_core_web_sm")

match_dict = get_match_dict()
r_matcher = ReplaceMatcher(nlp, match_dict)


rule_all_suggs_pos = []
rule_all_suggs_neg = []

for rule_name in r_matcher.match_dict:
    rule_suggestions = []
    for suggestion in r_matcher.match_dict[rule_name]["suggestions"]:
        rule_suggestions.append(" ".join([t["TEXT"] for t in suggestion]))

    rule_suggestions = (
        seq(rule_suggestions)
        .map(lambda phrase: nlp(phrase))
        .map(lambda doc: " ".join([token.lemma_ for token in doc]))
        .list()
예제 #7
0
def test_component_added_after_filter_is_called():
    replaCy = ReplaceMatcher(nlp, match_dict)
    replaCy.add_pipe(garbler, after="filter")
    spans = replaCy("hyuck, that's funny")
    assert spans[0]._.suggestions[0] == NewComponent.gibberish
예제 #8
0
def test_add_pipe_after():
    replaCy = ReplaceMatcher(nlp, match_dict)
    replaCy.add_pipe(garbler, after="filter")
    assert replaCy.pipe_names == ["sorter", "filter", "garbler", "joiner"]
예제 #9
0
def test_add_pipe_first():
    replaCy = ReplaceMatcher(nlp, match_dict)
    replaCy.add_pipe(garbler, first=True)
    assert replaCy.pipe_names == ["garbler", "sorter", "filter", "joiner"]
예제 #10
0
def test_default_pipe():
    replaCy = ReplaceMatcher(nlp, match_dict)
    assert replaCy.pipe_names == ["sorter", "filter", "joiner"]
예제 #11
0
            "TEXT": "exact",
            "FROM_TEMPLATE_ID": 1
        }]],
        "match_hook": [{
            "name": "succeeded_by_phrase",
            "args": "revenge",
            "match_if_predicate_is": True,
        }],
        "test": {
            "positive": [
                "And at the same time extract revenge on those he so despises?",  # 0
                "Watch as Tampa Bay extracts  revenge against his former Los Angeles Rams team.",  # 1
                "In fact, the farmer was so mean to this young man he determined to extract   revenge.",  # 2
                "And at the same time extract          revenge on the whites he so despises?",  # 10 sic
            ],
            "negative": ["Mother flavours her custards with lemon extract."],
        },
    }
}

r_matcher = ReplaceMatcher(nlp, match_dict, allow_multiple_whitespaces=True)


def test_multiple_whites():
    sents = match_dict["extract-revenge"]["test"]["positive"]
    for sent in sents:
        assert len(r_matcher(sent)), "Should correct with multiple whitespaces"

        suggestion = r_matcher(sent)[0].text.strip()
        assert "extract" in suggestion, "Should correct with multiple whitespaces"
예제 #12
0
import json

import pytest
import spacy
from replacy import ReplaceMatcher
from replacy.db import get_match_dict

nlp = spacy.load("en_core_web_sm")

with open("replacy/resources/match_dict.json", "r") as md:
    match_dict = json.load(md)
    r_matcher = ReplaceMatcher(nlp, match_dict)

r_matcher.match_dict.update({
    "sometest": {
        "patterns": [{
            "LOWER": "sometest"
        }],
        "suggestions": [[{
            "TEXT": "this part isn't the point"
        }]],
        "test": {
            "positive": ["positive test"],
            "negative": ["negative test"]
        },
        "comment": "this is an example comment",
        "description": 'The expression is "make do".',
        "category": "R:VERB",
        "yo": "yoyo",
        "whoa": ["it's", "a", "list"],
        "damn": {
예제 #13
0
import pytest
import spacy

from replacy import ReplaceMatcher
from replacy.db import get_match_dict

nlp = spacy.load("en_core_web_sm")
lm_path = "replacy/resources/test.arpa"

match_dict = get_match_dict()
r_matcher = ReplaceMatcher(nlp, match_dict, lm_path=lm_path)

dumb_matcher = ReplaceMatcher(nlp, match_dict, lm_path=None)

test_examples = [
    {
        "sent": "This x a sentence.",
        "span_start": 1,
        "span_end": 2,
        "suggestions": ["are", "were", "is"],
        "best_suggestion": "is",
    },
    {
        "sent": "This is x sentence.",
        "span_start": 2,
        "span_end": 3,
        "suggestions": ["two", "a", "cat"],
        "best_suggestion": "a",
    },
    {
        "sent": "This is a sentences.",
예제 #14
0
output_default_max_count_1 = [
    "They sang us a stories THEY themselves wrote",
    "They sang us a story THEY themselves made",
    "They gave us a stories THEY themselves made",
    "They gave us a story THEY themselves wrote",
    "They sang us the stories THEY themselves made",
    "They sang us the story THEY themselves wrote",
    "They gave us the stories THEY themselves wrote",
    "They gave us the story THEY themselves made",
    "They sang us some stories THEY themselves created",
    "They gave us some story THEY themselves created",
]

r_matcher1 = ReplaceMatcher(
    nlp,
    match_dict=match_dict,
    lm_path="./replacy/resources/test.arpa",
    filter_suggestions=True,
)

spans = r_matcher1("They read us the stories they themselves had written.")
suggestions = spans[0]._.suggestions


def test_suggestions():
    assert suggestions == outputs


r_matcher_max_count_1 = ReplaceMatcher(
    nlp,
    match_dict=match_dict,
    lm_path="./replacy/resources/test.arpa",