def __init__(self, vocab, validate=False): self._specs = {} self._patterns = {} self._callbacks = {} self._seen_attrs = set() self.vocab = vocab self.validate = validate try: self._validator = partial( validate_json, validator=get_json_validator(TOKEN_PATTERN_SCHEMA), ) except NameError: self._validator = validate_token_pattern
def validator(): return get_json_validator(TOKEN_PATTERN_SCHEMA)
def test_doc_to_json_valid_training(doc): json_doc = doc.to_json() validator = get_json_validator(TRAINING_SCHEMA) errors = validate_json([json_doc], validator) assert not errors
def training_schema_validator(): return get_json_validator(TRAINING_SCHEMA)
import pytest from spacy.errors import MatchPatternError from spikex.defaults import spacy_version if spacy_version < 3: from functools import partial from spacy.util import get_json_validator, validate_json from spikex.matcher._schemas import TOKEN_PATTERN_SCHEMA validator = partial( validate_json, validator=get_json_validator(TOKEN_PATTERN_SCHEMA) ) else: from spacy.schemas import validate_token_pattern # type: ignore validator = validate_token_pattern from spikex.matcher import Matcher # (pattern, num errors with validation, num errors identified with minimal # checks) TEST_PATTERNS = [ # Bad patterns flagged in all cases ([{"XX": "foo"}], 1, 1), ([{"IS_ALPHA": {"==": True}}, {"LIKE_NUM": None}], 2, 1), ([{"IS_PUNCT": True, "OP": "$"}], 1, 1), ([{"_": "foo"}], 1, 1), ('[{"TEXT": "foo"}, {"LOWER": "bar"}]', 1, 1),