コード例 #1
0
def test_hyphen_in_config():
    hyphen_config_str = """
    [nlp]
    lang = "en"
    pipeline = ["my_punctual_component"]

    [components]

    [components.my_punctual_component]
    factory = "my_punctual_component"
    punctuation = ["?","-"]
    """

    @spacy.Language.factory("my_punctual_component")
    class MyPunctualComponent(object):
        name = "my_punctual_component"

        def __init__(
            self,
            nlp,
            name,
            punctuation,
        ):
            self.punctuation = punctuation

    nlp = English.from_config(load_config_from_str(hyphen_config_str))
    assert nlp.get_pipe("my_punctual_component").punctuation == ['?', '-']
コード例 #2
0
def test_issue6950():
    """Test that the nlp object with initialized tok2vec with listeners pickles
    correctly (and doesn't have lambdas).
    """
    nlp = English.from_config(load_config_from_str(CONFIG_ISSUE_6950))
    nlp.initialize(lambda: [Example.from_dict(nlp.make_doc("hello"), {"tags": ["V"]})])
    pickle.dumps(nlp)
    nlp("hello")
    pickle.dumps(nlp)
コード例 #3
0
def test_issue7029():
    """Test that an empty document doesn't mess up an entire batch."""
    nlp = English.from_config(load_config_from_str(CONFIG))
    train_examples = []
    for t in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
    optimizer = nlp.initialize(get_examples=lambda: train_examples)
    for i in range(50):
        losses = {}
        nlp.update(train_examples, sgd=optimizer, losses=losses)
    texts = ["first", "second", "third", "fourth", "and", "then", "some", ""]
    docs1 = list(nlp.pipe(texts, batch_size=1))
    docs2 = list(nlp.pipe(texts, batch_size=4))
    assert [doc[0].tag_
            for doc in docs1[:-1]] == [doc[0].tag_ for doc in docs2[:-1]]
コード例 #4
0
def test_issue6908(component_name):
    """Test intializing textcat with labels in a list"""

    def create_data(out_file):
        nlp = spacy.blank("en")
        doc = nlp.make_doc("Some text")
        doc.cats = {"label1": 0, "label2": 1}
        out_data = DocBin(docs=[doc]).to_bytes()
        with out_file.open("wb") as file_:
            file_.write(out_data)

    with make_tempdir() as tmp_path:
        train_path = tmp_path / "train.spacy"
        create_data(train_path)
        config_str = CONFIG_ISSUE_6908.replace("TEXTCAT_PLACEHOLDER", component_name)
        config_str = config_str.replace("TRAIN_PLACEHOLDER", train_path.as_posix())
        config = load_config_from_str(config_str)
        init_nlp(config)
コード例 #5
0
def test_empty_doc():
    """Test that an empty document gets processed correctly
    """
    nlp = English.from_config(load_config_from_str(CONFIG))
    train_examples = []
    for t in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
    optimizer = nlp.initialize(get_examples=lambda: train_examples)
    for i in range(2):
        losses = {}
        nlp.update(train_examples, sgd=optimizer, losses=losses)
    texts = ["first", "second", "third", "fourth", "and", "then", "some", ""]

    # run as normal
    nlp.select_pipes(enable=["transformer", "tagger"])
    docs1 = list(nlp.pipe(texts, batch_size=1))
    docs2 = list(nlp.pipe(texts, batch_size=4))
    assert [doc[0].tag_ for doc in docs1[:-1]] == [doc[0].tag_ for doc in docs2[:-1]]

    # disable the transformer (the listener will produce random output)
    nlp.select_pipes(enable=["tagger"])
    docs1 = list(nlp.pipe(texts, batch_size=1))
    docs2 = list(nlp.pipe(texts, batch_size=4))
    assert [doc[0].tag_ for doc in docs1[:-1]] == [doc[0].tag_ for doc in docs2[:-1]]