Python SpacyPreprocessor Examples

Programming Language: Python

Namespace/Package Name: snorkel.preprocess.nlp

Examples at hotexamples.com: 7

Python SpacyPreprocessor - 7 examples found. These are the top rated real world Python examples of snorkel.preprocess.nlp.SpacyPreprocessor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SpacyPreprocessor(7)

memoize(1)

Frequently Used Methods

SpacyPreprocessor (7)

memoize (1)

Example #1

Show file

File: test_lf_applier.py Project: zuiwufenghua/slice_based_learning

    def test_lf_applier_pandas_spacy_preprocessor_memoized(self) -> None:
        spacy = SpacyPreprocessor(text_field="text", doc_field="doc")
        spacy.memoize = True

        @labeling_function(pre=[spacy])
        def first_is_name(x: DataPoint) -> int:
            return 0 if x.doc[0].pos_ == "PROPN" else -1

        @labeling_function(pre=[spacy])
        def has_verb(x: DataPoint) -> int:
            return 0 if sum(t.pos_ == "VERB" for t in x.doc) > 0 else -1

        df = pd.DataFrame(dict(text=TEXT_DATA))
        df = dd.from_pandas(df, npartitions=2)
        applier = DaskLFApplier([first_is_name, has_verb])
        L = applier.apply(df)
        np.testing.assert_equal(L, L_TEXT_EXPECTED)

Example #2

Show file

File: test_nlp.py Project: zuiwufenghua/slice_based_learning

 def test_spacy_preprocessor(self) -> None:
     x = SimpleNamespace(text="Jane plays soccer.")
     preprocessor = SpacyPreprocessor("text", "doc")
     x_preprocessed = preprocessor(x)
     assert x_preprocessed is not None
     self.assertEqual(len(x_preprocessed.doc), 4)
     token = x_preprocessed.doc[0]
     self.assertEqual(token.text, "Jane")
     self.assertEqual(token.pos_, "PROPN")

Example #3

Show file

File: test_lf_applier.py Project: zuiwufenghua/slice_based_learning

    def test_lf_applier_pandas_spacy_preprocessor(self) -> None:
        spacy = SpacyPreprocessor(text_field="text", doc_field="doc")

        @labeling_function(pre=[spacy])
        def first_is_name(x: DataPoint) -> int:
            return 0 if x.doc[0].pos_ == "PROPN" else -1

        @labeling_function(pre=[spacy])
        def has_verb(x: DataPoint) -> int:
            return 0 if sum(t.pos_ == "VERB" for t in x.doc) > 0 else -1

        df = pd.DataFrame(dict(text=TEXT_DATA))
        applier = PandasLFApplier([first_is_name, has_verb])
        L = applier.apply(df, progress_bar=False)
        np.testing.assert_equal(L, L_TEXT_EXPECTED)

Example #4

Show file

 def _create_preprocessor(
         cls, parameters: SpacyPreprocessorParameters) -> SpacyPreprocessor:
     return SpacyPreprocessor(**parameters._asdict())

Example #5

Show file

# For more info, see the [`SpacyPreprocessor` documentation](https://snorkel.readthedocs.io/en/master/packages/_autosummary/preprocess/snorkel.preprocess.nlp.SpacyPreprocessor.html#snorkel.preprocess.nlp.SpacyPreprocessor).
#
#
# If you prefer to use a different NLP tool, you can also wrap that as a `Preprocessor` and use it in the same way.
# For more info, see the [`preprocessor` documentation](https://snorkel.readthedocs.io/en/master/packages/_autosummary/preprocess/snorkel.preprocess.preprocessor.html#snorkel.preprocess.preprocessor).

# %% [markdown] {"tags": ["md-exclude"]}
# If the spaCy English model wasn't already installed, the next cell may raise an exception.
# If this happens, restart the kernel and re-execute the cells up to this point.

# %%
from snorkel.preprocess.nlp import SpacyPreprocessor

# The SpacyPreprocessor parses the text in text_field and
# stores the new enriched representation in doc_field
spacy = SpacyPreprocessor(text_field="text", doc_field="doc", memoize=True)


# %%
@labeling_function(pre=[spacy])
def has_person(x):
    """Ham comments mention specific people and are short."""
    if len(x.doc) < 20 and any([ent.label_ == "PERSON" for ent in x.doc.ents]):
        return HAM
    else:
        return ABSTAIN


# %% [markdown]
# Because spaCy is such a common preprocessor for NLP applications, we also provide a
# [prebuilt `labeling_function`-like decorator that uses spaCy](https://snorkel.readthedocs.io/en/master/packages/_autosummary/labeling/snorkel.labeling.lf.nlp.nlp_labeling_function.html#snorkel.labeling.lf.nlp.nlp_labeling_function).

Example #6

Show file

 def _create_preprocessor(
         cls, parameters: SpacyPreprocessorParameters) -> SpacyPreprocessor:
     preprocessor = SpacyPreprocessor(**parameters._asdict())
     make_spark_preprocessor(preprocessor)
     return preprocessor

Example #7

Show file

    for line in inF:
        allDev.append(json.loads(line))

df_dev = pd.DataFrame(allDev)

df_train = df_tweets.loc[~df_tweets['id'].isin(df_dev['id'])]

#label mappings
BE = 0
NL = 1
ABSTAIN = -1

#spacy preprocessor for dutch
spacy_preproc = SpacyPreprocessor('text',
                                  'doc',
                                  language='nl_core_news_sm',
                                  memoize=True,
                                  disable=['tagger', 'parser'])


## RULES
@labeling_function()
def country_code(x):
    #country_code based on tweet location
    #precise but low coverage
    if x.country_code == 'BE':
        return BE
    elif x.country_code == 'NL':
        return NL
    else:
        return ABSTAIN