Esempi in Python per get_stopwords

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: enlp.processing.stdtools

Metodo/funzione: get_stopwords

Esempi su hotexamples.com: 12

get_stopwords in Python: 12 esempi trovati. Questi sono i migliori esempi reali in Python per enlp.processing.stdtools.get_stopwords, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

def test_get_stopwords_norwegian():
    # arrange - get stopwords outwith function
    stops_nb_direct = list(STOP_WORDS_NB)

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, _ = get_stopwords()

    # assert
    assert stops_nb_direct == stopwords_nb_func

Esempio n. 2

Mostra file

def test_get_stopwords_english():
    # arrange - get stopwords outwith function
    stops_en_direct = list(STOP_WORDS_EN)

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, stopwords_en_func = get_stopwords()

    # assert
    assert stops_en_direct == stopwords_en_func

Esempio n. 3

Mostra file

def test_get_stopwords_english():
    # arrange - get stopwords outwith function
    from spacy.lang.en.stop_words import STOP_WORDS
    stops_en_direct = list(STOP_WORDS)

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, stopwords_en_func = get_stopwords()

    # assert
    assert stops_en_direct == stopwords_en_func

Esempio n. 4

Mostra file

def test_get_stopwords_full():
    # arrange - get stopwords outwith function
    stops_en_direct = list(STOP_WORDS_EN)
    stops_nb_direct = list(STOP_WORDS_NB)

    stopwords = stops_en_direct + stops_nb_direct
    stopwords_direct = [str(i) for i in stopwords]

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, _ = get_stopwords()

    # assert
    assert stopwords_direct == stopwords_func

Esempio n. 5

Mostra file

def test_get_stopwords_full():
    # arrange - get stopwords outwith function
    from spacy.lang.en.stop_words import STOP_WORDS
    stops_en_direct = list(STOP_WORDS)

    from spacy.lang.nb.stop_words import STOP_WORDS
    stops_nb_direct = list(STOP_WORDS)

    stopwords = stops_en_direct + stops_nb_direct
    stopwords_direct = [str(i) for i in stopwords]

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, _ = get_stopwords()

    # assert
    assert stopwords_direct == stopwords_func

Esempio n. 6

Mostra file

File: test_kw_extraction.py Progetto: rafaelschlatter/eNLP

def test_keyphrase_extraction():
    # arange
    example_text = './examples/example_data/en_historynlp.txt'
    with open(example_text, "r") as file:
        text = file.read()

    all_stopwords, stopwords_nb, stopwords_en = stdt.get_stopwords()

    # act
    keyphrases = kw.keyphrase_list(
        text,
        stopwords=stopwords_en,
        with_scores=False,
    )

    # assert
    assert keyphrases[0] == "word embeddings"

Esempio n. 7

Mostra file

NOTE: This is an example to show how to run the procedure however due to the small dataset used
the results are likely to be non-sensical.
"""

import enlp.understanding.topics as tp
import enlp.processing.stdtools as stdt
import spacy

###############################################################################
# Load example text and get stopwords

with open("example_data/en_nlptexts.txt", "r") as file:
    text=file.read()

all_stopwords, stopwords_nb, stopwords_en = stdt.get_stopwords()


###############################################################################
# Preprocess text - for this example we have a very small corpus to allow the documentation
# to build therefore we will split the single document into paragraphs for processing to
# imitate multiple document input and we will also remove stopwords and punctuation  as the text is too small.

# Split text into paragraphs to imitate documents
docs = text.split('\n\n')
# Remove \n and replace with space
docs = [d.replace('\n',' ') for d in docs]

# Because example text is small, remove stopwords and punctuation
en = spacy.load('en_core_web_md')
stopwords, stops_nb, stops_en = stdt.get_stopwords()

Esempio n. 8

Mostra file

"""
Removing Stopwords
==================
XXX
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as pltgs

from enlp.processing.stdtools import get_stopwords

plt.close(
    'all'
)  # very important for read the docs to avoid it crashing due to memory

###############################################################################
# Some text

# act - get functions idea of stopwords
stopwords_func, stopwords_nb_func, stopwords_en_func = get_stopwords()

print(stopwords_en_func[:5])

Esempio n. 9

Mostra file

File: ex_keyphrases.py Progetto: rafaelschlatter/eNLP

"""
Keyphrase Extraction (English)
==============================
The following example uses a python implementation of the Rapid Automatic Keyword Extraction algorithm to extract
keyphrases from a text.
"""

import pandas as pd
from enlp.processing.stdtools import get_stopwords
from enlp.understanding.keywords import keyphrase_list

###############################################################################
# Load example text and get stopwords

with open("example_data/en_historynlp.txt", "r") as file:
    text = file.read()

all_stopwords, stopwords_nb, stopwords_en = get_stopwords()

###############################################################################
# Extract keyphrases

keyphrases = keyphrase_list(
    text,
    stopwords=stopwords_en,
)

print(pd.DataFrame(keyphrases, columns=['score', 'keyphrase']).head(10))

Esempio n. 10

Mostra file

def all_stopwords():
    stopwords, stops_nb, stops_en = get_stopwords()
    return stopwords

Esempio n. 11

Mostra file

def norwegian_stopwords():
    stopwords, stops_nb, stops_en = get_stopwords()
    return stops_nb

Esempio n. 12

Mostra file

def english_stopwords():
    stopwords, stops_nb, stops_en = get_stopwords()
    return stops_en