Python get_stopwords 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: enlp.processing.stdtools

메소드/함수: get_stopwords

hotexamples.com에서의 예제들: 12

Python get_stopwords - 12개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 enlp.processing.stdtools.get_stopwords에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_get_stopwords_norwegian():
    # arrange - get stopwords outwith function
    stops_nb_direct = list(STOP_WORDS_NB)

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, _ = get_stopwords()

    # assert
    assert stops_nb_direct == stopwords_nb_func

예제 #2

파일 보기

def test_get_stopwords_english():
    # arrange - get stopwords outwith function
    stops_en_direct = list(STOP_WORDS_EN)

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, stopwords_en_func = get_stopwords()

    # assert
    assert stops_en_direct == stopwords_en_func

예제 #3

파일 보기

def test_get_stopwords_english():
    # arrange - get stopwords outwith function
    from spacy.lang.en.stop_words import STOP_WORDS
    stops_en_direct = list(STOP_WORDS)

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, stopwords_en_func = get_stopwords()

    # assert
    assert stops_en_direct == stopwords_en_func

예제 #4

파일 보기

def test_get_stopwords_full():
    # arrange - get stopwords outwith function
    stops_en_direct = list(STOP_WORDS_EN)
    stops_nb_direct = list(STOP_WORDS_NB)

    stopwords = stops_en_direct + stops_nb_direct
    stopwords_direct = [str(i) for i in stopwords]

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, _ = get_stopwords()

    # assert
    assert stopwords_direct == stopwords_func

예제 #5

파일 보기

def test_get_stopwords_full():
    # arrange - get stopwords outwith function
    from spacy.lang.en.stop_words import STOP_WORDS
    stops_en_direct = list(STOP_WORDS)

    from spacy.lang.nb.stop_words import STOP_WORDS
    stops_nb_direct = list(STOP_WORDS)

    stopwords = stops_en_direct + stops_nb_direct
    stopwords_direct = [str(i) for i in stopwords]

    # act - get functions idea of stopwords
    stopwords_func, stopwords_nb_func, _ = get_stopwords()

    # assert
    assert stopwords_direct == stopwords_func

예제 #6

파일 보기

파일: test_kw_extraction.py 프로젝트: rafaelschlatter/eNLP

def test_keyphrase_extraction():
    # arange
    example_text = './examples/example_data/en_historynlp.txt'
    with open(example_text, "r") as file:
        text = file.read()

    all_stopwords, stopwords_nb, stopwords_en = stdt.get_stopwords()

    # act
    keyphrases = kw.keyphrase_list(
        text,
        stopwords=stopwords_en,
        with_scores=False,
    )

    # assert
    assert keyphrases[0] == "word embeddings"

예제 #7

파일 보기

NOTE: This is an example to show how to run the procedure however due to the small dataset used
the results are likely to be non-sensical.
"""

import enlp.understanding.topics as tp
import enlp.processing.stdtools as stdt
import spacy

###############################################################################
# Load example text and get stopwords

with open("example_data/en_nlptexts.txt", "r") as file:
    text=file.read()

all_stopwords, stopwords_nb, stopwords_en = stdt.get_stopwords()


###############################################################################
# Preprocess text - for this example we have a very small corpus to allow the documentation
# to build therefore we will split the single document into paragraphs for processing to
# imitate multiple document input and we will also remove stopwords and punctuation  as the text is too small.

# Split text into paragraphs to imitate documents
docs = text.split('\n\n')
# Remove \n and replace with space
docs = [d.replace('\n',' ') for d in docs]

# Because example text is small, remove stopwords and punctuation
en = spacy.load('en_core_web_md')
stopwords, stops_nb, stops_en = stdt.get_stopwords()

예제 #8

파일 보기

"""
Removing Stopwords
==================
XXX
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as pltgs

from enlp.processing.stdtools import get_stopwords

plt.close(
    'all'
)  # very important for read the docs to avoid it crashing due to memory

###############################################################################
# Some text

# act - get functions idea of stopwords
stopwords_func, stopwords_nb_func, stopwords_en_func = get_stopwords()

print(stopwords_en_func[:5])

예제 #9

파일 보기

파일: ex_keyphrases.py 프로젝트: rafaelschlatter/eNLP

"""
Keyphrase Extraction (English)
==============================
The following example uses a python implementation of the Rapid Automatic Keyword Extraction algorithm to extract
keyphrases from a text.
"""

import pandas as pd
from enlp.processing.stdtools import get_stopwords
from enlp.understanding.keywords import keyphrase_list

###############################################################################
# Load example text and get stopwords

with open("example_data/en_historynlp.txt", "r") as file:
    text = file.read()

all_stopwords, stopwords_nb, stopwords_en = get_stopwords()

###############################################################################
# Extract keyphrases

keyphrases = keyphrase_list(
    text,
    stopwords=stopwords_en,
)

print(pd.DataFrame(keyphrases, columns=['score', 'keyphrase']).head(10))

예제 #10

파일 보기

def all_stopwords():
    stopwords, stops_nb, stops_en = get_stopwords()
    return stopwords

예제 #11

파일 보기

def norwegian_stopwords():
    stopwords, stops_nb, stops_en = get_stopwords()
    return stops_nb

예제 #12

파일 보기

def english_stopwords():
    stopwords, stops_nb, stops_en = get_stopwords()
    return stops_en