Python Stopwords.Stopwords Beispiele

Programmiersprache: Python

Namespace / Paketname: stopwords

Klasse / Typ: Stopwords

Methode / Funktion: Stopwords

Beispiele auf hotexamples.com: 2

Python Stopwords.Stopwords - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die stopwords.Stopwords.Stopwords, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Stopwords(2)

removeStopWords(1)

stopwords(1)

Beispiel #1

Datei anzeigen

Datei: indexer.py Projekt: alexscott64/ics_search_engine

re_matcher = re.compile("^https?://.*ics.uci.edu")


def get_links(html):
    links = []
    soup = BeautifulSoup(html, "html.parser")
    for link in soup.findAll('a', attrs={'href': re_matcher}):
        links.append(link.get('href'))
    return links


def hasdigit(token):
    return any(c.isdigit() for c in token)


stopw = Stopwords()


def check_token(token):
    return not stopw.is_stop(token) and not hasdigit(
        token) and len(token) > 1 and len(token) < 20


def add_token(token):
    pass


nonalphanum = re.compile("[^0-9a-z']")


def tokenize_text(intext):

Beispiel #2

Datei anzeigen

from zope.component.testing import setUp

from index import Index
from parsers.english import EnglishParser
from splitter import SplitterFactory
from stopwords import Stopwords
from zopyx.txng3.core.interfaces import IParser, IStopwords, IThesaurus
from zopyx.txng3.core.lexicon import LexiconFactory
from zopyx.txng3.core.storage import StorageWithTermFrequencyFactory
from zopyx.txng3.core.thesaurus import GermanThesaurus

# Setup environment
setUp()
provideUtility(SplitterFactory, IFactory, 'txng.splitters.default')
provideUtility(EnglishParser(), IParser, 'txng.parsers.en')
provideUtility(Stopwords(), IStopwords, 'txng.stopwords')
provideUtility(LexiconFactory, IFactory, 'txng.lexicons.default')
provideUtility(StorageWithTermFrequencyFactory, IFactory,
               'txng.storages.default')
provideUtility(GermanThesaurus, IThesaurus, 'txng.thesaurus.de')

try:
    import readline
    histfile = os.path.expanduser('~/.pyhist')
    readline.read_history_file(histfile)
    atexit.register(readline.write_history_file, histfile)
except:
    pass


class Text: