Python lowerstrip Beispiele

Programmiersprache: Python

Namespace / Paketname: text.utils

Methode / Funktion: lowerstrip

Beispiele auf hotexamples.com: 6

Python lowerstrip - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die text.utils.lowerstrip, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: blob.py Projekt: scraping-xx/TextBlob

 def word_counts(self):
     '''Dictionary of word frequencies in this text.
     '''
     counts = defaultdict(int)
     stripped_words = [lowerstrip(word) for word in self.words]
     for word in stripped_words:
         counts[word] += 1
     return counts

Beispiel #2

Datei anzeigen

Datei: classifiers.py Projekt: scraping-xx/TextBlob

def contains_extractor(document):
    '''A basic document feature extractor that returns a dict of words that
    the document contains.
    '''
    tokenizer = WordTokenizer()
    if isinstance(document, basestring):
        tokens = set([w.lower()
                    for w in tokenizer.itokenize(document, include_punc=False)])
    else:
        tokens = set((lowerstrip(w, all=False) for w in document))
    features = dict((u'contains({0})'.format(w), True) for w in tokens)
    return features

Beispiel #3

Datei anzeigen

Datei: blob.py Projekt: scraping-xx/TextBlob

 def __init__(self, text, tokenizer=None,
             pos_tagger=None, np_extractor=None, analyzer=None,
             parser=None, classifier=None, clean_html=False):
     if not isinstance(text, basestring):
         raise TypeError('The `text` argument passed to `__init__(text)` '
                         'must be a string, not {0}'.format(type(text)))
     if clean_html:
         raise NotImplementedError("clean_html has been deprecated. "
                                 "To remove HTML markup, use BeautifulSoup's "
                                 "get_text() function")
     self.raw = self.string = text
     self.stripped = lowerstrip(self.raw, all=True)
     _initialize_models(self, tokenizer, pos_tagger, np_extractor, analyzer,
                        parser, classifier)

Beispiel #4

Datei anzeigen

Datei: classifiers.py Projekt: scraping-xx/TextBlob

def basic_extractor(document, train_set):
    '''A basic document feature extractor that returns a dict indicating
    what words in ``train_set`` are contained in ``document``.

    :param document: The text to extract features from. Can be a string or an iterable.
    :param train_set: Training data set, a list of tuples of the form
        ``(words, label)``.
    '''
    tokenizer = WordTokenizer()
    word_features = _get_words_from_dataset(train_set)
    if isinstance(document, basestring):
        tokens = set([w.lower()
                    for w in tokenizer.itokenize(document, include_punc=False)])
    else:
        tokens = set((lowerstrip(w, all=False) for w in document))
    features = dict([(u'contains({0})'.format(word), (word in tokens))
                                            for word in word_features])
    return features

Beispiel #5

Datei anzeigen

Datei: test_utils.py Projekt: ratancs/TextBlob

 def test_lowerstrip(self):
     assert_equal(lowerstrip(self.text),
                 'this has punctuation')

Beispiel #6

Datei anzeigen

 def test_lowerstrip(self):
     assert_equal(lowerstrip(self.text), 'this. has. punctuation')