Python WordAnalyzer Examples

Programming Language: Python

Namespace/Package Name: scripts.algorithms.tfidf

Class/Type: WordAnalyzer

Examples at hotexamples.com: 3

Python WordAnalyzer - 3 examples found. These are the top rated real world Python examples of scripts.algorithms.tfidf.WordAnalyzer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

analyzer(3)

init(3)

Frequently Used Methods

analyzer (3)

init (3)

Example #1

Show file

File: test_tfidf.py Project: samiratzn/patent_app_detect

    def test_WordAnalyser_unigrams_with_stopwords(self):
        ngram_range = (1, 1)
        WordAnalyzer.init(tokenizer=self.word_tokenizer,
                          preprocess=self.preprocess,
                          ngram_range=ngram_range)

        doc = 'Some test words to ignore safely'
        expected_ngrams = ['test', 'words', 'ignore', 'safely']
        actual_ngrams = WordAnalyzer.analyzer(doc)
        self.assertListEqual(expected_ngrams, actual_ngrams)

Example #2

Show file

File: test_tfidf.py Project: samiratzn/patent_app_detect

    def test_WordAnalyser_unigrams_with_punctuation(self):
        ngram_range = (1, 1)
        WordAnalyzer.init(tokenizer=self.word_tokenizer,
                          preprocess=self.preprocess,
                          ngram_range=ngram_range)

        doc = "Some test words, to ignore except-hyphens but including someone's ownership"
        expected_ngrams = [
            'test', 'words', 'ignore', 'except-hyphens', 'ownership'
        ]
        actual_ngrams = WordAnalyzer.analyzer(doc)
        self.assertListEqual(expected_ngrams, actual_ngrams)

Example #3

Show file

File: test_tfidf.py Project: samiratzn/patent_app_detect

    def test_WordAnalyser_ngrams_dont_cross_punctuation_or_stop_words(self):
        ngram_range = (1, 3)
        WordAnalyzer.init(tokenizer=self.word_tokenizer,
                          preprocess=self.preprocess,
                          ngram_range=ngram_range)

        doc = "Some test words, except-hyphens metal but someone's metal fish bucket"
        expected_ngrams = [
            'test', 'words', 'except-hyphens', 'metal', 'metal', 'fish',
            'bucket', 'test words', 'except-hyphens metal', 'metal fish',
            'fish bucket', 'metal fish bucket'
        ]
        actual_ngrams = WordAnalyzer.analyzer(doc)
        self.assertListEqual(expected_ngrams, actual_ngrams)