Esempi in Python per wordFrequency

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: linguistic_tools

Metodo/funzione: wordFrequency

Esempi su hotexamples.com: 6

wordFrequency in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per linguistic_tools.wordFrequency, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

 def test_wordFrequency(self):
     word_frequency = linguistic_tools.wordFrequency(
         'The the The the The quick Quick quick Quick quick')
     word_frequency_lc = linguistic_tools.wordFrequency(
         'The the The the The quick Quick quick Quick quick'.lower())
     word_frequency_uc = linguistic_tools.wordFrequency(
         'The the The the The quick Quick quick Quick quick'.upper())
     self.assertEqual(3, word_frequency.get('The'))
     self.assertEqual(5, word_frequency_lc.get('the'))
     self.assertEqual(5, word_frequency_uc.get('THE'))

Esempio n. 2

Mostra file

 def test_tfIDF(self):
     dict_list = []
     dict_list.append(
         linguistic_tools.wordFrequency(
             'The the The the The quick Quick quick Quick quick'))
     dict_list.append(
         linguistic_tools.wordFrequency(
             'The the The the The quick Quick quick Quick quick'.lower()))
     dict_list.append(
         linguistic_tools.wordFrequency(
             'The the The the The quick Quick quick Quick quick'.upper()))
     doc_freq = linguistic_tools.documentFrequency(dict_list)
     self.assertEqual(7, doc_freq['the'])
     self.assertEqual(5, doc_freq['QUICK'])
     self.assertEqual(2, doc_freq['Quick'])

Esempio n. 3

Mostra file

 def test_countWordsinWordlist(self):
     word_frequency = linguistic_tools.wordFrequency(
         'The the The the The quick Quick quick Quick quick')
     wordlist = ['the']
     wordcount = linguistic_tools.countAllWordsInWordList(
         wordlist, word_frequency)
     self.assertEqual(2, wordcount)

Esempio n. 4

Mostra file

 def test_countConstructedWordlist(self):
     word_frequency = linguistic_tools.wordFrequency(
         'The the The the The quick Quick quick Quick quick')
     wordlist = ['the']
     wordcount = linguistic_tools.constructWordListFrequency(
         wordlist, word_frequency)
     self.assertEqual(2, wordcount.get('the', 0))
     self.assertEqual(0, wordcount.get('The', 0))

Esempio n. 5

Mostra file

 def test_cumWordCount(self):
     word_frequency = linguistic_tools.wordFrequency(
         'The the The the The quick Quick quick Quick quick')
     d_word_freq = linguistic_tools.cumulativePercentWords(word_frequency)
     total_percent = 0
     for value in d_word_freq.values():
         total_percent += value
     self.assertEqual(1, total_percent)

Esempio n. 6

Mostra file

def mainthread(file):
    with open(file, 'r', errors='ignore') as f:
        text = f.readlines(10000)
        header_dict = headerSearch.searchEdgarHeader(textSnippet=text)
        #use first cik in edgar file
        cik_header = cleaning_tools.splitEDGARHeader(header_dict, 'CENTRAL INDEX KEY')
        header_dict['CENTRAL INDEX KEY'] = cik_header[0]

        if int(header_dict.get('CENTRAL INDEX KEY', -99)) in int_list:
            #total_obs += 1
            f.seek(0)
            full_text = f.read()
            cleaned_text = linguistic_tools.parse_document(full_text, purge_tables=True)
            cleaned_text = cleaned_text.upper()
            f_words = linguistic_tools.wordFrequency(cleaned_text)
            return (f_words, header_dict)
            #print('out')
        else:
            return False