Python get_word_count Beispiele

Programmiersprache: Python

Namespace / Paketname: word_count

Methode / Funktion: get_word_count

Beispiele auf hotexamples.com: 9

Python get_word_count - 9 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die word_count.get_word_count, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: fighting_lexicon.py Projekt: jerryyao-uofc/NLP_Project

def get_top_distinguishing(input_file,
                           other_file_list,
                           data_dir,
                           output_file,
                           vocab_size=100):
    bigram_file = "%s/bigram_phrases.txt" % data_dir
    if not os.path.exists(bigram_file):
        wc.find_bigrams(input_file, bigram_file)
    bigram_dict = wc.load_bigrams(bigram_file)
    word_cnts = wc.get_word_count(input_file,
                                  bigram_dict=bigram_dict,
                                  words_func=wc.get_mixed_tokens)
    other_cnts = collections.defaultdict(int)
    for filename in other_file_list:
        tmp_cnts = wc.get_word_count(filename,
                                     bigram_dict=bigram_dict,
                                     words_func=wc.get_mixed_tokens)
        for w in tmp_cnts:
            other_cnts[w] += tmp_cnts[w]
    alphas = get_informative_alpha(word_cnts, other_cnts)
    word_score = log_odds_normalized_diff(word_cnts, other_cnts, alphas)
    vocab_dict = wc.get_word_dict(word_score,
                                  top=vocab_size,
                                  filter_regex="\w\w+")
    utils.write_word_dict(vocab_dict, word_cnts, output_file)

Beispiel #2

Datei anzeigen

def get_mallet_input_from_words(input_file, data_dir, vocab_size=10000):
    bigram_file = "%s/bigram_phrases.txt" % data_dir
    if not os.path.exists(bigram_file):
        wc.find_bigrams(input_file, bigram_file)
    bigram_dict = wc.load_bigrams(bigram_file)
    word_cnts = wc.get_word_count(input_file,
                                  bigram_dict=bigram_dict,
                                  words_func=wc.get_mixed_tokens)
    vocab_dict = wc.get_word_dict(word_cnts,
                                  top=vocab_size,
                                  filter_regex="\w\w+")
    utils.write_word_dict(vocab_dict, word_cnts,
                          "%s/data.word_id.dict" % data_dir)
    convert_word_count_mallet(
        vocab_dict,
        input_file,
        "%s/data.input" % data_dir,
        words_func=functools.partial(wc.get_mixed_tokens,
                                     bigram_dict=bigram_dict))

Beispiel #3

Datei anzeigen

Datei: mallet_topics.py Projekt: jerryyao-uofc/NLP_Project

def get_mallet_input_from_words(input_file, data_dir, vocab_size=10000):
    bigram_file = "%s/bigram_phrases.txt" % data_dir
    if not os.path.exists(bigram_file):
        wc.find_bigrams(input_file, bigram_file)
    else:
        print("get_mallet_input_from_words: bigram file found at: {}, skipping".format(bigram_file))
    if os.path.exists("%s/data.word_id.dict" % data_dir) and os.path.exists("%s/data.input" % data_dir):
        print("get_mallet_input_from_words: both data.word_id.dict and data.input found, skipping")
        return
    bigram_dict = wc.load_bigrams(bigram_file)
    word_cnts = wc.get_word_count(input_file, bigram_dict=bigram_dict,
                                  words_func=wc.get_mixed_tokens)
    vocab_dict = wc.get_word_dict(word_cnts,
                                  top=vocab_size,
                                  filter_regex="\w\w+")
                                #   filter_regex=None)
    utils.write_word_dict(vocab_dict, word_cnts,
                          "%s/data.word_id.dict" % data_dir)
    convert_word_count_mallet(vocab_dict, input_file,
                              "%s/data.input" % data_dir,
                              words_func=functools.partial(
                                  wc.get_mixed_tokens,
                                  bigram_dict=bigram_dict))

Beispiel #4

Datei anzeigen

 def test2(self):
     self.assertEqual(word_count.get_word_count('Word'), 1)

Beispiel #5

Datei anzeigen

 def test1(self):
     self.assertEqual(word_count.get_word_count('This is a sentence'), 4)

Beispiel #6

Datei anzeigen

 def test_three(self):
     assert word_count.get_word_count('     ') == 0

Beispiel #7

Datei anzeigen

 def test_two(self):
     assert word_count.get_word_count('Word') == 1

Beispiel #8

Datei anzeigen

 def test_one(self):
     assert word_count.get_word_count('This is a sentence') == 4

Beispiel #9

Datei anzeigen

 def test3(self):
     self.assertEqual(word_count.get_word_count('     '), 0)