Python words Examples

Programming Language: Python

Namespace/Package Name: nltk.corpus.knbc

Method/Function: words

Examples at hotexamples.com: 5

Python words - 5 examples found. These are the top rated real world Python examples of nltk.corpus.knbc.words extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

import json
from nltk import FreqDist
from nltk.corpus import knbc

cp = knbc.words()
dist = FreqDist(cp)
total_count = 0

for term, count in dist.items():
    total_count += count

if __name__ == "__main__":
    line = input()
    out = {"corpus_size": len(dist), "total_count": total_count}  # |C|
    for word in line.split(" "):
        out[word] = dist[word]
    print(json.dumps(out))

Example #2

Show file

File: main.py Project: kurtss/lin467


# out.write(line + ',' + tagged + '\n')

if __name__ == '__main__':
    with open('word_list.csv', newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            kansai = row[0].replace('〜', '')
            standard = row[1].replace('〜', '').split('・')
            if not kansai in dictionary:
                dictionary[kansai] = standard

    total = 0
    prev = 'BOS'
    for word in knbc.words():
        bg = bigram(prev, word)
        if bigram_freq_given(bg) == 0:
            frequency_bigram[bg] = 1
        else:
            frequency_bigram[bg] = frequency_bigram[bg] + 1
        if word_freq(word) == 0:
            frequency_word[word] = 1
        else:
            frequency_word[word] = frequency_word[word] + 1
        prev = word
        # total += 1
        # print(word + ', ' + str(frequency_word[word]))
    prev = 'BOS'
    for word in jeita.words():
        bg = bigram(prev, word)

Example #3

Show file

#JPop Band Name and Hits Generator
#aka. chickenberry
#Jonisha McKiddy | Julie Evans

###import/corpus###
import re
import nltk
from nltk.corpus import knbc
jc = knbc.words()
c = nltk.corpus.words.words()
from nltk.corpus import PlaintextCorpusReader
#portal=r"C:\Users\JuJuBee Marie\Google Drive\linguistics\comp ling\chickenberry"
corpus_root = r"C:\\Users\ses71_000\Desktop\programming"
pc = PlaintextCorpusReader(corpus_root, 'portal 12text.txt')
#cfd_pc=nltk.ConditionalFreqDist(nltk.bigrams(pc))
#cpd_pc=nltk.ConditionalProbDist(cfd_pc, nltk.MLEProbDist)

###dictionary###
f = open('C:\\Users\ses71_000\Desktop\edict.csv', encoding='utf-8')
#def searchable(w):
#[word for line in f for word in line.split()]
#print(w, word)

#srch=[word for word in f for word in line.split()]

read = f.readlines()
char = {}
for line in read:
    l = line
    str.replace(l, ',,,,,,,,,,,,,,,,,,,,', '')
    p = re.split('\||\|', l)

Example #4

Show file

File: lesson_jeita.py Project: angelica-karen/Ruby-Python-for-NLP

import nltk
from nltk.corpus import jeita
from nltk.corpus import knbc

jfull_t = nltk.Text(jeita.words())           # create NLTK Text from JEITA Corpus
kfull_t = nltk.Text(knbc.words())            # create NLTK Text from KNB Corpus

fdist_jfull = nltk.FreqDist(jfull_t)         # create frequency dist from JEITA Corpus
print(fdist_jfull.most_common(50))           # 50 most common words in JEITA Corpus
fdist_kfull = nltk.FreqDist(kfull_t)         # create frequency dist from KNB Corpus
print(fdist_kfull.most_common(50))           # 50 most common words in KNB Corpus



# words that appear in the same context (same words on either side) as '人'
print(jfull_t.similar("人"))
print(kfull_t.similar("人"))

Example #5

Show file

File: japanese_nltk_basics.py Project: cryptogramber/Japanese-Text-Analysis

print(
    "words in a0010.chasen with 3 or more characters, that appear 3 or more times:"
)
print(sorted(w for w in set(jsingle_t) if len(w) >= 3 and fdist_j[w] >= 3))
print("words in a0010.chasen ending in しい:")
print(sorted(w for w in set(jsingle_t) if w.endswith('しい')))
print("words in a0010.chasen starting with 見:")
print(sorted(w for w in set(jsingle_t) if w.startswith('見')))
print("words in a0010.chasen which contain 山:")
print(sorted(w for w in set(jsingle_t) if '山' in w))
print("words in a0010.chasen which contain 上 or 下:")
print(sorted(w for w in set(jsingle_t) if '上' in w or '下' in w))

# create NLTK texts for each corpus in full
jfull_t = nltk.Text(jeita.words())
kfull_t = nltk.Text(knbc.words())

# the frequency distribution tells us the frequency of each vocabulary item in the text
fdist_jfull = nltk.FreqDist(jfull_t)
print("50 most common words in the full JEITA corpus:")
print(fdist_jfull.most_common(50))
fdist_kfull = nltk.FreqDist(kfull_t)
print("50 most common words in the full KNB corpus:")
print(fdist_kfull.most_common(50))

# a collocation is a sequence of words that occur together unusually often
print(
    "collocations (words that occur together unusually often) in the JEITA corpus:"
)
print(jfull_t.collocations())
print(