def demo4(): from nltk_contrib import classify from nltk import detect from nltk.corpora import genesis from itertools import islice fd = detect.feature({ "2-tup": lambda t: [' '.join(t)[n:n + 2] for n in range(len(' '.join(t)) - 1)], "words": lambda t: t }) classifier = classify.NaiveBayes(fd) training_data = {} training_data["english-kjv"] = list( islice(genesis.raw("english-kjv"), 0, 400)) training_data["french"] = list(islice(genesis.raw("french"), 0, 400)) training_data["finnish"] = list(islice(genesis.raw("finnish"), 0, 400)) classifier.train(training_data) result = classifier.get_class_probs( list(islice(genesis.raw("english-kjv"), 150, 200))) print 'english-kjv :', result.prob('english-kjv') print 'french :', result.prob('french') print 'finnish :', result.prob('finnish')
def demo(): from nltk.corpus import brown from nltk import detect detector = detect.feature({"initial": lambda t: [t[0]], "len": lambda t: [len(t)]}) for sent in brown.words("a")[:10]: print detector(sent)
def demo(): from nltk.corpus import brown from nltk import detect detector = detect.feature({ 'initial': lambda t: [t[0]], 'len': lambda t: [len(t)] }) for sent in brown.words('a')[:10]: print detector(sent)
def demo(): from nltk_contrib import classify from nltk import detect fd = detect.feature({"1-tup": lambda t: list(t)}) classifier = classify.NaiveBayes(fd) training_data = {"class a": "aaaaaab", "class b": "bbbbbba"} classifier.train(training_data) result = classifier.get_class_dict("a") for cls in result: print cls, ':', result[cls] """
def demo(): from nltk_contrib import classify from nltk import detect fd = detect.feature({"1-tup": lambda t: [t[n] for n in range(len(t))]}) classifier = classify.spearman.Spearman(fd) trainning_data = {"class a": "aaaaaab", "class b": "bbbbbba"} classifier.train(trainning_data) result = classifier.get_class_dict("a") for cls in result: print cls, ':', result[cls] """
def demo2(): from nltk_contrib import classify from nltk import detect fd = detect.feature( {"2-tup": lambda t: [t[n:n + 2] for n in range(len(t))]}) classifier = classify.NaiveBayes(fd) training_data = {"class a": "aaaaaab", "class b": "bbbbbba"} classifier.train(training_data) result = classifier.get_class_dict("aababb") for cls in result: print cls, ':', result[cls] """
def demo2(): from nltk_contrib import classify from nltk import detect fd = detect.feature({"2-tup": lambda t: [t[n:n+2] for n in range(len(t))]}) classifier = classify.NaiveBayes(fd) training_data = {"class a": "aaaaaab", "class b": "bbbbbba"} classifier.train(training_data) result = classifier.get_class_dict("aababb") for cls in result: print cls, ':', result[cls] """
def demo4(): from nltk_contrib import classify from nltk import detect from nltk.corpora import genesis from itertools import islice fd = detect.feature({"2-tup": lambda t: [' '.join(t)[n:n+2] for n in range(len(' '.join(t))-1)], "words": lambda t: t}) classifier = classify.NaiveBayes(fd) training_data = {} training_data["english-kjv"] = list(islice(genesis.raw("english-kjv"), 0, 400)) training_data["french"] = list(islice(genesis.raw("french"), 0, 400)) training_data["finnish"] = list(islice(genesis.raw("finnish"), 0, 400)) classifier.train(training_data) result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200))) print 'english-kjv :', result.prob('english-kjv') print 'french :', result.prob('french') print 'finnish :', result.prob('finnish')
from nltk_contrib import classify from nltk import detect from nltk.corpus import udhr import string def run(classifier, training_data, gold_data): classifier.train(training_data) correct = 0 for lang in gold_data: cls = classifier.get_class(gold_data[lang]) if cls == lang: correct += 1 print correct, "in", len(gold_data), "correct" # features: character bigrams fd = detect.feature({"char-bigrams" : lambda t: [string.join(t)[n:n+2] for n in range(len(t)-1)]}) training_data = udhr.langs(['English-Latin1', 'French_Francais-Latin1', 'Indonesian-Latin1', 'Zapoteco-Latin1']) gold_data = {} for lang in training_data: gold_data[lang] = training_data[lang][:50] training_data[lang] = training_data[lang][100:200] print "Cosine classifier: ", run(classify.Cosine(fd), training_data, gold_data) print "Naivebayes classifier: ", run(classify.NaiveBayes(fd), training_data, gold_data) print "Spearman classifier: ", run(classify.Spearman(fd), training_data, gold_data)
import string def run(classifier, training_data, gold_data): classifier.train(training_data) correct = 0 for lang in gold_data: cls = classifier.get_class(gold_data[lang]) if cls == lang: correct += 1 print correct, "in", len(gold_data), "correct" # features: character bigrams fd = detect.feature({ "char-bigrams": lambda t: [string.join(t)[n:n + 2] for n in range(len(t) - 1)] }) training_data = udhr.langs([ 'English-Latin1', 'French_Francais-Latin1', 'Indonesian-Latin1', 'Zapoteco-Latin1' ]) gold_data = {} for lang in training_data: gold_data[lang] = training_data[lang][:50] training_data[lang] = training_data[lang][100:200] print "Cosine classifier: ", run(classify.Cosine(fd), training_data, gold_data) print "Naivebayes classifier: ",