Esempio n. 1
0
def printgeg(): # print wat gegevens over het corp
	wrds = corpus(5)
	print "type/token ratio vijf auteurs:"
	tt_ratio = float(len(wrds))/float(len(set(wrds)))
	print tt_ratio
	print "documentlengte/aantal woorden:"
	print len(wrds)
from classifier import train, p_feat_cat, p_feature
from features import *
from help_functions import *
import time
from nltk import NaiveBayesClassifier
import nltk
import operator
import pickle
import datetime
from math import *
import winsound
from time import time, sleep
import webbrowser
import os

corp = corpus(50)
# corp = lemmatize_corpus(corp0)
# writetofile(corp,"lemmatized_corpus.pkl")
compactcorpus = compactcorpus(corp)

print "corpus build"
authors = compactcorpus.keys()


def document_features(document):
    features = {}
    for word in word_features:
        features[word] = word in document
    return features