classes = collections.defaultdict(lambda: 0)

    for item in itemlist:
        if getElement(item, 'status') == "Annotated":
            url = getElement(item, 'url')
            title = getElement(item, 'filename')
            readability = getElement(item, 'readability')
            
            classes[int( float(readability) + 0.5)] += 1

            #getStatisticsFromInternet(url)
            #print item.attributes['ID'].value, title, readability, url
            #fo.write("%s,,,%f,,,%f\n" % (title, float(fkre), float(readability)))
            
            myTextObj = processFile(title, dataDir, fo, float(readability), removeWikiGarbage=False, wiki=False)
            tokens = myTextObj.tokens()
            
            documents.append(doc(title, readability, tokens))

            allTokensSet.update(tokens.keys())
            
#    print "NUMBER OF VALID = ", len(allTokensSet)
    #allTokens = dict( zip(allTokensSet, range(1, len(allTokensSet) + 1 )))
    allTokens = list(allTokensSet)

    #buildSVMInput(documents, allTokens)
    buildNBInput(documents, allTokens)

    for c,v in classes.iteritems():
        print "CLASS %s - %d items" % (c,v) 
from __future__ import division
import os
import re
import sys
from myWikiText import MyWikiText
from processFile import *

if __name__ == "__main__":

    if len(sys.argv) < 2:
        paths = [ "data/en", "data/simple" ]
    else:
        paths = sys.argv[1:]

    print paths
    for path in paths:
        dataDir = path
        outFileName = re.sub("/","",path)
        fo = open(outFileName + ".out", "w")
        print "Writing %s.out" % (outFileName)
        
        printLabels(fo)

        for title in os.listdir(dataDir):
        #   print title
            processFile(title, dataDir, fo, 0.0, removeWikiGarbage=True, wiki=True)
        fo.close()
import os
import re
import sys
from myWikiText import MyWikiText
from processFile import *

if __name__ == "__main__":

    dataDir = "data/simple"
    title = "Zoonosis.simple" 
    fo = sys.stdout
    
    fo.write("%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s,,,%s\n" % ("filename",\
            "numWords",\
            "numSentences",\
            "numSyllables",\
            "avgWordLengthSyl",\
            "avgWordLengthInChars",\
            "avgSenLengthInChars",\
            "avgWordsPerSentece",\
            "avgSyllablesPerSentence",\
            "fleschReadingEase",\
            "fleschKincaidGradeLevel",\
            "colemanLiauIndex",\
            "lixIndex"))
    
    myWikiText = processFile(title, dataDir, fo)


    fo.close()