def writeToFile(self): file = open('user_input.txt', 'w') file.write(self.text_area.get("1.0",END+"-1c")) file.close() ############################################################################## # initialize variables input = "user_input.txt" fio = FileIo(input) ip = InputProcessor() bp = BlockProcessor() # initial setup, process input, tokenize, find parts of speech the2DArray = ip.processInput(fio.getFile()) the2DArray = bp.removeCommas(the2DArray) tokenized = ip.tokenize(the2DArray) pos = bp.posTagger(the2DArray) ############################################################################## # noun and verb phrase chunking chunkPattern = """ NP: {<DT|PP\$>?<CD>?(<JJ>|<JJR>|<JJS>)*(<NN>|<NNP>|<NNPS>|<NNS>|<POS>)+} {<NNP>+} {<NN>+} {<PRP>+} {<DT><JJ>} VP: {<MD|TO|RB>?<VB.*>+<RB>?<VB.*>?} {<VB.*>+} """ phraseChunk = bp.phraseChunker(tokenized, chunkPattern) #for tree in phraseChunk: # print tree ############################################################################## # count nouns per block and total, update the2DArray nounDict = bp.countNouns(pos) for key, value in nounDict.iteritems() : if key is 'total': totalNouns = value else: the2DArray = bp.updateArray(the2DArray,key,'nounCount',value) ############################################################################## # count verbs per block and total, update the2DArray verbDict = bp.countVerbs(pos) for key, value in verbDict.iteritems() : if key is 'total': totalVerbs = value else: the2DArray = bp.updateArray(the2DArray,key,'verbCount',value) ############################################################################## # count adjectives per block and total, update the2DArray adjectiveDict = bp.countAdjectives(pos) for key, value in adjectiveDict.iteritems() : if key is 'total': totalAdjectives = value else: the2DArray = bp.updateArray(the2DArray,key,'adjectiveCount',value) ############################################################################## # count pronouns per block and total, update the2DArray pronounDict = bp.countPronouns(pos) for key, value in pronounDict.iteritems() : if key is 'total': totalPronouns = value else: the2DArray = bp.updateArray(the2DArray,key,'pronounCount',value) ############################################################################## # count adverbs per block and total, update the2DArray adverbDict = bp.countAdverbs(pos) for key, value in adverbDict.iteritems() : if key is 'total': totalAdverbs = value else: the2DArray = bp.updateArray(the2DArray,key,'adverbCount',value) ############################################################################## # count other parts of speech per block and total, update the2DArray otherDict = bp.countOther(pos) for key, value in otherDict.iteritems() : if key is 'total': totalOther = value else: the2DArray = bp.updateArray(the2DArray,key,'otherCount',value) ############################################################################## # count words per block and total, update the2DArray wordCountDict = bp.wordCount(tokenized) for key, value in wordCountDict.iteritems() : if key is 'total': totalWordCount = value else: the2DArray = bp.updateArray(the2DArray,key,'totalWordCount',value) ############################################################################## # update the2DArray with totals the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'nounCount',totalNouns) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'verbCount',totalVerbs) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'adjectiveCount',totalAdjectives) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'pronounCount',totalPronouns) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'adverbCount',totalAdverbs) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'otherCount',totalOther) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'totalWordCount',totalWordCount) ############################################################################## # process distinct word count and TF-IDF distinctWordCountArray = bp.distinctWordCount(tokenized) tf_idfArray = bp.tf_idf_Count(tokenized) ############################################################################## # ask user for directory name where the output csv files will be saved to dirname = tkFileDialog.askdirectory(initialdir="/",title="Choose Directory Location for Results") outputDirBase = dirname + '/' # csv result files will be located in teamNLP file followed by a number # if one or more exist already in the user directory location count = 1 baseName = 'teamNLP' outputFileName = outputDirBase + baseName while (os.path.exists(outputFileName)): # while the directory name exists count += 1 # increment the counter... outputFileName = outputDirBase + baseName + str(count) os.mkdir(outputFileName) # create folder in user's chosen directory location numpy.savetxt(outputFileName + '/the2DArray.csv', the2DArray, delimiter=",", fmt="%s") numpy.savetxt(outputFileName + '/distinctWordCountArray.csv', distinctWordCountArray, delimiter=",", fmt="%s") numpy.savetxt(outputFileName + '/tf_idfArray.csv', tf_idfArray, delimiter=",", fmt="%s")
from InputProcessor import * from BlockProcessor import * import nltk from Switch import * import sys import os if __name__ == "__main__": os.system('clear') input = "../input.txt" #input = "input2" fio = FileIo(input) ip = InputProcessor() bp = BlockProcessor() processInput = ip.processInput(fio.getFile()) tokenized = ip.tokenize(processInput) pos = bp.posTagger(processInput) print "Original input text:" print "###################################################################################\n\n" fio.toString(); print "\n###################################################################################\n\n" if (len(sys.argv) == 2): choice = str(sys.argv[1]) else: choice = raw_input(""" Please enter the number of the test to run: \tq) quit