def writeToFile(self): file = open('user_input.txt', 'w') file.write(self.text_area.get("1.0",END+"-1c")) file.close() ############################################################################## # initialize variables input = "user_input.txt" fio = FileIo(input) ip = InputProcessor() bp = BlockProcessor() # initial setup, process input, tokenize, find parts of speech the2DArray = ip.processInput(fio.getFile()) the2DArray = bp.removeCommas(the2DArray) tokenized = ip.tokenize(the2DArray) pos = bp.posTagger(the2DArray) ############################################################################## # noun and verb phrase chunking chunkPattern = """ NP: {<DT|PP\$>?<CD>?(<JJ>|<JJR>|<JJS>)*(<NN>|<NNP>|<NNPS>|<NNS>|<POS>)+} {<NNP>+} {<NN>+} {<PRP>+} {<DT><JJ>} VP: {<MD|TO|RB>?<VB.*>+<RB>?<VB.*>?} {<VB.*>+} """ phraseChunk = bp.phraseChunker(tokenized, chunkPattern) #for tree in phraseChunk: # print tree ############################################################################## # count nouns per block and total, update the2DArray nounDict = bp.countNouns(pos) for key, value in nounDict.iteritems() : if key is 'total': totalNouns = value else: the2DArray = bp.updateArray(the2DArray,key,'nounCount',value) ############################################################################## # count verbs per block and total, update the2DArray verbDict = bp.countVerbs(pos) for key, value in verbDict.iteritems() : if key is 'total': totalVerbs = value else: the2DArray = bp.updateArray(the2DArray,key,'verbCount',value) ############################################################################## # count adjectives per block and total, update the2DArray adjectiveDict = bp.countAdjectives(pos) for key, value in adjectiveDict.iteritems() : if key is 'total': totalAdjectives = value else: the2DArray = bp.updateArray(the2DArray,key,'adjectiveCount',value) ############################################################################## # count pronouns per block and total, update the2DArray pronounDict = bp.countPronouns(pos) for key, value in pronounDict.iteritems() : if key is 'total': totalPronouns = value else: the2DArray = bp.updateArray(the2DArray,key,'pronounCount',value) ############################################################################## # count adverbs per block and total, update the2DArray adverbDict = bp.countAdverbs(pos) for key, value in adverbDict.iteritems() : if key is 'total': totalAdverbs = value else: the2DArray = bp.updateArray(the2DArray,key,'adverbCount',value) ############################################################################## # count other parts of speech per block and total, update the2DArray otherDict = bp.countOther(pos) for key, value in otherDict.iteritems() : if key is 'total': totalOther = value else: the2DArray = bp.updateArray(the2DArray,key,'otherCount',value) ############################################################################## # count words per block and total, update the2DArray wordCountDict = bp.wordCount(tokenized) for key, value in wordCountDict.iteritems() : if key is 'total': totalWordCount = value else: the2DArray = bp.updateArray(the2DArray,key,'totalWordCount',value) ############################################################################## # update the2DArray with totals the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'nounCount',totalNouns) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'verbCount',totalVerbs) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'adjectiveCount',totalAdjectives) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'pronounCount',totalPronouns) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'adverbCount',totalAdverbs) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'otherCount',totalOther) the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'totalWordCount',totalWordCount) ############################################################################## # process distinct word count and TF-IDF distinctWordCountArray = bp.distinctWordCount(tokenized) tf_idfArray = bp.tf_idf_Count(tokenized) ############################################################################## # ask user for directory name where the output csv files will be saved to dirname = tkFileDialog.askdirectory(initialdir="/",title="Choose Directory Location for Results") outputDirBase = dirname + '/' # csv result files will be located in teamNLP file followed by a number # if one or more exist already in the user directory location count = 1 baseName = 'teamNLP' outputFileName = outputDirBase + baseName while (os.path.exists(outputFileName)): # while the directory name exists count += 1 # increment the counter... outputFileName = outputDirBase + baseName + str(count) os.mkdir(outputFileName) # create folder in user's chosen directory location numpy.savetxt(outputFileName + '/the2DArray.csv', the2DArray, delimiter=",", fmt="%s") numpy.savetxt(outputFileName + '/distinctWordCountArray.csv', distinctWordCountArray, delimiter=",", fmt="%s") numpy.savetxt(outputFileName + '/tf_idfArray.csv', tf_idfArray, delimiter=",", fmt="%s")
other = bp.countOther(pos) print "\nTotal other found: " + str(other) print "\n###################################################################################\n\n" break if case('12'): print"Preparing to run wordCount()" print "\n###################################################################################\n\n" wordCountDict = bp.countOther(pos) print "Total word count: " + str(wordCountDict) + "\n\n" print "\n###################################################################################\n\n" break if case('13'): print"Preparing to run updateArray()" print "\n###################################################################################\n\n" #processInput = bp.updateArray(processInput,5,'breakIt',42) processInput = bp.updateArray(processInput,5,'nounCount',42) processInput = bp.updateArray(processInput,5,'verbCount',42) processInput = bp.updateArray(processInput,5,'pronounCount',42) processInput = bp.updateArray(processInput,5,'adjectiveCount',42) processInput = bp.updateArray(processInput,5,'adverbCount',42) processInput = bp.updateArray(processInput,5,'otherCount',42) processInput = bp.updateArray(processInput,5,'totalWordCount',42) print processInput print "\n###################################################################################\n\n" break if case('14'): print "Preparing to run distinctWordCount()" print "\n###################################################################################\n\n" temp = bp.distinctWordCount(tokenized) for line in temp: print line