def main(): userInput = parser.getInput() fileList = parser.getFiles(userInput['train']) pdata = parser.parseFiles(fileList) allsent = '' for f in pdata: allsent += f[3] all_words = FreqDist(w.lower() for w in word_tokenize(allsent) if w not in stopwords.words('english') ) global top_words top_words = all_words.keys()[:500] # pdata = getParseData() featdata = featureAggregator(pdata) print featdata[:10]
def main(): classifier = getClassifier() userInput = getInput() fileList = parser.getFiles(userInput['test']) pdata = parser.parseFiles(fileList) featdata = extractor.featureAggregator(pdata) print pdata[4] output = [] outputFileObj = open('../../output.txt', 'w') # if pdata[4] for featdatarow in featdata: cl = classifier.classify(featdatarow[4]) if cl == 'pos': label = '1' elif cl == 'neutral': label = '0' else: label = '-1' print featdatarow[0], featdatarow[1], label outputRow = str(featdatarow[0]) + '\t' + str(featdatarow[1]) + '\t' + str(label) + '\n' output.append(outputRow) outputFileObj.write(outputRow) outputFileObj.close()
def main(): userInput = parser.getInput() fileList = parser.getFiles(userInput['train']) parsedata = parser.parseFiles(fileList) allsent = '' for f in parsedata: allsent += f[3] all_words = FreqDist(w.lower() for w in word_tokenize(allsent) if w not in stopwords.words('english') ) global top_words top_words = all_words.keys()[:500] featdata = extractor.featureAggregator(parsedata) # print featdata[20] print "Sample Data Item:\n\n" print "%20s %4s %4s %20s" % ("FILENAME", "LINENUM", "VOTE", "SENTENCE" ) print "-" * 79 print "%10s %4s %4s %20s" % (featdata[20][0], featdata[20][1], featdata[20][2], featdata[20][3]) print "\n\nFeatures of this Data Item" print "-" * 79 for key,val in featdata[20][4].items(): print "%50s : %10s" % (key, val ) # print "A sample feature: %s" % (featdata[20][4]) allacc = splitfeatdata(featdata) print "\n\n" print "-" * 60 print "Accuracy Values: %s" % (allacc) print "==" * 60 print "Overall Classifier Accuracy %4.4f " % (sum(allacc)/len(allacc))
def main(): userInput = parser.getInput() fileList = parser.getFiles(userInput['train']) pdata = parser.parseFiles(fileList) allsent = '' for f in pdata: allsent += f[3] all_words = FreqDist(w.lower() for w in word_tokenize(allsent) if w not in stopwords.words('english')) global top_words top_words = all_words.keys()[:500] # pdata = getParseData() featdata = featureAggregator(pdata) print featdata[:10]