nfeatures=10000 f=corpus.feature_selection(top=nfeatures,method=IG) corpus=corpus.filter(features=f) print 'Bayes Classifier on Reduced dataset of', nfeatures,' features' print '-------------------------' print '(Accuracy, Precision,REcall,F-Measure)' print Bayes.test(corpus,folds=10) #Testing Model on sample Dataset print 'Testing Model on Sample Dataset' classifier = Bayes() for document in corpus.documents: classifier.train(document,type=document.type) # In the file top 10 are negative tweets and rest are positive tweets ft=open('test_20','r') test_lines=ft.readlines() for line in test_lines: t=(Document(line)) corpus.append(t) print line.strip()+' '+str(classifier.classify(t)) ft.close() #Export reduced Features file to Arff file print 'Exporting Corpus to Weka format' corpus.export('Weka_PAtterns.arff',format=WEKA) f_neg.close() f_pos.close()