nfeatures=10000
f=corpus.feature_selection(top=nfeatures,method=IG)
corpus=corpus.filter(features=f)
print 'Bayes Classifier on Reduced dataset of', nfeatures,' features'
print  '-------------------------'
print  '(Accuracy, Precision,REcall,F-Measure)'
print Bayes.test(corpus,folds=10)


#Testing Model on sample Dataset
print 'Testing Model on Sample Dataset'
classifier = Bayes()
for document in corpus.documents:
    classifier.train(document,type=document.type)
# In the file top 10 are negative tweets and rest are positive tweets
ft=open('test_20','r')
test_lines=ft.readlines()
for line in test_lines:
	t=(Document(line))
	corpus.append(t)
	print line.strip()+' '+str(classifier.classify(t))
ft.close()

#Export reduced Features file to Arff file
print 'Exporting Corpus to Weka format'
corpus.export('Weka_PAtterns.arff',format=WEKA)

f_neg.close()
f_pos.close()