def supperclassify(train_set, train_label, test_set, test_label): '''Different methods''' train_voted = voting(train_set) aux = train_voted == train_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(train_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class( train_label, train_voted) print 'Estimator VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score print '\n' lambdas = weighted_voting_getlambdas(train_set, train_label) results = weighted_voting(test_set, lambdas) aux = results == test_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(test_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class( test_label, results) print 'Estimator W_VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score rf = clf.classifier_randomForest(train_set, train_label) results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF') lr = clf.logistic_regression(train_set, train_label) results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR') svm = clf.classifier_svm(train_set, train_label) results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM') rbf = clf.rbf_classifier(train_set, train_label) results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF')
def supperclassify(train_set, train_label, test_set, test_label): '''Different methods''' train_voted = voting(train_set) aux = train_voted == train_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(train_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(train_label, train_voted) print 'Estimator VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score print '\n' lambdas = weighted_voting_getlambdas(train_set, train_label) results = weighted_voting(test_set, lambdas) aux = results == test_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(test_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(test_label, results) print 'Estimator W_VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score rf = clf.classifier_randomForest(train_set, train_label) results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF') lr = clf.logistic_regression(train_set, train_label) results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR') svm = clf.classifier_svm(train_set, train_label) results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM') rbf = clf.rbf_classifier(train_set, train_label) results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF') # import pdb; pdb.set_trace()
Training different classifiers. ''' svm = clf.classifier_svm(tweets_features, train_labels) rf = clf.classifier_randomForest(tweets_features, train_labels) ada = clf.adaboost(tweets_features, train_labels) lr = clf.logistic_regression(tweets_features, train_labels) ''' Test the different classifiers with the test tweets. ''' pred = vectorizer.transform(test_tweets) pred = pred.toarray() _results, _accuracyLR, _precisionLR, _recallLR, _f_measureLR = clf.evaluateResults(lr, pred, test_labels, estimator_name='Logistic regression', file_name=results_folder) _results, _accuracyRF, _precisionRF, _recallRF, _f_measureRF = clf.evaluateResults(rf, pred, test_labels, estimator_name='RF', file_name=results_folder) _results, _accuracySVM, _precisionSVM, _recallSVM, _f_measureSVM = clf.evaluateResults(svm, pred, test_labels, estimator_name='SVM', file_name=results_folder) _results, _accuracyADA, _precisionADA, _recallADA, _f_measureADA = clf.evaluateResults(ada, pred, test_labels, estimator_name='ADABOOST', file_name=results_folder) accuracyLR.append(_accuracyLR) precisionLR.append(_precisionLR) recallLR.append(_recallLR)
# oneVSall_rf = clf.onevsall(tweets_features, train_labels, estimator) ''' Test the different classifiers with the test tweets. ''' pred = vectorizer.transform(test_tweets) pred = pred.toarray() # pred = SelectKBest(chi2, k=4500).fit_transform(pred, test_labels) # evaluateResults(lda, pred, test_labels, estimator_name='LDA') # results, accuracyLR, precisionLR, recallLR, f_measureLR = clf.evaluateResults(lr, pred, test_labels, # estimator_name='Logistic regression') # results, accuracyRF, precisionRF, recallRF, f_measureRF = clf.evaluateResults(forest, pred, test_labels, # estimator_name='RF') results, accuracySVM, precisionSVM, recallSVM, f_measureSVM = clf.evaluateResults( svm, pred, test_labels, estimator_name='SVM') # results, accuracyADA, precisionADA, recallADA, f_measureADA = clf.evaluateResults(ada, pred, test_labels, # estimator_name='ADABOOST') # results, accuracyMLP, precisionMLP, recallMLP, f_measureMLP = clf.evaluateResults(mlp, pred, test_labels, # estimator_name='MLP') # # results, accuracyOVASVM, precisionOVASVM, recallOVASVM, f_measureOVASVM = clf.evaluateResults(oneVSall_svm, pred, # test_labels, # estimator_name='one versus all SVM') # # evaluateResults(oneVSall_mlp, pred, test_labels, estimator_name='one versus all MLP') # results, accuracyOVARF, precisionOVARF, recallOVARF, f_measureOVARF = clf.evaluateResults(oneVSall_rf, pred, # test_labels, # estimator_name='one versus all RF') print 'Accuracy SVM:\t', accuracySVM print 'Average Precision:\t', precisionSVM
Test the different classifiers with the test tweets. ''' pred = vectorizer.transform(test_tweets) pred = pred.toarray() # pred = SelectKBest(chi2, k=4500).fit_transform(pred, test_labels) # evaluateResults(lda, pred, test_labels, estimator_name='LDA') # results, accuracyLR, precisionLR, recallLR, f_measureLR = clf.evaluateResults(lr, pred, test_labels, # estimator_name='Logistic regression') # results, accuracyRF, precisionRF, recallRF, f_measureRF = clf.evaluateResults(forest, pred, test_labels, # estimator_name='RF') results, accuracySVM, precisionSVM, recallSVM, f_measureSVM = clf.evaluateResults(svm, pred, test_labels, estimator_name='SVM') # results, accuracyADA, precisionADA, recallADA, f_measureADA = clf.evaluateResults(ada, pred, test_labels, # estimator_name='ADABOOST') # results, accuracyMLP, precisionMLP, recallMLP, f_measureMLP = clf.evaluateResults(mlp, pred, test_labels, # estimator_name='MLP') # # results, accuracyOVASVM, precisionOVASVM, recallOVASVM, f_measureOVASVM = clf.evaluateResults(oneVSall_svm, pred, # test_labels, # estimator_name='one versus all SVM') # # evaluateResults(oneVSall_mlp, pred, test_labels, estimator_name='one versus all MLP') # results, accuracyOVARF, precisionOVARF, recallOVARF, f_measureOVARF = clf.evaluateResults(oneVSall_rf, pred, # test_labels, # estimator_name='one versus all RF') print 'Accuracy SVM:\t', accuracySVM print 'Average Precision:\t', precisionSVM
Training different classifiers. ''' svm = clf.classifier_svm(tweets_features, train_labels) rf = clf.classifier_randomForest(tweets_features, train_labels) ada = clf.adaboost(tweets_features, train_labels) lr = clf.logistic_regression(tweets_features, train_labels) ''' Test the different classifiers with the test tweets. ''' pred = vectorizer.transform(test_tweets) pred = pred.toarray() _results, _accuracyLR, _precisionLR, _recallLR, _f_measureLR = clf.evaluateResults( lr, pred, test_labels, estimator_name='Logistic regression', file_name=results_folder) _results, _accuracyRF, _precisionRF, _recallRF, _f_measureRF = clf.evaluateResults( rf, pred, test_labels, estimator_name='RF', file_name=results_folder) _results, _accuracySVM, _precisionSVM, _recallSVM, _f_measureSVM = clf.evaluateResults( svm, pred, test_labels, estimator_name='SVM', file_name=results_folder)
print '\nCreating Test set for super classifier ... ' val_tweet_trans = vectorizer.transform(validation_tweets) val_tweet_trans = val_tweet_trans.toarray() test_results = clf.test_classifiers(val_tweet_trans, validation_labels, classifiers) ''' Now we have a train_results and test_results. Lets train and test a super classifier ''' print '\nTraining super classifier ... ' super_clf = clf.rbf_classifier(train_results, test_labels) print '\nEvaluating Super classifier ... ' results, accuracy, precision, recall, f_measure = clf.evaluateResults( super_clf, test_results, validation_labels, estimator_name='Supper Classifier') print '\n\nSuperClassify partition', j, '\n' diagnose.supperclassify(train_results, test_labels, test_results, validation_labels) # np.savetxt("train_results_3.csv", train_results, delimiter=",") # np.savetxt("train_labels_3.csv", test_labels, delimiter=",") # np.savetxt("test_results_3.csv", test_results, delimiter=",") # np.savetxt("test_labels_3.csv", validation_labels, delimiter=",") # import pdb; pdb.set_trace() # printResults(accuracy, precision, recall, f_measure, name="SUPER CLASSIFICATOR")
Train the super classifier on the test set ''' print '\nCreating Test set for super classifier ... ' val_tweet_trans = vectorizer.transform(validation_tweets) val_tweet_trans = val_tweet_trans.toarray() test_results = clf.test_classifiers(val_tweet_trans, validation_labels, classifiers) ''' Now we have a train_results and test_results. Lets train and test a super classifier ''' print '\nTraining super classifier ... ' super_clf = clf.rbf_classifier(train_results, test_labels) print '\nEvaluating Super classifier ... ' results, accuracy, precision, recall, f_measure = clf.evaluateResults(super_clf, test_results, validation_labels, estimator_name='Supper Classifier') print '\n\nSuperClassify partition', j, '\n' diagnose.supperclassify(train_results, test_labels, test_results, validation_labels) # np.savetxt("train_results_3.csv", train_results, delimiter=",") # np.savetxt("train_labels_3.csv", test_labels, delimiter=",") # np.savetxt("test_results_3.csv", test_results, delimiter=",") # np.savetxt("test_labels_3.csv", validation_labels, delimiter=",") # import pdb; pdb.set_trace() # printResults(accuracy, precision, recall, f_measure, name="SUPER CLASSIFICATOR")