def supperclassify(train_set, train_label, test_set, test_label): '''Different methods''' train_voted = voting(train_set) aux = train_voted == train_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(train_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class( train_label, train_voted) print 'Estimator VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score print '\n' lambdas = weighted_voting_getlambdas(train_set, train_label) results = weighted_voting(test_set, lambdas) aux = results == test_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(test_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class( test_label, results) print 'Estimator W_VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score rf = clf.classifier_randomForest(train_set, train_label) results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF') lr = clf.logistic_regression(train_set, train_label) results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR') svm = clf.classifier_svm(train_set, train_label) results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM') rbf = clf.rbf_classifier(train_set, train_label) results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF')
def supperclassify(train_set, train_label, test_set, test_label): '''Different methods''' train_voted = voting(train_set) aux = train_voted == train_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(train_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(train_label, train_voted) print 'Estimator VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score print '\n' lambdas = weighted_voting_getlambdas(train_set, train_label) results = weighted_voting(test_set, lambdas) aux = results == test_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(test_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(test_label, results) print 'Estimator W_VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score rf = clf.classifier_randomForest(train_set, train_label) results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF') lr = clf.logistic_regression(train_set, train_label) results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR') svm = clf.classifier_svm(train_set, train_label) results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM') rbf = clf.rbf_classifier(train_set, train_label) results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF') # import pdb; pdb.set_trace()
tweets.append(tweet['clean']) # labels.append(tweet['class']) tweets_SEPLN = np.array(tweets) # labels_SEPLN = np.array(labels) print '\nCreating Test set for super classifier ... ' val_tweet_trans = vectorizer.transform(tweets_SEPLN) val_tweet_trans = val_tweet_trans.toarray() SEPLN_results = clf.test_classifiers(val_tweet_trans, 0, classifiers) ''' Now we have a train_results and test_results. Lets train and test a super classifier ''' print '\nTraining super classifier ... ' super_clf = clf.rbf_classifier(train_results, test_labels) print '\nEvaluating Super classifier ... ' rbf_results = super_clf.predict(SEPLN_results) # rbf_results, _, _, _, _ = clf.evaluateResults(super_clf, train_results, test_labels) # validation_labels, # estimator_name='Supper Classifier') import classify_diagnosis as cd lambdas = cd.weighted_voting_getlambdas(train_results, test_labels) w_results = cd.weighted_voting(SEPLN_results, lambdas) v_results = diagnose.voting(SEPLN_results) # polarity = np.array(['NONE', 'N+', 'N', 'NEU', 'P', 'P+']) polarity = np.array(['NONE', 'N', 'NEU', 'P'])
train_results = clf.test_classifiers(test_tweet_trans, test_labels, classifiers) ''' Train the super classifier on the test set ''' print '\nCreating Test set for super classifier ... ' val_tweet_trans = vectorizer.transform(validation_tweets) val_tweet_trans = val_tweet_trans.toarray() test_results = clf.test_classifiers(val_tweet_trans, validation_labels, classifiers) ''' Now we have a train_results and test_results. Lets train and test a super classifier ''' print '\nTraining super classifier ... ' super_clf = clf.rbf_classifier(train_results, test_labels) print '\nEvaluating Super classifier ... ' results, accuracy, precision, recall, f_measure = clf.evaluateResults(super_clf, test_results, validation_labels, estimator_name='Supper Classifier') print '\n\nSuperClassify partition', j, '\n' diagnose.supperclassify(train_results, test_labels, test_results, validation_labels) # np.savetxt("train_results_3.csv", train_results, delimiter=",") # np.savetxt("train_labels_3.csv", test_labels, delimiter=",") # np.savetxt("test_results_3.csv", test_results, delimiter=",") # np.savetxt("test_labels_3.csv", validation_labels, delimiter=",") # import pdb; pdb.set_trace()