def tester(self): _, _ = self.load_progress() confusion = torch.zeros(self.opts.num_classes, self.opts.num_classes) #label to be fed into confusion matrix plot. pred_list = [] labels_list = [] y_label = np.arange(self.opts.num_classes) y_label = [str(e) for e in y_label] #eval mode to stop dropout self.RNN.eval() #used for overall accuracy correct = 0 total = 0 for data, label, lengths in self.data_loader: data, label, lengths = util.sort_batch(data, label, lengths) #run the data through RNN pred = self.RNN(data, lengths) #pick the argmax output = torch.max(pred, 1)[1] for output, label in zip(output, label): pred_list.append(output.cpu().item()) labels_list.append(label.item()) if output.cpu().item() == label.item(): correct += 1 total += 1 confusionMatrix.plot_confusion_matrix(np.array(labels_list, dtype=np.int), np.array(pred_list, dtype=np.int), np.array(y_label), title="ConfusionMatrix") plt.show() print("Test Accuracy", correct / float(total))
def test(self): steps = 0 correct = 0 total = 0 pred_list = [] labels_list = [] y_label = np.arange(self.opts.num_classes) y_label = [str(e) for e in y_label] self.model.eval() for images, labels in iter(self.trainloader): steps += 1 # label to be fed into confusion matrix plot. output = self.model(images.to(device)) pred = torch.max(output, 1)[1] pred_list.append(pred.cpu().item()) labels_list.append(labels.item()) if pred.cpu().item() == labels.item(): correct += 1 total += 1 confusionMatrix.plot_confusion_matrix(np.array(labels_list, dtype=np.int), np.array(pred_list, dtype=np.int), np.array(y_label), title="ConfusionMatrix") plt.show() print("Test Accuracy", correct / float(total))
def calc_with_RandomForestClassifier(): # Import data in h5py gammas = h5.File("../data/gammas.hdf5", "r") protons = h5.File("../data/protons.hdf5", "r") # Converting to pandas gamma_array_df = pd.DataFrame(data=dict(gammas['array_events'])) gamma_runs_df = pd.DataFrame(data=dict(gammas['runs'])) gamma_telescope_df = pd.DataFrame(data=dict(gammas['telescope_events'])) proton_array_df = pd.DataFrame(data=dict(protons['array_events'])) proton_runs_df = pd.DataFrame(data=dict(protons['runs'])) proton_telescope_df = pd.DataFrame(data=dict(protons['telescope_events'])) #merging of array and telescope data and shuffle of proton and gamma gamma_merge = pd.merge(gamma_array_df, gamma_telescope_df, on="array_event_id") proton_merge = pd.merge(proton_array_df, proton_telescope_df, on="array_event_id") data = pd.concat([gamma_merge, proton_merge]) data = shuffle(data) print(proton_runs_df['mc_max_energy']) # isolate mc data and drop unimportant information mc_attributes = list([ 'mc_az', 'mc_alt', 'mc_core_x', 'mc_core_y', 'mc_energy', 'mc_corsika_primary_id', 'mc_height_first_interaction' ]) mc_data = data[mc_attributes] data = data.drop(mc_attributes, axis=1) ID = data['array_event_id'] droped_information = list([ 'telescope_type_name', 'x', 'y', 'telescope_event_id', 'telescope_id', 'run_id_y', 'run_id_x', 'pointing_altitude', 'camera_name', 'camera_id', 'array_event_id', 'pointing_azimuth' ]) droped_data = data[droped_information] data = data.drop(droped_information, axis=1) truth = mc_data['mc_corsika_primary_id'] #fitting and predicting with the DecisionTreeClassifier clf = RandomForestClassifier() predictions = cross_val_predict(clf, data, truth, cv=10) #Plotting with the confusion Matrix #compute Confusion matrix cm = confusion_matrix(truth, predictions, labels=(0, 101)) class_names = ('Gamma', 'Proton') #plt.figure() confusionMatrix.plot_confusion_matrix(cm, classes=class_names, normalize=True, title='Normalized confusion matrix') #plt.show() plt.savefig('plots/CM_RFClassifier.pdf') plt.close() # ROC curve for cross_validate #Code from : http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html cv = StratifiedKFold(n_splits=6) tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) ID = pd.Series(ID) tprs2 = [] aucs2 = [] for train, test in cv.split(data, truth): propability_means = pd.DataFrame({'prob_1': [], 'prob_2': []}) probas_ = clf.fit(data.iloc[train], truth.iloc[train]).predict_proba(data.iloc[test]) fpr, tpr, thresholds = roc_curve(truth.iloc[test], probas_[:, 1], pos_label=101) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) #calculate the mean of the probas over every array_event_id test_ID = ID.iloc[test] probas = pd.DataFrame({ 'propability_1': probas_[:, 0], 'propability_2': probas_[:, 1], 'array_event_id': test_ID }) unique_ID = np.array(test_ID.unique()) x = truth.iloc[test] truth_id = pd.concat([x, test_ID], axis=1) truth_unique = pd.Series([], name='mc_corsika_primary_id') for i in unique_ID: prob1_mean = np.mean( probas.propability_1[probas.array_event_id == i]) prob2_mean = np.mean( probas.propability_2[probas.array_event_id == i]) # prob_mean = pd.DataFrame([{ 'prob_1': prob1_mean, 'prob_2': prob2_mean }]) propability_means = pd.concat([propability_means, prob_mean], ignore_index=True) y = truth_id.mc_corsika_primary_id[truth_id.array_event_id == i].iloc[0] y = pd.Series(y, name='mc_corsika_primary_id') truth_unique = pd.concat([truth_unique, y], ignore_index=True) fpr2, tpr2, threshold2 = roc_curve(truth_unique.values, propability_means.iloc[:, 1].values, pos_label=101) tprs2.append(interp(mean_fpr, fpr2, tpr2)) tprs2[-1][0] = 0.0 roc_auc2 = auc(fpr2, tpr2) aucs2.append(roc_auc2) # ROC for unmeaned try mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc), lw=2, alpha=0.8) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False positive Rate') plt.ylabel('True Positive Rate') plt.title("Receiver operating characteristic") plt.legend(loc="best") #plt.show() plt.savefig('plots/ROC_RFClassifier.pdf') plt.close() print(r'AUC without mean over event_id: %0.2f $\pm$ %0.2f' % (mean_auc, std_auc)) #ROC for meaned try mean_tpr2 = np.mean(tprs2, axis=0) mean_tpr2[-1] = 1.0 mean_auc2 = auc(mean_fpr, mean_tpr2) std_auc2 = np.std(aucs2) plt.plot(mean_fpr, mean_tpr2, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc2, std_auc2), lw=2, alpha=0.8) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False positive Rate') plt.ylabel('True Positive Rate') plt.title("Receiver operating characteristic for meaned propabilitys") plt.legend(loc="best") #plt.show() plt.savefig('plots/ROC_RFClassifier_meaned.pdf') plt.close() print(r'AUC with mean over event_id: %0.2f $\pm$ %0.2f' % (mean_auc2, std_auc2))
rf_classifier = GridSearchCV(estimator=RandomForestClassifier(), param_grid=rf_parameters, scoring='f1_weighted', cv=5, n_jobs=-1) rf_classifier = rf_classifier.fit(X_train, y_train) print('Best parameters value: ' + str(rf_classifier.best_params_) + '\n') print('Best scores on 5-Fold cross validation: ' + str(rf_classifier.best_score_) + '\n') y_pred_rf = rf_classifier.predict(X_test) y_test_rf, y_pred_rf = get_labels(labels_mapping, y_test, y_pred_rf) cm_rf = confusion_matrix(y_test_rf, y_pred_rf) print('Confusion Matrix:\n') print(cm_rf) plot_confusion_matrix(cm_rf, filename='RF_cm.png', title='Random Forest Activity Recognition') print('Accuracy score: ' + str(accuracy_score(y_test_rf, y_pred_rf))) print('F1 score: ' + str(f1_score(y_test_rf, y_pred_rf, average='weighted')) + '\n') rf_coreml_model = coremltools.converters.sklearn.convert( rf_classifier.best_estimator_) rf_coreml_model.save('rf_ar.mlmodel') print("Support vector machine grid search cross validation training...") svm_parameters = [{'kernel': ['linear', 'poly', 'rbf'], 'gamma': ['scale']}] svm_classifier = GridSearchCV(estimator=SVC(), param_grid=svm_parameters, scoring='f1_weighted', cv=5,