def print_metrics_regression(y_true, predictions, verbose=1): predictions = np.array(predictions) predictions = np.maximum(predictions, 0).flatten() y_true = np.array(y_true) y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true] prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions] cf = metrics.confusion_matrix(y_true_bins, prediction_bins) if verbose: print "Custom bins confusion matrix:" print cf kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins, weights='linear') mad = metrics.mean_absolute_error(y_true, predictions) mse = metrics.mean_squared_error(y_true, predictions) mape = mean_absolute_percentage_error(y_true, predictions) if verbose: print "Mean absolute deviation (MAD) =", mad print "Mean squared error (MSE) =", mse print "Mean absolute percentage error (MAPE) =", mape print "Cohen kappa score =", kappa return {"mad": mad, "mse": mse, "mape": mape, "kappa": kappa}
def analise(): datasets = load_data_from_pickle() classifier = get_conv_classifier() given_answers = list(classifier.predict(datasets.test.data)['classes']) wrong_answer_buckets = np.zeros(5) for i, test_data in enumerate(datasets.test.data): right_answer = datasets.test.target[i] given_answer = given_answers[i] if right_answer != given_answer: wrong_answer_buckets[right_answer] += 1 print(wrong_answer_buckets / sum(wrong_answer_buckets)) confusion_matrix = metrics.confusion_matrix(datasets.test.target, given_answers, range(5)) print(confusion_matrix) cohen_kappa_score = metrics.cohen_kappa_score(datasets.test.target, given_answers, range(5)) print(cohen_kappa_score) jaccard_similarity_score = metrics.jaccard_similarity_score(datasets.test.target, given_answers) print(jaccard_similarity_score) report = metrics.classification_report(datasets.test.target, given_answers, labels=range(5), target_names=['NORTH', 'EAST', 'SOUTH', 'WEST', 'STILL']) print(report)
def kappa_scorer(y_true, y_pred): ''' Cohen's kappa: a statistic that measures inter_annotator agreement. :param y_true: :param y_pred: :return: ''' kappa_coef = cohen_kappa_score(y_true, y_pred) return kappa_coef
def f1_score(preds, true_labels): mcc = matthews_corrcoef(preds, true_labels) print "mcc ", mcc kappa = cohen_kappa_score(preds, true_labels) print "kappa ", kappa p = precision(preds, true_labels) print "precision ", p r = recall(preds, true_labels) print "recall", r return 2*p*r/(p+r)
def calcMetricsClassification(model,train,target,n_folds=5): res=cross_val_predict(estimator=model,X=train,y=target,cv=n_folds,n_jobs=1) acc=metrics.accuracy_score(target,res) f1=metrics.f1_score(target,res) kappa=metrics.cohen_kappa_score(target,res) mix=classificationMix(target,res) scores={'acc':acc,'f1':f1,'kappa':kappa,'mix':mix} return scores
def output_metrics(model, images_test, test_label_classes): print('Predicting labels for test data...') preds = model.predict_classes(images_test) test_label_classes = test_label_classes.astype(np.int) #np.save('C:\\ML\\IS622\\test_labels.npy', test_labels) #np.save('C:\\ML\\IS622\\preds_classes.npy', preds) #test_labels = np.load('C:\\ML\\IS622\\test_labels_classes.npy') print('Confusion Matrix: ') print(confusion_matrix(test_label_classes, preds)) print('Kappa score: ', cohen_kappa_score(test_label_classes, preds)) print('Accuracy score: ', accuracy_score(test_label_classes, preds))
def test_cohen_kappa(): # These label vectors reproduce the contingency matrix from Artstein and # Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]). y1 = np.array([0] * 40 + [1] * 60) y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50) kappa = cohen_kappa_score(y1, y2) assert_almost_equal(kappa, .348, decimal=3) assert_equal(kappa, cohen_kappa_score(y2, y1)) # Add spurious labels and ignore them. y1 = np.append(y1, [2] * 4) y2 = np.append(y2, [2] * 4) assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa) assert_almost_equal(cohen_kappa_score(y1, y1), 1.) # Multiclass example: Artstein and Poesio, Table 4. y1 = np.array([0] * 46 + [1] * 44 + [2] * 10) y2 = np.array([0] * 52 + [1] * 32 + [2] * 16) assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)
def compare_dicts(dict1, dict2): """Computes various correlation metrics between two dictionaries.""" v1, v2, common_keys = dict_vectors(dict1, dict2) sp = tuple(spearmanr(v1, v2)) ps = pearsonr(v1, v2) cn = cos_number(v1, v2) kappa = cohen_kappa_score(v1.round(), v2.round()) dist1 = v1 / v1.sum() dist2 = v2 / v2.sum() return {'dict1': len(dict1), 'dict2': len(dict2), 'common': len(common_keys), 'kl': entropy(dist1, dist2), 'js': js_divergence(dist1, dist2), 'spearman': sp, 'pearson': ps, 'cos': cn, 'kappa': kappa}
def classificationMix(target,res): """ this is a combined classification metric used as the heuristic for GA optimization in classification :param target: :param res: :return: """ acc=metrics.accuracy_score(target,res) f1=metrics.f1_score(target,res) kappa=metrics.cohen_kappa_score(target,res) mix=acc*f1*kappa return mix
def cohens_kappa(): data_folder = '/Users/fpena/UCC/Thesis/datasets/context/manuallyLabeledReviews/' business_type = Constants.ITEM_TYPE file_name = data_folder + '%s_%s_reviews.json' labelers = [ # 'francisco', 'diego', 'mesut', 'rohit', ] all_records = [ load_data(file_name % (labeler, business_type)) for labeler in labelers ] rater1 = [record['review_type'] for record in all_records[0]] rater2 = [record['review_type'] for record in all_records[1]] rater3 = [record['review_type'] for record in all_records[2]] taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [ [1, str(i), str(rater2[i])] for i in range(0, len(rater2))] + [ [2, str(i), str(rater3[i])] for i in range(0, len(rater3))] print(taskdata) ratingtask = agreement.AnnotationTask(data=taskdata) print("Observed agreement " + str(ratingtask.avg_Ao())) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi())) print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2))) print("sklearn kappa " + str(cohen_kappa_score(rater1, rater3))) print("sklearn kappa " + str(cohen_kappa_score(rater2, rater3)))
def evaluate_performance(prefix=None, classifier=None, issues_train=None, priority_train=None, issues_test=None, priority_test=None): """ Calculates performance metrics for a classifier. :param prefix: A prefix, for identifying the classifier. :param classifier: The classifier, previously fitted. :param issues_train: Train features. :param priority_train: Train class. :param issues_test: Test features. :param priority_test: Test class. :return: Train accuracy , Test accuracy, Test weighted-f1 and F1 score per class. """ train_accuracy = None if issues_train is not None and priority_train is not None: train_accuracy = classifier.score(issues_train, priority_train) print prefix, ': Training accuracy ', train_accuracy train_predictions = classifier.predict(issues_train) print prefix, " :TRAIN DATA SET" print classification_report(y_true=priority_train, y_pred=train_predictions) test_accuracy = classifier.score(issues_test, priority_test) print prefix, ': Test accuracy ', test_accuracy test_predictions = classifier.predict(issues_test) test_kappa = cohen_kappa_score(priority_test, test_predictions) print prefix, ": Test Kappa: ", test_kappa print prefix, " :TEST DATA SET" print classification_report(y_true=priority_test, y_pred=test_predictions) labels = np.sort(np.unique(np.concatenate((priority_test.values, test_predictions)))) test_f1_score = f1_score(y_true=priority_test, y_pred=test_predictions, average='weighted') precission_scores = precision_score(y_true=priority_test, y_pred=test_predictions, average=None) all_scores = precision_recall_fscore_support(y_true=priority_test, y_pred=test_predictions, average=None) precission_per_class = {label: score for label, score in zip(labels, precission_scores)} recall_index = 1 recall_per_class = {label: support for label, support in zip(labels, all_scores[recall_index])} return train_accuracy, test_accuracy, test_kappa, test_f1_score, \ defaultdict(lambda: 0, precission_per_class), \ defaultdict(lambda: 0, recall_per_class)
def toy_cohens_kappa(): # rater1 = [1, 1, 1, 0] # rater2 = [1, 1, 0, 0] # rater3 = [0, 1, 1] rater1 = ['s', 's', 's', 'g', 'u'] rater2 = ['s', 's', 'g', 'g', 's'] taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [ [1, str(i), str(rater2[i])] for i in range(0, len(rater2))] # + [ # [2, str(i), str(rater3[i])] for i in range(0, len(rater3))] print(taskdata) ratingtask = agreement.AnnotationTask(data=taskdata) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi())) print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))
def classificationByRegressionResults(self, test, y_test, preds, scaler, resultIndex): i = 0 lenght = len(y_test) testTmp = test.copy() testTmp[self.labelCol] = y_test testTmp = pd.DataFrame(scaler.inverse_transform(testTmp), columns=testTmp.columns) y_test = testTmp[self.labelCol] testTmp = test.copy() testTmp[self.labelCol] = preds testTmp = pd.DataFrame(scaler.inverse_transform(testTmp), columns=testTmp.columns) preds = testTmp[self.labelCol] while( i < lenght): if(y_test[i] > 1400): y_test[i] = 1 else: y_test[i] = 0 if(preds[i] > 1400): preds[i] = 1 else: preds[i] = 0 i += 1 y_test = pd.factorize(y_test)[0] preds = pd.factorize(preds)[0] print(preds) print(y_test) print(pd.crosstab(y_test, preds, rownames=['Actual Species'], colnames=['Predicted Species'])) res = self.results[resultIndex] res.accuracy += metrics.accuracy_score(y_test, preds) res.precision += metrics.precision_score(y_test, preds) res.recall += metrics.recall_score(y_test, preds) res.k_cohen += metrics.cohen_kappa_score(y_test, preds) res.f1_measure += metrics.f1_score(y_test, preds) self.results[resultIndex] = res
def cohens_kappa(codes1, codes2): '''Cohen's Kappa statistic. Assumes 2 coders, no missing data. Cohen, J. (1960). A Coefficient of Agreement for Nominal Scales. Educational and Psychological Measurement, XX(1), 37–46. ''' r = [] code_counts = count_codes([codes1, codes2], min_coders=2, keep_coder_counts=True) s = summarize(code_counts) s['cohens_kappa'] = 0 code_cols = codes1.columns.values x = codes1.copy() y = codes2.copy() x['n_coders'] = code_counts['xxx_n_coders_xxx'] y['n_coders'] = code_counts['xxx_n_coders_xxx'] x = x[x['n_coders'] == 2] y = y[y['n_coders'] == 2] for c in code_cols: k = cohen_kappa_score(x[c].values, y[c].values) s.ix[c, 'cohens_kappa'] = k return (s[['cohens_kappa']], s['cohens_kappa'].mean())
def func(folder1, folder2): labs1 = [] labs2 = [] oldfnames = os.listdir(folder1) newfnames = os.listdir(folder2) counts = defaultdict(int) wordcounts = defaultdict(int) for fname in oldfnames: if fname not in newfnames: print("UH OH") else: print(fname) # open them both, compare lines. with open(folder1 + "/" + fname) as f: lines1 = f.readlines() with open(folder2 + "/" + fname) as f: lines2 = f.readlines() if len(lines1) != len(lines2): print("!!! length of files doesn't match. Old:",len(lines1)," New:",len(lines2)) return for old,new in zip(lines1, lines2): sold = old.split("\t") snew = new.split("\t") if len(sold) > 5 and len(snew) > 5: labs1.append(sold[0]) labs2.append(snew[0]) counts[sold[0] + ":" + snew[0]] += 1 if sold[0] != snew[0]: wordcounts[sold[0] + ":" + snew[0] + ":" + sold[5]] += 1 k = cohen_kappa_score(labs1, labs2) print("cohen's kappa", k) labels = sorted(set([key.split(":")[0] for key in counts])) #from sklearn.metrics import confusion_matrix print(labels) print(confusion_matrix(labs1, labs2, labels = labels)) miss =0 disagreement = 0 agree = 0 for key in counts: # ignore all matching. if key == "O:O": continue if len(set(key.split(":"))) == 1: agree += counts[key] elif "O" in key.split(":"): miss += counts[key] else: disagreement += counts[key] print("agree:", agree) print("miss:", miss) print("disagreement:", disagreement) for misses in sortmap(wordcounts, k=20): print(misses)
test_data = hormone_gene_bins[str(test_bin)] neg_test_data = neg_hormone_gene_bins[str(test_bin)] X_test_pos = transform_X_values(test_data, _train_marked) X_test_neg = transform_X_values(neg_test_data, _train_marked) X_test = np.concatenate([X_test_pos, X_test_neg]) y_test_pos = np.ones((X_test_pos.shape[0], ), dtype=int) y_test_neg = np.zeros((X_test_pos.shape[0], ), dtype=int) y_test = np.concatenate([y_test_pos, y_test_neg]) #X_test = min_max_scaler.transform(X_test) # get results on the test set y_pred_test = classifier.predict(X_test) if classifier_type == 'svm': y_dec_score_test = classifier.decision_function(X_test) else: y_dec_score_test = classifier.predict_proba(X_test) print("Testing results: fold-" + str(test_bin)) print("Kappa score: " + str(cohen_kappa_score(y_test, y_pred_test))) print(confusion_matrix(y_test, y_pred_test)) print(classification_report(y_test, y_pred_test)) np.save('./BioEmbedS/output/y_fold_' + str(test_bin) + '.npy', y_test) np.save('./BioEmbedS/output/y_pred_fold_' + str(test_bin) + '.npy', y_pred_test) np.save('./BioEmbedS/output/y_dec_score_fold_' + str(test_bin) + '.npy', y_dec_score_test) print("ROC-AUC score: " + str(roc_auc_score(y_test, y_dec_score_test))) precision, recall, _ = precision_recall_curve(y_test, y_dec_score_test) print("PR-AUC score: " + str(auc(recall, precision))) get_binned_results(test_data, neg_test_data, _train_marked, classifier) print("\n")
classes, cmap='RdBu', figsz=(30, 15), title='Classification Report') plt.savefig("image_clr.png", dpi=200, format='png', bbox_inches='tight', pad_inches=0.25) plt.close() fig = plt.figure(figsize=(9, 10)) import sklearn.metrics as sm acc = str(round(sm.accuracy_score(predictions, actuals) * 100, 3)) kappa = str(round(sm.cohen_kappa_score(predictions, actuals), 3)) fig.suptitle("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" + "*** ACCURACY = " + acc + "% | COHEN'S KAPPA = " + kappa + " ***", fontsize=17.5, fontweight="bold") #import math, scipy.stats as ss #rmse = str(round(math.sqrt(sm.mean_squared_error(predictions, actuals)), 3)); prc = str(round(ss.pearsonr(predictions, actuals), 3)) #fig.suptitle("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" + "*** RMSE = "+rmse+" | PEARSON'S CORRELATION = "+prc+" ***", fontsize=17.5, fontweight="bold") fig.add_subplot(221) plt.imshow(plt.imread("image_cm.png")) plt.axis('off') os.remove("image_cm.png") fig.add_subplot(222) plt.imshow(plt.imread("image_auroc.png")) plt.axis('off')
logger.info("Selected freq. bands: %s" % freq_bands[subject - 1]) fbcsp = create_fbcsp(freq_bands[subject - 1], n_components) clf = Pipeline([('fbcsp', fbcsp), ('clf', estimator)]) clf.fit(X_train, y_train) n_samples = raw_epochs_test.n_samples scores = np.zeros((n_samples, 2)) length = raw_epochs_test.length time_points = np.linspace(0, length, n_samples, endpoint=False) for i, t in enumerate(time_points): X_test = raw_epochs_test.window(start_time=t, win_len=-win_len) y_pred = clf.predict(X_test) scores[i, 0] = accuracy_score(y_test, y_pred) * 100 scores[i, 1] = cohen_kappa_score(y_test, y_pred) filename = "%s/%s_subject_%d" % (util.LOG_DIR, args.d, subject) np.savetxt(filename, scores, fmt="%.3f", header=header) arg_max = np.argmax(scores[:, 0]) msg = "Max. accuracy = %.3f at time point %d" logger.info(msg % (scores[arg_max, 0], arg_max)) arg_max = np.argmax(scores[:, 1]) msg = "Max. kappa = %.3f at time point %d" logger.info(msg % (scores[arg_max, 1], arg_max)) logger.info("") end = time.time() util.log_exec_time(end - start)
# Creating the Tf-Idf model from sklearn.feature_extraction.text import TfidfVectorizer vectorizer = TfidfVectorizer(max_features = 2000, min_df = 3, max_df = 0.6) TF_X = vectorizer.fit_transform(docs) TF_X.toarray() #**************************comparing KMeans and EM***************************** num_cluster = 3 Kmeans_labels = tfidf_kmeans(TF_X, k = num_cluster) #print(len(Kmeans_labels)) EM_labels = tfidf_EM(TF_X) #print(len(EM_labels)) print('Kappa for KMeans and EM:',metrics.cohen_kappa_score(Kmeans_labels,EM_labels,weights='linear')) #**************************comparing KMeans and HC***************************** num_cluster = 3 Kmeans_labels = tfidf_kmeans(TF_X, k = num_cluster) #print(len(Kmeans_labels)) HC_labels = tfidf_hc(TF_X,k = num_cluster ) #print(len(HC_labels)) print('Kappa for KMeans and HC:',metrics.cohen_kappa_score(Kmeans_labels,HC_labels,weights='linear')) #**************************comparing EM and HC********************************* num_cluster = 3 EM_labels = tfidf_EM(TF_X) #print(len(EM_labels))
print(tpr1) print(thresholds1) roc_auc1 = auc(fpr1, tpr1) print('Roc_auc1: %.4f' % roc_auc1) ################################################################################################# # # calculate AUC AUC = roc_auc_score(y_test_A, y_score) # calculate AUC print('AUC1: %.4f' % AUC) print('Accuracy1: %.4f' % ACC) print('Sensitivity1: %.4f' % sensitivity) print('Specificity1: %.4f' % specificity) print('Precision1: %.4f' % precision) print('F1score1: %.4f' % f1score) # # print(Recall) Cohen = cohen_kappa_score(y_test_A, y_pred) print('Cohen1: %.4f' % Cohen) ################################################################################################### #################################################################################### lw = 2 plt.figure() plt.plot(fpr1, tpr1, 'o-', ms=2, label='Combined-ROI(AUC = %0.4f)' % roc_auc1) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.yticks(np.arange(0, 1.05, step=0.1)) plt.xlabel('1-Specificity (False Positive Rate)', fontsize='large', fontweight='bold') plt.ylabel('Sensitivity (True Positive Rate)', fontsize='large',
'''Ajuste del modelo''' tpot_classifier.fit(Xtrain, ytrain) '''Predicción''' rf_y_pred = tpot_classifier.predict(Xtest) rf_y_prob = [probs[1] for probs in tpot_classifier.predict_proba(Xtest)] ypred_df = pd.DataFrame(rf_y_pred, columns=['Label pred']) pathy_pred = 'C:/Users/jkgv1/OneDrive/Escritorio/' + 'ypred' + 'fold' + str( fold) + '.xlsx' ypred_df.to_excel(pathy_pred) '''Validación y metricas de desempeño''' print('RF') print(confusion_matrix(ytest, rf_y_pred)) print('kappa', cohen_kappa_score(ytest, rf_y_pred)) report = precision_recall_fscore_support(ytest, rf_y_pred, average='weighted') auc_test_RF[fold] = roc_auc_score(ytest, rf_y_pred, average='weighted') kappa_test_RF[fold] = cohen_kappa_score(ytest, rf_y_pred) f1_test_RF[fold] = report[2] acc_test_RF[fold] = report[0] '''Cálculo del AUC''' fpr, tpr, _ = roc_curve(ytest, rf_y_pred) roc_auc = auc(fpr, tpr) interp_tpr = interp(mean_fpr, fpr, tpr) interp_tpr[0] = 0.0 tprs.append(interp_tpr) aucs.append(roc_auc)
def evaluate(self): print("Evaluating") yp = {} ycn = {} yc = {} ytn = {} yt = {} try: metrics = pickle.load(open(self._fileEvaluation, 'rb')) except FileNotFoundError: metrics = {} table = [[ "task", "average", "MAPs", "MAPc", "accur.", "kappa", "prec.", "recall", "f1score" ]] na = ' ' for task in self._tasks: table.append([" ", " ", " ", " ", " ", " ", " ", " "]) print(task) yp[task] = self.model[task].predict_proba(self.XTest[task]) yt[task] = self.yTest[task] ytn[task] = self.lb[task].inverse_transform(yt[task]) yc[task] = np.zeros(yt[task].shape, np.int) for i, p in enumerate(yp[task]): yc[task][i][np.argmax(p)] = 1 ycn[task] = self.lb[task].inverse_transform(yc[task]) metrics[task] = {} metrics[task]['MAPs'] = MAPScorer().samplesScore( yt[task], yp[task]) metrics[task]['MAPc'] = MAPScorer().classesScore( yt[task], yp[task]) metrics[task]['accuracy'] = accuracy_score(yt[task], yc[task]) metrics[task]['kappa'] = cohen_kappa_score(ytn[task], ycn[task]) metrics[task]['precision'] = {} metrics[task]['recall'] = {} metrics[task]['f1score'] = {} table.append([ task, na, "{:.3f}".format(metrics[task]['MAPs']), "{:.3f}".format(metrics[task]['MAPc']), "{:.3f}".format(metrics[task]['accuracy']), "{:.3f}".format(metrics[task]['kappa']), na, na, na ]) for avg in ['micro', 'macro', 'weighted']: metrics[task]['precision'][avg], metrics[task]['recall'][ avg], metrics[task]['f1score'][ avg], _ = precision_recall_fscore_support(yt[task], yc[task], average=avg) table.append([ task, avg, na, na, na, na, "{:.3f}".format(metrics[task]['precision'][avg]), "{:.3f}".format(metrics[task]['recall'][avg]), "{:.3f}".format(metrics[task]['f1score'][avg]) ]) metrics[task]['pr-curve'] = {} metrics[task]['pr-curve']['x'], metrics[task]['pr-curve'][ 'y'], metrics[task]['pr-curve'][ 'auc'] = self._calculateMicroMacroCurve( lambda y, s: (lambda t: (t[1], t[0])) (precision_recall_curve(y, s)), yt[task], yp[task]) metrics[task]['roc-curve'] = {} metrics[task]['roc-curve']['x'], metrics[task]['roc-curve'][ 'y'], metrics[task]['roc-curve'][ 'auc'] = self._calculateMicroMacroCurve( lambda y, s: (lambda t: (t[0], t[1]))(roc_curve(y, s)), yt[task], yp[task]) pickle.dump(metrics, open(self._fileEvaluation, "wb")) print(tabulate(table))
res = np.hstack((res, X2[:, 2 * (i - 1):2 * i], XX[:, arr1[i - 1]:arr1[i]])) res = np.hstack((res, X2[:, 6:8])) #[ 0 7 10 12 14 18 20 23 25 28 30 33 35 38 40 44 46] # 1.RandomForest + onehot +cost standardLst = [10, 12, 14, 25, 38, 51, 59] print("1.RandomForest + onehot +cost:") for i in range(len(standardLst)): XXXX = res[:, :standardLst[i]] X_train, X_test, y_train, y_test = train_test_split(XXXX, y, random_state=0, test_size=.2) print("X_train.shape: {}".format(X_train.shape)) #decisionTree tree = RandomForestClassifier(random_state=0) tree.fit(X_train, y_train) # print("Accuracy on train set: {:.4f}".format(tree.score(X_train, y_train))) # print("Accuracy on test set: {:.4f}".format(tree.score(X_test, y_test))) y_pred = tree.predict(X_test) kappa = cohen_kappa_score(y_test, y_pred) print(" Kappa: {:.4f}".format(kappa)) # fip = tree.feature_importances_ # fips = [sum(fip[:10])] # for tmp in range(1, i+1): # fips.append(sum(fip[:standardLst[tmp]]) - sum(fips)) # result = [format(x, '.2%') for x in fips] # print("Feature importances:\n{}".format(result))
def Teste(self): ''' Por que recall médio? Porque essa métrica é calculada com base nos acertos de cada classe individualmente e depois é feito a média. Se alguma classe não apresenta pixels o recall é penalizado e não posso eliminar nenhuma classe da minha imagem. (2) A acurácia pode considerar muitos acertos de uma dada classe, aumentando o seu valor, mas pode ter uma classe com poucos representantes que pode sumir no processo e o algoritmo não ser penalizado. ''' #grupo 3 e grupo 2 clf1 = MLClassifier() #clf1.defclasses_indices(self.class_indices) grupoTreinamento=[] grupoTreinamentoGT=[] grupoTreinamento.extend(self.amostra_grupo2) grupoTreinamento.extend(self.amostra_grupo3) grupoTreinamentoGT.extend(self.grupo2_GT) grupoTreinamentoGT.extend(self.grupo3_GT) clf1.fit(grupoTreinamento,grupoTreinamentoGT) pred1=clf1.predict(self.amostra_grupo1) s1=recall_score(self.grupo1_GT,pred1 , average='macro') p1=precision_score(self.grupo1_GT, pred1, average='macro') a1=accuracy_score(self.grupo1_GT, pred1, normalize=True) k1=cohen_kappa_score(self.grupo1_GT, pred1) f11=f1_score(self.grupo1_GT, pred1, average='macro') #grupo 1 e grupo 3 clf2 = MLClassifier() #clf2.defclasses_indices(self.class_indices) grupoTreinamento=[] grupoTreinamentoGT=[] grupoTreinamento.extend(self.amostra_grupo1) grupoTreinamento.extend(self.amostra_grupo3) grupoTreinamentoGT.extend(self.grupo1_GT) grupoTreinamentoGT.extend(self.grupo3_GT) clf2.fit(grupoTreinamento,grupoTreinamentoGT) pred2=clf2.predict(self.amostra_grupo2) s2=recall_score(self.grupo2_GT,pred2 , average='macro') p2=precision_score(self.grupo2_GT, pred2, average='macro') a2=accuracy_score(self.grupo2_GT, pred2, normalize=True) k2=cohen_kappa_score(self.grupo2_GT, pred2) f12=f1_score(self.grupo2_GT, pred2, average='macro') #grupo 1 e grupo 2 clf3 = MLClassifier() #clf3.defclasses_indices(self.class_indices) grupoTreinamento=[] grupoTreinamentoGT=[] grupoTreinamento.extend(self.amostra_grupo1) grupoTreinamento.extend(self.amostra_grupo2) grupoTreinamentoGT.extend(self.grupo1_GT) grupoTreinamentoGT.extend(self.grupo2_GT) clf3.fit(grupoTreinamento,grupoTreinamentoGT) pred3=clf3.predict(self.amostra_grupo3) s3=recall_score(self.grupo3_GT,pred3 , average='macro') p3=precision_score(self.grupo3_GT,pred3, average='macro') a3=accuracy_score(self.grupo3_GT, pred3, normalize=True) k3=cohen_kappa_score(self.grupo3_GT, pred3) f13=f1_score(self.grupo3_GT, pred3, average='macro') return (s1+s2+s3)/3,(p1+p2+p3)/3,(a1+a2+a3)/3,(k1+k2+k3)/3,(f11+f12+f13)/3
pred_classes_df = pd.DataFrame(pred_classes) true_classes_df = pd.DataFrame(true_classes) results = pd.concat([probabilities_df, pred_labels_df, true_labels_df, pred_classes_df, true_classes_df], axis=1) results.columns = ['Prob_Barley', 'Prob_Canola', 'Prob_Chickpea', 'Prob_Lentils', 'Prob_Wheat', 'Pred_label', 'True_label', 'Pred_class', 'True_class'] # classification report y_true = true_labels y_pred = pred_labels target_names = ['Barley', 'Canola', 'Chickpea', 'Lentils', 'Wheat'] print(classification_report(y_true, y_pred, target_names=target_names, digits=2)) print(confusion_matrix(y_true, y_pred, labels=range(NUM_CLASSES))) print('Kappa', cohen_kappa_score(y_true, y_pred)) #### end time ##### end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) print(f'Execution Time: {epoch_mins}m {epoch_secs}s') # ### save predictions as csv # results.to_csv(f"{logdir}/predictions/predictions.csv", index=False) # ### generate test xy for steamlit # X_test = scaler.inverse_transform(X_test) # df_test = pd.DataFrame(X_test) # df_test.iloc[0:5000, :].to_csv(f"{logdir}/data/test_2019_198.csv", index=False)
_, test_yhat_, _ = dev_step(x_batch, y_batch) test_yhat[(test_step - 1) * config.batch_size:test_step * config.batch_size] = test_yhat_ test_step += 1 if (test_generator.pointer < test_generator.data_size): actual_len, x_batch, y_batch, _ = test_generator.rest_batch( config.batch_size) _, test_yhat_, _ = dev_step(x_batch, y_batch) test_yhat[ (test_step - 1) * config.batch_size:test_generator.data_size] = test_yhat_ test_fscore = f1_score(test_generator.label, test_yhat + 1, average='macro') test_acc = accuracy_score(test_generator.label, test_yhat + 1) test_kappa = cohen_kappa_score(test_generator.label, test_yhat + 1) print("{:g} {:g} {:g}".format(test_acc, test_fscore, test_kappa)) with open(os.path.join(out_dir, "result_log.txt"), "a") as text_file: text_file.write("{:g} {:g} {:g}\n".format( test_acc, test_fscore, test_kappa)) # Reset the file pointer of the data generators train_generator.reset_pointer() test_generator.reset_pointer() # save the latest model here checkpoint_name = os.path.join(checkpoint_path, 'best_model') save_path = saver.save(sess, checkpoint_name)
def learn(self): # seeding classes = self.short_df['status'].unique() seed_index = [] for i in classes: seed_index.append( self.short_df['status'][self.short_df['status'] == i].index[0]) seed_index act_data = self.short_df.copy() accuracy_list = [] f1_total_list = [] kappa_total_list = [] # initialising train_idx = seed_index X_train = self.X[train_idx] y_train = self.Y[train_idx] # generating the pool X_pool = np.delete(self.X, train_idx, axis=0) y_pool = np.delete(self.Y, train_idx) act_data = act_data.drop(axis=0, index=train_idx) act_data.reset_index(drop=True, inplace=True) initiated_committee = [] for learner_idx, model in enumerate(self.learners): learner = ActiveLearner(estimator=model, X_training=X_train, y_training=y_train) initiated_committee.append(learner) # Commitee creation committee = Committee( learner_list=initiated_committee, # query_strategy=vote_entropy_sampling ) committee.teach(X_train, y_train) # pool-based sampling n_queries = int(len(X) / (100 / self.percent)) for idx in range(n_queries): query_idx = np.random.choice(range(len(X_pool))) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) act_data = act_data.drop(axis=0, index=query_idx) act_data.reset_index(drop=True, inplace=True) accuracy_list.append( accuracy_score(committee.predict(X_pool), y_pool)) model_pred = committee.predict(X_pool) f1_total_list.append( f1_score(y_pool, model_pred, average="weighted", labels=np.unique(model_pred))) kappa_total_list.append(cohen_kappa_score(y_pool, model_pred)) # print('Accuracy after query no. %d: %f' % (idx+1, accuracy_score(committee.predict(X_pool),y_pool))) # print("By just labelling ",round(n_queries*100.0/len(X),2),"% of total data accuracy of ", round(accuracy_score(committee.predict(X_pool),y_pool),3), " % is achieved on the unseen data" ) return accuracy_list, f1_total_list, kappa_total_list
prediction = learner.predict(img) label = int(prediction[1]) predictions['actual'].append(curr['value']) predictions['predicted'].append(label) count += 1 print() print() print() return predictions # TODO: find model name predictions_mapping = {} for i in list(bone_map): if i == 'ELBOW': predictions_mapping[i] = run_model_on_predictions( i, 'vgg19bn-' + i.lower() + 'sfullval') else: predictions_mapping[i] = run_model_on_predictions( i, 'vgg19bn-' + I + 'fullval') from sklearn.metrics import accuracy_score, cohen_kappa_score # calculate kappa and accuracy for i in list(predictions_mapping): print(i) actual = predictions_mapping[i]['actual'] predict = predictions_mapping[i]['predicted'] print('accuracy ', accuracy_score(actual, predict)) print('kappa ', cohen_kappa_score(actual, predict))
estimator=LogisticRegression()) X_resampled_iht, y_resampled_iht = iht.fit_sample(train_set_1_1, label) print(sorted(Counter(y_resampled_iht).items())) x_train_iht,x_test_iht,y_train_iht,y_test_iht=train_test_split(X_resampled_iht, y_resampled_iht ,random_state=1) svm_clf.fit(x_train_iht, y_train_iht) #joblib.dump(svm_clf,'../model/iht_sample_model.pkl') #tl评估 from sklearn.model_selection import cross_val_score scores=cross_val_score(svm_clf,x_test_iht,y_test_iht,cv=5) print('iht_score:',scores) pred11 = svm_clf.predict(x_test_iht) print('iht_accuracy_score:',metrics.accuracy_score(y_test_iht, pred11)) print('iht_f1_score:',metrics.f1_score(y_test_iht, pred11,average="micro")) from sklearn.metrics import cohen_kappa_score#Kappa系数是基于混淆矩阵的计算得到的模型评价参数 kappa = cohen_kappa_score(y_test_iht,pred11) print('iht_cohen_kappa_score:',kappa) from sklearn.metrics import hamming_loss#铰链损失 hamloss=hamming_loss(y_test_iht,pred11) print('iht_hamming_loss',hamloss) ''' [(1.0, 31727), (2.0, 31728), (3.0, 31727)] iht_score: [0.48162151 0.48634454 0.47678084 0.48507776 0.48045397] iht_accuracy_score: 0.4862161707850059 iht_f1_score: 0.4862161707850059 iht_cohen_kappa_score: 0.22873389703406943 iht_hamming_loss 0.5137838292149941 '''
tree_params = { 'max_depth': range(15, 17), 'n_estimators': range(75, 77), 'max_features': range(26, 28), 'min_samples_leaf': range(28, 30) } tree_grid = GridSearchCV(model, tree_params, cv=5, n_jobs=2, verbose=True, scoring=make_scorer(cohen_kappa_score)) tree_grid.fit(x, y) print(tree_grid.best_params_) model = RandomForestClassifier( max_features=tree_grid.best_params_['max_features'], min_samples_leaf=tree_grid.best_params_['min_samples_leaf'], n_estimators=tree_grid.best_params_['n_estimators'], max_depth=tree_grid.best_params_['max_depth'], random_state=17) model.fit(x, y) X_test = data_test[columns_transformed] data_test['target'] = model.predict(X_test) cohen_kappa_score(data_test['target'], data_test['Response'], weights='quadratic')
from sknn.mlp import Layer from sklearn.preprocessing import MinMaxScaler, StandardScaler from sknn.mlp import Classifier from sklearn.pipeline import Pipeline from sklearn.metrics import accuracy_score, cohen_kappa_score nn = nn = Classifier( layers=[ Layer("Rectifier", units=8000), Layer("Softmax")], learning_rule = 'nesterov', learning_rate=0.04, batch_size = 200, dropout_rate = 0., n_iter=20, verbose = 1, valid_size = 0.1, n_stable = 15, debug = False, # regularize = 'L2' ) pipeline_nn = Pipeline([ ("scaler", MinMaxScaler(feature_range=(0.0, 1.0))), ('neural network', nn) ]) pipeline_nn.fit(X_train, y_train) predicted_label = pipeline_nn.predict(X_test) print("GBC - accuracy Score on test_data : ", accuracy_score(y_test, predicted_label)) print("GBC - kappa Score on test_data : ", cohen_kappa_score(y_test, predicted_label)) print("GBC- kappa Score on train data : ", cohen_kappa_score(y_train, pipeline_nn.predict(X_train)))
# visualizing losses and accuracy train_loss = history.history['loss'] val_loss = history.history['val_loss'] #Observing the losses but can be commented out as it's not mandatory reporter = LossPrettifier(show_percentage=True) for i in range(numEpochs - 1): reporter(epoch=i, LossA=train_loss[i], LossAB=val_loss[i]) # Model evaluation score, acc = model.evaluate(x_test, y_test_oneHot, batch_size=batch_Size) print("Accuracy:", acc) #if acc>0.675: model.save_weights(modelPath) y_pred = model.predict(x_test) y_pred = y_pred.reshape(len(y_test_oneHot), 5) y_predict = y_pred.argmax(axis=-1) # Writing results on file f = open(resultPath, 'a') #create classification report f.write(classification_report(y_test, y_predict)) f.write( str(sklm.cohen_kappa_score(y_test, y_predict)) + "," + str(acc) + "," + str(score) + "\n") #Print class-wise classification metrics print(classification_report(y_test, y_predict))
# In[145]: from sklearn.metrics import accuracy_score, confusion_matrix, cohen_kappa_score, roc_curve, jaccard_similarity_score, precision_score accuracy_score(df_test['benign_malignant'], result) # In[110]: confusion_matrix(df_test['benign_malignant'], result) # In[111]: cohen_kappa_score(df_test['benign_malignant'], result) # In[112]: jaccard_similarity_score(df_test['benign_malignant'], result) # ### Predicting path_diagnosis # In[152]: forest2 = RandomForestClassifier(n_estimators = 100) # Fit the forest to the training set, using the bag of words as # features and the sentiment labels as the response variable
# starting on the evaluation set X_train = X_train_origin[features] X_test = X_test_origin[features] i = 0 result = Results() #getting test results for each classifier while (i < len(clfs)): clfs[i].fit(X_train, Y_train) preds = clfs[i].predict(X_test) result.accuracy = metrics.accuracy_score(Y_test, preds) result.precision = metrics.precision_score(Y_test, preds) result.recall = metrics.recall_score(Y_test, preds) result.k_cohen = metrics.cohen_kappa_score(Y_test, preds) result.f1_measure = metrics.f1_score(Y_test, preds) result.log_loss = metrics.log_loss(Y_test, clfs[i].predict_proba(X_test)) #write results into file printResults(result, clfNames[i], len(features)) i += 1 featureSize -= 5 #plotting test and train results dirPath = "Classification/Test/" plotter = Plotter(clfNames, dirPath) metricNames = ["Accuracy", "Precision", "Recall", "K_cohen", "F1_measure", "Log-loss"] i = 0 while (i < len(metricNames)): plotter.plotMetric(dirPath + metricNames[i] + ".png", i + 1)
model.add(BatchNormalization(epsilon=0.001, axis=-1, momentum=0.99, weights=None, beta_init='zero', gamma_init='one', gamma_regularizer=None, beta_regularizer=None)) model.add(Dense(nb_classes,init='normal')) model.add(Activation('softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics = ['acc']) # Train the model nama_filenya = "weights_" + vartuning +"_.hdf5" checkpointer = ModelCheckpoint(filepath=nama_filenya, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True) hist = model.fit(train_set_R1, Y_train, validation_data=(test_set_R1, Y_test), batch_size=8, nb_epoch = jumEpoch, shuffle=True, verbose = 1, callbacks = [checkpointer]) # Evaluate the model # load best model model2 = Sequential() model2.load_weights(nama_filenya) score = model2.evaluate(test_set_R1, Y_test, batch_size=8) print "Skor Model:" print score[1] Y_pred = model2.predict_classes(test_set_R1, batch_size = 8) grup.append(kcv) grup.append(score[1]) cohennya = cohen_kappa_score(np.argmax(Y_test,axis=1), Y_pred) print "kohen kappa:" print cohennya grup.append(cohennya) writer.writerows([grup])
def main(): accuracy1 = 0 accuracy2 = 0 accuracy3 = 0 accuracy4 = 0 roc_accuracy1 = 0 roc_accuracy2 = 0 roc_accuracy3 = 0 roc_accuracy4 = 0 kappa1 = 0 matthews1 = 0 precision1_0 = 0 precision2_0 = 0 precision3_0 = 0 precision4_0 = 0 precision1_1 = 0 precision2_1 = 0 precision3_1 = 0 precision4_1 = 0 recall1_0 = 0 recall2_0 = 0 recall3_0 = 0 recall4_0 = 0 recall1_1 = 0 recall2_1 = 0 recall3_1 = 0 recall4_1 = 0 f1measure1_0 = 0 f1measure2_0 = 0 f1measure3_0 = 0 f1measure4_0 = 0 f1measure1_1 = 0 f1measure2_1 = 0 f1measure3_1 = 0 f1measure4_1 = 0 K = 10 for i in range(0, 10): ## x_te = pd.read_csv("testing_English/test_M"+str(i)+".csv",delimiter=",", header=None, encoding="utf-8"); ## x_tr = pd.read_csv("testing_English/train_M"+str(i)+".csv",delimiter=",", header=None, encoding="utf-8"); for j in range(0, 1): x_te = pd.read_csv("testing_baselines/test.csv", delimiter=",", header=None, encoding="utf-8") x_tr = pd.read_csv("testing_baselines/train.csv", delimiter=",", header=None, encoding="utf-8") x_te_1 = x_te.loc[x_te[9] == 1] x_te_0 = x_te.loc[x_te[9] == 0] l = round((len(x_te_0.index)) * 0.01) rows = random.sample(list(x_te.loc[x_te[9] == 1].index), l) x_te_11 = x_te_1.ix[rows] frames = [x_te_11, x_te_0] x_tef = pd.concat(frames) y_te = x_tef[9].values y_tr = x_tr[9].values del x_tr[9] x = x_tr[[1, 2, 7]].values X_tr = x[:, 0:10] Y_tr = y_tr x = x_tef[[1, 2, 7]].values x_ter = x_tef[[1, 2, 7, 9]] print(x_ter) feature = 7 x_ter_1 = x_ter.loc[x_ter[9] == 1] x_ter_0 = x_ter.loc[x_ter[9] == 0] l = round(len(x_ter_1) * 0.1) print(l) rows1 = random.sample(list(x_ter_1.index), l) rows0 = random.sample(list(x_ter_0.index), l) del x_ter[9] x_ter = randomSwap(x_ter, rows1, rows0) ## for k in [1,2,7]: ## x_ter =robustSwap(x_ter,k, rows1, rows0); x = x_ter.values X_te = x[:, 0:10] Y_te = y_te rf = RandomForestClassifier(n_estimators=100, criterion="entropy", random_state=4294967294) rf.fit(X_tr, Y_tr) predicted_digits = rf.predict(X_te) Y_p1 = rf.predict_proba(X_te) Y_p = predicted_digits accuracy1 = accuracy1 + accuracy_score(Y_te, Y_p) print("random forests") print("accuracy", accuracy_score(Y_te, Y_p)) precision1_0 = precision1_0 + precision_score( Y_te, Y_p, average='binary', pos_label=0) precision1_1 = precision1_1 + precision_score( Y_te, Y_p, average='binary', pos_label=1) recall1_0 = recall1_0 + recall_score( Y_te, Y_p, average='binary', pos_label=0) recall1_1 = recall1_1 + recall_score( Y_te, Y_p, average='binary', pos_label=1) print("precision_0", precision_score(Y_te, Y_p, average='binary', pos_label=1)) print("precision_1", precision_score(Y_te, Y_p, average='binary', pos_label=0)) print("recall_0", recall_score(Y_te, Y_p, average='binary', pos_label=0)) print("recall_1", recall_score(Y_te, Y_p, average='binary', pos_label=1)) f1measure1_0 = f1measure1_0 + f1_score( Y_te, Y_p, average='binary', pos_label=0) f1measure1_1 = f1measure1_1 + f1_score( Y_te, Y_p, average='binary', pos_label=1) print("f1 score _0", f1_score(Y_te, Y_p, average='binary', pos_label=0)) print("f1 score _1", f1_score(Y_te, Y_p, average='binary', pos_label=1)) print(confusion_matrix(Y_te, Y_p)) print("roc_auc_curve results") roc_accuracy1 = roc_accuracy1 + met.roc_auc_score( Y_te, Y_p1[:, 1]) print(met.roc_auc_score(Y_te, Y_p)) kappa1 = kappa1 + cohen_kappa_score(Y_te, Y_p) matthews1 = matthews1 + met.matthews_corrcoef(Y_te, Y_p) ## sv = svm.SVC(kernel='linear', random_state = 2543484765, degree=3, cache_size = 40, class_weight = None) ## sv.fit(X_tr, Y_tr) ## predicted_digits = sv.predict(X_te) ## Y_p = predicted_digits ## accuracy2 = accuracy2+accuracy_score(Y_te, Y_p); ## print(accuracy_score(Y_te, Y_p)); ## precision2_0 = precision2_0 + precision_score(Y_te, Y_p, average='binary', pos_label=0); ## precision2_1 = precision2_1 + precision_score(Y_te, Y_p, average='binary', pos_label=1); ## recall2_0 = recall2_0 + recall_score(Y_te, Y_p, average='binary', pos_label=0); ## recall2_1 = recall2_1 + recall_score(Y_te, Y_p, average='binary', pos_label=1); ## print(precision_score(Y_te, Y_p, average='binary', pos_label=1)); ## print(precision_score(Y_te, Y_p, average='binary', pos_label=0)); ## print(recall_score(Y_te, Y_p, average='binary', pos_label=0)); ## print(recall_score(Y_te, Y_p, average='binary', pos_label=1)); ## f1measure2_0 = f1measure2_0 + f1_score(Y_te, Y_p, average='binary', pos_label=0) ## f1measure2_1 = f1measure2_1 + f1_score(Y_te, Y_p, average='binary', pos_label=1) ## print(f1_score(Y_te, Y_p, average='binary', pos_label=0)); ## print(f1_score(Y_te, Y_p, average='binary', pos_label=1)); ## print(confusion_matrix(Y_te, Y_p)); ## logreg = linear_model.LogisticRegression(C=1e5) ## logreg.fit(X_tr, Y_tr) ## predicted_digits = logreg.predict(X_te) ## Y_p = predicted_digits ## accuracy3= accuracy3+accuracy_score(Y_te, Y_p); ## print("logistics regression"); ## print("accuracy",accuracy_score(Y_te, Y_p)); ## precision3_0 = precision3_0 + precision_score(Y_te, Y_p, average='binary', pos_label=0); ## precision3_1 = precision3_1 + precision_score(Y_te, Y_p, average='binary', pos_label=1); ## recall3_0 = recall3_0 + recall_score(Y_te, Y_p, average='binary', pos_label=0); ## recall3_1 = recall3_1 + recall_score(Y_te, Y_p, average='binary', pos_label=1); ## print("precision_0",precision_score(Y_te, Y_p, average='binary', pos_label=1)); ## print("precision_1",precision_score(Y_te, Y_p, average='binary', pos_label=0)); ## print("recall_0",recall_score(Y_te, Y_p, average='binary', pos_label=0)); ## print("recall_1",recall_score(Y_te, Y_p, average='binary', pos_label=1)); ## f1measure3_0 = f1measure3_0 + f1_score(Y_te, Y_p, average='binary', pos_label=0) ## f1measure3_1 = f1measure3_1 + f1_score(Y_te, Y_p, average='binary', pos_label=1) ## print("f1 score _0",f1_score(Y_te, Y_p, average='binary', pos_label=0)); ## print("f1 score _1",f1_score(Y_te, Y_p, average='binary', pos_label=1)); ## print(confusion_matrix(Y_te, Y_p)); ## print("roc_auc_curve results"); ## roc_accuracy3 = roc_accuracy3 + met.roc_auc_score(Y_te, Y_p); ## print(met.roc_auc_score(Y_te, Y_p)); ## neigh = KNeighborsClassifier(n_neighbors=3) ## neigh.fit(X_tr, Y_tr) ## predicted_digits = neigh.predict(X_te) ## Y_p = predicted_digits ## accuracy4 = accuracy4+accuracy_score(Y_te, Y_p); ## print("k-nearest neighbors"); ## print("accuracy",accuracy_score(Y_te, Y_p)); ## precision4_0 = precision4_0 + precision_score(Y_te, Y_p, average='binary', pos_label=0); ## precision4_1 = precision4_1 + precision_score(Y_te, Y_p, average='binary', pos_label=1); ## recall4_0 = recall4_0 + recall_score(Y_te, Y_p, average='binary', pos_label=0); ## recall4_1 = recall4_1 + recall_score(Y_te, Y_p, average='binary', pos_label=1); ## print(precision_score(Y_te, Y_p, average='binary', pos_label=1)); ## print(precision_score(Y_te, Y_p, average='binary', pos_label=0)); ## print("recall_0",recall_score(Y_te, Y_p, average='binary', pos_label=0)); ## print("recall_1",recall_score(Y_te, Y_p, average='binary', pos_label=1)); ## f1measure4_0 = f1measure4_0 + f1_score(Y_te, Y_p, average='binary', pos_label=0) ## f1measure4_1 = f1measure4_1 + f1_score(Y_te, Y_p, average='binary', pos_label=1) ## print("f1 score _0",f1_score(Y_te, Y_p, average='binary', pos_label=0)); ## print("f1 score _1",f1_score(Y_te, Y_p, average='binary', pos_label=1)); ## print(confusion_matrix(Y_te, Y_p)); ## print("roc_auc_curve results"); ## roc_accuracy4 = roc_accuracy4 + met.roc_auc_score(Y_te, Y_p); ## print(met.roc_auc_score(Y_te, Y_p)); ## print("accuracy : ",accuracy1/K); ## print("precision: " ,precision1_0/K); ## print("recall:",recall1_0/K); ## print("f1 measure:",f1measure1_0/K); ## print("precision: " ,precision1_1/K); ## print("recall:",recall1_1/K); ## print("f1 measure:",f1measure1_1/K); ## print("roc_acu_curve:",roc_accuracy1/K); #### print("accuracy : ",accuracy2/K); #### print("precision: " ,precision2_0/K); #### print("recall:",recall2_0/K); #### print("f1 measure:",f1measure2_0/K); #### print("precision: " ,precision2_1/K); #### print("recall:",recall2_1/K); #### print("f1 measure:",f1measure2_1/K); ## print("accuracy : ",accuracy3/K); ## print("precision: " ,precision3_0/K); ## print("recall:",recall3_0/K); ## print("f1 measure:",f1measure3_0/K); ## print("precision: " ,precision3_1/K); ## print("recall:",recall3_1/K); ## print("f1 measure:",f1measure3_1/K); ## print("roc_acu_curve:",roc_accuracy3/K); ## print("accuracy : ",accuracy4/K); ## print("precision: " ,precision4_0/K); ## print("recall:",recall4_0/K); ## print("f1 measure:",f1measure4_0/K); ## print("precision: " ,precision4_1/K); ## print("recall:",recall4_1/K); ## print("f1 measure:",f1measure4_1/K); ## print("roc_acu_curve:",roc_accuracy4/K); table = [ ## ["Random Forests","","","","","","","","","",""], [ "Random Forests", accuracy1 / K, roc_accuracy1 / K, precision1_0 / K, precision1_1 / K, recall1_0 / K, recall1_1 / K, f1measure1_0 / K, f1measure1_1 / K, kappa1 / K, matthews1 / K ] ] ## ["Logistic regression","","","","","","","","","",""], ## ["",accuracy3/K,roc_accuracy3/K,precision3_0/K,precision3_1/K,recall3_0/K,recall3_1/K,f1measure3_0/K,f1measure3_1/K, kappa3/K, matthews3/K], ## ["k-nearest neighbor","","","","","","","","","",""], ## ["",accuracy4/K,roc_accuracy4/K,precision4_0/K,precision4_1/K,recall4_0/K,recall4_1/K,f1measure4_0/K,f1measure4_1/K, kappa4/K, matthews4/K]] print( tabulate(table, headers=[ "Classifier", "Accuracy", "roc_auc_curve", "precision_0", "precision_1", "recall_0", "recall_1", "f1 score_0", "f1 score_1", "kappa Score", "Matthews score" ])) f = open('Baselines_100_random.txt', 'w') f.write( tabulate(table, headers=[ "Classifier", "Accuracy", "roc_auc_curve", "precision_0", "precision_1", "recall_0", "recall_1", "f1 score_0", "f1 score_1", "kappa Score", "Matthews score" ])) f.close()
import pandas as pd import numpy as np from sklearn.metrics import cohen_kappa_score fp = open('C:\\Users\\user\\Desktop\\raptor_fb_done.txt', 'r') lines = fp.readlines() y_nik = [] y_mits = [] for line in lines: line = line.strip('\n') l = line.split('\t') y_nik.append(l[0]) y_mits.append(l[1]) print(cohen_kappa_score(y_nik, y_mits)) #run models with dimitris labels
}, inplace=True) merge_df = merge_df[['null', 'Sand', 'Gravel', 'Boulders', 'substrate']] pvt = pd.pivot_table(merge_df, index=['substrate'], values=['null', 'Sand', 'Gravel', 'Boulders'], aggfunc=np.nansum) del merge_df #Percentage classification table class_df = pvt.div(pvt.sum(axis=1), axis=0) writer = pytablewriter.MarkdownTableWriter() writer.table_name = "Non-negative Least Squares Classification Results" writer.header_list = list(class_df.columns.values) writer.value_matrix = class_df.values.tolist() writer.write_table() df = pd.pivot_table(merge_df, values=['Sand', 'Gravel', 'Boulders'], index=['substrate'], aggfunc=np.sum) df = pd.read_csv( r"C:\workspace\GLCM\new_output\LSQ_Results\pred_true_lsq.csv", sep=',', names=['pred', 'true'], header=None) print(classification_report(df['pred'].ravel(), df['true'].ravel())) print(cohen_kappa_score(df['pred'].ravel(), df['true'].ravel()))
roc = None try: roc = roc_auc_score(Y_test_lb, Y_pred, average='macro') except: pass print('AUC score:') print(roc) # F1 f1= f1_score(numpy.argmax(Y_test_lb, axis=1), numpy.argmax(Y_pred, axis=1), average='macro') print('F1 score:') print(f1) # KAppa? kappa = cohen_kappa_score(numpy.argmax(Y_test_lb, axis=1), numpy.argmax(Y_pred, axis=1)) print('Kappa score:') print(kappa) #import matplotlib #matplotlib.use('Agg') #import matplotlib.pyplot as plt # summarize history for accuracy #plt.plot(history.history['acc']) #plt.plot(history.history['val_acc']) #plt.title('model accuracy') #plt.ylabel('accuracy') #plt.xlabel('epoch') #plt.legend(['train','test'], loc='upper left') #plt.show() # summarize history for loss
def cohen_kappa(y_true, y_pred): y_t_clean = [y_true[i].index(1) for i in y_true] y_p_clean = [y_pred[j].index(1) for k in y_pred] return cohen_kappa_score(y_t_clean, y_p_clean)
train_loader, criterion, EPOCH, use_cuda=USE_CUDA, output_file_path=output_file_path) with open(output_file_path, 'a+') as f: f.write('Epoch {}: Train Loss {}\n'.format(epoch + 1, train_loss)) if epoch >= start_val: start = time.time() val_loss, probs, truth, _ = validate_epoch(model, val_loader, criterion, USE_CUDA) preds = probs.argmax(1) # Validation metrics cm = confusion_matrix(truth, preds) kappa = np.round( cohen_kappa_score(truth, preds, weights="quadratic"), 4) acc = np.round( np.mean(cm.diagonal().astype(float) / cm.sum(axis=1)), 4) mse = np.round(mean_squared_error(truth, preds), 4) val_time = np.round(time.time() - start, 4) train_losses.append(train_loss) val_losses.append(val_loss) val_mse.append(mse) val_acc.append(acc) val_kappa.append(kappa) with open(output_file_path, 'a+') as f: f.write(str(cm.diagonal() / cm.sum(axis=1)) + '\n') f.write('Epoch {}: Val Loss {}; Val Acc {}; Val MSE {}; Val Kappa {};\n' \ .format(epoch + 1, val_loss, acc, mse, kappa)) # Making logs backup
batch_size=batch_size, epochs=nb_epoch, shuffle=False, verbose=2, callbacks=[mcp, lr] ) model.load_weights(output_file) # Show layer's name for i in range(len(model.layers)): print (str(i) , model.layers[i].name) predictions_valid = model.predict(X_test, batch_size=batch_size, verbose=2) Y_pred = np.argmax(predictions_valid, axis=-1) Y_test = np.argmax(y_test, axis=-1) # Convert one-hot to index print("Accuracy = ", accuracy_score(Y_test, Y_pred)) print(classification_report(Y_test, Y_pred)) print("Kappa accuracy = ", cohen_kappa_score(Y_test, Y_pred)) print( confusion_matrix(Y_test, Y_pred)) fim = time.time() total = fim-inicio print( "Execution time - ", fim-inicio," sec")
roc = None try: roc = roc_auc_score(dummy_y_test, Y_pred, average='macro') except: pass print('AUC score:') print(roc) # F1 f1= f1_score(numpy.argmax(dummy_y_test, axis=1), numpy.argmax(Y_pred, axis=1), average='macro') print('F1 score:') print(f1) # KAppa? kappa = cohen_kappa_score(numpy.argmax(dummy_y_test, axis=1), numpy.argmax(Y_pred, axis=1)) print('Kappa score:') print(kappa) #import matplotlib #matplotlib.use('Agg') #import matplotlib.pyplot as plt # summarize history for accuracy #plt.plot(history.history['acc']) #plt.plot(history.history['val_acc']) #plt.title('model accuracy') #plt.ylabel('accuracy') #plt.xlabel('epoch') #plt.legend(['train','test'], loc='upper left') #plt.show() # summarize history for loss
writer.add_scalar('Train mode/epoch_losses_sup', epoch_losses_sup, epoch) writer.add_scalar('Train mode/epoch_losses_unsup', epoch_losses_unsup, epoch) ssvae.eval() xs = train_dataset.trials ys = torch.topk(train_dataset.labels, 1)[1].squeeze() if use_cuda: xs = xs.cuda() ys = ys.cuda() outputs = ssvae.classifier(xs) _, y_pred = torch.max(outputs, 1) train_acc = accuracy_score(ys.data.cpu().numpy(), y_pred.data.cpu().numpy()) train_kappa = cohen_kappa_score(ys.data.cpu().numpy(), y_pred.data.cpu().numpy()) '''Valid''' xs = valid_dataset.trials ys = torch.topk(valid_dataset.labels, 1)[1].squeeze() if use_cuda: xs = xs.cuda() ys = ys.cuda() outputs = ssvae.classifier(xs) _, y_pred = torch.max(outputs, 1) valid_acc = accuracy_score(ys.data.cpu().numpy(), y_pred.data.cpu().numpy()) valid_kappa = cohen_kappa_score(ys.data.cpu().numpy(), y_pred.data.cpu().numpy()) '''Test''' xs = test_dataset.trials ys = torch.topk(test_dataset.labels, 1)[1].squeeze()
def create_analysis_report(model_output, groundtruth, output_path, LABELS_LIST, validation_output=None, validation_groundtruth=None): """ Create a report of all the different evaluation metrics, including optimizing the threshold with the validation set if it is passed in the parameters """ # Round the probabilities at 0.5 model_output_rounded = np.round(model_output) model_output_rounded = np.clip(model_output_rounded, 0, 1) # Create a dataframe where we keep all the evaluations, starting by prediction accuracy accuracies_perclass = sum( model_output_rounded == groundtruth) / len(groundtruth) results_df = pd.DataFrame(columns=LABELS_LIST) results_df.index.astype(str, copy=False) percentage_of_positives_perclass = sum(groundtruth) / len(groundtruth) results_df.loc[0] = percentage_of_positives_perclass results_df.loc[1] = accuracies_perclass results_df.index = ['Ratio of positive samples', 'Model accuracy'] # plot the accuracies per class results_df.T.plot.bar(figsize=(22, 12), fontsize=18) plt.title('Model accuracy vs the ratio of positive samples per class') plt.xticks(rotation=45) plt.savefig(os.path.join(output_path, "accuracies_vs_positiveRate.pdf"), format="pdf") plt.savefig(os.path.join(output_path, "accuracies_vs_positiveRate.png")) # Getting the true positive rate perclass true_positives_ratio_perclass = sum((model_output_rounded == groundtruth) * (groundtruth == 1)) / sum(groundtruth) results_df.loc[2] = true_positives_ratio_perclass # Get true negative ratio true_negative_ratio_perclass = sum( (model_output_rounded == groundtruth) * (groundtruth == 0)) / (len(groundtruth) - sum(groundtruth)) results_df.loc[3] = true_negative_ratio_perclass # compute additional metrics (AUC,f1,recall,precision) auc_roc_per_label = roc_auc_score(groundtruth, model_output, average=None) precision_perlabel = precision_score(groundtruth, model_output_rounded, average=None) recall_perlabel = recall_score(groundtruth, model_output_rounded, average=None) f1_perlabel = f1_score(groundtruth, model_output_rounded, average=None) kappa_perlabel = [ cohen_kappa_score(groundtruth[:, x], model_output_rounded[:, x]) for x in range(len(LABELS_LIST)) ] results_df = results_df.append( pd.DataFrame([ auc_roc_per_label, recall_perlabel, precision_perlabel, f1_perlabel, kappa_perlabel ], columns=LABELS_LIST)) results_df.index = [ 'Ratio of positive samples', 'Model accuracy', 'True positives ratio', 'True negatives ratio', "AUC", "Recall", "Precision", "f1-score", "Kappa score" ] # Creating evaluation plots plot_true_poisitve_vs_all_positives( model_output_rounded, groundtruth, os.path.join(output_path, 'TruePositive_vs_allPositives'), LABELS_LIST) plot_output_coocurances(model_output_rounded, os.path.join(output_path, 'output_coocurances'), LABELS_LIST) plot_false_netgatives_confusion_matrix( model_output_rounded, groundtruth, os.path.join(output_path, 'false_negative_coocurances'), LABELS_LIST) # Adjusting threshold based on validation set if (validation_groundtruth is not None and validation_output is not None): np.savetxt(os.path.join(output_path, 'validation_predictions.out'), validation_output, delimiter=',') np.savetxt(os.path.join(output_path, 'valid_ground_truth_classes.txt'), validation_groundtruth, delimiter=',') thresholds = np.arange(0, 1, 0.01) f1_array = np.zeros((len(LABELS_LIST), len(thresholds))) for idx, label in enumerate(LABELS_LIST): f1_array[idx, :] = [ f1_score( validation_groundtruth[:, idx], np.clip( np.round(validation_output[:, idx] - threshold + 0.5), 0, 1)) for threshold in thresholds ] threshold_arg = np.argmax(f1_array, axis=1) threshold_per_class = thresholds[threshold_arg] # plot the f1 score across thresholds plt.figure(figsize=(20, 20)) for idx, x in enumerate(LABELS_LIST): plt.plot(thresholds, f1_array[idx, :], linewidth=5) plt.legend(LABELS_LIST, loc='best') plt.title( "F1 Score vs different prediction threshold values for each class") plt.savefig(os.path.join(output_path, "f1_score_vs_thresholds.pdf"), format="pdf") plt.savefig(os.path.join(output_path, "f1_score_vs_thresholds.png")) # Applying thresholds optimized per class model_output_rounded = np.zeros_like(model_output) for idx, label in enumerate(LABELS_LIST): model_output_rounded[:, idx] = np.clip( np.round(model_output[:, idx] - threshold_per_class[idx] + 0.5), 0, 1) accuracies_perclass = sum( model_output_rounded == groundtruth) / len(groundtruth) # Getting the true positive rate perclass true_positives_ratio_perclass = sum( (model_output_rounded == groundtruth) * (groundtruth == 1)) / sum(groundtruth) # Get true negative ratio true_negative_ratio_perclass = sum( (model_output_rounded == groundtruth) * (groundtruth == 0)) / (len(groundtruth) - sum(groundtruth)) results_df = results_df.append( pd.DataFrame([ accuracies_perclass, true_positives_ratio_perclass, true_negative_ratio_perclass ], columns=LABELS_LIST)) # compute additional metrics (AUC,f1,recall,precision) auc_roc_per_label = roc_auc_score(groundtruth, model_output, average=None) precision_perlabel = precision_score(groundtruth, model_output_rounded, average=None) recall_perlabel = recall_score(groundtruth, model_output_rounded, average=None) f1_perlabel = f1_score(groundtruth, model_output_rounded, average=None) kappa_perlabel = [ cohen_kappa_score(groundtruth[:, x], model_output_rounded[:, x]) for x in range(len(LABELS_LIST)) ] results_df = results_df.append( pd.DataFrame([ auc_roc_per_label, precision_perlabel, recall_perlabel, f1_perlabel, kappa_perlabel ], columns=LABELS_LIST)) results_df.index = [ 'Ratio of positive samples', 'Model accuracy', 'True positives ratio', 'True negatives ratio', "AUC", "Precision", "Recall", "f1-score", "Kappa score", 'Optimized model accuracy', 'Optimized true positives ratio', 'Optimized true negatives ratio', "Optimized AUC", "Optimized precision", "Optimized recall", "Optimized f1-score", "Optimized Kappa score" ] # Creating evaluation plots plot_true_poisitve_vs_all_positives( model_output_rounded, groundtruth, os.path.join(output_path, 'TruePositive_vs_allPositives[optimized]'), LABELS_LIST) plot_output_coocurances( model_output_rounded, os.path.join(output_path, 'output_coocurances[optimized]'), LABELS_LIST) plot_false_netgatives_confusion_matrix( model_output_rounded, groundtruth, os.path.join(output_path, 'false_negative_coocurances[optimized]'), LABELS_LIST) results_df['average'] = results_df.mean(numeric_only=True, axis=1) results_df.T.to_csv(os.path.join(output_path, "results_report.csv"), float_format="%.2f") return results_df
def kappa(y_true, y_pred): return cohen_kappa_score(y_true, y_pred, weights='quadratic')
def run_training(): #Read the training data training_examples = read_im_list('tf_train.csv') val_examples = read_im_list('tf_val.csv') #np.random.seed(42) #shuffle the same way each time for consistency np.random.shuffle(training_examples) #fetcher = Fetcher(examples) fetcher = Fetcher(training_examples) #images, labels = fetcher.load_batch_balanced(FLAGS.batch_size) #print images.shape, labels.shape file_io.create_dir(os.path.join(FLAGS.model_dir)) file_io.create_dir(os.path.join(FLAGS.train_output_dir)) #FLAGS.im_size = 256 with tf.Graph().as_default(): # Generate placeholders for the images and labels and mark as input. x = tf.placeholder(tf.float32, shape=(None, FLAGS.im_size,FLAGS.im_size,3)) y_ = tf.placeholder(tf.float32, shape=(None, n_classes)) # See "Using instance keys": https://cloud.google.com/ml/docs/how-tos/preparing-models # for why we have keys_placeholder keys_placeholder = tf.placeholder(tf.int64, shape=(None,)) # IMPORTANT: Do not change the input map inputs = {'key': keys_placeholder.name, 'image': x.name} tf.add_to_collection('inputs', json.dumps(inputs)) # Build a the network #net = network(x) #net = alexnet(x) net = overfeat(x) # Add to the Graph the Ops for loss calculation. loss = slim.losses.softmax_cross_entropy(net, y_) tf.scalar_summary(loss.op.name, loss) # keep track of value for TensorBoard # To be able to extract the id, we need to add the identity function. keys = tf.identity(keys_placeholder) # The prediction will be the index in logits with the highest score. # We also use a softmax operation to produce a probability distribution # over all possible digits. # DO NOT REMOVE OR CHANGE VARIABLE NAMES - used when predicting with a model prediction = tf.argmax(net, 1) scores = tf.nn.softmax(net) # Mark the outputs. outputs = {'key': keys.name, 'prediction': prediction.name, 'scores': scores.name} tf.add_to_collection('outputs', json.dumps(outputs)) # Add to the Graph the Ops that calculate and apply gradients. train_op = tf.train.AdamOptimizer(FLAGS.Adam_lr).minimize(loss) # lr = 1e-4 #train_op = tf.train.AdamOptimizer(FLAGS.Adam_lr, beta1 = FLAGS.Adam_beta1, # beta2=FLAGS.Adam_beta2, epsilon=FLAGS.Adam_eps).minimize(loss) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Add the variable initializer Op. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep = 20) # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_output_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) lossf = open(os.path.join(FLAGS.model_dir,'loss_acc.txt'),'w') lossf.write('step, loss\n') lossf.close() # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. #images, labels = fetcher.load_batch(FLAGS.batch_size) images, labels = fetcher.load_batch_balanced(FLAGS.batch_size) # images, labels = load_cv_batch(x_train,y_train,step,FLAGS.batch_size) feed_dict = {x: images, y_: labels} # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if (step % 5) == 0: # Print status to stdout. print('Step %d: loss = %g (%.3f sec)' % (step, loss_value, duration)) sys.stdout.flush() with open(os.path.join(FLAGS.model_dir,'loss_acc.txt'),'a') as lossf: lossf.write('%d, %g\n' %(step, loss_value) ) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step%25 == 0 and (step > 1)) or (step == FLAGS.max_steps-1): input_alias_map = json.loads(sess.graph.get_collection('inputs')[0]) output_alias_map = json.loads(sess.graph.get_collection('outputs')[0]) aliases, tensor_names = zip(*output_alias_map.items()) y_true = [] y_pred = [] for (label, files) in val_examples: #channels = [ misc.imread(file_io.FileIO(f,'r')) for f in files] #channels = [misc.imresize(channels[j],(im_size,im_size)) for j in xrange(len(channels))] channels = misc.imread(files) #channels = misc.imresize(channels,[FLAGS.im_size, FLAGS.im_size]) #image = np.dstack(channels) image = misc.imresize(channels,[FLAGS.im_size, FLAGS.im_size]) feed_dict = {input_alias_map['image']: [image]} predict, scores = sess.run(fetches=[output_alias_map['prediction'], output_alias_map['scores']], feed_dict=feed_dict) y_true.append(np.argmax(label)) y_pred.append(predict[0]) accuracy = metrics.accuracy_score(y_true,y_pred) #f1macro = metrics.f1_score(y_true,y_pred,average='macro') #f1micro = metrics.f1_score(y_true,y_pred,average='micro') #print('Val Accuracy: %g, Val F1macro: %g, Val F1micro:%g\n' %(accuracy, f1macro, f1micro)) #print('Confusion matrix is') #print(metrics.confusion_matrix(y_true, y_pred)) #with open(os.path.join(FLAGS.model_dir,'loss_acc.txt'),'a') as lossf: # lossf.write('%d, %g, %g, %g, %g\n' %(step, loss_value, accuracy, f1macro, f1micro)) f1 = metrics.f1_score(y_true, y_pred) cohen_kappa = metrics.cohen_kappa_score(y_true, y_pred) print('Val Accuracy: %g, F1: %g, cohen_kappa: %g\n' %(accuracy, f1, cohen_kappa)) print('Confusion matrix is') print(metrics.confusion_matrix(y_true, y_pred)) with open(os.path.join(FLAGS.model_dir,'loss_acc.txt'),'a') as lossf: lossf.write('%d, %g, %g, %g %g\n' %(step, loss_value, accuracy, f1, cohen_kappa)) if ((step%1000 == 0) and (step>1)) or (step == FLAGS.max_steps-1): # and (step > 1): # Export the model so that it can be loaded and used later for predictions. file_io.create_dir(os.path.join(FLAGS.model_dir, str(step))) saver.save(sess, os.path.join(FLAGS.model_dir, str(step),'export' ))
print('svm p', precision_score(test_label, tes_label_svm, average='weighted')) print('svm r', recall_score(test_label, tes_label_svm, average='weighted')) print('knn f1', f1_score(test_label, tes_label_knn, average='weighted')) print('knn p', precision_score(test_label, tes_label_knn, average='weighted')) print('knn r', recall_score(test_label, tes_label_knn, average='weighted')) print('dtc f1', f1_score(test_label, tes_label_dtc, average='weighted')) print('dtc p', precision_score(test_label, tes_label_dtc, average='weighted')) print('dtc r', recall_score(test_label, tes_label_dtc, average='weighted')) print('rf f1', f1_score(test_label, tes_label_rf, average='weighted')) print('rf p', precision_score(test_label, tes_label_rf, average='weighted')) print('rf r', recall_score(test_label, tes_label_rf, average='weighted')) print('svm cohen_kappa_score:', cohen_kappa_score(test_label, tes_label_svm)) print('knn cohen_kappa_score:', cohen_kappa_score(test_label, tes_label_knn)) print('dtc cohen_kappa_score:', cohen_kappa_score(test_label, tes_label_dtc)) print('rf cohen_kappa_score:', cohen_kappa_score(test_label, tes_label_rf)) print('svm ham_distance:', hamming_loss(test_label, tes_label_svm)) print('knn ham_distance:', hamming_loss(test_label, tes_label_knn)) print('dtc ham_distance:', hamming_loss(test_label, tes_label_dtc)) print('rf ham_distance:', hamming_loss(test_label, tes_label_rf)) # 查看决策函数 ''' print('train_decision_function:\n', classifier.decision_function(train_data)) # (90,3) print('predict_result:\n', classifier.predict(train_data)) print('svm rmse:', mean_squared_error(test_label, tes_label_svm, squared=False))
def evaluate_models(y_true, y_pred): print(classification_report(y_test, y_pred)) print('confusion_matrix(0,1):') print(confusion_matrix(y_test, y_pred)) print('cohen_kappa_score:', cohen_kappa_score(y_test, y_pred))
def testClassifier(clf): #XGB tuning - concept, not in use param_grid = [{ 'max_depth': range(2, 6, 2), 'min_child_weight': range(2, 6, 2), 'n_estimators': range(100, 200, 75), 'learning_rate': [0.1], 'gamma': [0, 1, 10], 'subsample': [0.6, 0.8], 'colsample_bytree': [0.6, 0.8], 'reg_alpha': [1, 10], 'reg_lambda': [1, 10] }] fit_params = { "early_stopping_rounds": 8, "eval_metric": "map", "eval_set": [[X_test, y_test]], "verbose": False } grid = GridSearchCV(clf, param_grid, fit_params=fit_params, cv=3, verbose=1, n_jobs=-1, scoring='average_precision') fitted_classifier = grid.fit(X_train, y_train) print(grid.best_score_, grid.best_params_) predictions = fitted_classifier.predict(X_test) score1 = metrics.accuracy_score(y_test.values, predictions) score2 = metrics.roc_auc_score(y_test.values, predictions) score3 = metrics.cohen_kappa_score(y_test.values, predictions) score4 = metrics.classification_report(y_test.values, predictions) print('Accuracy score, ROC AUC, Cohen Kappa') print(score1, score2, score3) print('Classification Report') print(score4) print('Normal Fit') fitted = clf.fit(X_train, y_train) scoresCV = cross_val_score(clf, X_train, y_train, cv=3, verbose=0, n_jobs=-1) trainPredictionsCV = cross_val_predict(clf, X_train, y_train, cv=3, verbose=0, n_jobs=-1) trainPredictions = clf.predict(X_train) testPredictions = clf.predict(X_test) #X_test['Predictions'] = testPredictions score1 = metrics.accuracy_score(y_test.values, testPredictions) score2 = metrics.roc_auc_score(y_test.values, testPredictions) score3 = metrics.cohen_kappa_score(y_test.values, testPredictions) score4 = metrics.classification_report(y_test.values, testPredictions) print('Train score: ', metrics.accuracy_score(y_train.values, trainPredictions)) print('CV score: ', scoresCV) print('Accuracy score, ROC AUC, Cohen Kappa') print(score1, score2, score3) print('Classification Report') print(score4) #WITH UNDER-SAMPLING #Low Precision in Class 1 (~0.28) = suggests that too many salaries are labeled as >50k when they are <50k #Could be a potential after-effect of under-sampling #High Recall in Class 1 (~0.90) = suggests that the classifier is able to find all positive samples #WITHOUT UNDER-SAMPLING #High Precision in Class 1 (~0.76) = suggests that the classifiers handles negative samples well #Low Recall in Class 1 (~0.39) = suggests that the classifier is not able to find all positive samples return clf
recall = mets.recall_score(voted_category, predicted_category, average='weighted', labels=labels) print("Average(weighted) recall is {0:.2f}".format(recall)) # F1-score f_score = mets.recall_score(voted_category, predicted_category, average='weighted', labels=labels) print("Average(weighted) F1-score is {0:.2f}".format(f_score)) # Cohen's Kappa ckappa = mets.cohen_kappa_score(voted_category, predicted_category, labels=labels) print("\nCohen's Kappa is {0:.2f}".format(ckappa)) # Fleiss Kappa # Create an index for the IDs - IDs are strings and can't be used as index in dataframe. The indexes are 0-79. ID_index = {} ID_index_counter = 0 for ID in IDs: ID_index[ID] = ID_index_counter ID_index_counter = ID_index_counter + 1 # Create empty dataframe to store df_fkappa = pd.DataFrame(columns=labels, index=list(range(len(IDs)))).fillna(0) # For each subject (ID), extract all annotations and count them. Then insert the counts in dataframe
def predict_handler(config, args): ensemble_config_file = args.ensemble_config ensemble_config = yaml.load(ensemble_config_file) data_source = ensemble_config["data_source"] input_dir = args.input_dir output_dir = args.output_dir output_prediction_h5 = os.path.join(output_dir, "predictions.h5") output_text = args.output_text models_config = ensemble_config["models"] postprocessings = ensemble_config.get("postprocessing", []) params = ensemble_config.get("params", {}) prediction_formater = ensemble_config.get("prediction_formater", categorical_prediction) if data_source.input_source is None: data_source.set_input_source(args.input_dir) log.info("Using datasource: %s", data_source) log.info("Atempting to classify data in %s", data_source.input_source) dataset_loader, _ = data_source.get_dataset_loader() log.info("classifying %d datasets", len(dataset_loader)) if output_dir is not None: if not os.path.exists(output_dir): log.info("Creating output directory: %s", output_dir) os.makedirs(output_dir) log.info("Predictions will be stored in: %s", output_prediction_h5) h5 = h5py.File(output_prediction_h5, 'w') for model_name, model_config in models_config.items(): model_type = model_config.get("type", "keras") model_match_id_pattern = model_config.get('match_id', '.*') model_match_id = re.compile(model_match_id_pattern) model_match_id = re.compile(model_match_id_pattern) window_size = model_config["window_size"] stride_size = model_config["stride_size"] swap_axes = model_config["swap_axes"] batch_size = model_config["batch_size"] tile_merge_strategy = model_config.get("tile_merge_strategy", "average") mapping = model_config["mapping"] ensemble_options = ensemble_config["ensemble"] ensemble_method = ensemble_options.get("method", "average") ensemble_handlers = { 'average': ensemble_avg } if ensemble_method not in ensemble_handlers: raise KeyError("Unknown method") ensemble_handler = ensemble_handlers[ensemble_method] predicter = None model = None if model_type == "sklearn": model_path = model_config["path"] sklearn_model = pickle.load(open(model_path, "rb")) class __model_wrapper(object): def __init__(self, model): self._model = model def predict(self, data, batch_size): num_sample, width, height, num_chan = data.shape data = data.reshape((num_sample * width * height, num_chan)) result = self._model.predict_proba(data) result = result.reshape((num_sample, width, height, 2)) return result model = __model_wrapper(sklearn_model) elif model_type == "keras": model_path = model_config["path"] model_builder = model_config.get("builder", None) custom_objects = {} if model_builder: _, model_builder_custom_options = import_model_builder(model_builder) custom_objects.update(model_builder_custom_options) log.info("Loading keras model from %s", model_path) model = load_model(model_path, custom_objects=custom_objects) log.info("Finished loading") # model = None else: raise NotImplementedError("Unsupported model type: %s" % model_type) for scene in dataset_loader: scene_id, scene_data = scene if model_match_id.match(scene_id) is None: continue log.info("Classifying %s using %s", scene_id, model_name) input_mapping = mapping["inputs"] output_mapping = mapping.get("target", {}) tile_loader = DatasetLoader((scene,), rasterio_env=dataset_loader.rasterio_env, _cache_data=dataset_loader._cache_data) tile_loader.reset() data_generator = DataGenerator(tile_loader, batch_size=batch_size, input_mapping=input_mapping, output_mapping=None, swap_axes=swap_axes, loop=False, default_window_size=window_size, default_stride_size=stride_size) if len(output_mapping) == 1: rio_raster = scene_data[output_mapping[0][0]] output_window_shape, output_stride_size = adapt_shape_and_stride(rio_raster, data_generator.primary_scene, window_size, stride_size) else: output_window_shape = model_config.get('output_window_size', model_config.get('window_size')) output_stride_size = model_config.get('output_stride_size', model_config.get('stride_size')) output_shape = ensemble_config.get('output_shape', None) if output_shape: output_width, output_height = output_shape else: output_width, output_height = data_generator.primary_scene.shape image_probs = get_probabilities_from_tiles( model, data_generator, output_width, output_height, output_window_shape[0], output_window_shape[1], output_stride_size, batch_size, merge_strategy=tile_merge_strategy ) dataset_name = "%s/%s" % (model_name, scene_id) log.info("Saving predictions to dataset: %s", dataset_name) h5[dataset_name] = image_probs models = list(models_config.keys()) # ? if output_text: log.info("Saving TXT output to: %s", output_text.name) annotation_idx = 0 prediction_count = 0 global_score_f1 = 0 global_score_accuracy = 0 global_score_precision = 0 global_score_recall = 0 global_score_roc_auc = 0 global_score_jaccard_similarity = 0 global_score_zero_one = 0 global_score_hamming = 0 global_score_average = 0 global_score_kappa = 0 global_score_iou = 0 global_score_mean_iou = 0 global_conf_matrix = 0 global_score_overall_accuracy = 0 dataset_loader.reset() for scene in dataset_loader: scene_id, scene_data = scene model_spec = [ (model_name, models_config[model_name], h5["%s/%s" % (model_name, scene_id)]) for model_name in models ] result = ensemble_handler(model_spec) log.info("Generating prediction representation") reconstructed = prediction_formater(result) log.info("Finished generating representation") if postprocessings: reconstructed, result = run_final_postprocessings(postprocessings, reconstructed, result) if output_dir is not None: if 'RGB' in scene_data: # ToDo: do we need special consideration for RGB ?! _source = scene_data['RGB'] else: _source = list(scene_data.items())[0][1] _source_profile = _source.profile crs = _source_profile.get('crs', None) if crs is None: destination_file = os.path.join(output_dir, scene_id + ".png") else: destination_file = os.path.join(output_dir, scene_id + ".tif") log.info("Saving output tile to %s", destination_file) num_out_channels = reconstructed.shape[-1] if crs is None: io.imsave(destination_file, reconstructed.astype(rasterio.uint8)) else: _source_profile.update(dtype=reconstructed.dtype, count=num_out_channels, compress='lzw', nodata=0) with rasterio.open(destination_file, 'w', **_source_profile) as dst: for idx in range(0, num_out_channels): dst.write(reconstructed[:, :, idx], idx + 1) if args.scoring_gti and args.scoring_gti not in scene_data: log.error("Components %s not in available components!", args.scoring_gti) if args.scoring_gti and args.scoring_gti in scene_data: # We can compute the score prediction_count += 1 bin_gti = scene_data[args.scoring_gti].read(1) format_converter = params.get("format_converter", None) if format_converter is not None: bin_gti = format_converter(bin_gti) if params.get("type", "binary") == "binary": bin_gti = bin_gti > 0 bin_gti = bin_gti.reshape(-1) flat_reconstructed = reconstructed.reshape(-1) log.info("Scoring %s", scene_id) metric_params = params.get("metrics", {}) # get all metric options metric_options = metric_params.get("options", {}) metric_usage = metric_params.get("usage", {}) if 'average' in metric_options and metric_options['average'] == 'None': metric_options['average'] = None # get metric options to metric parameters mapping def get_metric_option(metric_name, metric_options, metric_usage): metric_params = metric_usage.get(metric_name, []) metric_args = {} for m in metric_params: metric_args[m] = metric_options.get(m, None) return metric_args f1_score_params = get_metric_option("f1_score", metric_options, metric_usage) score_f1 = f1_score(bin_gti, flat_reconstructed, **f1_score_params) score_accuracy = accuracy_score(bin_gti, flat_reconstructed) precision_score_params = get_metric_option("precision_score", metric_options, metric_usage) score_precision = precision_score(bin_gti, flat_reconstructed, **precision_score_params) recall_score_params = get_metric_option("recall_score", metric_options, metric_usage) score_recall = recall_score(bin_gti, flat_reconstructed, **recall_score_params) # score_roc_auc = roc_auc_score(bin_gti, flat_reconstructed) score_jaccard_similarity = jaccard_similarity_score(bin_gti, flat_reconstructed) score_zero_one = zero_one_loss(bin_gti, flat_reconstructed) hamming_loss_params = get_metric_option("hamming_loss", metric_options, metric_usage) score_hamming = hamming_loss(bin_gti, flat_reconstructed, **hamming_loss_params) score_average = 0 if params.get("type", "binary") == "binary": average_precision_score_params = get_metric_option("average_precision_score", metric_options, metric_usage) score_average = average_precision_score(bin_gti, flat_reconstructed, **average_precision_score_params) cohen_kappa_score_params = get_metric_option("cohen_kappa_score", metric_options, metric_usage) score_kappa = cohen_kappa_score(bin_gti, flat_reconstructed, **cohen_kappa_score_params) mean_iou_params = get_metric_option("np_mean_iou", metric_options, metric_usage) if params.get("type", "binary") == "binary": conf_matrix = confusion_matrix(bin_gti, flat_reconstructed) score_iou = 0 score_mean_iou = 0 else: conf_matrix, score_iou, score_mean_iou = np_mean_iou(bin_gti, flat_reconstructed, **mean_iou_params) score_overall_accuracy = overall_accuracy(conf_matrix) global_score_f1 += score_f1 global_score_accuracy += score_accuracy global_score_precision += score_precision global_score_recall += score_recall # global_score_roc_auc += score_roc_auc global_score_jaccard_similarity += score_jaccard_similarity global_score_zero_one += score_zero_one global_score_hamming += score_hamming global_score_average += score_average global_score_kappa += score_kappa global_score_iou += score_iou global_score_mean_iou += score_mean_iou global_score_overall_accuracy += score_overall_accuracy log.info("{} Scores: F-1: {}, Accuracy: {}, Precision: {}, Recall: {}, " "Jaccard Similarity: {}, Zero One Loss: {}, Hamming Loss: {}, Average Precision Score: {}, Kappa Score: {}, IoU Score: {}, Mean IoU: {}, Overall accuracy: {}".format( scene_id, score_f1, score_accuracy, score_precision, score_recall, score_jaccard_similarity, score_zero_one, score_hamming, score_average, score_kappa, score_iou, score_mean_iou, score_overall_accuracy)) if output_text: height, width, _ = result.shape dfile = output_text.name + ".geojson" features = [] out_scene_id = scene_id polys = list(to_polygons(reconstructed)) idx = 0 for poly in polys: idx += 1 _source gjs = [_source.transform * i for i in poly[0]] crs = { "type": "name", "properties": { "name": _source.crs["init"] } } if len(gjs) < 3: log.warning("Not enough points for assembling polygon") continue features.append(Feature(geometry=Polygon(gjs))) pstr = "POLYGON ((" crds = [] for pairs in poly[0]: crds.append("%.3f %.3f 0" % pairs) crds.append(crds[0]) pstr += ",".join(crds) pstr += "))" output_text.write("%s,%d" % (out_scene_id, idx)) output_text.write(',"%s",%d' % (pstr, 1)) output_text.write("\n") output_text.flush() feature_collection = FeatureCollection(features, crs=crs) jsdump(feature_collection, open(dfile, "w")) if prediction_count > 0: global_score_f1 = global_score_f1 / prediction_count global_score_accuracy = global_score_accuracy / prediction_count global_score_precision = global_score_precision / prediction_count global_score_recall = global_score_recall / prediction_count global_score_roc_auc = global_score_roc_auc / prediction_count global_score_jaccard_similarity = global_score_jaccard_similarity / prediction_count global_score_zero_one = global_score_zero_one / prediction_count global_score_hamming = global_score_hamming / prediction_count global_score_average = global_score_average / prediction_count global_score_kappa = global_score_kappa / prediction_count global_score_iou = global_score_iou / prediction_count global_score_mean_iou = global_score_mean_iou / prediction_count global_score_overall_accuracy = global_score_overall_accuracy / prediction_count else: global_score_f1, global_score_accuracy, global_score_precision, global_score_recall, global_score_roc_auc, global_score_jaccard_similarity, global_score_zero_one, global_score_hamming, global_score_average, global_score_kappa, global_score_iou, global_score_mean_iou, global_score_overall_accuracy = ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) log.info("Average Global Score => F-1: {}, Accuracy: {}, Precision: {}, Recall: {}, " "Jaccard Similarity: {}, Zero One Loss: {}, Hamming Loss: {}, Average Precision Score: {}, Kappa Score: {}, IoU Score: {}, Mean IoU: {}, Overall accuracy: {} ".format( global_score_f1, global_score_accuracy, global_score_precision, global_score_recall, global_score_jaccard_similarity, global_score_zero_one, global_score_hamming, global_score_average, global_score_kappa, global_score_iou, global_score_mean_iou, global_score_overall_accuracy)) log.info("Finished")