def eval_predictions_multi(y_true, y_pred, y_proba): # acc = balanced_accuracy_score(y_true, y_pred) acc = accuracy_score(y_true, y_pred) if y_proba.shape[1] == 2: k = 0 classes = ['R0', 'R1'] mean_auc = roc_auc_score(y_true, y_proba[:, 1]) cm = confusion_matrix(y_true, y_pred, labels=[0, 1]) elif y_proba.shape[1] == 6: k = kappa(y_true, y_pred, weights='quadratic') classes = ['DR0', 'DR1', 'DR2', 'DR3', 'DR4', 'DR5'] # mean_auc = roc_auc_score(y_true, y_proba, average='weighted', multi_class='ovr') # ovo should be better, but average is not clear from docs # mean_auc = roc_auc_score(y_true, y_proba, average='macro', multi_class='ovo') mean_auc = roc_auc_score(y_true, y_proba, average='weighted', multi_class='ovo') cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3, 4, 5]) else: k = kappa(y_true, y_pred, weights='quadratic') classes = ['DR0', 'DR1', 'DR2', 'DR3', 'DR4'] # mean_auc = roc_auc_score(y_true, y_proba, average='weighted', multi_class='ovr') # ovo should be better, but average is not clear from docs # mean_auc = roc_auc_score(y_true, y_proba, average='macro', multi_class='ovo') mean_auc = roc_auc_score(y_true, y_proba, average='weighted', multi_class='ovo') cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3, 4]) print_cm(cm, classes) return k, mean_auc, acc
def calc_exp_obs_pre_posture(data_path, logger=Logger()): logger.log('Postural behavior') sub_logger = Logger(logger) for folder, files in get_csv_files_from_folders(data_path): sub_logger.log('Category: \"' + os.path.basename(folder) + '\"') folder_logger = Logger(sub_logger) folder_logger.log('Found files') # participants maps the participant numbers onto a # tuple with each rater's rating of that participant. participants = {} # Concatenate records from all .csv files for file in files: file_path = os.path.join(folder, file) Logger(folder_logger).log(file_path) table = get_csv_table(file_path) # First row in table is header row. for row in table[1:]: for item in range(1, len(row)): if not is_int(row[item]): row[item] = -1 # Negative integers used to represent data that's absent. participants.update({int(row[0]): (int(row[1]), int(row[2]),)}) filled_table = [rater for rater in participants.values() if ((rater[0] >= 0) and (rater[1] >= 0))] amount_agreed = len([row for row in filled_table if (row[0] == row[1])]) percent_agreement = amount_agreed / len(filled_table) folder_logger.log('Number of participants: ' + str(len(participants))) folder_logger.log('Number of participants with both raters: ' + str(len(filled_table))) folder_logger.log('Percent Agreement: ' + str(percent_agreement)) with warnings.catch_warnings(): warnings.simplefilter("ignore") folder_logger.log('Krippendorff\'s Alpha: ' + str(krippendorff.alpha(filled_table))) x_values, y_values = [x for x in zip(*filled_table)] folder_logger.log('Cohen\'s Kappa: ' + str(kappa(x_values, y_values))) valid_ratings = [] # A list of valid ratings for participant_values in participants.values(): for value in participant_values: if value >= 0: valid_ratings.append(value) folder_logger.log('Frequency of risky behavior: ' + str(np.average(valid_ratings)))
def kappa_calc(file1, file2): lines = open(file1, 'r', encoding="utf8").readlines() lines2 = open(file2, 'r', encoding="utf8").readlines() lines.append('###') labels1 = [] labels2 = [] abs_count = 0 i = 0 new_abstract = True for l, line in enumerate(lines): line = line.strip() if not line or '#' in line: pass else: try: try: l1 = int(line.split(" ", 1)[0]) except Exception as e: l1 = int(line.split("\t", 1)[0]) try: l2 = int(lines2[l].split(" ", 1)[0]) except Exception as e: l2 = int(lines2[l].split("\t", 1)[0]) except Exception as e: traceback.print_exc() pdb.set_trace() labels1.append(l1) labels2.append(l2) kappa(labels1, labels2) # abstract_size[abstract_len] += 1 print('The cohen kappa score is : ', kappa(labels1, labels2))
def eval_predictions_multi(y_true, y_pred, y_proba, print_conf=True): acc = balanced_accuracy_score(y_true, y_pred) k = kappa(y_true, y_pred, weights='quadratic') classes = ['DR0', 'DR1', 'DR2', 'DR3', 'DR4'] # mean_auc = roc_auc_score(y_true, y_proba, average='weighted', multi_class='ovr') # ovo should be better, but average is not clear from docs # mean_auc = roc_auc_score(y_true, y_proba, average='macro', multi_class='ovo') mean_auc = roc_auc_score(y_true, y_proba, average='weighted', multi_class='ovo') if print_conf: cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3, 4]) print_cm(cm, classes) return k, mean_auc, acc
def kappas(inlist): kaps = [] for itm in inlist: if inlist.index(itm) != int(len(inlist) - 1): for itk in inlist[inlist.index(itm) + 1:]: a_values = [] b_values = [] checklist = list(zip(itm, itk)) for it in checklist: if 'nan' not in it: a_values.append(it[0]) b_values.append(it[1]) kaps.append([kappa(a_values, b_values), len(a_values)]) return kaps
def test_RCNN9(self, sm, dn, wsc, bsc, time_cnt): data1 = scipy.io.loadmat(st.RCNN_separated_class + "A09E.mat")["data"] print("Subject 9") data1 = np.swapaxes(data1, 0,2) label = data1[...,-1]-1 data1 = data1[...,:-1] rolled_data = self.rolling_window(data1, (1, time_cnt)) # data = normalize(data) #Gaussian Normalization for i in range(00, 47): with tf.Session() as sess: tf.global_variables_initializer().run() saver = tf.train.Saver() saver.restore(sess, st.RCNN_model_path + "%02d.ckpt" %(i)) w, b = sess.run([wsc, bsc]) np.save(st.path + "spatialconvweight.npy", w) # scipy.io.savemat('/home/a/PycharmProjects/RCNN_BCI/conv1weight.mat', {'w': w}) # data = np.pad(data, ((0, time_cnt - 1), (0, 0)), mode="edge") # rolled_data = self.rolling_window(data, (time_cnt, 1)) acc = 0 results4 = np.empty([288, 1]) mean_prediction = np.empty([288, 189])## gt1 = np.empty([288, 1]) for cnt, dat in enumerate(rolled_data): results = sess.run(sm, feed_dict={dn: dat[...,None]}) results1 = np.argmax(results, axis=-1) mean_prediction[cnt] = results1### results2 = np.bincount(results1, minlength=4) results3 = np.argmax(results2) results4[cnt] = results3 gt = np.unique(label[cnt]) gt1[cnt] = gt # print(gt==results3, gt, results3) if gt == results3: acc = acc + 1 mean_prediction = np.mean(mean_prediction, axis=0)## # cov_pred = np.cov(mean_prediction)## # np.save(st.path + 'covdata.npy', cov_data)## # np.save(st.path + 'covpred.npy', cov_pred)## acc = acc / 288 print("%02d th Accuracy: %.3f" % (i, acc)) print("%02d th kappa coefficient: %.3f" % (i,kappa(gt1, results4))) tf.reset_default_graph()
def test_classifiers(X,y,n=7,rname="results.txt"): clfs={ # "Bagging KNN": [BaggingClassifier(KNeighborsClassifier(),max_samples=0.5, max_features=0.5),[],[],[],[]], "NN (kNN k=1)": [KNeighborsClassifier(n_neighbors=1),[],[],[],[],[]], #"NN (kNN k=3)": [KNeighborsClassifier(n_neighbors=3),[],[],[],[],[]], "NN (kNN k=3 w)": [KNeighborsClassifier(n_neighbors=3, weights='distance'),[],[],[],[],[]], "NN (kNN k=5 w)": [KNeighborsClassifier(n_neighbors=5, weights='distance'),[],[],[],[],[]], #"NN (kNN k=7 w)": [KNeighborsClassifier(n_neighbors=7, weights='distance'),[],[],[],[]], #"SVM - Linear kernel": [svm.SVC(kernel="rbf",probability=True),[],[],[],[]], # "Naive Bayes": [GaussianNB(),[],[],[],[]], # "SVM Sigmoide": [svm.SVC(kernel="sigmoid"),[],[],[],[]], #"ANN":[MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1),[],[],[],[]], } #V=["Voting KNN",[None,[],[],[],[]]] skf=kfold(y, n_iter=n, random_state=None, train_size=0.7) output=open(rname,"w") for train,test in skf: Xt,Yt=X[train],y[train] Xv,Yv=X[test],y[test] votes=[] for (k,v) in clfs.items(): v[0].fit(Xt,Yt) #print(clfs[k]) Yr=v[0].predict(Xv) #print(accs(Yv,Yr)) v[1].append(accs(Yv,Yr)) v[2].append(f1(Yv,Yr,average="macro")) v[3].append(recall(Yv,Yr,average="macro")) v[4].append(precision(Yv,Yr)) v[5].append(kappa(Yv,Yr)) #votes.append(Yr) #Yp=predict(votes) for k,v in clfs.items(): fm="%s | %s| %s | %s | %s\n" output.write(fm %(k,"Accuracy",np.mean(v[1]),min(v[1]),max(v[1]))) #output.write(fm %(k,"Kappa",np.mean(v[5]),min(v[5]),max(v[5]))) output.write(fm %(k,"F1",np.mean(v[2]),min(v[2]),max(v[2]))) output.write(fm %(k,"Recall",np.mean(v[3]),min(v[3]),max(v[3]))) output.write(fm %(k,"Precision",np.mean(v[4]),min(v[4]),max(v[4])))
def calc_pre_obs(data_path, logger=Logger()): global rating logger.log('pre-obs') for criteria, files in get_csv_files_from_folders(data_path): logger_criteria = Logger(logger) logger_criteria.log('Criteria ' + os.path.basename(criteria) + ':') table = get_table(criteria, files, Logger(logger_criteria)) # Entries with 2 raters filled_table = \ [list(map(int, row)) for row in table if (('' not in row) and len(row) == 2)] Logger(logger_criteria).log('Number of participants: ' + str(len(table))) Logger(logger_criteria).log('Number of participants with both raters: ' + str(len(filled_table))) amount_agreed = len([row for row in filled_table if (row[0] == row[1])]) percent_agreement = amount_agreed / len(filled_table) Logger(logger_criteria).log('Percent Agreement: ' + str(percent_agreement)) Logger(logger_criteria).log('Krippendorff\'s Alpha: ' + str(krippendorff.alpha(filled_table))) x_values, y_values = [x for x in zip(*filled_table)] Logger(logger_criteria).log('Cohen\'s Kappa: ' + str(kappa(x_values, y_values))) ratings = [] for row in table: for rating in row: if is_int(rating): ratings.append(int(rating)) Logger(logger_criteria).log( 'Frequency of risky behavior: ' + str(np.mean(ratings, dtype=np.float64)))
train_accuracy = regressor.score(x_train, y_train) ''' #step7 #Building the Optimal Model using backward elimination !**! backward elimination method using the p value """ ''' #import stats model for statistical operations import statsmodels.formula.api as sm arr = np.ones((len(X), 1)) X_opt = np.append(arr, values=X, axis=1) ##we appended a array of ones as A0 = 1 as it has the base weight #Building the Optimal Model using backward elimination actuall X_opt2 = X_opt[:, [1, 3, 4]] regressor_OLS = sm.OLS( endog=y, exog=X_opt2).fit() #ordinary least square (y-y^)^2 method of minimizing regressor_OLS.summary() from sklearn.metrics import confusion_matrix print(confusion_matrix(y_test, y_pred)) from sklearn.metrics import cohen_kappa_score as kappa print(kappa(y_pred, y_test))
print(cm) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.ylabel('True label') plt.xlabel('Predicted label') plt.tight_layout() # Review from sklearn.metrics import confusion_matrix import numpy as np class_names = np.unique(Y) cf_matrix = confusion_matrix(y_test, y_pred) plot_confusion_matrix(cf_matrix, class_names) from sklearn.metrics import cohen_kappa_score as kappa kappa(y_test, y_pred)
print(test_data.shape) # preprocessing dataset scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # train_data = preprocessing.normalize(train_data, norm='l2') # test_data = preprocessing.normalize(test_data, norm='l2') # calc time start_time = time.time() model.fit(train_data, train_label) end_time = time.time() time_cost = end_time - start_time print('time cost : {}'.format(time_cost)) # test result = model.score(train_data, train_label) print(result) result = model.score(test_data, test_label) print(result) result = kappa(model.predict(test_data), test_label) print('kappa:', result) result = classification_report(model.predict(test_data), test_label) print(result) result = confusion_matrix(model.predict(test_data), test_label) print('matrix:\n', result) # save model with open('model.pickle', 'wb') as f: pickle.dump(model, f) # print(model.get_params())
def inference(args): data, gt = mat2array() patch_size = 4 output = np.zeros((145, 145, 16)) clear_session() model = build_model(args.model, input_shape=(4, 4, 200)) model.load_weights(args.weights) for i in range(0, data.shape[0] - patch_size, patch_size): for j in range(0, data.shape[1] - patch_size, patch_size): patch = (data[i:i + patch_size, j:j + patch_size, :]).copy() patch = np.reshape(patch, (-1, patch_size, patch_size, 200)) output[i:i + patch_size, j:j + patch_size, :] = model.predict(patch).reshape( 1, patch_size, patch_size, -1) # Semantically segmented output -- making it 1-indexed. output_semantic = np.argmax(output, axis=2) + 1 # Getting regions where ground truth is defined gt_mask = np.where(gt > 0, np.ones_like(gt), np.zeros_like(gt)) # Output after filtering masked_output = (output_semantic * gt_mask) count = 0 total = 0 gt_without_test_label, output_without_test_label = [], [] classwise_prediction_frequency = [0 for _ in range(16)] for i in range(145): for j in range(145): if gt[i, j] != 0: total += 1 if gt[i, j] == masked_output[i, j]: count += 1 classwise_prediction_frequency[gt[i, j] - 1] += 1 for i, j in zip(gt.ravel(), masked_output.ravel()): if i != 0: gt_without_test_label.append(i) output_without_test_label.append(j) classwise_gt_frequency = [ i for i in Counter(gt_without_test_label).values() ] classwise_gt_frequency = np.array(classwise_gt_frequency) classwise_prediction_frequency = np.array(classwise_prediction_frequency) classwise_average_accuracy = classwise_prediction_frequency / classwise_gt_frequency overall_acc = count / total average_acc = np.mean(classwise_average_accuracy) kappa_score = kappa(gt_without_test_label, output_without_test_label) formatted_string = '\nOverall Accuracy = %.3f \nAverage Accuracy = %.3f \nKappa Score = %.3f \n' print(formatted_string % (overall_acc * 100, average_acc * 100, kappa_score * 100))
plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.ylabel('True label') plt.xlabel('Predicted label') plt.tight_layout() # Review from sklearn.metrics import confusion_matrix import numpy as np class_names = np.unique(Y) cf_matrix = confusion_matrix(y_test, y_pred) plot_confusion_matrix(cf_matrix, class_names) from sklearn.metrics import cohen_kappa_score as kappa kappa(y_test, y_pred)
def accuracy_scores_binary(y_true, y_pred): ''' A function to calculate accuracy measures for a binary classification. Function written by Osian Roberts. Parameters: :param y_true: observed binary labels, where 0 is absence and 1 is presence. :param y_pred: predicted binary labels, where 0 is absence and 1 is presence. :returns: a list containing two numpy.arrays - (metrics: name of test metrics, scores: test scores for each metric) Reference: See pages 253 - 255 in: Guisan et al. (2017). Habitat suitability and distribution models: with applications in R. ''' import numpy # check inputs: if not isinstance(y_true, numpy.ndarray): y_true = numpy.array(y_true) if not isinstance(y_pred, numpy.ndarray): y_pred = numpy.array(y_pred) if y_true.ndim != 1: raise SystemExit('ERROR: the true labels are not in a 1D array.') if y_pred.ndim != 1: raise SystemExit('ERROR: the predicted labels are not in a 1D array.') if y_true.size != y_pred.size: raise SystemExit('ERROR: unequal number of binary labels.') # ensure that y_true, y_pred contain binary labels (i.e. 0 or 1 values): y_true = y_true.astype('uint8') y_pred = y_pred.astype('uint8') if numpy.min(y_true) != 0 or numpy.max(y_true) != 1: raise SystemExit('ERROR: the true labels are not binary (zero or one values).') if numpy.min(y_pred) != 0 or numpy.max(y_pred) != 1: raise SystemExit('ERROR: the predicted labels are not binary (zero or one values).') metrics = numpy.array(['Prevalence', 'Overall Diagnostic Power', 'Correct Classification Rate', 'Misclassification Rate', 'Presence Predictive Power', 'Absence Predictive Power', 'Accuracy', 'Balanced Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'F1 Score', 'Matthews Correlation', 'Cohen Kappa', 'Normalised Mutual Information', 'Hanssen-Kuiper skill']) try: n_presence = numpy.where(y_true == 1)[0].size n_absence = numpy.where(y_true == 0)[0].size # calculate true-presence, true-absence, false-presence and false-absence: TP = numpy.where((y_true == 1) & (y_pred == 1))[0].size TA = numpy.where((y_true == 0) & (y_pred == 0))[0].size FP = numpy.where((y_true == 1) & (y_pred == 0))[0].size FA = numpy.where((y_true == 0) & (y_pred == 1))[0].size # aka sweet FA! # proportion of presence records: prevalence = (TP / FA) / y_true.size # proportion of absence records: ODP = 1 - prevalence # correct classification & misclassification rate CCR = (TP + TA) / y_true.size MR = (FP + FA) / y_true.size # Sensitivity (aka Recall or True Positive Rate): sensitivity = TP / n_presence # false presence rate - inverse of sensitivity (redundant?) #FPR = 1 - sensitivity # Presence and absence predictive power: PPP = TP / (TP + FP) APP = TA / (TA + FA) # Specificity (aka True Negative Rate): specificity = TA / n_absence # false positive rate - inverse of specificity (redundant?) #FPR = 1 - specificity # Accuracy scores: accuracy = (TP + TA) / (n_presence + n_absence) balanced_accuracy = ((TP / n_presence) + (TA / n_absence)) / 2 # precision: precision = TP / (TP + FP) # F1 score: f1_score = 2 * TP / ((2*TP) + FP + FA) # Matthews Correlation Coefficient: MCC = ((TP * TA) - (FP * FA)) / (((TP + FP) * (TP + FA) * (TA + FP) * (TA + FA))**0.5) # Hanssen-Kuiper skill (unreliable when TA is very large): TSS = sensitivity + specificity - 1 del n_presence, n_absence, TP, TA, FP, FA from sklearn.metrics import normalized_mutual_info_score as nmi_score nmi_score = nmi_score(y_true, y_pred) # Cohen's Kappa (caution: sensitive to sample size and proportion of presence records): from sklearn.metrics import cohen_kappa_score as kappa kappa = kappa(y_true, y_pred) scores = numpy.array([prevalence, ODP, CCR, MR, PPP, APP, accuracy, balanced_accuracy, sensitivity, specificity, precision, f1_score, MCC, kappa, nmi_score, TSS]).round(decimals=6) del prevalence, ODP, CCR, MR, PPP, APP, accuracy, balanced_accuracy, sensitivity del specificity, precision, f1_score, MCC, kappa, nmi_score, TSS except Exception: scores = numpy.zeros(len(metrics)) if metrics.size == scores.size: return [metrics, scores] else: raise SystemExit('ERROR: unable to calculate accuracy metrics.')
def print_kappa(a1_tokens, a2_tokens, label, logger): # print(a1_tokens,'\n' ,a2_tokens,'\n') logger.info("Kappa coefficient for {0} = {1:6.3f}".format( label, kappa(a1_tokens, a2_tokens, labels=label)))
def calc_exp_obs_posture(data_path, logger=Logger()): logger.log('Postural behavior') sub_logger = Logger(logger) for folder, files in get_csv_files_from_folders(data_path): sub_logger.log('Criteria: \"' + os.path.basename(folder) + '\"') folder_logger = Logger(sub_logger) folder_logger.log('Found files') records_from_table = [] # Concatenate records from all .csv files for file in files: file_path = os.path.join(folder, file) Logger(folder_logger).log(file_path) table = get_csv_table(file_path) # First row in table is header row. for row in table[1:]: for item in range(1, 5): if not is_int(row[item]): # Negative integers used to represent data that's absent. row[item] = -1 records_from_table.append((dateparse(row[0]), int(row[1]), int(row[2]), int(row[3]), int(row[4]),)) # turn records into a numpy array of the records. records_from_table = np.array(records_from_table, dtype=[('date', datetime.date), ('participant', int), ('trial', int), ('rater 1', int), ('rater 2', int)]) # Percent Agreement, Krippendorff's Alpha, and Cohen's Kappa logger_criteria = Logger(logger) filled_table = [(record['rater 1'], record['rater 2'],) for record in records_from_table if ((record['rater 1'] >= 0) and (record['rater 2'] >= 0))] amount_agreed = len([row for row in filled_table if (row[0] == row[1])]) percent_agreement = amount_agreed / len(filled_table) Logger(logger_criteria).log('Percent Agreement: ' + str(percent_agreement)) Logger(logger_criteria).log('Krippendorff\'s Alpha: ' + str(krippendorff.alpha(filled_table))) x_values, y_values = [x for x in zip(*filled_table)] Logger(logger_criteria).log('Cohen\'s Kappa: ' + str(kappa(x_values, y_values))) # adjusted_records will hold the records where each subject is enumerated based # on their day rather than the date. adjusted_records = [] current_participant = -1 current_date = datetime.datetime(1, 1, 1) day_number = 0 for record in np.sort(records_from_table, order=['participant', 'date', 'trial']): # reset of on next participant if current_participant != record['participant']: current_participant = record['participant'] day_number = 0 current_date = datetime.datetime(1, 1, 1) # reset if on same participant but next day if current_date < record['date']: current_date = record['date'] day_number += 1 adjusted_records.append((day_number, record['participant'], record['trial'], record['rater 1'], record['rater 2'], )) # redefine adjusted_records as numpy table. adjusted_records = np.array(adjusted_records, dtype=[('day', int), ('participant', int), ('trial', int), ('rater 1', int), ('rater 2', int), ]) participants = {} # maps day numbers on to list of participants. for record in adjusted_records: if record['day'] in participants: if record['participant'] not in participants[record['day']]: participants[record['day']].append(record['participant']) else: participants.update({record['day']: [record['participant']]}) ratings = {} # maps day numbers onto list of ratings for that day. for record in adjusted_records: if record['day'] not in ratings: ratings[record['day']] = [] if record['rater 1'] >= 0: ratings[record['day']].append(record['rater 1']) if record['rater 2'] >= 0: ratings[record['day']].append(record['rater 2']) statistic_logger = Logger(Logger(folder_logger)) for day in participants: Logger(folder_logger).log('Day ' + str(day) + ':') statistic_logger.log('Number of participants: ' + str(len(participants[day]))) statistic_logger.log('Frequency of risky behavior: ' + str(np.average(ratings[day])))