def get_results(results, instance_of_datasets, classifier_name, y_true, y_pred, file_dump): tmp_ = {"y_pred": y_pred, "y_true": y_true, "accuracy": accuracy_score(y_true, y_pred), "precision_micro": precision_score(y_true, y_pred, average="micro"), "precision_macro": precision_score(y_true, y_pred, average="macro"), "recall_micro": recall_score(y_true, y_pred, average="micro"), "recall_macro": recall_score(y_true, y_pred, average="macro"), "f1_micro": f1_score(y_true, y_pred, average="micro"), "f1_macro": f1_score(y_true, y_pred, average="macro") } cPickle.dump(tmp_, gzip.open("%s/single_%s_%s_%s.zcp"%(dir_results,file_dump,instance_of_datasets, classifier_name), "wb+")) results[instance_of_datasets][classifier_name]=tmp_ print(classifier_name, "accuracy", results[instance_of_datasets][classifier_name]["accuracy"], "f1 score_micro", results[instance_of_datasets][classifier_name]["f1_micro"], "precision_micro", results[instance_of_datasets][classifier_name]["precision_micro"], "recall_micro", results[instance_of_datasets][classifier_name]["recall_micro"], "f1 score_macro", results[instance_of_datasets][classifier_name]["f1_macro"], "precision_macro", results[instance_of_datasets][classifier_name]["precision_macro"], "recall_macro", results[instance_of_datasets][classifier_name]["recall_macro"] ) cPickle.dump(results, gzip.open(dir_results+"/"+file_dump, "wb+")) return results
def calc_fit(model, metric, train_x, train_y, test_x, test_y, p): train_x = map(lambda x: list(compress(x, p)), train_x) test_x = map(lambda x: list(compress(x, p)), test_x) clf = model.fit(train_x, train_y) predictions = clf.predict(test_x) if metric == 'precision': return precision_score(test_y, predictions, [0, 1]) elif metric == 'recall': return recall_score(test_y, predictions, [0, 1]) elif metric == 'accuracy': return accuracy_score(test_y, predictions, [0, 1]) return precision_score(test_y, predictions, [0, 1]) + recall_score(test_y, predictions, [0, 1]) + accuracy_score(test_y, predictions, [0, 1])
def get_score(a, b_max): a_max = np.argmax(a, axis=-1) acc = accuracy_score(a_max, b_max) p = precision_score(a_max, b_max, average='macro') r = recall_score(a_max, b_max, average='macro') f1 = f1_score(a_max, b_max, average='macro') return acc, p, r, f1
def metric_permission_based_outlier(scores, marks, target_labels, title=None): from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score for i in range(len(target_labels)): label_i = target_labels[i] scores_i, y_true = [], [] for j in range(len(scores)): if marks[j][i] != 0: scores_i.append(scores[j][i]) y_true.append(1 if marks[j][i] == 1 else 0) pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, scores_i, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) n = sum(y_true) - 1 if 0 <= n < len(pk): # print(y_true)j # print(scores_i) print('{}@{}/{}'.format(label_i, n, len(scores_i)), pk[n], rk[n], roc_auc_score(y_true, scores_i)) else: print('{}@{}/{}'.format(label_i, n, len(scores_i)), 0.0, 0.0, 0.0) if title is not None: fp_save = os.path.join('results_weighted', title) plot_curve('{}_{}_precision'.format(title, label_i), 'precision', list(range(1, len(y_true))), pk, path_save=fp_save + '_{}_precision.pdf'.format(label_i)) plot_curve('{}_{}_recall'.format(title, label_i), 'recall', list(range(1, len(y_true))), rk, path_save=fp_save + '_{}_recall.pdf'.format(label_i))
def metric_overall_outlier(scores, weights, marks, title=None): from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score y_true = [] weighted_scores = [] for i in range(len(scores)): score = 0.0 for w, s, m in zip(weights[i], scores[i], marks[i]): score += w * s # print(1 if 'n' in marks[i] else 0, score, scores[i], weights[i], marks[i]) weighted_scores.append(score) y_true.append(1 if 1 in marks[i] else 0) pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, weighted_scores, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) n = sum(y_true) print('overall@{}'.format(n), len(y_true), pk[n], rk[n], roc_auc_score(y_true, weighted_scores)) if title is not None: fp_save = os.path.join('results', 'overall_' + title) # plot_curve('overall_{}_precision'.format(title), 'precision', list(range(1, len(y_true))), pk, # fp_save=fp_save + '_precision.pdf') # plot_curve('overall_{}_recall'.format(title), 'recall', list(range(1, len(y_true))), rk, # fp_save=fp_save + '_recall.pdf') plot_precision_recall( '', list(range(1, len(y_true))), pk, rk, path_save=fp_save + '.pdf' )
def Predict(self, inp, labels, classifier, folds, name, paramdesc): X= inp y = labels X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape ############################################################################### # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(y, n_folds=folds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] _precision = 0.0 _recall = 0.0 _accuracy = 0.0 _f1 = 0.0 for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) pred_ = classifier.predict(X[test]) _precision += precision_score(y[test], pred_) _recall += recall_score(y[test], pred_) _accuracy += accuracy_score(y[test], pred_) _f1 += f1_score(y[test], pred_) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) _precision /= folds _recall /= folds _accuracy /= folds _f1 /= folds plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic - {0}'.format(name)) plt.legend(loc="lower right") plt.savefig(self.configObject['outputdir'] + '/' + name + '.png') plt.close() result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) Announce(result)
def run(): paras = create_dataset() X = np.array(get_features(paras)) Y = np.array(get_ys(paras)) skf = StratifiedKFold(Y, n_folds=10) f = open('results/correct.txt', 'w') f2 = open('results/wrong.txt', 'w') accs = [] precs = [] recs = [] f1s = [] for train_index, test_index in skf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] cv = CountVectorizer() X_train_counts = cv.fit_transform(X_train) tf_transformer = TfidfTransformer(use_idf=True).fit(X_train_counts) X_train_tfidf = tf_transformer.transform(X_train_counts) clf = DummyClassifier(strategy="most_frequent").fit( X_train_counts, y_train) X_test_counts = cv.transform(X_test) X_test_tfidf = tf_transformer.transform(X_test_counts) y_pred = clf.predict(X_test_counts) acc = accuracy_score(y_test, y_pred) prec = precision_score(y_test, y_pred) rec = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) accs.append(acc) precs.append(prec) recs.append(rec) f1s.append(f1) print 'Acc \t %s' % acc print 'Prec \t %s' % prec print 'Recall \t %s' % rec print 'F1 \t %s' % f1 for para, (y_t, y_p) in zip(X_test, zip(y_test, y_pred)): if y_t == y_p: f.write('%s\n' % para) else: f2.write('%s\n' % para) print 'Avg Acc \t %s \t ' % np.mean(accs) print 'Avg Prec \t %s' % np.mean(precs) print 'Avg Recall \t %s' % np.mean(recs) print 'Avg F1 \t %s' % np.mean(f1s)
def metrics(y_true, y_predict): logger.info("计算分类指标...") F_value = f1_score(y_true, y_predict, average="weighted") Recall_value = recall_score(y_true, y_predict, average="weighted") Precision_value = precision_score(y_true, y_predict, average="weighted") return F_value, Recall_value, Precision_value
def metric_permission_based_outlier(scores, marks, target_permissions, title=None): """Metric and print permission based outlier scores, i.e., precision/recall and AUC value. :param scores: List, scores(i, j) of each widget(i) in each permission(j). :param marks: List, outlier marks(i, j) of each widget(i) in each permission(j). The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier). :param target_permissions: List of string, the `j`th permission name. :param title: String, file name used to save the plot, `None` means not to save. :return: None """ from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score for i in range(len(target_permissions)): permission_i = target_permissions[i] # sort scores in each permission scores_i, y_true = [], [] for j in range(len(scores)): if marks[j][i] != 0: scores_i.append(scores[j][i]) y_true.append(1 if marks[j][i] == 1 else 0) # no positive or negative labels if sum(y_true) == len(scores_i) or sum(y_true) == 0: print('{}({}/{}), error'.format( permission_i, sum(y_true), len(scores_i) )) continue # compute precision, recall curve and auc value pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, scores_i, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) auc = roc_auc_score(y_true, scores_i) # print top-k precision, recall, and AUC value k = sum(y_true) print('{}({}/{}), p/r: {}, AUC: {}'.format( permission_i, k, len(scores_i), round(pk[k - 1], 4), round(auc, 4) )) # save plot if title is not None: path_save = os.path.join('{}-{}.pdf'.format(title, permission_i)) plot_precision_recall( permission_i, list(range(1, len(y_true))), pk, rk, path_save )
def by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=None): if labels is None: labels = np.unique(target_y) precisions = [ precision_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] accuracies = [ accuracy_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] f1_scores = [ f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] recalls = [ recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] c_train_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 1)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 1)], axis=1)) * 100 for c in np.unique(target_y) ] c_test_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 0)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 0)], axis=1)) * 100 for c in np.unique(target_y) ] x = PrettyTable() x.float_format = '.2' x.add_column("Class", labels) x.add_column('Target Accuracy Train', np.round(c_train_accs, 2)) x.add_column('Target Accuracy Test', np.round(c_test_accs, 2)) x.add_column("Attack Precision", np.round(precisions, 2)) x.add_column("Attack Accuracy", np.round(accuracies, 2)) x.add_column("Attack Recall", np.round(recalls, 2)) x.add_column("Attack F-1 Score", np.round(f1_scores, 2)) x.add_column( "Percentage of Data", np.round( np.array([ len(target_y[target_y == c]) / len(target_y) * 100 for c in np.unique(target_y) ]), 2)) print(x.get_string(title='Per Class Evaluation'))
def classifier_evaluation(ytrue, ypred): """function compute key performance metrics """ from sklearn.metrics.classification import (accuracy_score, precision_score, recall_score) return { "accuracy_score": accuracy_score(ytrue, ypred), "precision_score": precision_score(ytrue, ypred), "recall_score": recall_score(ytrue, ypred) }
def __evaluate(self, modelFactory, x, y): """ Perform the cross validation :param modelFactory: a factory that builds a model :param x: the evaluation data :param y: the evaluation classes """ #Creating KFold kf = KFold(self.folds, shuffle=True, random_state=None) print( "=============================" + str(self.folds) + "-fold Cross-Validation training and testing ============================= \n" ) i = 1 # If the number of classes is not given, use the classes that we have if not self.numClasses: self.numClasses = len(set(y)) # A list of results to be used to see how well the model is doing over the folds tableResults = [] #Loop through the folds separation of data for trainIndex, testIndex in kf.split(x): # print(type(trainIndex)) # Build a model adapter using a factory model = modelFactory.create() # A print to see if it is ok print(" ============== Fold ", i, "============") trainDocs, testDocs = x[trainIndex], x[testIndex] trainCats, testCats = y[trainIndex], y[testIndex] # If we want the categories to be represented as a binary array, here is were we do that #TODO: Categorical class error representation on valuating the classes returned by the model # Using the adapter to fit our model model.fit(trainDocs, trainCats, epochs=self.epochs, batch_size=len(trainIndex)) # Predicting it pred = model.predict(testDocs, testCats) print(pred) # Getting the scores accuracy = accuracy_score(testCats, pred) recall = recall_score(testCats, pred, average='weighted') precision = precision_score(testCats, pred, average='weighted') f1 = f1_score(testCats, pred, average='weighted') #Appending it to the result table tableResults.append({ 'result': 'result', 'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1 }) i += 1 self.tableResults = tableResults
def show_metrics(model, X_enc, y_enc, show_confusion=False): pr = model.predict_classes(X_enc) yh = y_enc.argmax(2) fyh, fpr = decode_results(yh, pr) print('Accuracy:', accuracy_score(fyh, fpr)) print('F1:', f1_score(fyh, fpr, average='weighted')) print('Precision (per class: %s)' % labels) print(precision_score(fyh, fpr, average=None)) print('Recall (per class: %s)' % labels) print(recall_score(fyh, fpr, average=None)) if show_confusion: print('Confusion matrix:') print(confusion_matrix(fyh, fpr))
def train_and_evaluate_model(model, X_train, Y_train, X_test, Y_test): train_start = datetime.now() model.fit(X_train, Y_train) train_duration_sec = (datetime.now() - train_start).seconds test_start = datetime.now() Y_pred = model.predict(X_test) test_duration_sec = (datetime.now() - test_start).seconds accuracy = accuracy_score(Y_test, Y_pred) precision = precision_score(Y_test, Y_pred, average="weighted") recall = recall_score(Y_test, Y_pred, average="weighted") return dict(accuracy=float(accuracy), precision=float(precision), recall=float(recall), train_duration_sec=train_duration_sec, test_duration_sec=test_duration_sec)
def myaccuracy(raw_file, result_file): df = pd.read_csv(result_file, sep='\t', header=None, names=['pred_0', 'pred_1']) test_df = pd.read_csv(raw_file, sep='\t', header=None, names=['idx', 'question', 'relation', 'label']) df["pred"] = df.apply(lambda row: func(row["pred_1"], row["pred_0"]), axis=1) f1 = f1_score(y_true=test_df.label, y_pred=df.pred) acc = accuracy_score(y_true=test_df.label, y_pred=df.pred) p = precision_score(y_true=test_df.label, y_pred=df.pred) r = recall_score(y_true=test_df.label, y_pred=df.pred) # print("accuracy: ", acc) # print("precision: ", p) # print("recall: ", r) # print("f1: ", f1) # df['idx'] = test_df.idx.map(lambda x: x.split('-')[0]) df["idx"] = test_df.idx df["group_sort"] = df["pred_1"].groupby(df["idx"]).rank(ascending=0, method="dense") df["candidate"] = test_df.relation # test_df['idx'] = test_df.idx.map(lambda x: x.split('-')[0]) df.drop_duplicates(subset=['idx', 'group_sort'], keep='first', inplace=True) true_relation = test_df.loc[test_df["label"] == 1] pred_relation = df.loc[(df["group_sort"] == 1.0)] # print(pred_relation.tail()) # print(true_relation.tail()) new_df = pd.merge(true_relation, pred_relation, how="inner") new_df["correct"] = new_df.apply( lambda row: row["relation"] == row["candidate"], axis=1) c = new_df.loc[new_df["correct"] == True] correct = c.idx.count() total = new_df.idx.count() print("my_accuracy: {}, {}/{}".format(correct / total, correct, total))
def scores(y_test, predictions, pp, clf): print() if pp == 'Y': print('Scores After Preprocessing :') else: print('Scores Before Preprocessing :') print('Classifier = {clf}'.format(clf=clf)) print('Accuracy score = {accuracy}'.format( accuracy=accuracy_score(y_test, predictions))) print('Precision score = {precision}'.format( precision=precision_score(y_test, predictions))) print('Recall score = {recall}'.format( recall=recall_score(y_test, predictions))) print('F1 Score = {f1score}'.format(f1score=f1_score(y_test, predictions))) print('ROC AUC = {roc_auc}'.format( roc_auc=roc_auc_score(y_test, predictions))) print(confusion_matrix(y_test, predictions)) print(classification_report(y_test, predictions)) print()
def metric_overall_outlier(scores, marks, title=None): """Metric global outlier results, i.e., precision/recall and AUC value. :param scores: List, summed scores of each widget(i). :param marks: List, outlier marks(i, j) of each widget(i) in each permission(j). The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier). If there is one outlier in the related permission, then the widget is outlier. :param title: String, file name used to save the plot, `None` means not to save. :return: None """ from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score # get global outlier mark y_true = [1 if 1 in marks[i] else 0 for i in range(len(scores))] # compute precision, recall curve and auc value pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, scores, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) auc = roc_auc_score(y_true, scores) # print top-k precision, recall, and AUC value k = sum(y_true) print('overall({}/{}), p/r: {}, AUC: {}'.format( k, len(y_true), round(pk[k - 1], 4), round(auc, 4) )) # save plot if title is not None: path_save = os.path.join('{}.pdf'.format(title)) plot_precision_recall( 'Overall', list(range(1, len(y_true))), pk, rk, path_save )
def get_classification_metrics(ground_truth_labels, predicted_labels): classification_metric_dict = dict({}) classification_metric_dict['accuracy'] = accuracy_score( ground_truth_labels, predicted_labels) classification_metric_dict['precision'] = precision_score( ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['recall'] = recall_score(ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['f1_score'] = f1_score(ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['brier_score_loss'] = brier_score_loss( ground_truth_labels, predicted_labels) classification_metric_dict['matthews_corr_coef'] = matthews_corrcoef( ground_truth_labels, predicted_labels) classification_metric_dict['jaccard_score'] = jaccard_score( ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['cohen_kappa_score'] = cohen_kappa_score( ground_truth_labels, predicted_labels) return classification_metric_dict
test_documents, "section", class_map, len(ipc_sections)) print( "=============================== Predicting test data ===============================" ) # Predicting the class for each word vector in the database real = [] pred = [] for doc, ipc in test_embedding_generator: result = model.predict_one(doc) pred.append(class_map[result]) #adding the result to the predicted vector real.append(class_map[numpy.argmax( ipc)]) #Adding the real value to de real class vector #Calculating the metric F1, Precision, Accuracy and Recall accuracy = accuracy_score(real, pred) recall = recall_score(real, pred, average='weighted') precision = precision_score(real, pred, average='weighted') f1 = f1_score(real, pred, average='weighted') print("Accuracy " + str(accuracy), "Recall " + str(recall), "Precision " + str(precision), "F1 " + str(f1)) result_string += "Accuracy " + str(accuracy) + " Recall " + str( recall) + " Precision " + str(precision) + " F1 " + str(f1) + "\n" f = open(result_file_name, "w") f.write("Database: " + training_documents_collection) f.write("embedding matrix: " + str(maxWords) + " " + str(embeddingSize)) f.write("epochs: " + str(epochs)) f.write("layers : " + str(layers)) f.write(result_string) f.close()
sklearn_mnb = sklearn_mnb.fit(X_train, y_train) sklearn_y_pred = sklearn_mnb.predict(X_test) assert (my_y_pred == sklearn_y_pred).all() ###### my defined fasttext train_data, test_data = train_test_split(processed_data[['label', 'item']], test_size=0.1, random_state=2020) fasttext = FastText(class_num=3, class_type='multi-class', ngram_range=2) fasttext.fit(train_data['item'], train_data['label'], epochs=5) y_pred = fasttext.predict(test_data['item']) y_true = fasttext.y_encoder.transform(test_data['label']) macro_f1 = f1_score(y_true, y_pred, average='macro') macro_precision = precision_score(y_true, y_pred, average='macro') macro_recall = recall_score(y_true, y_pred, average='macro') ##### textCNN ## multi-class test train_data, test_data = train_test_split( processed_data[['subject', 'processed_item']], test_size=0.1, random_state=2020) text_cnn = TextCNN(class_num=4, class_type='multi-class') text_cnn.fit(train_data['processed_item'], train_data['subject'], validation_data=(test_data['processed_item'], test_data['subject']), epochs=2)
# exit() modelCheckpoint = ModelCheckpoint(parameters['modelCheckpointPath'] + 'fold_' + str(i)) kerasAdapter.fit(dataTrainGenerator, epochs=epochs, batch_size=len(dataTrainGenerator), validationDataGenerator=dataTestGenerator, validationSteps=len(dataTestGenerator), callbacks=[modelCheckpoint, configSaver]) result = kerasAdapter.predict(dataTestGenerator, batch_size=parameters['batchSize']) testClasses = classes[testIndex] metrics = dict() metrics['fscore'] = f1_score(testClasses, result, average='weighted') metrics['precision'] = precision_score(testClasses, result, average='weighted') metrics['recall'] = recall_score(testClasses, result, average='weighted') metrics['auc'] = roc_auc_score(testClasses, result, average='weighted') metrics['fscore_b'] = f1_score(testClasses, result) metrics['precision_b'] = precision_score(testClasses, result) metrics['recall_b'] = recall_score(testClasses, result) metrics['auc_b'] = roc_auc_score(testClasses, result) metrics['kappa'] = cohen_kappa_score(testClasses, result) metrics['accuracy'] = accuracy_score(testClasses, result) tn, fp, fn, metrics['tp_rate'] = confusion_matrix(testClasses, result).ravel()
img_dir = 'C:/Users/Administrator/Desktop/Normal' for _ in os.listdir(img_dir): res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _)) print(res) if res['desc'] == 'Not Blurry': preds.append(0) else: preds.append(1) gts.append(0) img_dir = 'C:/Users/Administrator/Desktop/Blur' for _ in os.listdir(img_dir): res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _)) print(res) if res['desc'] == 'Not Blurry': preds.append(0) else: preds.append(1) gts.append(1) tok = time.time() print('FPS={}'.format(len(os.listdir(img_dir)) / (tok - tik))) print(confusion_matrix(gts, preds)) print('Precision = %f' % precision_score(gts, preds)) print('Recall = %f' % recall_score(gts, preds)) print('Accuracy = %f' % accuracy_score(gts, preds))
# real.append(class_map[numpy.argmax(ipc)]) #Adding the real value to de real class vector for doc in test_documents: result = model.predict_one(pickle.loads(doc['embedding'])) pred.append(class_map[result]) #adding the result to the predicted vector real.append(doc['ipc_classes'][0][0]) all_class.append(doc['ipc_classes']) print(pred) print(real) #Calculating the metric F1, Precision, Accuracy and Recall accuracy = accuracy_score(real, pred) recall = recall_score(real, pred, average='weighted') recall_per_class = recall_score(real, pred, average=None) precision = precision_score(real, pred, average='weighted') precision_per_class = precision_score(real, pred, average=None) f1 = f1_score(real, pred, average='weighted') f1_per_class = f1_score(real, pred, average=None) results_per_class = dict() for i in range(0, len(recall_per_class)): if not class_map[i] in results_per_class.keys(): results_per_class[class_map[i]] = [] results_per_class[class_map[i]].append(recall_per_class[i]) results_per_class[class_map[i]].append(precision_per_class[i]) results_per_class[class_map[i]].append(f1_per_class[i]) matrix = confusion_matrix(real, pred, labels=ipc_sections.sort()) #ploting
x_train = [] for i in train: y_train.append(features[i][6]) tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]] x_train.append(tmp) y_test = [] x_test = [] for i in test: y_test.append(features[i][6]) tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]] x_test.append(tmp) lr.fit(x_train, y_train) lrPredTest = lr.predict(x_test) lrPrecisionTest = precision_score(y_test, lrPredTest) lrRecallTest = recall_score(y_test, lrPredTest) lrF1Test = f1_score(y_test, lrPredTest) lrAvgPrecision += lrPrecisionTest lrAvgRecall += lrRecallTest lrAvgF1 += lrF1Test print "log reg completed in ", time.time() - start, " s" print "lr:\n Precision {}\n Recall {}\n F1 {}\n".format(lrAvgPrecision / 5, lrAvgRecall / 5, lrAvgF1 / 5) start = time.time() """RANDOM FOREST""" rf = RandomForestClassifier(n_estimators=100, min_samples_leaf=5) rfAvgPrecision = 0.0
def evaluate_results(result, target_def, n_shadow): attack_test_y = result['attack_test_y'] attack_test_x = result['attack_test_x'] preds = result['preds'] target_y = result['target_y'] target_x = result['target_x'] labels = result['labels'] # INFORMATION ABOUT THE MODEL UNDER ATTACK x = PrettyTable( ['Model Definition', 'Training Accuracy', 'Testing Accuracy']) x.float_format = ".2" target_preds = np.argmax(attack_test_x, axis=1) train_acc = accuracy_score(target_y[attack_test_y == 1], target_preds[attack_test_y == 1]) test_acc = accuracy_score(target_y[attack_test_y == 0], target_preds[attack_test_y == 0]) x.add_row([ target_def.split(os.path.dirname(os.getcwd()))[-1], train_acc * 100, test_acc * 100 ]) print(x.get_string(title='Target Model')) # INFORMATION ABOUT THE OVERALL ATTACK EFFECTIVENESS cols = ['Num Shadow', 'Accuracy', 'Precision', 'Recall', 'F-1'] x = PrettyTable(cols) x.float_format = ".2" p = np.argmax(preds, axis=1) x.add_row([ n_shadow, accuracy_score(attack_test_y, p) * 100, precision_score(attack_test_y, p) * 100, recall_score(attack_test_y, p) * 100, f1_score(attack_test_y, p) * 100 ]) print(x.get_string(title='Attack Aggregate')) # noinspection PyShadowingNames def by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=None): if labels is None: labels = np.unique(target_y) precisions = [ precision_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] accuracies = [ accuracy_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] f1_scores = [ f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] recalls = [ recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] c_train_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 1)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 1)], axis=1)) * 100 for c in np.unique(target_y) ] c_test_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 0)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 0)], axis=1)) * 100 for c in np.unique(target_y) ] x = PrettyTable() x.float_format = '.2' x.add_column("Class", labels) x.add_column('Target Accuracy Train', np.round(c_train_accs, 2)) x.add_column('Target Accuracy Test', np.round(c_test_accs, 2)) x.add_column("Attack Precision", np.round(precisions, 2)) x.add_column("Attack Accuracy", np.round(accuracies, 2)) x.add_column("Attack Recall", np.round(recalls, 2)) x.add_column("Attack F-1 Score", np.round(f1_scores, 2)) x.add_column( "Percentage of Data", np.round( np.array([ len(target_y[target_y == c]) / len(target_y) * 100 for c in np.unique(target_y) ]), 2)) print(x.get_string(title='Per Class Evaluation')) by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=labels) return { 'attack_test_y': attack_test_y, 'attack_test_x': attack_test_x, 'preds': preds, 'target_y': target_y, 'target_x': target_x, 'target_def': target_def, 'n_shadow': n_shadow, 'labels': labels }
dataframe = pd.read_csv("train.csv", sep=',',header=0,names=column_names,index_col=0,usecols=[0,1,2,3,4,5,6,7,8,10,11] ,nrows =set_sizes[i]) Y = dataframe["Short_or_long"] X = dataframe[["vendor_id","passenger_count","pickup_longitude","pickup_latitude","dropoff_longitude","dropoff_latitude"]] X_train = X.head(int(set_sizes[i]*0.7)) X_test = X.tail(int(set_sizes[i]*0.3)) Y_train = Y.head(int(set_sizes[i]*0.7)) Y_test = Y.tail(int(set_sizes[i]*0.3)) h = .02 # step size in the mesh logreg = linear_model.LogisticRegression(C=1e5) # we create an instance of Neighbours Classifier and fit the data. logreg.fit(X_train, Y_train) pred = logreg.predict(X_test) # The coefficients #print('Coefficients: \n', clf.coef_) # The mean squared error t=accuracy_score(Y_test, pred) tt=precision_score(Y_test, pred, average='weighted') #print(Y_tester_targets.size) #print(pred_test.size) print('Accuracy score: %.2f' % t) print('Precision: %.2f'% tt)
classifier = SupervisedDBNClassification(hidden_layers_structure=[10, 8], learning_rate_rbm=0.05, learning_rate=0.01, n_epochs_rbm=10, n_iter_backprop=5000, batch_size=32, activation_function='relu', dropout_p=0.2) classifier.fit(X_train, Y_train) # Test Y_pred = classifier.predict(X_test) a = accuracy_score(Y_test, Y_pred) print('Done.\nAccuracy: %f' % a) print('Done.\nPrecision: %f' % precision_score(Y_test, Y_pred)) print('Done.\nRecall: %f' % recall_score(Y_test, Y_pred)) print('Done.\nf1 score: %f' % f1_score(Y_test, Y_pred)) #print('Done.\nf1 score: %f' % classification_report(Y_test, Y_pred)) #print('Done.\nf1 score: %f' % confusion_matrix(Y_test, Y_pred)) cm1 = confusion_matrix(Y_test, Y_pred) print(cm1) total1 = sum(sum(cm1)) accuracy1 = (cm1[0, 0] + 1 + cm1[1, 1] + 1) / total1 print('Accuracy : ', accuracy1) sensitivity1 = cm1[0, 0] / (cm1[0, 0] + cm1[0, 1]) print('Sensitivity : ', sensitivity1) specificity1 = cm1[1, 1] / (cm1[1, 0] + cm1[1, 1])
#test_encode = [] #cont = 0 #while cont < 10: # test_encode.append(dataset.letra[cont].split()) # cont += 1 #teste = label_encoder.transform(test_encode[0]) #Rotinas para alimentar o OneHotEncoder onehot = OneHotEncoder() int_encoded_fit = int_encoded_fit.reshape(len(int_encoded_fit), 1) int_encoded_pred = int_encoded_pred.reshape(len(int_encoded_pred), 1) letra_fit = onehot.fit_transform(int_encoded_fit) letra_pred = onehot.transform(int_encoded_pred) #Utilização do SVM clf.fit(letra_fit, label_train) prediction = clf.predict(letra_pred) print() print("Recall {}".format( recall_score(label_test, prediction, average='weighted'))) print("Precision {}".format( precision_score(label_test, prediction, average='weighted'))) print("F1 {}".format(f1_score(label_test, prediction, average='weighted'))) print("Accuracy {}".format(accuracy_score(label_test, prediction)))
def run(): paras, sents = create_dataset() X = np.array(get_features(paras)) Y = np.array(get_ys(paras)) print len(X[0]) sents = np.array(sents) skf = StratifiedKFold(Y, n_folds=10) f = open('results/correct.txt','w') f2 = open('results/wrong.txt','w') accs = [] precs = [] recs = [] f1s = [] for train_index, test_index in skf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] sent_train = sents[train_index] sent_test = sents[test_index] # cv = CountVectorizer(stop_words="english", ngram_range=(1,1), min_df = 5) # sent_train_counts = cv.fit_transform(sent_train) # # tf_transformer = TfidfTransformer(use_idf=True).fit(sent_train_counts) # sent_train_counts = tf_transformer.transform(sent_train_counts) # # sent_train_counts = sent_train_counts.toarray() # # print sent_train_counts.shape # print X_train.shape # # new_train = [] # for i,j in zip(X_train, sent_train_counts): # new_train.append(np.append(i,j)) #fs = SelectKBest(chi2, k=24) #X_train = fs.fit_transform(X_train, y_train) clf = LogisticRegression() clf.fit(X_train, y_train) print clf.coef_ # # sent_test_counts = cv.transform(sent_test) # sent_test_counts = tf_transformer.transform(sent_test_counts) # # sent_test_counts = sent_test_counts.toarray() # # new_test = [] # for i,j in zip(X_test, sent_test_counts): # new_test.append(np.append(i,j)) #X_test = fs.transform(X_test) y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) prec = precision_score(y_test, y_pred) rec = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) accs.append(acc) precs.append(prec) recs.append(rec) f1s.append(f1) print 'Acc \t %s' % acc print 'Prec \t %s' % prec print 'Recall \t %s' % rec print 'F1 \t %s' % f1 for (index,test),(y_t, y_p) in zip(zip(test_index, X_test), zip(y_test, y_pred)): if y_t == y_p: # if paras[index]['prev_para']: # f.write('%s\n' % paras[index]['prev_para']['sents']) f.write('%s\n' % sents[index]) f.write('%s\n' % (y_t)) else: # if paras[index]['prev_para']: # f2.write('%s\n' % paras[index]['prev_para']['sents']) f2.write('%s\n' % sents[index]) f2.write('%s\n' % (y_t)) print 'Avg Acc \t %s \t ' % np.mean(accs) print 'Avg Prec \t %s' % np.mean(precs) print 'Avg Recall \t %s' % np.mean(recs) print 'Avg F1 \t %s' % np.mean(f1s)
#data = data[np.isfinite(data['pred'])] print(data) #first print("first:") cnm = confusion_matrix(data['true'], data['pred']) mat = np.matrix( [[cnm[1][1], cnm[0][1]] , [cnm[0][0], cnm[1][0]]]) print(mat, "\n") # second print("second:") acc = accuracy_score(data['true'], data['pred']) print("accuracy: ",round(acc,2)) per = precision_score(data['true'], data['pred']) print("percision: ",round(per,2)) rec = recall_score(data['true'], data['pred']) print("recall: ", round(rec,2)) f_m = f1_score(data['true'], data['pred']) print("f-metr: ", round(f_m,2), "\n") #third print("\nthird:") from sklearn.metrics import roc_auc_score data = pd.read_csv("D:/Sai/JavaDoc/Cousera/3/3_4/scores.csv")
y_train, y_test = classes[train_index], classes[test_index] # treino do modelo print(f'Gerando o Modelo {i}...') classifier = RandomForestClassifier(n_estimators=10, criterion='gini', random_state=iteracao).fit( x_train, y_train) # classificando o conjunto de teste y_pred = classifier.predict(x_test) # metricas de desempenho aux_accuracy += accuracy_score(y_test, y_pred) aux_f1_score += f1_score(y_test, y_pred) aux_precision += precision_score(y_test, y_pred) aux_recall += recall_score(y_test, y_pred) conf_matrices += np.asarray(confusion_matrix(y_test, y_pred)) print(f'Modelo {i} finalizado e avaliado.') i += 1 # resultados print(f'\nITERATION #{iteracao} -----------------------') print(f'Accuracy = {aux_accuracy / k_fold.n_splits}') print(f'F1 Score = {aux_f1_score / k_fold.n_splits}') print(f'Precision = {aux_precision / k_fold.n_splits}') print(f'Recall = {aux_recall / k_fold.n_splits}') print(f'Examples x Attributes = {tf_idf.shape}') print(f'Confusion Matrix = \n{np.array(list(conf_matrices))}')
classifier_models = [LogisticRegression(random_state=0), KNeighborsClassifier(n_neighbors=10, metric='minkowski', p=2), SVC(kernel='linear', random_state=0), SVC(kernel='rbf', random_state=0), GaussianNB(), DecisionTreeClassifier(criterion="entropy", random_state=0), RandomForestClassifier(n_estimators=40, criterion="entropy", random_state=0)] y_prdc_results = [model.fit(x_train, y_train).predict(x_test) for model in classifier_models] # importing confusion matrix from sklearn.metrics import confusion_matrix, classification cm_results = [confusion_matrix(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))] acc_results = [classification.accuracy_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))] prec_results = [classification.precision_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))] recal_results = [classification.recall_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))] f1_results = [classification.f1_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))] x_label = ["LR", "KNN", "SVC(L)", "SVC(NL)", "NB", "DT", "RF"] #Visualization of Results plt.ylim(0, max(acc_results)) plt.bar(x_label, acc_results) plt.title("Histogram view of accuracy") plt.xlabel("Classification Models") plt.ylabel("Accuracy") plt.show() plt.ylim(0, max(prec_results)) plt.bar(x_label, prec_results) plt.title("Histogram view of precision")
labels, features = targetFeatureSplit(data) ### it's all yours from here forward! features_train, features_test, labels_train, labels_test = train_test_split( features, labels, test_size=0.3, random_state=42) clf = DecisionTreeClassifier() clf.fit(features_train, labels_train) numpos = 0 pred = clf.predict(features_test) for i, item in enumerate(labels_test): print(i, item, pred[i]) if item == 1 and item == pred[i]: numpos = numpos + 1 print("Number of true positives: ", numpos) print("Precision: ", precision_score(labels_test, pred)) print("Recall: ", recall_score(labels_test, pred)) #print(len(labels_test)) #print(clf.score(features_test, labels_test)) predictions = [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1] true_labels = [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0] print("Number of true positives: ", numpos) print("Precision: ", precision_score(true_labels, predictions)) print("Recall: ", recall_score(true_labels, predictions))