def get_results(results, instance_of_datasets, classifier_name, y_true, y_pred, file_dump): tmp_ = {"y_pred": y_pred, "y_true": y_true, "accuracy": accuracy_score(y_true, y_pred), "precision_micro": precision_score(y_true, y_pred, average="micro"), "precision_macro": precision_score(y_true, y_pred, average="macro"), "recall_micro": recall_score(y_true, y_pred, average="micro"), "recall_macro": recall_score(y_true, y_pred, average="macro"), "f1_micro": f1_score(y_true, y_pred, average="micro"), "f1_macro": f1_score(y_true, y_pred, average="macro") } cPickle.dump(tmp_, gzip.open("%s/single_%s_%s_%s.zcp"%(dir_results,file_dump,instance_of_datasets, classifier_name), "wb+")) results[instance_of_datasets][classifier_name]=tmp_ print(classifier_name, "accuracy", results[instance_of_datasets][classifier_name]["accuracy"], "f1 score_micro", results[instance_of_datasets][classifier_name]["f1_micro"], "precision_micro", results[instance_of_datasets][classifier_name]["precision_micro"], "recall_micro", results[instance_of_datasets][classifier_name]["recall_micro"], "f1 score_macro", results[instance_of_datasets][classifier_name]["f1_macro"], "precision_macro", results[instance_of_datasets][classifier_name]["precision_macro"], "recall_macro", results[instance_of_datasets][classifier_name]["recall_macro"] ) cPickle.dump(results, gzip.open(dir_results+"/"+file_dump, "wb+")) return results
def calc_fit(model, metric, train_x, train_y, test_x, test_y, p): train_x = map(lambda x: list(compress(x, p)), train_x) test_x = map(lambda x: list(compress(x, p)), test_x) clf = model.fit(train_x, train_y) predictions = clf.predict(test_x) if metric == 'precision': return precision_score(test_y, predictions, [0, 1]) elif metric == 'recall': return recall_score(test_y, predictions, [0, 1]) elif metric == 'accuracy': return accuracy_score(test_y, predictions, [0, 1]) return precision_score(test_y, predictions, [0, 1]) + recall_score(test_y, predictions, [0, 1]) + accuracy_score(test_y, predictions, [0, 1])
def metric_overall_outlier(scores, weights, marks, title=None): from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score y_true = [] weighted_scores = [] for i in range(len(scores)): score = 0.0 for w, s, m in zip(weights[i], scores[i], marks[i]): score += w * s # print(1 if 'n' in marks[i] else 0, score, scores[i], weights[i], marks[i]) weighted_scores.append(score) y_true.append(1 if 1 in marks[i] else 0) pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, weighted_scores, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) n = sum(y_true) print('overall@{}'.format(n), len(y_true), pk[n], rk[n], roc_auc_score(y_true, weighted_scores)) if title is not None: fp_save = os.path.join('results', 'overall_' + title) # plot_curve('overall_{}_precision'.format(title), 'precision', list(range(1, len(y_true))), pk, # fp_save=fp_save + '_precision.pdf') # plot_curve('overall_{}_recall'.format(title), 'recall', list(range(1, len(y_true))), rk, # fp_save=fp_save + '_recall.pdf') plot_precision_recall( '', list(range(1, len(y_true))), pk, rk, path_save=fp_save + '.pdf' )
def metric_permission_based_outlier(scores, marks, target_labels, title=None): from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score for i in range(len(target_labels)): label_i = target_labels[i] scores_i, y_true = [], [] for j in range(len(scores)): if marks[j][i] != 0: scores_i.append(scores[j][i]) y_true.append(1 if marks[j][i] == 1 else 0) pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, scores_i, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) n = sum(y_true) - 1 if 0 <= n < len(pk): # print(y_true)j # print(scores_i) print('{}@{}/{}'.format(label_i, n, len(scores_i)), pk[n], rk[n], roc_auc_score(y_true, scores_i)) else: print('{}@{}/{}'.format(label_i, n, len(scores_i)), 0.0, 0.0, 0.0) if title is not None: fp_save = os.path.join('results_weighted', title) plot_curve('{}_{}_precision'.format(title, label_i), 'precision', list(range(1, len(y_true))), pk, path_save=fp_save + '_{}_precision.pdf'.format(label_i)) plot_curve('{}_{}_recall'.format(title, label_i), 'recall', list(range(1, len(y_true))), rk, path_save=fp_save + '_{}_recall.pdf'.format(label_i))
def Predict(self, inp, labels, classifier, folds, name, paramdesc): X= inp y = labels X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape ############################################################################### # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(y, n_folds=folds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] _precision = 0.0 _recall = 0.0 _accuracy = 0.0 _f1 = 0.0 for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) pred_ = classifier.predict(X[test]) _precision += precision_score(y[test], pred_) _recall += recall_score(y[test], pred_) _accuracy += accuracy_score(y[test], pred_) _f1 += f1_score(y[test], pred_) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) _precision /= folds _recall /= folds _accuracy /= folds _f1 /= folds plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic - {0}'.format(name)) plt.legend(loc="lower right") plt.savefig(self.configObject['outputdir'] + '/' + name + '.png') plt.close() result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) Announce(result)
def get_score(a, b_max): a_max = np.argmax(a, axis=-1) acc = accuracy_score(a_max, b_max) p = precision_score(a_max, b_max, average='macro') r = recall_score(a_max, b_max, average='macro') f1 = f1_score(a_max, b_max, average='macro') return acc, p, r, f1
def run(): paras = create_dataset() X = np.array(get_features(paras)) Y = np.array(get_ys(paras)) skf = StratifiedKFold(Y, n_folds=10) f = open('results/correct.txt', 'w') f2 = open('results/wrong.txt', 'w') accs = [] precs = [] recs = [] f1s = [] for train_index, test_index in skf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] cv = CountVectorizer() X_train_counts = cv.fit_transform(X_train) tf_transformer = TfidfTransformer(use_idf=True).fit(X_train_counts) X_train_tfidf = tf_transformer.transform(X_train_counts) clf = DummyClassifier(strategy="most_frequent").fit( X_train_counts, y_train) X_test_counts = cv.transform(X_test) X_test_tfidf = tf_transformer.transform(X_test_counts) y_pred = clf.predict(X_test_counts) acc = accuracy_score(y_test, y_pred) prec = precision_score(y_test, y_pred) rec = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) accs.append(acc) precs.append(prec) recs.append(rec) f1s.append(f1) print 'Acc \t %s' % acc print 'Prec \t %s' % prec print 'Recall \t %s' % rec print 'F1 \t %s' % f1 for para, (y_t, y_p) in zip(X_test, zip(y_test, y_pred)): if y_t == y_p: f.write('%s\n' % para) else: f2.write('%s\n' % para) print 'Avg Acc \t %s \t ' % np.mean(accs) print 'Avg Prec \t %s' % np.mean(precs) print 'Avg Recall \t %s' % np.mean(recs) print 'Avg F1 \t %s' % np.mean(f1s)
def metrics(y_true, y_predict): logger.info("计算分类指标...") F_value = f1_score(y_true, y_predict, average="weighted") Recall_value = recall_score(y_true, y_predict, average="weighted") Precision_value = precision_score(y_true, y_predict, average="weighted") return F_value, Recall_value, Precision_value
def metric_permission_based_outlier(scores, marks, target_permissions, title=None): """Metric and print permission based outlier scores, i.e., precision/recall and AUC value. :param scores: List, scores(i, j) of each widget(i) in each permission(j). :param marks: List, outlier marks(i, j) of each widget(i) in each permission(j). The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier). :param target_permissions: List of string, the `j`th permission name. :param title: String, file name used to save the plot, `None` means not to save. :return: None """ from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score for i in range(len(target_permissions)): permission_i = target_permissions[i] # sort scores in each permission scores_i, y_true = [], [] for j in range(len(scores)): if marks[j][i] != 0: scores_i.append(scores[j][i]) y_true.append(1 if marks[j][i] == 1 else 0) # no positive or negative labels if sum(y_true) == len(scores_i) or sum(y_true) == 0: print('{}({}/{}), error'.format( permission_i, sum(y_true), len(scores_i) )) continue # compute precision, recall curve and auc value pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, scores_i, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) auc = roc_auc_score(y_true, scores_i) # print top-k precision, recall, and AUC value k = sum(y_true) print('{}({}/{}), p/r: {}, AUC: {}'.format( permission_i, k, len(scores_i), round(pk[k - 1], 4), round(auc, 4) )) # save plot if title is not None: path_save = os.path.join('{}-{}.pdf'.format(title, permission_i)) plot_precision_recall( permission_i, list(range(1, len(y_true))), pk, rk, path_save )
def by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=None): if labels is None: labels = np.unique(target_y) precisions = [ precision_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] accuracies = [ accuracy_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] f1_scores = [ f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] recalls = [ recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] c_train_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 1)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 1)], axis=1)) * 100 for c in np.unique(target_y) ] c_test_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 0)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 0)], axis=1)) * 100 for c in np.unique(target_y) ] x = PrettyTable() x.float_format = '.2' x.add_column("Class", labels) x.add_column('Target Accuracy Train', np.round(c_train_accs, 2)) x.add_column('Target Accuracy Test', np.round(c_test_accs, 2)) x.add_column("Attack Precision", np.round(precisions, 2)) x.add_column("Attack Accuracy", np.round(accuracies, 2)) x.add_column("Attack Recall", np.round(recalls, 2)) x.add_column("Attack F-1 Score", np.round(f1_scores, 2)) x.add_column( "Percentage of Data", np.round( np.array([ len(target_y[target_y == c]) / len(target_y) * 100 for c in np.unique(target_y) ]), 2)) print(x.get_string(title='Per Class Evaluation'))
def __evaluate(self, modelFactory, x, y): """ Perform the cross validation :param modelFactory: a factory that builds a model :param x: the evaluation data :param y: the evaluation classes """ #Creating KFold kf = KFold(self.folds, shuffle=True, random_state=None) print( "=============================" + str(self.folds) + "-fold Cross-Validation training and testing ============================= \n" ) i = 1 # If the number of classes is not given, use the classes that we have if not self.numClasses: self.numClasses = len(set(y)) # A list of results to be used to see how well the model is doing over the folds tableResults = [] #Loop through the folds separation of data for trainIndex, testIndex in kf.split(x): # print(type(trainIndex)) # Build a model adapter using a factory model = modelFactory.create() # A print to see if it is ok print(" ============== Fold ", i, "============") trainDocs, testDocs = x[trainIndex], x[testIndex] trainCats, testCats = y[trainIndex], y[testIndex] # If we want the categories to be represented as a binary array, here is were we do that #TODO: Categorical class error representation on valuating the classes returned by the model # Using the adapter to fit our model model.fit(trainDocs, trainCats, epochs=self.epochs, batch_size=len(trainIndex)) # Predicting it pred = model.predict(testDocs, testCats) print(pred) # Getting the scores accuracy = accuracy_score(testCats, pred) recall = recall_score(testCats, pred, average='weighted') precision = precision_score(testCats, pred, average='weighted') f1 = f1_score(testCats, pred, average='weighted') #Appending it to the result table tableResults.append({ 'result': 'result', 'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1 }) i += 1 self.tableResults = tableResults
def train_and_eval(output, ngram_range=(1, 1), max_features=None, max_df=1.0, C=1.0): """Train and eval newsgroup classification. :param ngram_range: ngram range :param max_features: the number of maximum features :param max_df: max document frequency ratio :param C: Inverse of regularization strength for LogisticRegression :return: metrics """ # Loads train and test data. train_data = fetch_20newsgroups(subset='train') test_data = fetch_20newsgroups(subset='test') # Define the pipeline. pipeline = Pipeline([('tfidf', TfidfVectorizer()), ('clf', LogisticRegression(multi_class='auto'))]) # Set pipeline parameters. params = { 'tfidf__ngram_range': ngram_range, 'tfidf__max_features': max_features, 'tfidf__max_df': max_df, 'clf__C': C, } pipeline.set_params(**params) print(pipeline.get_params().keys()) # Train the model. pipeline.fit(train_data.data, train_data.target) # Predict test data. start_time = time() predictions = pipeline.predict(test_data.data) inference_time = time() - start_time avg_inference_time = 1.0 * inference_time / len(test_data.target) print("Avg. inference time: {}".format(avg_inference_time)) # Calculate the metrics. accuracy = accuracy_score(test_data.target, predictions) recall = recall_score(test_data.target, predictions, average='weighted') f1 = f1_score(test_data.target, predictions, average='weighted') metrics = { 'accuracy': accuracy, 'recall': recall, 'f1': f1, } # Persistent the model. joblib.dump(pipeline, output) return metrics
def classifier_evaluation(ytrue, ypred): """function compute key performance metrics """ from sklearn.metrics.classification import (accuracy_score, precision_score, recall_score) return { "accuracy_score": accuracy_score(ytrue, ypred), "precision_score": precision_score(ytrue, ypred), "recall_score": recall_score(ytrue, ypred) }
def show_metrics(model, X_enc, y_enc, show_confusion=False): pr = model.predict_classes(X_enc) yh = y_enc.argmax(2) fyh, fpr = decode_results(yh, pr) print('Accuracy:', accuracy_score(fyh, fpr)) print('F1:', f1_score(fyh, fpr, average='weighted')) print('Precision (per class: %s)' % labels) print(precision_score(fyh, fpr, average=None)) print('Recall (per class: %s)' % labels) print(recall_score(fyh, fpr, average=None)) if show_confusion: print('Confusion matrix:') print(confusion_matrix(fyh, fpr))
def run_grid_search(grid_search, show_evaluation=True): """ Run the GridSearch algorithm and compute evaluation metrics """ X_train, X_test, y_train, y_test = split_dataset() grid_search.fit(X_train, y_train) # for key, value in grid_search.cv_results_.items(): # print key, value predictions = grid_search.predict(X_test) if show_evaluation: logger.debug("macro_recall: %s", recall_score(y_test, predictions, average="macro")) logger.debug(precision_recall_fscore_support(y_test, predictions)) logger.debug(confusion_matrix(y_test, predictions))
def balanced_accuracy_score(y_true, y_pred, sample_weight=None): """Compute the balanced pred The balanced pred is used in binary classification problems to deal with imbalanced datasets. It is defined as the arithmetic mean of sensitivity (true positive rate) and specificity (true negative rate), or the average_flat pred obtained on either class. The best value is 1 and the worst value is 0. Read more in the :ref:`User Guide <balanced_accuracy_score>`. Parameterspartial(power, exponent=2) ---------- y_true : 1d array-like Ground truth (correct) target values. y_pred : 1d array-like Estimated targets as returned by a classifier. sample_weight : array-like of shape = [n_samples], optional Sample weights. Returns ------- balanced_accuracy : float. The average_flat of sensitivity and specificity See also -------- recall_score References ---------- .. [1] Brodersen, K.H.; Ong, alpha.S.; Stephan, K.E.; Buhmann, J.M. (2010). The balanced pred and its posterior distribution. Proceedings of the 20th International Conference on Pattern Recognition, 3121–24. Examples -------- >>> from decog.metrics import balanced_accuracy_score >>> y_true = [0, 1, 0, 0, 1, 0] >>> y_pred = [0, 1, 0, 0, 0, 1] >>> balanced_accuracy_score(y_true, y_pred) 0.625 """ y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type != 'binary': raise ValueError('Balanced pred is only meaningful ' 'for binary classification problems.') # simply wrap the ``recall_score`` function return recall_score(y_true, y_pred, pos_label=None, average='macro', sample_weight=sample_weight)
def train_and_evaluate_model(model, X_train, Y_train, X_test, Y_test): train_start = datetime.now() model.fit(X_train, Y_train) train_duration_sec = (datetime.now() - train_start).seconds test_start = datetime.now() Y_pred = model.predict(X_test) test_duration_sec = (datetime.now() - test_start).seconds accuracy = accuracy_score(Y_test, Y_pred) precision = precision_score(Y_test, Y_pred, average="weighted") recall = recall_score(Y_test, Y_pred, average="weighted") return dict(accuracy=float(accuracy), precision=float(precision), recall=float(recall), train_duration_sec=train_duration_sec, test_duration_sec=test_duration_sec)
def myaccuracy(raw_file, result_file): df = pd.read_csv(result_file, sep='\t', header=None, names=['pred_0', 'pred_1']) test_df = pd.read_csv(raw_file, sep='\t', header=None, names=['idx', 'question', 'relation', 'label']) df["pred"] = df.apply(lambda row: func(row["pred_1"], row["pred_0"]), axis=1) f1 = f1_score(y_true=test_df.label, y_pred=df.pred) acc = accuracy_score(y_true=test_df.label, y_pred=df.pred) p = precision_score(y_true=test_df.label, y_pred=df.pred) r = recall_score(y_true=test_df.label, y_pred=df.pred) # print("accuracy: ", acc) # print("precision: ", p) # print("recall: ", r) # print("f1: ", f1) # df['idx'] = test_df.idx.map(lambda x: x.split('-')[0]) df["idx"] = test_df.idx df["group_sort"] = df["pred_1"].groupby(df["idx"]).rank(ascending=0, method="dense") df["candidate"] = test_df.relation # test_df['idx'] = test_df.idx.map(lambda x: x.split('-')[0]) df.drop_duplicates(subset=['idx', 'group_sort'], keep='first', inplace=True) true_relation = test_df.loc[test_df["label"] == 1] pred_relation = df.loc[(df["group_sort"] == 1.0)] # print(pred_relation.tail()) # print(true_relation.tail()) new_df = pd.merge(true_relation, pred_relation, how="inner") new_df["correct"] = new_df.apply( lambda row: row["relation"] == row["candidate"], axis=1) c = new_df.loc[new_df["correct"] == True] correct = c.idx.count() total = new_df.idx.count() print("my_accuracy: {}, {}/{}".format(correct / total, correct, total))
def scores(y_test, predictions, pp, clf): print() if pp == 'Y': print('Scores After Preprocessing :') else: print('Scores Before Preprocessing :') print('Classifier = {clf}'.format(clf=clf)) print('Accuracy score = {accuracy}'.format( accuracy=accuracy_score(y_test, predictions))) print('Precision score = {precision}'.format( precision=precision_score(y_test, predictions))) print('Recall score = {recall}'.format( recall=recall_score(y_test, predictions))) print('F1 Score = {f1score}'.format(f1score=f1_score(y_test, predictions))) print('ROC AUC = {roc_auc}'.format( roc_auc=roc_auc_score(y_test, predictions))) print(confusion_matrix(y_test, predictions)) print(classification_report(y_test, predictions)) print()
def metric_overall_outlier(scores, marks, title=None): """Metric global outlier results, i.e., precision/recall and AUC value. :param scores: List, summed scores of each widget(i). :param marks: List, outlier marks(i, j) of each widget(i) in each permission(j). The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier). If there is one outlier in the related permission, then the widget is outlier. :param title: String, file name used to save the plot, `None` means not to save. :return: None """ from pyod.utils.utility import get_label_n from sklearn.metrics.ranking import roc_auc_score from sklearn.metrics.classification import precision_score, recall_score # get global outlier mark y_true = [1 if 1 in marks[i] else 0 for i in range(len(scores))] # compute precision, recall curve and auc value pk, rk = [], [] for k in range(1, len(y_true)): y_predict = get_label_n(y_true, scores, k) pk.append(precision_score(y_true, y_predict)) rk.append(recall_score(y_true, y_predict)) auc = roc_auc_score(y_true, scores) # print top-k precision, recall, and AUC value k = sum(y_true) print('overall({}/{}), p/r: {}, AUC: {}'.format( k, len(y_true), round(pk[k - 1], 4), round(auc, 4) )) # save plot if title is not None: path_save = os.path.join('{}.pdf'.format(title)) plot_precision_recall( 'Overall', list(range(1, len(y_true))), pk, rk, path_save )
def get_classification_metrics(ground_truth_labels, predicted_labels): classification_metric_dict = dict({}) classification_metric_dict['accuracy'] = accuracy_score( ground_truth_labels, predicted_labels) classification_metric_dict['precision'] = precision_score( ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['recall'] = recall_score(ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['f1_score'] = f1_score(ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['brier_score_loss'] = brier_score_loss( ground_truth_labels, predicted_labels) classification_metric_dict['matthews_corr_coef'] = matthews_corrcoef( ground_truth_labels, predicted_labels) classification_metric_dict['jaccard_score'] = jaccard_score( ground_truth_labels, predicted_labels, average='weighted') classification_metric_dict['cohen_kappa_score'] = cohen_kappa_score( ground_truth_labels, predicted_labels) return classification_metric_dict
def run(): paras, sents = create_dataset() X = np.array(get_features(paras)) Y = np.array(get_ys(paras)) print len(X[0]) sents = np.array(sents) skf = StratifiedKFold(Y, n_folds=10) f = open('results/correct.txt','w') f2 = open('results/wrong.txt','w') accs = [] precs = [] recs = [] f1s = [] for train_index, test_index in skf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] sent_train = sents[train_index] sent_test = sents[test_index] # cv = CountVectorizer(stop_words="english", ngram_range=(1,1), min_df = 5) # sent_train_counts = cv.fit_transform(sent_train) # # tf_transformer = TfidfTransformer(use_idf=True).fit(sent_train_counts) # sent_train_counts = tf_transformer.transform(sent_train_counts) # # sent_train_counts = sent_train_counts.toarray() # # print sent_train_counts.shape # print X_train.shape # # new_train = [] # for i,j in zip(X_train, sent_train_counts): # new_train.append(np.append(i,j)) #fs = SelectKBest(chi2, k=24) #X_train = fs.fit_transform(X_train, y_train) clf = LogisticRegression() clf.fit(X_train, y_train) print clf.coef_ # # sent_test_counts = cv.transform(sent_test) # sent_test_counts = tf_transformer.transform(sent_test_counts) # # sent_test_counts = sent_test_counts.toarray() # # new_test = [] # for i,j in zip(X_test, sent_test_counts): # new_test.append(np.append(i,j)) #X_test = fs.transform(X_test) y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) prec = precision_score(y_test, y_pred) rec = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) accs.append(acc) precs.append(prec) recs.append(rec) f1s.append(f1) print 'Acc \t %s' % acc print 'Prec \t %s' % prec print 'Recall \t %s' % rec print 'F1 \t %s' % f1 for (index,test),(y_t, y_p) in zip(zip(test_index, X_test), zip(y_test, y_pred)): if y_t == y_p: # if paras[index]['prev_para']: # f.write('%s\n' % paras[index]['prev_para']['sents']) f.write('%s\n' % sents[index]) f.write('%s\n' % (y_t)) else: # if paras[index]['prev_para']: # f2.write('%s\n' % paras[index]['prev_para']['sents']) f2.write('%s\n' % sents[index]) f2.write('%s\n' % (y_t)) print 'Avg Acc \t %s \t ' % np.mean(accs) print 'Avg Prec \t %s' % np.mean(precs) print 'Avg Recall \t %s' % np.mean(recs) print 'Avg F1 \t %s' % np.mean(f1s)
y[train_idx], batch_size=cm.bs, epochs=cm.n_ep, verbose=0, callbacks=[cm.custom_stopping(value=cm.loss, verbose=1)], validation_data=(X_train, y[train_idx])) y_pred = model.predict(X_test) y_pred = np.argmax(y_pred, axis=1) y_true = np.argmax(y[test_idx], axis=1) acc_fold = accuracy_score(y_true, y_pred) avg_acc.append(acc_fold) recall_fold = recall_score(y_true, y_pred, average='macro') avg_recall.append(recall_fold) f1_fold = f1_score(y_true, y_pred, average='macro') avg_f1.append(f1_fold) print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format( acc_fold, recall_fold, f1_fold, i)) print('______________________________________________________') del model ic_acc = st.t.interval(0.9, len(avg_acc) - 1, loc=np.mean(avg_acc), scale=st.sem(avg_acc)) ic_recall = st.t.interval(0.9,
test_documents, "section", class_map, len(ipc_sections)) print( "=============================== Predicting test data ===============================" ) # Predicting the class for each word vector in the database real = [] pred = [] for doc, ipc in test_embedding_generator: result = model.predict_one(doc) pred.append(class_map[result]) #adding the result to the predicted vector real.append(class_map[numpy.argmax( ipc)]) #Adding the real value to de real class vector #Calculating the metric F1, Precision, Accuracy and Recall accuracy = accuracy_score(real, pred) recall = recall_score(real, pred, average='weighted') precision = precision_score(real, pred, average='weighted') f1 = f1_score(real, pred, average='weighted') print("Accuracy " + str(accuracy), "Recall " + str(recall), "Precision " + str(precision), "F1 " + str(f1)) result_string += "Accuracy " + str(accuracy) + " Recall " + str( recall) + " Precision " + str(precision) + " F1 " + str(f1) + "\n" f = open(result_file_name, "w") f.write("Database: " + training_documents_collection) f.write("embedding matrix: " + str(maxWords) + " " + str(embeddingSize)) f.write("epochs: " + str(epochs)) f.write("layers : " + str(layers)) f.write(result_string) f.close()
y_train, y_test = classes[train_index], classes[test_index] # treino do modelo print(f'Gerando o Modelo {i}...') classifier = RandomForestClassifier(n_estimators=10, criterion='gini', random_state=iteracao).fit( x_train, y_train) # classificando o conjunto de teste y_pred = classifier.predict(x_test) # metricas de desempenho aux_accuracy += accuracy_score(y_test, y_pred) aux_f1_score += f1_score(y_test, y_pred) aux_precision += precision_score(y_test, y_pred) aux_recall += recall_score(y_test, y_pred) conf_matrices += np.asarray(confusion_matrix(y_test, y_pred)) print(f'Modelo {i} finalizado e avaliado.') i += 1 # resultados print(f'\nITERATION #{iteracao} -----------------------') print(f'Accuracy = {aux_accuracy / k_fold.n_splits}') print(f'F1 Score = {aux_f1_score / k_fold.n_splits}') print(f'Precision = {aux_precision / k_fold.n_splits}') print(f'Recall = {aux_recall / k_fold.n_splits}') print(f'Examples x Attributes = {tf_idf.shape}') print(f'Confusion Matrix = \n{np.array(list(conf_matrices))}')
kerasAdapter.fit(dataTrainGenerator, epochs=epochs, batch_size=len(dataTrainGenerator), validationDataGenerator=dataTestGenerator, validationSteps=len(dataTestGenerator), callbacks=[modelCheckpoint, configSaver]) result = kerasAdapter.predict(dataTestGenerator, batch_size=parameters['batchSize']) testClasses = classes[testIndex] metrics = dict() metrics['fscore'] = f1_score(testClasses, result, average='weighted') metrics['precision'] = precision_score(testClasses, result, average='weighted') metrics['recall'] = recall_score(testClasses, result, average='weighted') metrics['auc'] = roc_auc_score(testClasses, result, average='weighted') metrics['fscore_b'] = f1_score(testClasses, result) metrics['precision_b'] = precision_score(testClasses, result) metrics['recall_b'] = recall_score(testClasses, result) metrics['auc_b'] = roc_auc_score(testClasses, result) metrics['kappa'] = cohen_kappa_score(testClasses, result) metrics['accuracy'] = accuracy_score(testClasses, result) tn, fp, fn, metrics['tp_rate'] = confusion_matrix(testClasses, result).ravel() print(classification_report(testClasses, result)) metrics["fold"] = i if dictWriter is None:
sklearn_y_pred = sklearn_mnb.predict(X_test) assert (my_y_pred == sklearn_y_pred).all() ###### my defined fasttext train_data, test_data = train_test_split(processed_data[['label', 'item']], test_size=0.1, random_state=2020) fasttext = FastText(class_num=3, class_type='multi-class', ngram_range=2) fasttext.fit(train_data['item'], train_data['label'], epochs=5) y_pred = fasttext.predict(test_data['item']) y_true = fasttext.y_encoder.transform(test_data['label']) macro_f1 = f1_score(y_true, y_pred, average='macro') macro_precision = precision_score(y_true, y_pred, average='macro') macro_recall = recall_score(y_true, y_pred, average='macro') ##### textCNN ## multi-class test train_data, test_data = train_test_split( processed_data[['subject', 'processed_item']], test_size=0.1, random_state=2020) text_cnn = TextCNN(class_num=4, class_type='multi-class') text_cnn.fit(train_data['processed_item'], train_data['subject'], validation_data=(test_data['processed_item'], test_data['subject']), epochs=2) y_true = text_cnn.y_encoder.transform(test_data['subject'])
img_dir = 'C:/Users/Administrator/Desktop/Normal' for _ in os.listdir(img_dir): res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _)) print(res) if res['desc'] == 'Not Blurry': preds.append(0) else: preds.append(1) gts.append(0) img_dir = 'C:/Users/Administrator/Desktop/Blur' for _ in os.listdir(img_dir): res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _)) print(res) if res['desc'] == 'Not Blurry': preds.append(0) else: preds.append(1) gts.append(1) tok = time.time() print('FPS={}'.format(len(os.listdir(img_dir)) / (tok - tik))) print(confusion_matrix(gts, preds)) print('Precision = %f' % precision_score(gts, preds)) print('Recall = %f' % recall_score(gts, preds)) print('Accuracy = %f' % accuracy_score(gts, preds))
test_idx = folds[i][1] X_train = X[train_idx] X_test = X[test_idx] X_train = feature_extraction(X_train) X_test = feature_extraction(X_test) clf = train_boosting(X_train, y[train_idx]) tmp = clf.predict(X_test) acc_fold = accuracy_score(y[test_idx], tmp) avg_acc.append(acc_fold) recall_fold = recall_score(y[test_idx], tmp, average='macro') avg_recall.append(recall_fold) f1_fold = f1_score(y[test_idx], tmp, average='macro') avg_f1.append(f1_fold) print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format( acc_fold, recall_fold, f1_fold, i)) print('______________________________________________________') ic_acc = st.t.interval(0.9, len(avg_acc) - 1, loc=np.mean(avg_acc), scale=st.sem(avg_acc)) ic_recall = st.t.interval(0.9, len(avg_recall) - 1,
# result = model.predict_one(doc) # pred.append(class_map[result]) #adding the result to the predicted vector # real.append(class_map[numpy.argmax(ipc)]) #Adding the real value to de real class vector for doc in test_documents: result = model.predict_one(pickle.loads(doc['embedding'])) pred.append(class_map[result]) #adding the result to the predicted vector real.append(doc['ipc_classes'][0][0]) all_class.append(doc['ipc_classes']) print(pred) print(real) #Calculating the metric F1, Precision, Accuracy and Recall accuracy = accuracy_score(real, pred) recall = recall_score(real, pred, average='weighted') recall_per_class = recall_score(real, pred, average=None) precision = precision_score(real, pred, average='weighted') precision_per_class = precision_score(real, pred, average=None) f1 = f1_score(real, pred, average='weighted') f1_per_class = f1_score(real, pred, average=None) results_per_class = dict() for i in range(0, len(recall_per_class)): if not class_map[i] in results_per_class.keys(): results_per_class[class_map[i]] = [] results_per_class[class_map[i]].append(recall_per_class[i]) results_per_class[class_map[i]].append(precision_per_class[i]) results_per_class[class_map[i]].append(f1_per_class[i]) matrix = confusion_matrix(real, pred, labels=ipc_sections.sort())
for i in train: y_train.append(features[i][6]) tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]] x_train.append(tmp) y_test = [] x_test = [] for i in test: y_test.append(features[i][6]) tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]] x_test.append(tmp) lr.fit(x_train, y_train) lrPredTest = lr.predict(x_test) lrPrecisionTest = precision_score(y_test, lrPredTest) lrRecallTest = recall_score(y_test, lrPredTest) lrF1Test = f1_score(y_test, lrPredTest) lrAvgPrecision += lrPrecisionTest lrAvgRecall += lrRecallTest lrAvgF1 += lrF1Test print "log reg completed in ", time.time() - start, " s" print "lr:\n Precision {}\n Recall {}\n F1 {}\n".format(lrAvgPrecision / 5, lrAvgRecall / 5, lrAvgF1 / 5) start = time.time() """RANDOM FOREST""" rf = RandomForestClassifier(n_estimators=100, min_samples_leaf=5) rfAvgPrecision = 0.0 rfAvgRecall = 0.0
# Evaluate model and predict data on TEST print("******Evaluating TEST set*********") rnn_model.load_weights(model_filename) y_test_predict = rnn_model.predict(X_test_, batch_size = BATCH_SIZE) y_test_predict = np.array(y_test_predict) y_test_predict = np.argmax(y_test_predict, axis=1) all_trainable_count = int(np.sum([K.count_params(p) for p in set(rnn_model.trainable_weights)])) MAE = metrics.mean_absolute_error(y_test, y_test_predict, sample_weight=None, multioutput='uniform_average') acc_fold = accuracy_score(y_test, y_test_predict) avg_acc.append(acc_fold) recall_fold = recall_score(y_test, y_test_predict, average='macro') avg_recall.append(recall_fold) f1_fold = f1_score(y_test, y_test_predict, average='macro') avg_f1.append(f1_fold) with open(SAVE_DIR + '/results_model_with_self_attn_' + MODE + '.csv', 'a') as out_stream: out_stream.write(str(SEED) + ', ' + str(DATA_FILE[0:-4]) + ', ' + str(i) + ', ' + str(early_stopping_epoch) + ', ' + str(all_trainable_count) + ', ' + str(acc_fold) + ', ' + str(MAE) + ', ' + str(recall_fold) + ', ' + str(f1_fold) + '\n') print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format(acc_fold, recall_fold, f1_fold, i)) print('______________________________________________________') K.clear_session() ic_acc = st.t.interval(0.9, len(avg_acc) - 1, loc=np.mean(avg_acc), scale=st.sem(avg_acc)) ic_recall = st.t.interval(0.9, len(avg_recall) - 1, loc=np.mean(avg_recall), scale=st.sem(avg_recall))
def evaluate_results(result, target_def, n_shadow): attack_test_y = result['attack_test_y'] attack_test_x = result['attack_test_x'] preds = result['preds'] target_y = result['target_y'] target_x = result['target_x'] labels = result['labels'] # INFORMATION ABOUT THE MODEL UNDER ATTACK x = PrettyTable( ['Model Definition', 'Training Accuracy', 'Testing Accuracy']) x.float_format = ".2" target_preds = np.argmax(attack_test_x, axis=1) train_acc = accuracy_score(target_y[attack_test_y == 1], target_preds[attack_test_y == 1]) test_acc = accuracy_score(target_y[attack_test_y == 0], target_preds[attack_test_y == 0]) x.add_row([ target_def.split(os.path.dirname(os.getcwd()))[-1], train_acc * 100, test_acc * 100 ]) print(x.get_string(title='Target Model')) # INFORMATION ABOUT THE OVERALL ATTACK EFFECTIVENESS cols = ['Num Shadow', 'Accuracy', 'Precision', 'Recall', 'F-1'] x = PrettyTable(cols) x.float_format = ".2" p = np.argmax(preds, axis=1) x.add_row([ n_shadow, accuracy_score(attack_test_y, p) * 100, precision_score(attack_test_y, p) * 100, recall_score(attack_test_y, p) * 100, f1_score(attack_test_y, p) * 100 ]) print(x.get_string(title='Attack Aggregate')) # noinspection PyShadowingNames def by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=None): if labels is None: labels = np.unique(target_y) precisions = [ precision_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] accuracies = [ accuracy_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] f1_scores = [ f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] recalls = [ recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] c_train_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 1)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 1)], axis=1)) * 100 for c in np.unique(target_y) ] c_test_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 0)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 0)], axis=1)) * 100 for c in np.unique(target_y) ] x = PrettyTable() x.float_format = '.2' x.add_column("Class", labels) x.add_column('Target Accuracy Train', np.round(c_train_accs, 2)) x.add_column('Target Accuracy Test', np.round(c_test_accs, 2)) x.add_column("Attack Precision", np.round(precisions, 2)) x.add_column("Attack Accuracy", np.round(accuracies, 2)) x.add_column("Attack Recall", np.round(recalls, 2)) x.add_column("Attack F-1 Score", np.round(f1_scores, 2)) x.add_column( "Percentage of Data", np.round( np.array([ len(target_y[target_y == c]) / len(target_y) * 100 for c in np.unique(target_y) ]), 2)) print(x.get_string(title='Per Class Evaluation')) by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=labels) return { 'attack_test_y': attack_test_y, 'attack_test_x': attack_test_x, 'preds': preds, 'target_y': target_y, 'target_x': target_x, 'target_def': target_def, 'n_shadow': n_shadow, 'labels': labels }
yRec_list.append(data['classes']) xRec = np.concatenate([testRec, gyr], axis=-1) xRec_list.append(xRec) catal_classifier = Catal() catal_classifier.fit(X_train, y[train_idx]) for i in range(len(missing_list)): miss = missing_list[i] y_pred = catal_classifier.predict(xRec_list[i]) finalResult['acc'][miss].append( accuracy_score(yRec_list[i], y_pred)) finalResult['f1'][miss].append( f1_score(yRec_list[i], y_pred, average='macro')) finalResult['rec'][miss].append( recall_score(yRec_list[i], y_pred, average='macro')) Result = dict() Result['acc'] = dict() Result['recall'] = dict() Result['f1'] = dict() for miss in missing_list: ic_acc = st.t.interval(0.9, len(finalResult['acc'][miss]) - 1, loc=np.mean(finalResult['acc'][miss]), scale=st.sem(finalResult['acc'][miss])) ic_recall = st.t.interval(0.9, len(finalResult['rec'][miss]) - 1, loc=np.mean(finalResult['rec'][miss]), scale=st.sem(finalResult['rec'][miss]))