def _validate_model(self, x: np.ndarray, y: np.ndarray, validation_file_name: str = "validation.json") -> dict: logging.info("Creating predictions ...") y_predicted_categories = self._model.predict(x, batch_size=self._batch_size) gc.collect() from sklearn.metrics.classification import accuracy_score, precision_recall_fscore_support y_expected_1dim = self._label_enc.max_category(y) y_predicted_1dim = self._label_enc.max_category(y_predicted_categories) logging.info("Results:") logging.info("{}".format(precision_recall_fscore_support(y_true=y_expected_1dim, y_pred=y_predicted_1dim))) accuracy = accuracy_score(y_true=y_expected_1dim, y_pred=y_predicted_1dim) logging.info("{}".format(accuracy)) from sklearn.metrics.classification import classification_report logging.info("\n{}".format(classification_report(y_true=y_expected_1dim, y_pred=y_predicted_1dim, target_names=["neg", "pos"], ))) results = classification_report(y_true=y_expected_1dim, y_pred=y_predicted_1dim, target_names=["neg", "pos"], output_dict=True) results["accuracy"] = accuracy write_text_file( file_path=self._experiment_folder / validation_file_name, text=json.dumps(results)) return results
def get_cv_metrics(self, cv): fold_avg_p = [] fold_avg_r = [] fold_avg_f1 = [] fold_accuracy = [] fold_test_support = [] fold_train_support = [] for i, (train, test) in enumerate(cv): train_df, train_y = self.X.iloc[train], self.y.iloc[train] test_df, test_y = self.X.iloc[test], self.y.iloc[test] estimator = clone(self.pipeline) estimator.fit(train_df, train_y) y_pred = estimator.predict(test_df) p, r, f1, s = precision_recall_fscore_support(test_y, y_pred) accuracy = accuracy_score(test_y, y_pred) # support weighted average precision,recall,f1,support across classes avg_p, avg_r, avg_f1 = (np.average(p, weights=s), np.average(r, weights=s), np.average(f1, weights=s)) test_support = test_y.shape[0] train_support = train_y.shape[0] fold_avg_p.append(avg_p) fold_avg_r.append(avg_r) fold_avg_f1.append(avg_f1) fold_accuracy.append(accuracy) fold_test_support.append(test_support) fold_train_support.append(train_support) return np.average(fold_avg_p), np.average(fold_avg_r), np.average( fold_avg_f1), np.average(fold_accuracy), np.average( test_support), np.average(train_support)
def compute(indices, all_truth ,all_scores, pdb_ids, all_thresholds, precision, recall): print all_thresholds[indices[0]] for t in indices : for i in xrange(len(pdb_ids)) : p, r, _, _ = precision_recall_fscore_support(all_truth[i], [copysign(1, x - all_thresholds[t]) for x in all_scores[i]], average='binary') precision[t] += p recall[t] += r precision[t] /= len(pdb_ids) recall[t] /= len(pdb_ids)
def pandas_classification_report(y_true, y_pred): metrics_summary = precision_recall_fscore_support(y_true=y_true, y_pred=y_pred) avg = list( precision_recall_fscore_support(y_true=y_true, y_pred=y_pred, average='weighted')) metrics_sum_index = ['precision', 'recall', 'f1-score', 'support'] class_report_df = pd.DataFrame(list(metrics_summary), index=metrics_sum_index) support = class_report_df.loc['support'] total = support.sum() avg[-1] = total class_report_df['avg / total'] = avg return class_report_df.T
def calculate(self) -> None: """ Calculates all of the metrics (precision, recall, F score and support) and stores them in the results dictionary. Note: This function may eat up a lot of memory if it's used on a large file. :return: """ print('\nCalculating metrics...') ftr_all = [] fpr_all = [] gen = generate_tuples_from_file(self.fpath, encodings=self.encodings, first_layer=self.first_layer, batch_size=self.batch_size) if tqdm: for _ in tqdm(range(self.steps)): x, y = next(gen) y_pred = self.model.predict_classes(x, verbose=0) y_true = y.argmax(2) ftr, fpr = self._score(y_true, y_pred) ftr_all.extend(ftr) fpr_all.extend(fpr) else: print('[!] For progress logging during metrics calculation ' 'install tqdm.') for _ in range(self.steps): x, y = next(gen) y_pred = self.model.predict_classes(x, verbose=0) y_true = y.argmax(2) ftr, fpr = self._score(y_true, y_pred) ftr_all.extend(ftr) fpr_all.extend(fpr) confusion = confusion_matrix(ftr_all, fpr_all) p, r, f, s = precision_recall_fscore_support(ftr_all, fpr_all) self.results = { 'confusion_matrix': confusion, 'precision': p, 'recall': r, 'fscore': f, 'f1mean': np.mean(f), 'support': s }
def evaluate_to_stats(net, testloader): net.eval() predictions = [] with torch.no_grad(): for batch_idx, (inputs, label, filename) in enumerate(testloader): if inputs.shape[0] > 1: raise NotImplementedError('Please choose a batch size of 1. Saving the results is not compatible with larger batch sizes in this version.') inputs = inputs.to(device) inputs = Variable(inputs) output_1, output_2, output_3, output_concat= net(inputs) outputs_com = output_1 + output_2 + output_3 + output_concat _, predicted_com = torch.max(outputs_com.data, 1) y_pred = predicted_com[0].flatten().cpu().numpy() y_fn = filename[0] if args.calc_perf: predictions.append({'filename':y_fn,'prediction':y_pred, 'ground truth':label.cpu().numpy()}) else: predictions.append({'filename':y_fn,'prediction':y_pred}) if batch_idx % 50 == 0: print('Testing image {} from {}'.format(batch_idx,len(testloader))) if args.calc_perf: y_gt = [] y_pred = [] for prediction in predictions: y_gt.append(prediction['ground truth']) y_pred.append(prediction['prediction']) y_gt = np.array(y_gt,dtype=np.uint8) y_pred = np.array(y_pred,dtype=np.uint8) precision, recall, f1, support = precision_recall_fscore_support(y_gt, y_pred, average='macro') confusion_matrix = sklearn.metrics.confusion_matrix(y_gt, y_pred, labels=range(len(testloader.dataset.classes))) cm_fig = construct_confusion_matrix_image(testloader.dataset.classes, confusion_matrix) cm_fig.savefig('result_confusion_matrix.png',dpi=300) print('F1 {}, precision, {}, recall, {}'.format(f1,precision,recall)) return predictions
def run_grid_search(grid_search, show_evaluation=True): """ Run the GridSearch algorithm and compute evaluation metrics """ X_train, X_test, y_train, y_test = split_dataset() grid_search.fit(X_train, y_train) # for key, value in grid_search.cv_results_.items(): # print key, value predictions = grid_search.predict(X_test) if show_evaluation: logger.debug("macro_recall: %s", recall_score(y_test, predictions, average="macro")) logger.debug(precision_recall_fscore_support(y_test, predictions)) logger.debug(confusion_matrix(y_test, predictions))
def compute_scores(o, n_iterations, pdb_ids, ab_truth, ab_coord, ab_X, ab_X_weights, precision, recall): print outlier_fractions[o] forest = IsolationForest(contamination=outlier_fractions[o], n_jobs=4) for i in xrange(len(pdb_ids)) : print pdb_ids[i] current_precision = 0 current_recall = 0 for _ in xrange(n_iterations) : forest.fit(ab_X[i], sample_weight=ab_X_weights[i]) patch_pred_no_outliers = forest.predict(ab_coord[i]) p, r, _, _ = precision_recall_fscore_support(ab_truth[i], patch_pred_no_outliers, average='binary') current_precision += p current_recall += r current_precision /= n_iterations current_recall /= n_iterations precision[o] += current_precision recall[o] += current_recall precision[o] /= len(pdb_ids) recall[o] /= len(pdb_ids)
def recall_0(self, y_true, y_pred, labels=None, average='binary', sample_weight=None): ''' :param y_true: :param y_pred: :param labels: :param average: :param sample_weight: :return: calculate recall for neg class ''' _, r, _, _ = precision_recall_fscore_support( y_true, y_pred, beta=1, labels=labels, pos_label=0, average=average, warn_for=('f-score', ), sample_weight=sample_weight) return r
def classification_report_imbalanced(y_true, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, alpha=0.1): """Build a classification report based on metrics used with imbalanced dataset Specific metrics have been proposed to evaluate the classification performed on imbalanced dataset. This report compiles the state-of-the-art metrics: precision/recall/specificity, geometric mean, and index balanced accuracy of the geometric mean. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. target_names : list of strings, optional Optional display names matching the labels (same order). sample_weight : ndarray, shape (n_samples, ) Sample weights. digits : int, optional (default=2) Number of digits for formatting output floating point values alpha : float, optional (default=0.1) Weighting factor. Returns ------- report : string Text summary of the precision, recall, specificity, geometric mean, and index balanced accuracy. Examples -------- >>> import numpy as np >>> from imblearn.metrics import classification_report_imbalanced >>> y_true = [0, 1, 2, 2, 2] >>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE >>> target_names = ['class 0', 'class 1', \ 'class 2'] # doctest : +NORMALIZE_WHITESPACE >>> print(classification_report_imbalanced(y_true, y_pred, \ target_names=target_names)) pre rec spe f1 geo iba\ sup <BLANKLINE> class 0 0.50 1.00 0.75 0.67 0.71 0.48\ 1 class 1 0.00 0.00 0.75 0.00 0.00 0.00\ 1 class 2 1.00 0.67 1.00 0.80 0.82 0.69\ 3 <BLANKLINE> avg / total 0.70 0.60 0.90 0.61 0.63 0.51\ 5 <BLANKLINE> """ if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = 'avg / total' if target_names is None: target_names = ['%s' % l for l in labels] name_width = max(len(cn) for cn in target_names) width = max(name_width, len(last_line_heading), digits) headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"] fmt = '%% %ds' % width # first column: class name fmt += ' ' fmt += ' '.join(['% 9s' for _ in headers]) fmt += '\n' headers = [""] + headers report = fmt % tuple(headers) report += '\n' # Compute the different metrics # Precision/recall/f1 precision, recall, f1, support = precision_recall_fscore_support( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Specificity specificity = specificity_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Geometric mean geo_mean = geometric_mean_score( y_pred, y_true, labels=labels, average=None, sample_weight=sample_weight) # Index balanced accuracy iba_gmean = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) iba = iba_gmean( y_pred, y_true, labels=labels, average=None, sample_weight=sample_weight) for i, label in enumerate(labels): values = [target_names[i]] for v in (precision[i], recall[i], specificity[i], f1[i], geo_mean[i], iba[i]): values += ["{0:0.{1}f}".format(v, digits)] values += ["{0}".format(support[i])] report += fmt % tuple(values) report += '\n' # compute averages values = [last_line_heading] for v in (np.average( precision, weights=support), np.average( recall, weights=support), np.average( specificity, weights=support), np.average( f1, weights=support), np.average( geo_mean, weights=support), np.average( iba, weights=support)): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(support))] report += fmt % tuple(values) return report
from collections import Counter import numpy import Features_manager import Database_manager from sklearn.metrics.classification import precision_recall_fscore_support, accuracy_score from sklearn.cross_validation import KFold database_manager = Database_manager.make_database_manager() feature_manager = Features_manager.make_feature_manager() tweets = numpy.array(database_manager.return_tweets()) stance = numpy.array(feature_manager.get_stance(tweets)) count = Counter(stance) print(count.most_common()) majority_class = count.most_common()[0][0] test_predict = [majority_class] * len(stance) prec, recall, f, support = precision_recall_fscore_support(stance, test_predict, beta=1) accuracy = accuracy_score(stance, test_predict) print("f:", (f)) print("p:", (prec)) print("r:", (recall)) print('"MClass"' + '\t' + str(((f[1] + f[2]) / 2)) + '\t' + str(((f[0] + f[1] + f[2]) / 3)) + '\n')
def main(args): """ Process the tweets, returning an output file """ # read the config file cfg = fh.read_config_file(args.cfg) file_type = cfg['file_type'] # check if should produce a report report = cfg.get('report_file', None) report_f = None if report: # open the file report_f = open(report, 'w') # open the TweetsDB connections db_train = TweetsDB('sa_train', drop_db=True) db_test = TweetsDB('sa_test') # read the train file print "Processing train file", train_file_name = cfg['train_file_name'] train_feat, labels = create_features(train_file_name, file_type, cfg, db_train) # save the dictionary info to db tr_stats = r.get_lexs_stats() r.reset_lexs_stats() print "done" if report: print >> report_f, r.pd.DataFrame(tr_stats) # read the test file print "Processing test file", test_file_name = cfg['test_file_name'] test_feat, gold = create_features(test_file_name, file_type, cfg, db_test) # get the test stats ts_stats = r.get_lexs_stats() print "done" # create the feature vector print "Training model", vec = DictVectorizer() X = vec.fit_transform(train_feat) y = np.array(labels) X_test = vec.transform(test_feat) # train the model clf = SGDClassifier(penalty='elasticnet', alpha=0.0001, l1_ratio=0.85, n_iter=1000, n_jobs=-1) clf.fit(X, y) print "done" print "Predicting", pred = clf.predict(X_test) print "done" # calculate score print "Saving information to db", score = f1_score(gold, pred, labels=[-1, 1], average='macro') prfs = classification.precision_recall_fscore_support(gold, pred) # save run time run = RunWrapper(pred, gold, score, tr_stats, ts_stats, prfs, clf) run_info = run.save() print "done" # save model print "Saving model to file", date = run_info['date'].strftime('%Y%m%d.%H%M%S') model = run_info['model'].split('.')[-1] file_name = '{}.{}.pkl'.format(date, model) joblib.dump(clf, path.join(cfg['model_out_dir'], file_name)) print "done" # save the predicted values tr = fh.TweetReader(test_file_name, file_type) output_filename = cfg['output'] print "Saving output to file,", output_filename, with codecs.open(output_filename, 'w', 'utf8') as out: for tweet, label in zip(tr, pred): line = '\t'.join([ tweet["sid"], tweet["uid"], p.decode_label[label], tweet["text"] ]) + '\n' out.write(line) db_test.save_tweet_pred_sent(tweet["sid"], p.decode_label[label]) print "done" print "\n\nRun the following code on shell" print "python scorer.py b %s ../1-Input/twitter-test-GOLD-B.tsv" % output_filename
def train_and_test(alpha, predictors, predictor_params, x_filename, y_filename, n_users, percTest, featureset_to_use, diff_weighting, phi, force_balanced_classes, do_scaling, optimise_predictors, report, conf_report=None): # all_X = numpy.loadtxt(x_filename, delimiter=",") all_X = numpy.load(x_filename + ".npy") all_y = numpy.loadtxt(y_filename, delimiter=",") print("loaded X and y files", x_filename, y_filename) if numpy.isnan(all_X.any()): print("nan in", x_filename) exit() if numpy.isnan(all_y.any()): print("nan in", y_filename) exit() #print("selecting balanced subsample") print("t t split") X_train, X_test, y_train, y_test = train_test_split(all_X, all_y, test_size=percTest, random_state=666) # feature extraction # test = SelectKBest(score_func=chi2, k=100) # kb = test.fit(X_train, y_train) # # summarize scores # numpy.set_printoptions(precision=3) # print(kb.scores_) # features = kb.transform(X_train) # mask = kb.get_support() # # summarize selected features # print(features.shape) # X_train = X_train[:,mask] # X_test = X_test[:,mask] scaler = StandardScaler() rdim = FeatureAgglomeration(n_clusters=100) if do_scaling: # input(X_train.shape) X_train = rdim.fit_transform(X_train) X_test = rdim.transform(X_test) X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) with open('../../../isaac_data_files/qutor_scaler.pkl', 'wb') as output: pickle.dump(scaler, output, pickle.HIGHEST_PROTOCOL) with open('../../../isaac_data_files/qutor_rdim.pkl', 'wb') as output: pickle.dump(rdim, output, pickle.HIGHEST_PROTOCOL) # print("feature reduction...") # pc = PCA(n_components=100) # X_train = pc.fit_transform(X_train) # X_test = pc.transform(X_test) classes = numpy.unique(y_train) sample_weights = None if (force_balanced_classes): X_train, y_train = balanced_subsample(X_train, y_train, 1.0) #0.118) print("X_train shape:", X_train.shape) print("X_test shape:", X_test.shape) print("tuning classifier ...") for ix, p in enumerate(predictors): print(type(p)) print(p.get_params().keys()) if optimise_predictors == True and len(predictor_params[ix]) > 1: pbest = run_random_search(p, X_train, y_train, predictor_params[ix]) else: pbest = p.fit(X_train, y_train) predictors[ix] = pbest print("pickling classifier ...") for ix, p in enumerate(predictors): p_name = predictor_params[ix]['name'] with open( '../../../isaac_data_files/p_{}_{}_{}.pkl'.format( p_name, alpha, phi), 'wb') as output: pickle.dump(p, output, pickle.HIGHEST_PROTOCOL) print("done!") # report.write("* ** *** |\| \` | | |) /; `|` / |_| *** ** *\n") # report.write("* ** *** | | /_ |^| |) || | \ | | *** ** *\n") #report.write("RUNS,P,FB,WGT,ALPHA,PHI,SCL,0p,0r,0F,0supp,1p,1r,1F,1supp,avg_p,avg_r,avg_F,#samples\n") for ix, p in enumerate(predictors): report.write(",".join( map(str, (all_X.shape[0], str(p).replace(",", ";").replace( "\n", ""), force_balanced_classes, diff_weighting, alpha, phi, do_scaling)))) y_pred_tr = p.predict(X_train) y_pred = p.predict(X_test) # for x,y,yp in zip(X_train, y_test, y_pred): if conf_report: conf_report.write( str(p).replace(",", ";").replace("\n", "") + "\n") conf_report.write(str(alpha) + "," + str(phi) + "\n") conf_report.write(str(confusion_matrix(y_test, y_pred)) + "\n") conf_report.write("\n") # p = precision_score(y_test, y_pred, average=None, labels=classes) # r = recall_score(y_test, y_pred, average=None, labels=classes) # F = f1_score(y_test, y_pred, average=None, labels=classes) p, r, F, s = precision_recall_fscore_support(y_test, y_pred, labels=classes, average=None, warn_for=('precision', 'recall', 'f-score')) avp, avr, avF, _ = precision_recall_fscore_support( y_test, y_pred, labels=classes, average='weighted', warn_for=('precision', 'recall', 'f-score')) for ix, c in enumerate(classes): report.write(",{},{},{},{},{},".format(c, p[ix], r[ix], F[ix], s[ix])) report.write("{},{},{},{}\n".format(avp, avr, avF, numpy.sum(s))) # report.write(classification_report(y_test, y_pred)+"\n") # report.write("------END OF CLASSIFIER------\n") report.flush() return X_train, X_test, y_pred_tr, y_pred, y_test, scaler
for i in range(0,len(training)): for key, clf in clfs.items(): print(key,label[i]) tweets_training=training[i] tweets_test=test[i] stance_training=numpy.array(feature_manager.get_stance(tweets_training)) stance_test=numpy.array(feature_manager.get_stance(tweets_test)) prec, recall, f, support = precision_recall_fscore_support( stance_test, [Counter(stance_training).most_common()[0][0]]*len(stance_test), beta=1) accuracy = accuracy_score( stance_test, [Counter(stance_training).most_common()[0][0]]*len(stance_test) ) fmacro=(f[0]+f[1])/2 feature_names=numpy.array(singlefeature[label[i]])
real_values_binary = [ 1 if value >= 0.5 else 0 for value in real_values ] pred_values_binary = [ 1 if value >= 0.5 else 0 for value in predicted_values ] number_of_zeros = len( [value for value in real_values_binary if value == 0]) number_of_ones = len( [value for value in real_values_binary if value == 1]) majority_label = 0 if number_of_zeros > number_of_ones else 1 majority_baseline = [ majority_label for i in range(len(real_values)) ] acc = accuracy_score(real_values_binary, pred_values_binary) metrics = precision_recall_fscore_support(real_values_binary, pred_values_binary) f1 = f1_score(real_values_binary, pred_values_binary, average="weighted") roc = roc_auc_score(real_values_binary, predicted_values) acc_maj = accuracy_score(real_values_binary, majority_baseline) metrics_maj = precision_recall_fscore_support( real_values_binary, majority_baseline) f1_maj = f1_score(real_values_binary, majority_baseline, average="weighted") roc_maj = roc_auc_score(real_values_binary, majority_baseline) b, c = compute_correct_predictions(majority_baseline, pred_values_binary, real_values_binary) p_value = mcnemar_midp(b, c)
def load_results_from_raw_labels(self, y_true: np.array, y_pred: np.array, conf=None): """ Load results from true labels and predicted labels Args: y_true: numpy array, true labels y_pred: numpy array, predicted labels conf: numpy array, confidence scores of predicted labels """ accuracy = accuracy_score(y_pred=y_pred, y_true=y_true) auc = None self.conf = conf if len(self.labels) == 2: precision, recall, f_score, true_sum = precision_recall_fscore_support( y_true=y_true, y_pred=y_pred, labels=[1]) if self.conf is not None: auc = roc_auc_score(y_true=y_true, y_score=conf) else: _precision, _recall, _f_score, _ = precision_recall_fscore_support( y_true=y_true, y_pred=y_pred, labels=list(range(len(self.labels)))) class_precision = { key: value for key, value in list(zip(self.labels, _precision.tolist())) } precision = { 'class': class_precision, 'average': np.mean(_precision) } class_recall = { key: value for key, value in list(zip(self.labels, _recall.tolist())) } recall = {'class': class_recall, 'average': np.mean(_recall)} class_f_score = { key: value for key, value in list(zip(self.labels, _f_score.tolist())) } f_score = {'class': class_f_score, 'average': np.mean(_f_score)} class_accuracy = {key: '-' for key in self.labels} accuracy = {'class': class_accuracy, 'average': accuracy} class_auc = {key: '-' for key in self.labels} auc = {'class': class_auc, 'average': 'N.A.'} if METRIC_PRECISION in self.metric_list: self.metric_scores_[METRIC_PRECISION] = precision if METRIC_RECALL in self.metric_list: self.metric_scores_[METRIC_RECALL] = recall if METRIC_F1 in self.metric_list: self.metric_scores_[METRIC_F1] = f_score if METRIC_ACCURACY in self.metric_list: self.metric_scores_[METRIC_ACCURACY] = accuracy if METRIC_CM in self.metric_list: self.metric_scores_[METRIC_CM] = { 'labels': self.labels, 'values': confusion_matrix(y_true=y_true, y_pred=y_pred) } if METRIC_AUC in self.metric_list: self.metric_scores_[METRIC_AUC] = auc
[list(feature_names).index(f) for f in feature_filtered]) feature_index_filtered = numpy.concatenate( feature_index_global[list(feature_index_filtered)]) #print(feature_name_global[feature_index_filtered]) X_filter = X[:, feature_index_filtered] #print(feature_filtered,X.shape,X_filter.shape) predict = [] golden = [] for index_train, index_test in kf: X_train = X_filter[index_train] X_test = X_filter[index_test] clf = SVC(kernel='linear') clf.fit(X_train, stance[index_train]) test_predict = clf.predict(X_test) predict = numpy.concatenate((predict, test_predict)) golden = numpy.concatenate((golden, stance[index_test])) prec, recall, f, support = precision_recall_fscore_support(golden, predict, beta=1) accuracy = accuracy_score(golden, predict) print('"' + (' '.join(feature_filtered)) + '"' + '\t' + str(((f[0] + f[1] + f[2]) / 3)) + '\t' + str(((f[1] + f[2]) / 2)) + '\t' + str(prec) + '\t' + str(recall) + '\t' + str(f) + '\n')
def classification_report_imbalanced(y_true, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, alpha=0.1): """Build a classification report based on metrics used with imbalanced dataset Specific metrics have been proposed to evaluate the classification performed on imbalanced dataset. This report compiles the state-of-the-art metrics: precision/recall/specificity, geometric mean, and index balanced accuracy of the geometric mean. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. target_names : list of strings, optional Optional display names matching the labels (same order). sample_weight : ndarray, shape (n_samples, ) Sample weights. digits : int, optional (default=2) Number of digits for formatting output floating point values alpha : float, optional (default=0.1) Weighting factor. Returns ------- report : string Text summary of the precision, recall, specificity, geometric mean, and index balanced accuracy. Examples -------- >>> import numpy as np >>> from imblearn.metrics import classification_report_imbalanced >>> y_true = [0, 1, 2, 2, 2] >>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE >>> target_names = ['class 0', 'class 1', \ 'class 2'] # doctest : +NORMALIZE_WHITESPACE >>> print(classification_report_imbalanced(y_true, y_pred, \ target_names=target_names)) pre rec spe f1 geo iba\ sup <BLANKLINE> class 0 0.50 1.00 0.75 0.67 0.87 0.77\ 1 class 1 0.00 0.00 0.75 0.00 0.00 0.00\ 1 class 2 1.00 0.67 1.00 0.80 0.82 0.64\ 3 <BLANKLINE> avg / total 0.70 0.60 0.90 0.61 0.66 0.54\ 5 <BLANKLINE> """ if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = 'avg / total' if target_names is None: target_names = ['%s' % l for l in labels] name_width = max(len(cn) for cn in target_names) width = max(name_width, len(last_line_heading), digits) headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"] fmt = '%% %ds' % width # first column: class name fmt += ' ' fmt += ' '.join(['% 9s' for _ in headers]) fmt += '\n' headers = [""] + headers report = fmt % tuple(headers) report += '\n' # Compute the different metrics # Precision/recall/f1 precision, recall, f1, support = precision_recall_fscore_support( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Specificity specificity = specificity_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Geometric mean geo_mean = geometric_mean_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Index balanced accuracy iba_gmean = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) iba = iba_gmean( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) for i, label in enumerate(labels): values = [target_names[i]] for v in (precision[i], recall[i], specificity[i], f1[i], geo_mean[i], iba[i]): values += ["{0:0.{1}f}".format(v, digits)] values += ["{0}".format(support[i])] report += fmt % tuple(values) report += '\n' # compute averages values = [last_line_heading] for v in (np.average(precision, weights=support), np.average( recall, weights=support), np.average(specificity, weights=support), np.average(f1, weights=support), np.average( geo_mean, weights=support), np.average(iba, weights=support)): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(support))] report += fmt % tuple(values) return report
labels_test = numpy.array(feature_manager.get_label(tweets_test)) #feature_type=feature_manager.get_availablefeaturetypes() feature_type = [ "numhashtag", "puntuactionmarks", "length", ] X, X_test, feature_name, feature_index = feature_manager.create_feature_space( tweets_training, feature_type, tweets_test) print(feature_name) print("feature space dimension X:", X.shape) print("feature space dimension X_test:", X_test.shape) clf = SVC(kernel="linear") clf.fit(X, labels_training) test_predict = clf.predict(X_test) prec, recall, f, support = precision_recall_fscore_support(labels_test, test_predict, beta=1) accuracy = accuracy_score(test_predict, labels_test) print(prec, recall, f, support) print(accuracy)
def classification_report_imbalanced_values(y_true, y_pred, labels, target_names=None, sample_weight=None, digits=2, alpha=0.1): """Copy of imblearn.metrics.classification_report_imbalanced to have access to the raw values. The code is mostly the same except the formatting code and generation of the report which haven removed. Copied from version 0.4.3. The original code is living here: https://github.com/scikit-learn-contrib/imbalanced-learn/blob/master/imblearn/metrics/_classification.py#L750 """ labels = np.asarray(labels) if target_names is None: target_names = ["%s" % l for l in labels] # Compute the different metrics # Precision/recall/f1 precision, recall, f1, support = precision_recall_fscore_support( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Specificity specificity = specificity_score(y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Geometric mean geo_mean = geometric_mean_score(y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Index balanced accuracy iba_gmean = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) iba = iba_gmean(y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) result = {"targets": {}} for i, label in enumerate(labels): result["targets"][target_names[i]] = { "precision": precision[i], "recall": recall[i], "specificity": specificity[i], "f1": f1[i], "geo_mean": geo_mean[i], "iba": iba[i], "support": support[i], } result["average"] = { "precision": np.average(precision, weights=support), "recall": np.average(recall, weights=support), "specificity": np.average(specificity, weights=support), "f1": np.average(f1, weights=support), "geo_mean": np.average(geo_mean, weights=support), "iba": np.average(iba, weights=support), "support": np.sum(support), } return result
for name_c, c in classifiers.items(): c.init_run_variables() for (train, test), color in zip(cv.split(X_tt, y_tt), colors): #predicted_ = clf.fit(X_tt[train], y_tt[train]).predict(X_tt[test]) #print(metrics.classification_report(y[test], predicted_) ) for name_c, c in classifiers.items(): #model = c.clf.fit(X_tt[train], y_tt[train]).best_estimator_ model = c.clf.fit(X_tt[train], y_tt[train]) #print "\n",c.clf.get_params(),"\n" probas_ = model.predict_proba(X_tt[test]) y_test_split = model.predict(X_tt[test]) # precision, recall, F-measure and support precision, recall, f1, support = precision_recall_fscore_support( y_tt[test], y_test_split) c.negclass_f1_sum += f1[0] c.negclass_precision_sum += precision[0] c.negclass_recall_sum += recall[0] c.posclass_f1_sum += f1[1] c.posclass_precision_sum += precision[1] c.posclass_recall_sum += recall[1] # Evaluating over the test set y_test_split = model.predict(X_ts) # precision, recall, F-measure and support precision, recall, f1, support = precision_recall_fscore_support( y_ts, y_test_split)