def evaluate_test(x_eval, y_eval, res_eval, model, config, test=False): total_pred = np.array([], dtype=np.float64) total_y_test = np.array([], dtype=np.int64) if (config['old'] == False): size = 2048 else: size = 1024 transformed_target = np.zeros(shape=(1, size), dtype=np.float64) test_batch_num = int( math.ceil(x_eval.shape[0] / float(config['batch_size']))) cum_acc = [] cum_f1 = [] with torch.no_grad(): for i in range(test_batch_num): begin_index = i * config['batch_size'] end_index = min((i + 1) * config['batch_size'], x_eval.shape[0]) batch_test_x = x_eval[begin_index:end_index] batch_test_y = y_eval[begin_index:end_index] batch_test_res = res_eval[begin_index:end_index] if (config['model_type'] == 'dan' or config['model_type'] == 'dann'): batch_test_x = batch_test_x.reshape(-1, 1, 1, batch_test_x.shape[1]) elif (config['model_type'] == 'jdda' or config['model_type'] == 'jdda_rnn' or config['model_type'] == 'jdda_ff'): batch_test_x = batch_test_x.reshape(-1, 1, batch_test_x.shape[1]) batch_test_x = torch.from_numpy(batch_test_x).float().to( config['device']) batch_test_y = torch.from_numpy(batch_test_y).long().to( config['device']) batch_test_res = torch.from_numpy(batch_test_res).float() output_test, hidden_test = model(batch_test_x) _, predicted = torch.max(output_test.data, 1) # loss = _loss(batch_test_res, batch_test_y, output_test) total_pred = np.concatenate((total_pred, predicted.cpu())) total_y_test = np.concatenate((total_y_test, batch_test_y.cpu())) batch_acc = accuracy_score(batch_test_y.cpu().numpy(), predicted.cpu().numpy()) _, _, batch_f1, _ = prfs(batch_test_y.cpu().numpy(), predicted.cpu().numpy(), average='weighted') cum_acc.append(batch_acc) cum_f1.append(batch_f1) transformed_target = np.concatenate( (transformed_target, hidden_test.cpu().numpy())) if (test == True): print(classification_report(total_y_test, total_pred, digits=4)) acc = accuracy_score(total_y_test, total_pred) cum_acc = np.array(cum_acc) cum_f1 = np.array(cum_f1) print("Testing accuracy", acc) print("Average acc and std ", np.mean(cum_acc), np.std(cum_acc)) print("Average F1 and std ", np.mean(cum_f1), np.std(cum_f1)) _, _, f1, _ = prfs(total_y_test, total_pred, average='weighted') transformed_target = np.delete(transformed_target, (0), axis=0) print("Target - Eval", transformed_target.shape, x_eval.shape) return f1, transformed_target
def _compute_metrics(self, gt_all, pred_all, types, print_results: bool = False): labels = [t.index for t in types] per_type = prfs(gt_all, pred_all, labels=labels, average=None, zero_division=0) micro = prfs(gt_all, pred_all, labels=labels, average='micro', zero_division=0)[:-1] macro = prfs(gt_all, pred_all, labels=labels, average='macro', zero_division=0)[:-1] total_support = sum(per_type[-1]) if print_results: self._print_results(per_type, list(micro) + [total_support], list(macro) + [total_support], types) return [m * 100 for m in micro + macro]
def classify_vuamc(all_features, features_to_use, classifier=xgb.XGBClassifier, n_dev=0, seed=0, classifier_args=None): if classifier_args is None: classifier_args = {} print("Extracting features {}".format(', '.join(features_to_use))) F = preprocess(all_features, features_to_use, n_dev=n_dev, seed=seed) print("{} total features".format(F['X_train'].shape[1])) print("{} train, {} dev, {} test".format(F['X_train'].shape[0], F['X_dev'].shape[0], F['X_test'].shape[0])) print("Training {}".format(classifier)) clf = classifier(scale_pos_weight=sum(1 - F['y_train']) / sum(F['y_train']), **classifier_args) clf.fit(F['X_train'], F['y_train']) print("Scoring") y_pred_test = clf.predict(F['X_test']) y_pred_dev = clf.predict(F['X_dev']) p, r, f1, s = prfs(F['y_test'], y_pred_test, average='binary') print("Precision:", p) print("Recall:", r) print("F1 Score:", f1) acc = (F['y_test'] == y_pred_test).mean() print("Accuracy:", acc) n_features = F['X_train'].shape[1] # By genre genres = np.unique(F['test_genre']) for genre in genres: print("Genre: {}".format(genre)) genre_mask = F['test_genre'] == genre y_pred_genre = clf.predict(F['X_test'][genre_mask]) pg, rg, fg, sg = prfs(F['y_test'][genre_mask], y_pred_genre, average='binary') print("Precision:", pg) print("Recall:", rg) print("F1 Score:", fg) accg = (F['y_test'][genre_mask] == y_pred_genre).mean() print("Accuracy:", accg) print() stats = { 'precision': p, 'recall': r, 'f1': f1, 'accuracy': acc, 'n_features': n_features } F['y_pred_test'] = y_pred_test F['y_pred_dev'] = y_pred_dev return F, stats
def evalute_one(self, metadata, predictions, batches): def log_loss(y, _p): eps = 1e-3 p = np.clip(_p, eps, 1. - eps) return np.mean(-(y * np.log(p) + (1 - y) * np.log(1 - p))) outputs = ['y_onsets', 'y_frames', 'y_offsets'] result = dict() for output in outputs: loss = log_loss(batches[output], predictions[output]) y_true = (batches[output] > 0.5) * 1 y_pred = (predictions[output] > 0.5) * 1 p, r, f, _ = prfs(y_true, y_pred, average='micro') result[output] = dict( loss=loss, p=p, r=r, f=f ) result['adsr'] = self.evaluate_adsr(metadata, predictions) return result
def on_epoch_end(self, epoch, logs={}): predict = np.argmax(self.model.predict( self.validation_data[:2], batch_size=self.params['batch_size']), axis=1) targ = np.argmax(self.validation_data[2], axis=1) prec, rec, f1, _ = prfs(targ, predict, average='macro') if self.verbose: info("epoch: {} p: {:0.4f} r: {:0.4f} f:{:0.4f}".format( epoch, prec, rec, f1)) if f1 > self.best: self.best = f1 self.best_epoch = epoch self.wait = 0 self.best_weights = self.model.get_weights() else: if self.wait >= self.patience: self.model.stop_training = True self.model.set_weights(self.best_weights) info("Stopping at epoch {}, best: e {}, f: {}.".format( epoch, self.best_epoch, self.best)) else: self.wait += 1 if epoch == (self.params['epochs'] - 1): info("Stopping at final epoch {}, best: e {}, f: {}.".format( epoch, self.best_epoch, self.best)) if self.best < f1: self.model.set_weights(self.best_weights)
def fit_eval(m, trnX, trnY, tstX, tstY, average='micro', **param): m.set_params(**param) m.fit(trnX, trnY, tstX, tstY) pred = m.predict(tstX) prec, rec, f1, _ = prfs(tstY, pred, average=average) acc = accuracy_score(tstY, pred) return (acc, prec, rec, f1)
def get_metrics(pred, pred_prob, y): """This function calculates the metrics of AUC_PR, Error, Precision, Recall, and F1 score from true labels y, prediction pred, or predicted P(y=1|x) pred_prob. Parameters ---------- pred : iterable (list or np.array) Predicted labels pred_prob : iterable (list or np.array) Predicted P(y=1|x) y : iterable (list or np.array) True labels. """ precision, recall, f1, _ = zip(*prfs(y, pred))[1] error = 1 - accuracy_score(y, pred) area_under_curve = auc_score(y, pred_prob) metrics_dict = { "AUC": area_under_curve, "Error": error, "Precision": precision, "Recall": recall, "F1 score": f1, } return metrics_dict
def main(): # Reading Data from folders X, y = load_data(data_path='./crop_dataset/crop_dataset/') print(f"Data shape: {X.shape}, Labels: {y.shape}\n") # Displaying random set of images from data display_random_set(data=X, labels=y) # Splitting data into training and testing data, training will consist of 70% of the data and 30% of the remaining # will be testing data. x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True) print( f"Training Data: {x_train.shape}, Training labels: {y_train.shape}\nValidation Data: {x_val.shape}, " f"Validation labels: {y_val.shape}\n") # Adjusting labels to be represented as categorical data. y_train = to_categorical(y=y_train, num_classes=len(np.unique(y))) y_val = to_categorical(y=y_val, num_classes=len(np.unique(y))) # Creating Neural network model. model = build_model(num_classes=len(np.unique(y)), img_dim=x_train[0].shape) # To train the model again change train value to True, change to False to not train. train_model(x=x_train, y=y_train, x_val=x_val, y_val=y_val, model=model, train=True) print("[In progress] Loading H5 model and history file...") classifier = load_model(filepath='traffic_sign_model.h5') hist_loaded = load_history(file_name='traffic_sign.pickle') print("[Done] Loading H5 model and history file...") # Loading data for testing model. x_test, y_test = load_test_data(test_data_dir='./test_data/test_data', test_data_labels_dir='./test_labels.csv') predictions = classifier.predict_classes(x_test) accuracy = np.array([ 1 if predictions[i] == int(y_test[i]) else 0 for i in range(len(predictions)) ]) print(f"Accuracy on test data: {np.mean(accuracy) * 100} %.") # plotting loss and mse curves for training and validation steps plot_curves(hist_loaded) # plotting accuracy bar graph per class labels = np.unique(y) precision, recall, f1, support = prfs(y_true=y_test, y_pred=predictions, average=None) accuracy_per_class(labels, precision, recall, f1)
def _compute_metrics(gt_all, pred_all, labels, labels_str, print_results: bool = False): per_type = prfs(gt_all, pred_all, labels=labels, average=None) micro = prfs(gt_all, pred_all, labels=labels, average='micro')[:-1] macro = prfs(gt_all, pred_all, labels=labels, average='macro')[:-1] total_support = sum(per_type[-1]) if print_results: _print_results(per_type, list(micro) + [total_support], list(macro) + [total_support], labels_str) metrics = [m * 100 for m in micro + macro] return dict(zip(METRIC_LABELS, metrics))
def get_prfs(causality_truth, causality_pred): assert causality_pred.shape == causality_truth.shape assert len(causality_pred.shape) == 2 from sklearn.metrics import precision_recall_fscore_support as prfs precision, recall, F1, _ = prfs( to_np_array(causality_truth).flatten(), to_np_array(causality_pred).flatten()) return precision[1], recall[1], F1[1]
def score(train, test, opt): from sklearn.metrics import precision_recall_fscore_support as prfs pred = _predict(train, test, opt) gold = test.labels prec, rec, f1, _ = prfs(gold, pred, average='macro') print("Precision {}, Recall: {}, F-score: {}".format( prec, rec, f1))
def evaluate(logger, tag_group, device, recurrent_model, output_model, loaders, global_step): recurrent_model.eval() output_model.eval() losses = defaultdict(list) all_prf = defaultdict(list) with torch.no_grad(): loss_function_bce = nn.BCEWithLogitsLoss(reduction='mean') loss_function_mse = nn.MSELoss(reduction='mean') # each loader is for one sequence for midifilename, loader in loaders: print('evaluate midifilename', midifilename) y_true, y_pred = evaluate_aux(device, recurrent_model, output_model, loader, losses) print('y_true.shape', y_true.shape) print('y_pred.shape', y_pred.shape) y_pred = (y_pred > 0.5) * 1 # import matplotlib.pyplot as plt # fig, axes = plt.subplots(nrows=2, sharex=True, sharey=True) # axes[0].imshow(y_true.T) # axes[1].imshow(y_pred.T) # plt.show() # exit() p, r, f, _ = prfs(y_true, y_pred, average='micro') print('p {:>4.2f} r {:>4.2f} f {:>4.2f}'.format(p, r, f)) all_prf['p'].append(p) all_prf['r'].append(r) all_prf['f'].append(f) to_log = { '{}_prf/p'.format(tag_group): np.mean(all_prf['p']), '{}_prf/r'.format(tag_group): np.mean(all_prf['r']), '{}_prf/f'.format(tag_group): np.mean(all_prf['f']), '{}_losses/mse_frames'.format(tag_group): np.mean(losses['mse_frames']), '{}_losses/mse_velocity'.format(tag_group): np.mean(losses['mse_velocity']), '{}_losses/bce'.format(tag_group): np.mean(losses['bce']) } if logger is not None: for key, value in to_log.items(): logger.add_scalar(key, value, global_step) return to_log
def rfFitScore(clf, dftrain, dftrain_y, dftest, dftest_y): '''random forest classifier fit and score. clf=RandomForestClassifier, dftrain=train data, dftrain_y=train data Y, dftest=test data, dftest_y=test data Y''' clfit = clf.fit(dftrain, dftrain_y['Y']) # clf.fit(X, y) imp = clfit.feature_importances_ # ndarray of 562 # clfit.fit_transform( X, y=None ) # returns X_new new_y = clfit.predict( dftest ) # returns predicted Y test_score = clfit.score( dftest, dftest_y['Y'] ) print("test score:", test_score) # clfit.oob_score_ if (clf.oob_score): print("oob score", clfit.oob_score_) # calculate test score by other means print("predict True %.3f percent, %d out of %d" % \ ((100 * sum(dftest_y['Y'] == new_y) / dftest_y.shape[0]), \ sum(dftest_y['Y'] == new_y), dftest_y.shape[0])) print("predict False %.3f percent, %d out of %d" % \ ((100 * sum(dftest_y['Y'] != new_y) / dftest_y.shape[0]), \ sum(dftest_y['Y'] != new_y), dftest_y.shape[0])) # new_p = clfit.predict_proba( dftest ) # # probability of each X variable to predict each y class # print("test predict probabilities head:\n", new_p[:5]) # cross table of variable predictions ptab = pd.crosstab(dftest_y['Y'], new_y, \ rownames=['actual'], colnames=['predicted']) print("cross table:\n", ptab) # accuracy: percent labeled correctly # precision: true positives / (true positives + true negatives) # recall: true positives / (true positives + false negatives) precision, recall, fbeta, support = prfs(dftest_y['Y'], new_y) print("precision", precision, "\nrecall", recall, \ "\nfbeta", fbeta, "\nsupport", support) if (clf.oob_score): return test_score, imp, clfit.oob_score_ else: return test_score, imp
def log_metrics(y_true, y_pred, metrics_write_path, average=""): """ Log the precision/recall/f1-score/support :param y_true: (ndarray) True labels :param y_pred: (ndarray) Classifier predicted labels :param metrics_write_path: (str) Place to write metrics :param average: (str) Specifies micro/macro averaging :return: None """ if not average: # Default write_file.write both micro and macro averages with open(metrics_write_path, "a") as write_file: write_file.write("\nModel Report\n") timestamp = str( datetime.fromtimestamp((datetime.timestamp(datetime.now())))) write_file.write("{}\n".format(timestamp)) write_file.write("----------------------------------------\n") # Micro Avg log_metrics(y_true, y_pred, metrics_write_path, average="micro") # Macro Avg log_metrics(y_true, y_pred, metrics_write_path, average="macro") else: metrics = prfs(y_true, y_pred, average=average) with open(metrics_write_path, "a") as write_file: write_file.write("\n") write_file.write("\n- {} averaging -\n".format(average)) write_file.write(classification_report(y_true, y_pred)) write_file.write("\nPrecision: {}\n".format(metrics[0])) write_file.write("Recall: {}\n".format(metrics[1])) write_file.write("F1-Score: {}\n".format(metrics[2])) write_file.write("Support: {}\n".format(metrics[3])) write_file.write("Accuracy: {}\n".format( accuracy_score(y_true, y_pred))) write_file.write("AUC Score ({}): {}\n".format( average, roc_auc_score(y_true, y_pred, average=average))) write_file.write(str(confusion_matrix(y_true, y_pred)) + "\n")
def eval_params(self, nparr): cutoff = int(self.X.shape[0]*.7) C = nparr[0] gamma = nparr[1] if gamma < 0: gamma = 1e-3 if C < 0: C = 1e-3 svm = SVC(C=C, gamma=gamma) X_train, y_train = self.X[:cutoff], self.labels[:cutoff] X_test, y_test = self.X[cutoff:], np.array(self.labels[cutoff:]) svm.fit(X_train, y_train) y_pred = svm.predict(X_test) # fitness = (y_pred == y_test).sum() / (y_test.shape[0]+.0) fitness = prfs(y_test, y_pred, average='macro')[2] print "F-score macro: %.6f achieved with C=%.6f and gamma=%.6f" % (fitness, C, gamma) return fitness
def rfFitScore(clf, dftrain, dftrain_y, dftest, dftest_y): '''random forest classifier fit and score. clf=RandomForestClassifier, dftrain=train data, dftrain_y=train data Y, dftest=test data, dftest_y=test data Y''' clfit = clf.fit(dftrain, dftrain_y['Y']) # clf.fit(X, y) imp = clfit.feature_importances_ # ndarray of 562 # clfit.fit_transform( X, y=None ) # returns X_new new_y = clfit.predict(dftest) # returns predicted Y test_score = clfit.score(dftest, dftest_y['Y']) print("test score:", test_score) # clfit.oob_score_ if (clf.oob_score): print("oob score", clfit.oob_score_) # calculate test score by other means print("predict True %.3f percent, %d out of %d" % \ ((100 * sum(dftest_y['Y'] == new_y) / dftest_y.shape[0]), \ sum(dftest_y['Y'] == new_y), dftest_y.shape[0])) print("predict False %.3f percent, %d out of %d" % \ ((100 * sum(dftest_y['Y'] != new_y) / dftest_y.shape[0]), \ sum(dftest_y['Y'] != new_y), dftest_y.shape[0])) # new_p = clfit.predict_proba( dftest ) # # probability of each X variable to predict each y class # print("test predict probabilities head:\n", new_p[:5]) # cross table of variable predictions ptab = pd.crosstab(dftest_y['Y'], new_y, \ rownames=['actual'], colnames=['predicted']) print("cross table:\n", ptab) # accuracy: percent labeled correctly # precision: true positives / (true positives + true negatives) # recall: true positives / (true positives + false negatives) precision, recall, fbeta, support = prfs(dftest_y['Y'], new_y) print("precision", precision, "\nrecall", recall, \ "\nfbeta", fbeta, "\nsupport", support) if (clf.oob_score): return test_score, imp, clfit.oob_score_ else: return test_score, imp
def _score(self, gold, pred, scores={'precision', 'recall', 'f1-score'}, negative_class=None, average=None): """ Return the score for the testset. """ from sklearn.metrics import precision_recall_fscore_support as prfs if average is None: average = 'macro' if negative_class: average = 'binary' scores = [sc \ if ':' in sc or \ sc not in {'precision', 'recall', 'f1-score'}\ else ':'.join((sc, average))\ for sc in scores] scores = {k: None for k in scores} for sc_avg in list(scores): if ':' in sc_avg: sc, avg = sc_avg.split(':') else: sc = sc_avg avg = None if scores[sc_avg] is not None: continue if sc not in {'precision', 'recall', 'f1-score', 'accuracy'}: warning("Skipping unknown score `{}'.".format(sc)) continue if sc in {'precision', 'recall', 'f1-score'}: if avg not in {'binary', 'micro', 'macro'}: warning("Skipping `{}': unknown avgeraging method.".format( sc_avg)) continue p, r, f, _ = prfs(gold, pred, average=avg) scores[':'.join(('precision', avg))] = p scores[':'.join(('recall', avg))] = r scores[':'.join(('f1-score', avg))] = f if sc == 'accuracy': from sklearn.metrics import accuracy_score scores['accuracy'] = accuracy_score(gold, pred) return {k: v for k, v in scores.items()}
def evaluate_test(x_eval, y_eval, res_eval, model, config, test=False): total_pred = np.array([], dtype=np.float64) total_y_test = np.array([], dtype=np.int64) test_batch_num = int( math.ceil(x_eval.shape[0] / float(config['batch_size']))) with torch.no_grad(): for i in range(test_batch_num): begin_index = i * config['batch_size'] end_index = min((i + 1) * config['batch_size'], x_eval.shape[0]) batch_test_x = x_eval[begin_index:end_index] batch_test_y = y_eval[begin_index:end_index] batch_test_res = res_eval[begin_index:end_index] if (config['model_type'] == 'dan' or config['model_type'] == 'dann'): batch_test_x = batch_test_x.reshape(-1, 1, 1, batch_test_x.shape[1]) batch_test_x = torch.from_numpy(batch_test_x).float().to( config['device']) batch_test_y = torch.from_numpy(batch_test_y).long().to( config['device']) batch_test_res = torch.from_numpy(batch_test_res).float() output_test, _ = model(batch_test_x, batch_test_x) _, predicted = torch.max(output_test.data, 1) # loss = _loss(batch_test_res, batch_test_y, output_test) total_pred = np.concatenate((total_pred, predicted.cpu())) total_y_test = np.concatenate((total_y_test, batch_test_y.cpu())) # overall_loss = _loss(res_eval, y_eval, total_pred) # overall_loss = criterion(total_pred, total_y_test) # acc = accuracy_score(total_y_test, total_pred) if (test == True): print(classification_report(total_y_test, total_pred, digits=4)) acc = accuracy_score(total_y_test, total_pred) print("Testing accuracy", acc) _, _, f1, _ = prfs(total_y_test, total_pred, average='weighted') # print ("Overall testing/evaluation F1 is ", f1) return f1
def evaluate_one_loader(cuda, net, loader): net.eval() loss_function = nn.BCELoss(reduction='mean') smoothed_loss = 1. y_true = [] y_pred = [] for x, y in loader: if cuda: x = x.cuda() y = y.cuda() y_hat = net.predict(x) loss = loss_function(y_hat, y) smoothed_loss = smoothed_loss * 0.9 + loss.detach().cpu().item() * 0.1 y_true.append(y.detach().cpu().numpy()) y_pred.append((y_hat.detach().cpu().numpy() > 0.5) * 1) y_true = np.vstack(y_true) y_pred = np.vstack(y_pred) p, r, f, _ = prfs(y_true, y_pred, average='micro') return smoothed_loss, p, r, f
def success_metrics(model): """ Print evaluation of the model parameter: Pytorch model, (dataloaders['val']) return: precision, recall, f1, support """ model.eval() original_labels = [] pred_lst = [] with torch.no_grad(): for i, (inputs, labels) in enumerate(dataloaders['val']): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) original_labels.extend(labels) pred_lst.extend(preds) precision, recall, f1, support = prfs(original_labels, pred_lst, average='weighted') print("Precision: {:.2%}\nRecall: {:.2%}\nF1 score: {:.2%}".format(precision, recall, f1))
model1.add(Dense(6, activation='relu')) model1.add(Dense(1, activation='sigmoid', input_dim=11)) model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model1.fit(X_train, y_train, batch_size=10, epochs=100) y_pred = model1.predict(X_test) y_pred = (y_pred > 0.5) print('CM:', confusion_matrix(y_test, y_pred)) print('AC:', ac(y_test, y_pred)) print('F1 scores:', f1(y_test, y_pred)) print('PR:', prfs(y_test, y_pred)) #logistic Regression from sklearn.linear_model import LogisticRegression as lr model2 = lr().fit(X_train, y_train) y_pred = model2.predict(X_test) y_pred = (y_pred > 0.5) print('CM:', confusion_matrix(y_test, y_pred)) print('AC:', ac(y_test, y_pred)) print('F1 scores:', f1(y_test, y_pred)) print('PR:', prfs(y_test, y_pred)) from sklearn.neighbors import KNeighborsClassifier as knn model3 = knn().fit(X_train, y_train)
# # best result: epsilon = 2, threshold = 0.46, F1 = 0.515 (P = 0.468, R = 0.573) # # y_pred = neighbourhood_difference(y_dists) # # plot_thresholds(y_true, y_pred, False, 'binary') # Measurements on test data if __name__ == '__main__': data = config.get_seg_data('test') print("Loading the data") y = np.load(data['y']) y_true = np.load(data['y_true_lm']) T = 0.5 y_pred = compute_distance(y, euclidean_distances) > T P, R, F, S = prfs(y_true, y_pred, average='binary') print('euclidean distance: threshold = %.2f' % T) print_measurements(y_true, y_pred) y_dists = compute_distance(y, cosine_distances) T = 0.92 y_pred = y_dists > T print('cosine distance: threshold = %.2f' % T) print_measurements(y_true, y_pred) T = 0.33 y_pred = compute_distance(y, manhattan_distances) > T print('manhattan distance: threshold = %.2f' % T) print_measurements(y_true, y_pred)
cd_loss = criterion(cd_preds, labels) loss = cd_loss loss.backward() optimizer.step() cd_preds = cd_preds[-1] _, cd_preds = torch.max(cd_preds, 1) # Calculate and log other batch metrics cd_corrects = ( 100 * (cd_preds.squeeze().byte() == labels.squeeze().byte()).sum() / (labels.size()[0] * (opt.patch_size**2))) cd_train_report = prfs(labels.data.cpu().numpy().flatten(), cd_preds.data.cpu().numpy().flatten(), average='binary', pos_label=1) train_metrics = set_metrics(train_metrics, cd_loss, cd_corrects, cd_train_report, scheduler.get_lr()) # log the batch mean metrics mean_train_metrics = get_mean_metrics(train_metrics) for k, v in mean_train_metrics.items(): writer.add_scalars(str(k), {'train': v}, total_step) # clear batch variables from memory del batch_img1, batch_img2, labels scheduler.step()
########################################################### ################# ConvNet Evaluation ###################### ########################################################### print "\n fitting cnn model on %d samples..." % x_train.shape[0] # hist = cnn_100.model.fit(x_train, y_train, nb_epoch=10) # cnn_100.save_model(hist) # print 'saved hist: ', str(hist) cnn_200_pred = cnn_200.model.predict(x_test) y_true = np.array([np.argmax(i) for i in y_test]) y_pred = np.array([np.argmax(i) for i in cnn_200_pred]) cnn_acc = accuracy_score(y_true, y_pred) p, r, f, s = prfs(y_true, y_pred, average='macro') print print "cnn_20 stats: precision=%.3f, recall=%.3f, f-score=%.3f, acc: %.3f" % ( p, r, f, cnn_acc) ########################################################### ################# LogReg/SVM Preparation ################## ########################################################### print 'reading text data...' t = time() text_reader = TweetCorpusReader('../data/', text_only=False) t = time() - t print 'time elapsed: %.0fs' % t
def callback(epoch, model): if epoch % 250 != 0: return perf = [] for X, Z, y in ((experiment.X[ts], experiment.Z[ts], experiment.y[ts]), (experiment.X[tr], experiment.Z[tr], experiment.y[tr])): yhat = model.predict(X) y_loss = model.loss_y(X, y) z_loss = model.loss_z(X, Z) y_perf = list(prfs(y, yhat, average='binary')[:3]) perf.extend(y_perf + [y_loss, z_loss]) #print(Z[0]) #print(Z_hat[0]) if args.experiment.startswith('color') and args.record_lime: similarities, dispersions, times_senn, times_lime = [], [], [], [] for i in selection: x = experiment.X[i].reshape(1, -1) z_senn, t_senn = model.explain(x, return_runtime=True) z_senn = z_senn.ravel() z_lime, Z_lime, t_lime = \ experiment.explain_lime(model, tr, i, n_repeats=args.lime_repeats, n_samples=args.lime_samples, n_features=args.lime_features) times_senn.append(t_senn) times_lime.append(t_lime) n_nonzeros = len(np.nonzero(z_lime)[0]) similarities.append( _whatever_at_k(z_senn, z_lime, n_nonzeros)) n_repeats = len(Z_lime) dispersions.append(1 / (n_repeats * (n_repeats - 1)) * \ np.sum(pairwise_distances(Z_lime, Z_lime))) path = basename + '__fold={}__instance={}__epoch={}'.format( k, i, epoch) experiment.dump_explanation(path + '_senn.png', experiment.X[i], experiment.Z[i], z_senn) experiment.dump_explanation(path + '_lime.png', experiment.X[i], experiment.Z[i], z_lime) perf.extend([ np.mean(similarities), np.mean(dispersions), np.mean(times_senn), np.mean(times_lime), ]) print('epoch {} : {}'.format(epoch, perf)) return perf
print '#'*40 print 'NO Twitter Features' print 'SVM - Linear Kernel' print '#'*40 print 'ACC\tPR\tRE\tF1' print '#'*40 i=1 for tr, ts in KFold(n=len(normalized_corpus), n_folds=10): train = X[tr] test = X[ts] clf = LinearSVC() clf.fit(train, labels[tr]) ytrue = labels[ts] ypred = clf.predict(test) acc = (ytrue == ypred).sum() / (len(ypred)+.0) p, r, f, s = prfs(ytrue, ypred, average='binary') accs.append(acc) ps.append(p) rs.append(r) fs.append(f) print "%.2f\t%.2f\t%.2f\t%.2f KFoldRnd%d" % (acc,p,r,f,i) i += 1 print '#'*40 print 'Mean accuracy: %.2f' % (np.mean(accs)) print 'Mean precision: %.2f' % (np.mean(ps)) print 'Mean recal: %.2f' % (np.mean(rs)) print 'Mean f-score: %.2f' % (np.mean(fs)) accs = [] ps = []
print( 'Loading the paragraph trained classifier trained on data processed by' ' threshold_half_max function') classifier = load_pickle(config.classifier_par_half_max) threshold = 0.39 y_true = process_y(data, threshold_half_max) print("Loading x") x = load_sparse_csr(data['x']) else: threshold = 0.3 vectorizer = load_pickle(config.vectorizer) binarizer = load_pickle(config.binarizer) print('Loading the classifier') classifier = load_pickle(config.classifier) corpus, topics = build_corpus_and_topics(config.data['test']) print('Transforming corpus by vectorizer') x = vectorizer.transform(corpus) print('Transforming article topics by binarizer') y_true = binarizer.transform(topics) del vectorizer, binarizer, corpus, topics y_pred = predict_tuned(x, classifier, threshold) P, R, F, S = prfs(y_true, y_pred, average='samples') print('F1 = %.3f (P = %.3f, R = %.3f)' % (F, P, R))
def performanceSummary(trueLabels,predictedLabels): accuracy=acu(trueLabels,predictedLabels) precision,recall,fscore,support=prfs(trueLabels,predictedLabels) return [accuracy.tolist()]+precision.tolist()+recall.tolist()+fscore.tolist()+support.tolist()
# The twitter-specific tokenizer makes the parsing slow, however # the accuracy is much improved with it. X_train, train_feats = pr.process(X_train, verbose=True) # ~7min vectoring phase X_mat = pr.fit_transform(X_train, saveVectorizer=False, saveMatrix=False, verbose=True) X_test, test_feats = pr.process(X_test, verbose=True) X_test = pr.transform(X_test, saveMatrix=False, verbose=True) # Compare the accuracy with and w/o the twitter-specific features. # Must scale the features matrix before concatenating with the ngrams matrix. print '\nTF-IDF Unigrams and Bigrams || Logistic Regression classifier' print '-'*40 clf = LR() # Roughly 3 minutes on training t0 = time.time() print 'Training on %d samples...' % (X_mat.shape[0]) clf.fit(X_mat, y_train) print 'Training time: %.0fs' % ((time.time()-t0)) print 'Testing on %d samples...' % (X_test.shape[0]) y_pred = clf.predict(X_test) acc = (y_pred==y_test).sum()/(len(y_pred)+.0) f1 = prfs(y_test, y_pred, average="macro")[-2] roc_auc = roc_auc_score(y_test, y_pred) print '\nReport\n'+'-'*40 print 'Accuracy: %.4f\nMacro F-1 Score: %.4f\nROC_AUC Score: %.4f' % (acc, f1, roc_auc)
if classPred[bind]==0: voxPred = branch_1[bind] else: voxPred = branch_2[bind] revVoxPred = voxPred[:,:,::-1,:] voxPred = np.maximum(revVoxPred,voxPred) voxPredLabels = voxPred[grid_xyz[:,0],grid_xyz[:,1],grid_xyz[:,2],0] tree = ckdt(grid_xyz_pc) _,inds = tree.query(cloud,k=1) pcPred = voxPredLabels[inds] thresh_pr = [] ; thresh_re = [] for thresh in np.arange(0.0,1.01,0.01): threshPred = np.array(pcPred) threshPred[threshPred >= thresh] = 1 threshPred[threshPred < thresh] = 0 p,r,f,s = prfs(seg.flatten(),threshPred.flatten(),average='binary') thresh_pr.append(p) thresh_re.append(r) prs.append(thresh_pr) recs.append(thresh_re) meanacc += acc if j%5==0: print('BATCH ' + str(j) + ' of ' + str(len(trainInstances)/args['batch_size']),' PREDICTION ACCURACY: ',meanacc/(j+1)) except StopIteration: if not os.path.exists(args['outputDir']): os.makedirs(args['outputDir']) prs = np.array(prs) recs = np.array(recs) pr_mean = np.mean(prs,axis=0) rec_mean = np.mean(recs,axis=0)
def permutations(data, shortest, longest, iterations, kernel, prefix, suffix): #initialize text data vectorizer if (shortest == 1) & (longest == 1): AAs = [ 'a', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'y' ] cv = CountVectorizer(analyzer='char', ngram_range=(shortest, longest), vocabulary=AAs) else: cv = CountVectorizer(analyzer='char', ngram_range=(shortest, longest)) dataVect = cv.transform(data.Sequence) # repeat for each kernel type: for k in kernel: #initialize feature use arrays precisions = pd.DataFrame(columns=data.Genus.unique()) recalls = pd.DataFrame(columns=data.Genus.unique()) fbetas = pd.DataFrame(columns=data.Genus.unique()) micros = pd.DataFrame(columns=['Precision', 'Recall', 'Fbeta']) macros = pd.DataFrame(columns=['Precision', 'Recall', 'Fbeta']) scores = pd.DataFrame(columns=("iteration", "score")) #initialize the genus classification for this ngram for i in data.Genus.unique(): data[i] = 0 #repeat n times for j in range(iterations): randomstate = j + 5342 #make sure the randomstate input is at least 4 digits and repeatable #initialize classifier #uses default settings for the classifier, see scikit-learn documentation sklearn.svm.SVC for details clf = SVC(kernel=k) #build training and test data sets X_train, X_test, y_train, y_test = train_test_split( dataVect, data.Genus, test_size=0.5, stratify=data.Genus, random_state=randomstate) #Scale the data to the training set StSc = StandardScaler(copy=True, with_mean=False) StSc.fit(X_train) X_sc_train = StSc.transform(X_train) X_sc_test = StSc.transform(X_test) X_scaled = StSc.transform(dataVect) #train the classifier clf.fit(X_sc_train, y_train) #make predictions for the original dataset y_pred = clf.predict(X_scaled) score = clf.score(X_sc_test, y_test) scores.loc[j] = [j, score] data['prediction'] = y_pred #increment the Genus column counter for each Genus for c in clf.classes_: data.loc[data.prediction == c, c] += 1 #record simulation data metrics = prfs(data.Genus, y_pred) precisions.loc[j] = metrics[0] recalls.loc[j] = metrics[1] fbetas.loc[j] = metrics[2] metrics = prfs(data.Genus, y_pred, average='micro') micros.loc[j, 'Precision'] = metrics[0] micros.loc[j, 'Recall'] = metrics[1] micros.loc[j, 'Fbeta'] = metrics[2] metrics = prfs(data.Genus, y_pred, average='macro') macros.loc[j, 'Precision'] = metrics[0] macros.loc[j, 'Recall'] = metrics[1] macros.loc[j, 'Fbeta'] = metrics[2] data.to_csv("{0}_SVM_{1}_{2}.csv".format(prefix, k, suffix)) scores.to_csv("{0}_SVM_{1}_{2}.scores".format(prefix, k, suffix)) precisions.to_csv("{0}_SVM_{1}{2}.precision".format(prefix, k, suffix)) recalls.to_csv("{0}_SVM_{1}_{2}.recall".format(prefix, k, suffix)) fbetas.to_csv("{0}_SVM_{1}_{2}.fbeta".format(prefix, k, suffix)) micros.to_csv("{0}_SVM_{1}_{2}.micro".format(prefix, k, suffix)) macros.to_csv("{0}_SVM_{1}_{2}.macro".format(prefix, k, suffix))
def precision_recall(beta_true, beta_pred): b_true = beta_true!=0 b_pred = beta_pred!=0 p, r, f, s = prfs(b_true, b_pred, pos_label=1) return p, r
def train_dnn(x_t, y_t, res_t, x_eval, y_eval, res_eval, x_test, y_test, config): # Start training process counter = 0 best_f1 = 0.0 f1_total = 0.0 epoch_no_improve = 0 n_epoch_stop = 3 #momentum = 0.9 #log_interval = 10 #l2_decay = 5e-4 batch_num = int(x_t.shape[0] / config['batch_size']) if (config['model_type'] == 'dan'): training_model = transfer_model.DANNet(config).to(config['device']) else: training_model = transfer_model.CNNModel(config).to(config['device']) loss_class = torch.nn.NLLLoss().to(config['device']) loss_domain = torch.nn.NLLLoss().to(config['device']) for epoch in range(config['num_epochs']): # Start training process if (config['model_type'] == 'dan'): LEARNING_RATE = config['learning_rate'] / math.pow( (1 + 10 * (epoch - 1) / config['num_epochs']), 0.75) print('learning rate{: .4f}'.format(LEARNING_RATE)) optimizer = torch.optim.SGD([ { 'params': training_model.sharedNet.parameters() }, { 'params': training_model.cls_fc.parameters(), 'lr': LEARNING_RATE }, ], lr=LEARNING_RATE / 10, momentum=config['momentum'], weight_decay=config['l2_decay']) elif (config['model_type'] == 'dann'): optimizer = torch.optim.Adam(training_model.parameters(), lr=config['learning_rate']) for batch in range(batch_num): training_model.train() batch_index = generate_batch(x_t.shape[0], config['batch_size']) test_batch_idx = generate_batch(x_test.shape[0], config['batch_size']) batch_x = x_t[batch_index] batch_y = y_t[batch_index] batch_res = res_t[batch_index] batch_x = batch_x.reshape(-1, 1, 1, batch_x.shape[1]) batch_x = torch.from_numpy(batch_x).float().to(config['device']) batch_y = torch.from_numpy(batch_y).long().to(config['device']) batch_res = torch.from_numpy(np.array(batch_res)).float() batch_test_x = x_test[test_batch_idx].reshape( -1, 1, 1, x_test.shape[1]) batch_test_x = torch.from_numpy(batch_test_x).float().to( config['device']) # Forward pass #print ("Shape source and target", batch_x.shape, batch_test_x.shape) alpha = 0.0 if (config['model_type'] == 'dan'): label_pred, loss_mmd = training_model(batch_x, batch_test_x) batch_test_x = x_test[test_batch_idx].reshape( -1, 1, 1, x_test.shape[1]) batch_test_x = torch.from_numpy(batch_test_x).float().to( config['device']) label_pred, loss_mmd = training_model(batch_x, batch_test_x) #loss = criterion(outputs, batch_y) # loss = _loss(batch_res, batch_y, outputs) loss_cls = F.nll_loss(F.log_softmax(label_pred, dim=1), batch_y) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['num_epochs'])) - 1 err = loss_cls + gamma * loss_mmd # Backward and optimize optimizer.zero_grad() err.backward() optimizer.step() #print ("Label prediction", label_pred) tr_pred = np.argmax(label_pred.cpu().detach(), 1) #tr_pred = np.argmax(outputs.cpu().detach(), 1) counter += 1 training_model.eval() _, _, f1, _ = prfs(batch_y.cpu(), tr_pred, average='weighted') f1_total += f1 elif (config['model_type'] == 'dann'): p = float(counter + epoch * x_t.shape[0]) / config['num_epochs'] / x_t.shape[0] alpha = 2. / (1. + np.exp(-10 * p)) - 1 training_model.zero_grad() #batch_x = batch_x.reshape(-1, 1, 1, x_test.shape[1]) #class_label = torch.LongTensor(batch_size) domain_label = torch.zeros(config['batch_size']) domain_label = domain_label.long().to(config['device']) # Using source data class_output, domain_output = training_model(batch_x, alpha) err_s_label = loss_class(class_output, batch_y) err_s_domain = loss_domain(domain_output, domain_label) ### Using target data #x_test_batch = torch.FloatTensor(batch_size, 1, x_test.shape[1], x_test.shape[1]) domain_label = torch.ones(config['batch_size']) domain_label = domain_label.long().to(config['device']) _, domain_output = training_model(batch_test_x, alpha) err_t_domain = loss_domain(domain_output, domain_label) err = err_t_domain + err_s_domain + err_s_label #print ("Error is ", err) err.backward() optimizer.step() training_model.eval() class_output, _ = training_model(batch_x, alpha) tr_pred = class_output.data.cpu().max(1, keepdim=True)[1] _, _, f1, _ = prfs(batch_y.cpu(), tr_pred, average='weighted') f1_total += f1 if (counter + 1) % 100 == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Best F1: {:.4f}' .format(epoch + 1, config['num_epochs'], batch + 1, batch_num, err.item(), f1)) counter += 1 print("------epoch : ", epoch, " Loss: ", err.item(), " Training F1:", round((f1_total / batch_num), 4)) training_model.eval() eval_f1 = evaluate_test(x_eval, y_eval, res_eval, training_model, config, False) f1_total += eval_f1 if (eval_f1 >= best_f1): best_f1 = eval_f1 torch.save(training_model.state_dict(), config['ckpt_path'] + 'best_model.pth') print("Current F1 score (evaluation) ", eval_f1) print("Best F1 score (evaluation) ", best_f1) print("All completed")
concat = Dropout(.5)(concat) output = Dense(2, activation='softmax', name='dense')(concat) model = Model([input1, input2], output, name='deep_lstm') model.compile("adam", "binary_crossentropy", metrics=['accuracy']) model.summary() from keras.utils import plot_model # plot_model(model, to_file='attn-cnn-lstm.png', show_layer_names=True, show_shapes=True) ############################################################ ################ Training and Eval ######################### ############################################################ labels = np.load("labels.npy") x1_train, x1_test, x2_train, x2_test, labels_train, labels_test = train_test_split( data1, data2, labels, test_size=0.15) N_EPOCHS = 10 history = model.fit([x1_train, x2_train], labels_train, batch_size=128, validation_split=.15, epochs=N_EPOCHS, callbacks=[EarlyStopping(patience=2)]) preds = model.predict([x1_test, x2_test]).argmax(axis=-1) y_true = labels_test.argmax(axis=-1) print(prfs(y_true, preds)) print(accuracy_score(y_true, preds))
for m in range(len(mnb.class_count_)): temp = np.argsort(mnb.coef_[m])[-20:] for e in temp[-10:]: if features[e] in aa_top_10.columns: aa_top_10[features[e]][mnb.classes_[m]] += 1 else: aa_top_10[features[e]] = 0 aa_top_10[features[e]][mnb.classes_[m]] = 1 for k in temp[:10]: if features[k] in aa_next_10.columns: aa_next_10[features[k]][mnb.classes_[m]] += 1 else: aa_next_10[features[k]] = 0 aa_next_10[features[k]][mnb.classes_[m]] = 1 metrics = prfs(data.Genus, y_pred) #record simulation data precisions.loc[j] = metrics[0] recalls.loc[j] = metrics[1] fbetas.loc[j] = metrics[2] metrics = prfs(data.Genus, y_pred, average='micro') micros.loc[j, 'Precision'] = metrics[0] micros.loc[j, 'Recall'] = metrics[1] micros.loc[j, 'Fbeta'] = metrics[2] metrics = prfs(data.Genus, y_pred, average='macro') macros.loc[j, 'Precision'] = metrics[0] macros.loc[j, 'Recall'] = metrics[1] macros.loc[j, 'Fbeta'] = metrics[2]
#Model fitting clf = rfc() #Calling feature selection methods fs = feature_selection() #clf,x_train,x_test,x_final_test,y_out = fs.PCASelection(x_train,y_train_binary,x_test,y_test_binary,x_final_test,clf) clf, x_train, x_test, x_final_test, y_out = fs.KBest(x_train, y_train_binary, x_test, y_test_binary, x_final_test, clf) clf.fit(x_train, y_train_binary) y_out = clf.predict(x_test) #Printing scores score = clf.score(x_test, y_test_binary) print "Score : ", score print "Precision recall f-score support : ", prfs(y_test_binary, y_out) #Cross validation folds = 2 print "\nManual ", folds, " fold cross validation score" CV(x_orig_train, y_orig_train_binary, clf, folds) scores = cross_val_score(clf, x_orig_train, y_orig_train_binary, cv=10) #Checking with inbuilt CV function print "\nChecking with inbuilt function" skf = KFold(n_splits=folds, shuffle=False) skfscore = cross_val_score(clf, x_orig_train, y_orig_train_binary, cv=skf) print skfscore #Manual Parameter tuning print "\nManual parameter tuning"