def comet_addtional_info(exp, model, save_path, X_test, y_test, embedding_type, model_type): from tensorflow.keras.utils import to_categorical NN_test_preds = model.predict(X_test) class_rep = sklearn.metrics.classification_report( y_test, NN_test_preds) #print(class_rep) if len(set(y_test)) == 2: prec, rec, f_1, supp = prf(y_test, NN_test_preds, average=None) else: prec, rec, f_1, supp = prf(y_test, NN_test_preds, average=None) #get AID number import ntpath #get base file name folder, base = ntpath.split(save_path) #split file name at second _ assumes file save in AID_xxx_endinfo.pkl AID, _, end_info = base.rpartition('_') exp.add_tag(AID) #save data location, AID info, and version info exp.log_dataset_info(name=AID, version=end_info, path=save_path) #save model params #exp.log_parameters(trained_mod.get_params()) #save metrics report to comet if len(f_1) == 2: for i, name in enumerate(['Active', 'Inactive']): exp.log_metric('f1 class ' + name, f_1[i]) exp.log_metric('Recall class' + name, rec[i]) exp.log_metric('Precision class' + name, prec[i]) else: for i, name in enumerate(['Active', 'Inconclusive', 'Inactive']): exp.log_metric('f1 class ' + str(i), f_1[i]) exp.log_metric('Recall class' + str(i), rec[i]) exp.log_metric('Precision class' + str(i), prec[i]) #exp.log_metric('f1 class '+str(i), f_1[i]) #exp.log_metric('Recall class'+str(i),rec[i]) #exp.log_metric('Precision class'+str(i), prec[i]) exp.log_other('Classification Report', class_rep) #save model in data_folder with comet experiement number associated # exp_num = exp.get_key() # model_save = folder+'\\'+model_type+'_'+exp_num+'.pkl' # pickle_on = open(model_save,'wb') # pickle.dump(fast_NN,pickle_on) # pickle_on.close() # #log trained model location # exp.log_other('Trained Model Path',model_save) #save some informatvie tags: tags = [AID, end_info, model_type] exp.add_tags(tags) exp.add_tag('SVM') exp.add_tag(embedding_type) #save ROC curve exp.log_figure(figure_name='ROC-Pres/Recall', figure=plt) plt.show() #tell comet that the experiement is over exp.end()
def train_LGBM(X_train, X_test, y_train, y_test, split_ID): import lightgbm as lgb #make model class lgbm_model = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=500, subsample_for_bin=200000, objective='binary', is_unbalance=True, min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, random_state=None, n_jobs=-1, silent=True, importance_type='split') #train model lgbm = lgbm_model.fit(X_train, y_train) lgbm_preds = lgbm.predict(X_test) prec, rec, f_1, supp = prf(y_test, lgbm_preds, average=None) class_rep = sklearn.metrics.classification_report(y_test, lgbm_preds) exp.log_other('Classification Report' + split_ID, class_rep) mcc = sklearn.metrics.matthews_corrcoef(y_test, lgbm_preds) #if first iteration, report model parameters to comet if split_ID == '0': exp.log_parameters(lgbm.get_params()) return prec, rec, f_1, supp, mcc
def test(self): print("======================TEST MODE======================") pred = self.best_model.fit_predict(self.X_test) gt = self.y_test.astype(int) from sklearn.metrics import ( precision_recall_fscore_support as prf, accuracy_score, roc_auc_score, ) auc = roc_auc_score(gt, -pred) pred = pred < 0 accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}".format( accuracy, precision, recall, f_score ) ) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, }, ) return accuracy, precision, recall, f_score
def test_all(self): print("======================TEST MODE======================") self.X_test = np.concatenate([self.X_train, self.X_test], axis=0) pred = self.best_model.predict(self.X_test) gt = np.concatenate([self.y_train, self.y_test]) gt = gt.astype(int) from sklearn.metrics import (precision_recall_fscore_support as prf, accuracy_score, roc_auc_score) auc = roc_auc_score(gt, -self.best_model.decision_function(self.X_test)) pred = pred < 0 accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC-score: {:0.4f}" .format(accuracy, precision, recall, f_score, auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, "auc": auc, }, ) return accuracy, precision, recall, f_score, auc
def get_DBSCAN_pca_result(filename, eps, min_samples): #读取训练数据测试数据 data, labels = read('data\\' + filename + '_train.data') #将数据降至二维 reduced_data = PCA(n_components=2).fit_transform(data) reduced_data = normalize(reduced_data) #print(reduced_data) #data_test , test_lable = read('data\\'+ filename +'_test.data'); #labels_dict = get_labels_num(labels) t0 = time() dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean', algorithm='auto', leaf_size=30, p=None, n_jobs=1).fit(reduced_data) #预测结果 t1 = time() #print(labels_dict) labels = change_labels_2_num(labels) predict_label = dbscan.labels_ #print(predict_label) #for temp in predict_label: # print(temp) print("数据降至二维 eps:", eps, "\t min_samples: ", min_samples) precision, recall, fbeta_score, support = prf(labels, predict_label, average='weighted') print('precision:', precision) print('recall:', recall) print('fbeta_socre:', fbeta_score) print('所用时间:', t1 - t0)
def evaluate(self, x_train, y_train, x_test, y_test): def _compute_energy(X): energy = [] n_x = len(X) max_batches = n_x // self.config.batch_size if n_x % self.config.batch_size != 0: max_batches += 1 for x_batch in tqdm(iter_data(X, size=self.config.batch_size), total=max_batches): #z = self.session.run(self.model.z, feed_dict=self.model.get_feed_dict(x_batch)) #energy.append( self.session.run(self.model.compute_energy(z, phi, mu, scale)) ) energy.append( self.session.run( self.model.energy, feed_dict=self.model.get_feed_dict(x_batch))) return np.concatenate(energy) eng_train = _compute_energy(x_train) eng_test = _compute_energy(x_test) assert len(eng_train) == len(x_train) and len(eng_test) == len( x_test), 'double check' combined_energy = np.concatenate((eng_train, eng_test)) thresh = np.percentile(combined_energy, 100 - 20) pred = (eng_test > thresh).astype(int) gt = y_test.astype(int) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average='binary') print( "Seed : {:3d}, Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}" .format(self.config.seed, accuracy, precision, recall, f_score)) return accuracy, precision, recall, f_score
def train_kSVM(X_train, X_test, y_train, y_test, split_ID): kSVM = SVC(kernel='rbf', degree=3, gamma='auto', coef0=0.0, C=1.0, tol=0.001, probability=False, class_weight='balanced', shrinking=False, cache_size=None, verbose=False, max_iter=-1, n_jobs=-1, max_mem_size=-1, random_state=None, decision_function_shape='ovo') kSVM_model = kSVM.fit(X_train, y_train) kSVM_preds = kSVM_model.predict(X_test) prec, rec, f_1, supp = prf(y_test, kSVM_preds, average=None) class_rep = sklearn.metrics.classification_report(y_test, kSVM_preds) exp.log_other('Classification Report' + split_ID, class_rep) mcc = sklearn.metrics.matthews_corrcoef(y_test, kSVM_preds) #if first iteration, report model parameters to comet if split_ID == '0': exp.log_parameters(kSVM_preds.get_params()) return prec, rec, f_1, supp, mcc
def main(): rfc = RFC(n_estimators=100, n_jobs=-1) fs = SelectFromModel(rfc) pca = PCA() svm = SVC() estimators = zip(["feature_selection", "pca", "svm"], [fs, pca, svm]) pl = Pipeline(estimators) parameters = { "feature_selection__threshold" : ["mean", "median"], "pca__n_components" : [0.8, 0.5], "svm__gamma" : [0.001, 0.01, 0.05], "svm__C" : [1, 10] } gclf = GridSearchCV(pl, parameters, n_jobs=-1, verbose=2) digits = load_digits() X = digits.data y = digits.target first_fold = True trues = [] preds = [] for train_index, test_index in SKF().split(X, y): if first_fold: gclf.fit(X[train_index], y[train_index]) clf = gclf.best_estimator_ first_fold = False clf.fit(X[train_index,], y[train_index]) trues.append(y[test_index]) preds.append(clf.predict(X[test_index])) true_labels = np.hstack(trues) pred_labels = np.hstack(preds) print("p:{0:.6f} r:{1:.6f} f1:{2:.6f}".format(*prf(true_labels,pred_labels,average="macro")))
def get_kmeans_result(filename): #读取训练数据测试数据 data, labels = read('data\\' + filename + '_train.data') data_test, test_lable = read('data\\' + filename + '_test.data') n_samples, n_features = data.shape n_digits = len(np.unique(labels)) print("未使用数据降维,init='random',n_init=10") print("n_digits: %d, \t n_samples %d, \t n_features %d" % (n_digits, n_samples, n_features)) #kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10).fit(data) t0 = time() kmeans = KMeans(init='random', n_clusters=n_digits, n_init=10).fit(data) t1 = time() predict_label = kmeans.predict(data_test) t2 = time() labels_dict = get_labels_num(labels) predict_label_d = [] predict_label_d = [labels_dict[k] for k in predict_label] #print(predict_label_d) #print(test_lable) precision, recall, fbeta_score, support = prf(test_lable, predict_label_d, average='weighted') print('precision:', precision) print('recall:', recall) print('fbeta_socre:', fbeta_score) print('模型训练时间:', t1 - t0) print('测试数据预测时间:', t2 - t1)
def perform_testing(): print('--- Performing Oracle Evaluation ---') testing_data_dict = helpers.csv_to_dict(training=False) keys = list(testing_data_dict.keys()) testing_key = keys[0] # Validate on the second composer print('Testing on: ' + ' '.join(testing_key)) # Get data _, y_annotated, _ = helpers.fetch_data(testing_data_dict, testing_key) # Get data dictionary training_data_dict = helpers.csv_to_dict(training=True) training_keys = sorted(list(training_data_dict.keys()))[2] print('Using predictions from: ' + " ".join(training_keys)) # Get data _, y_noisy, _ = helpers.fetch_data(training_data_dict, training_keys) res = prf(y_annotated, y_noisy, average='binary') cls_error = np.sum( np.abs(y_annotated - y_noisy)) / np.shape(y_annotated)[0] * 100. print('Precision: %2f' % res[0]) print('Recall: %2f' % res[1]) print('Fscore: %2f' % res[2]) print('Error: %2f' % cls_error) return None
def train_SVM(X_train, X_test, y_train, y_test, split_ID): sgd_linear_SVM = SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, max_iter=500000, tol=0.001, shuffle=True, verbose=0, epsilon=0.1, n_jobs=-1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight='balanced', warm_start=False, average=False) sgd_linear_SVM_model = sgd_linear_SVM.fit(X_train, y_train) sgd_lSVM_preds = sgd_linear_SVM_model.predict(X_test) prec, rec, f_1, supp = prf(y_test, sgd_lSVM_preds, average=None) class_rep = sklearn.metrics.classification_report( y_test, sgd_lSVM_preds) exp.log_other('Classification Report' + split_ID, class_rep) mcc = sklearn.metrics.matthews_corrcoef(y_test, sgd_lSVM_preds) #if first iteration, report model parameters to comet if split_ID == '0': exp.log_parameters(sgd_linear_SVM_model.get_params()) return prec, rec, f_1, supp, mcc
def calc_and_save_metrics(y_true, pred_probs, model_type, embedding_type, AID, metric_dict_list, iter_num, test_train, hist): '''Takes in test and train data + labels, computes metrics and saves them as a dict inside of the provided list. Returns this list.''' #save the hist from our DNNS if needed history = hist #make categorical preds at .5 threshold #need to grab the DNN's class 1 preds bc doing the categorical embedding class_preds = [x >= 0.5 for x in pred_probs] #calculate all metrics prec, rec, f_1, supp = prf(y_true, class_preds, average=None) mcc = matthews_corrcoef(y_true, class_preds) conf_mat = confusion_matrix(y_true, class_preds) prec_array, recall_array, thresh_array = precision_recall_curve( y_true, pred_probs) auc_PR = auc(recall_array, prec_array) results_array = np.concatenate((prec, rec, f_1, supp)).tolist() + [ mcc, prec_array, recall_array, thresh_array, conf_mat, auc_PR ] metric_names = [ 'Classifier', 'Embedding', 'AID', 'Iteration Number', 'test_train', 'prec_Inactive', 'prec_Active', 'rec_Inactive', 'rec_Active', 'f_1_Inactive', 'f_1_Active', 'supp_Inactive', 'supp_Active', 'mcc', 'prec_array', 'rec_array', 'thresh_array', 'conf_matrix', 'auc', 'hist' ] metric_dict_list.append( dict( zip(metric_names, [model_type, embedding_type, AID, iter_num, test_train] + results_array + [history]))) return metric_dict_list
def sentPred(trainfile, testfile, result, report): traindata = np.loadtxt(trainfile) testdata = np.loadtxt(testfile) x_train = traindata[:, 1:] y_train = traindata[:, 0] y_pred_stan = traindata[:, -1] score_train_stan = ascore(y_train, y_pred_stan) rep_train_stan = prf(y_train, y_pred_stan, average=None) clf_lda = lda() clf_lda.fit(x_train, y_train) y_pred_lda = clf_lda.predict(x_train) score_train_lda = ascore(y_train, y_pred_lda) rep_train_lda = prf(y_train, y_pred_lda, average=None) test_pred_lda = clf_lda.predict(testdata) clf_log = log() clf_log.fit(x_train, y_train) y_pred_log = clf_log.predict(x_train) score_train_log = ascore(y_train, y_pred_log) rep_train_log = prf(y_train, y_pred_log, average=None) test_pred_log = clf_log.predict(testdata) clf_knn = knn(n_neighbors=1) clf_knn.fit(x_train, y_train) y_pred_knn = clf_knn.predict(x_train) score_train_knn = ascore(y_train, y_pred_knn) rep_train_knn = prf(y_train, y_pred_knn, average=None) test_pred_knn = clf_knn.predict(testdata) separator = np.array((9, )) test_pred = np.concatenate( (test_pred_lda, separator, test_pred_log, separator, test_pred_knn)) np.savetxt(result, test_pred, fmt='%i') np.savetxt(report, rep_train_stan + rep_train_lda + rep_train_log + rep_train_knn, fmt='%10.5f') f = open(report, 'ab') f.write('stan: ' + str(score_train_stan) + '\n') f.write('lda: ' + str(score_train_lda) + '\n') f.write('log: ' + str(score_train_log) + '\n') f.write('knn: ' + str(score_train_knn) + '\n') f.close()
def test(self): print("======================TEST MODE======================") # self.dagmm.load_stat self.ae.load_state_dict( torch.load(self.model_save_path + "parameter.pth")) self.ae.eval() vae_loss = VAE_LOSS() vae_score = VAE_Outlier_SCORE() if self.data_name == 'optdigits': loss_type = 'BCE' else: loss_type = 'MSE' for _, (x, y, m) in enumerate(self.testing_loader): y = y.data.cpu().numpy() x = x.to(self.device).float() m = m.to(self.device).float() _, _, xhat1, xhat2, mu1, mu2, logvar1, logvar2 = self.ae( x.float(), x.float(), m, m) error1 = vae_score(xhat1, x, mu1, logvar1, loss_type) error2 = vae_score(xhat2, x, mu2, logvar2, loss_type) n_non_missing = m.sum(dim=1) error = (error1 / n_non_missing + error2 / n_non_missing) error = error.data.cpu().numpy() thresh = np.percentile(error, self.data_normaly_ratio * 100) print("Threshold :", thresh) pred = (error > thresh).astype(int) gt = y.astype(int) from sklearn.metrics import ( precision_recall_fscore_support as prf, accuracy_score, roc_auc_score, ) auc = roc_auc_score(gt, error) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC : {:0.4f}" .format(accuracy, precision, recall, f_score, auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, "auc": auc, }, ) return accuracy, precision, recall, f_score, auc
def pr(labels, scores, percen): thresh = np.percentile(scores, 100 - percen) print("Threshold :", thresh) pred = (scores >= thresh).astype(int) labels = np.array(labels) gt = labels.astype(int) precision, recall, f_score, support = prf(gt, pred, average='binary') print('precision %f , recall %f , f1: %f' % (precision, recall, f_score))
def get_accuracy_precision_recall_fscore(y_true: list, y_pred: list): accuracy = accuracy_score(y_true, y_pred) # warn_for=() avoids log warnings for any result being zero precision, recall, f_score, _ = prf(y_true, y_pred, average='binary', warn_for=()) if precision == 0 and recall == 0: f01_score = 0 else: f01_score = fbeta_score(y_true, y_pred, average='binary', beta=0.1) return accuracy, precision, recall, f_score, f01_score
def getPRF(fileName): y_p_t = readFile(fileName) y_pred = y_p_t[0]; y_true = y_p_t[1]; acc = prf(y_true, y_pred, average = 'micro'); p = round(acc[0]*100, 1); r = round(acc[1]*100, 1); f = round(acc[2]*100, 1); return np.array([p,r,f])
def sentPred(trainfile, testfile, result, report): traindata = np.loadtxt(trainfile) testdata = np.loadtxt(testfile) x_train = traindata[:,1:] y_train = traindata[:,0] y_pred_stan = traindata[:,-1] score_train_stan = ascore(y_train, y_pred_stan) rep_train_stan = prf(y_train, y_pred_stan, average=None) clf_lda = lda() clf_lda.fit(x_train, y_train) y_pred_lda = clf_lda.predict(x_train) score_train_lda = ascore(y_train, y_pred_lda) rep_train_lda = prf(y_train, y_pred_lda, average=None) test_pred_lda = clf_lda.predict(testdata) clf_log = log() clf_log.fit(x_train, y_train) y_pred_log = clf_log.predict(x_train) score_train_log = ascore(y_train, y_pred_log) rep_train_log = prf(y_train, y_pred_log, average=None) test_pred_log = clf_log.predict(testdata) clf_knn = knn(n_neighbors = 1) clf_knn.fit(x_train, y_train) y_pred_knn = clf_knn.predict(x_train) score_train_knn = ascore(y_train, y_pred_knn) rep_train_knn = prf(y_train, y_pred_knn, average=None) test_pred_knn = clf_knn.predict(testdata) separator = np.array((9,)) test_pred = np.concatenate((test_pred_lda,separator,test_pred_log,separator,test_pred_knn)) np.savetxt(result, test_pred, fmt='%i') np.savetxt(report, rep_train_stan + rep_train_lda + rep_train_log + rep_train_knn, fmt = '%10.5f') f = open(report, 'ab') f.write('stan: ' + str(score_train_stan) + '\n') f.write('lda: ' + str(score_train_lda) + '\n') f.write('log: ' + str(score_train_log) + '\n') f.write('knn: ' + str(score_train_knn) + '\n') f.close()
def test(self): print("======================TEST MODE======================") self.ae.train() mse_loss = torch.nn.MSELoss(reduction='none') if self.data_name == 'optdigits': mse_loss = torch.nn.BCELoss(reduction='none') error_list = [] for _ in range(1000): # ensemble score over 100 stochastic feedforward with torch.no_grad(): for _, (x, y) in enumerate(self.testing_loader): # testing data loader has n_test batchsize, if it is image data, need change this part y = y.data.cpu().numpy() x = x.to(self.device).float() _, _, xhat1, xhat2 = self.ae(x.float(), x.float()) error = mse_loss(xhat1, x) + mse_loss(xhat2, x) error = error.mean(dim=1) error = error.data.cpu().numpy() error_list.append(error) error_list = np.array(error_list) error = error_list.mean(axis=0) from sklearn.metrics import ( precision_recall_fscore_support as prf, accuracy_score, roc_auc_score, ) gt = y.astype(int) thresh = np.percentile(error, self.dataset.__anomalyratio__() * 100) print("Threshold :", thresh) pred = (error > thresh).astype(int) gt = y.astype(int) auc = roc_auc_score(gt, error) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC : {:0.4f}".format( accuracy, precision, recall, f_score, auc ) ) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, "auc": auc, }, ) print("result save to {}".format(self.result_path)) return accuracy, precision, recall, f_score, auc
def update(self, y_hat, y_test): y_hat, y_test = y_hat.flatten(), y_test.flatten() new_prf = np.array(prf(y_test, y_hat, average='weighted'))[:-1] new_acc = accuracy_score(y_test, y_hat) self.prf = update_moving_average(self.prf, new_prf, self.n) self.acc = update_moving_average(self.acc, new_acc, self.n) self.n = 1 if self.n is None else self.n + 1
def test(self): log_density_test = [] y_test = [] self.ae.eval() for batch_idx, (x, y, _) in enumerate(self.testing_loader): x = to_var(x) x = x.float() y = y.float() log_density = self.ae.log_prob(x) y_test.append(y) log_density_test.append(log_density) log_density_test = torch.cat(log_density_test) y_test = torch.cat(y_test) y_test = y_test.data.cpu().numpy() log_density_test = log_density_test.data.cpu().numpy() clean_index = np.where(y_test.squeeze() == 0) anomaly_index = np.where(y_test.squeeze() == 1) thresh = np.percentile(log_density_test, (1 - self.data_normaly_ratio) * 100) print("Threshold :", thresh) pred = (log_density_test < thresh).astype(int) gt = y_test.astype(int) auc = roc_auc_score(gt, -log_density_test) from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average='binary') print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC:{:0.4f}" .format(accuracy, precision, recall, f_score, auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, "auc": auc, }, ) print("result save to {}".format(self.result_path)) return accuracy, precision, recall, f_score, auc
def train_test_svm(cla): p = Preprocess() train_label, train_matrix, test_label, test_matrix = p.preprocess_fourtype() # train svm cla.fit(train_matrix, train_label) # predict train matrix to check the model print('\n predict train data') predict_trainlabel = cla.predict(train_matrix) # show diff # show_diff(predict_trainlabel, train_label) p,r,f,s = prf(train_label,predict_trainlabel) print(p,r,f,s) # predict test matrix to check the precision print('\n predict test data') predict_testlabel = cla.predict(test_matrix) # show diff # show_diff(predict_testlabel, test_label) p,r,f,s = prf(test_label,predict_testlabel) print(p,r,f,s)
def test(self): print("======================TEST MODE======================") self.ae.load_state_dict(torch.load(self.model_save_path + "parameter.pth")) self.ae.eval() loss = torch.nn.MSELoss(reduction='none') if self.data_name == 'optdigits': loss = torch.nn.BCELoss(reduction='none') for _, (x, y, m) in enumerate(self.testing_loader): y = y.data.cpu().numpy() x = x.to(self.device).float() m = m.to(self.device).float() _, _, xhat1, xhat2 = self.ae(x.float(), x.float(), m, m) error = loss(xhat1, x) + loss(xhat2, x) error = error.sum(dim=1) error = error.data.cpu().numpy() thresh = np.percentile(error, self.data_normaly_ratio * 100) print("Threshold :", thresh) pred = (error > thresh).astype(int) gt = y.astype(int) from sklearn.metrics import ( precision_recall_fscore_support as prf, accuracy_score, roc_auc_score ) gt = gt.squeeze() auc = roc_auc_score(gt, error) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}".format( accuracy, precision, recall, f_score ) ) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "auc": auc, "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, }, ) return accuracy, precision, recall, f_score, auc
def train_RF(X_train,X_test,y_train,y_test,split_ID): rf = RandomForestClassifier(n_estimators=100, random_state=2562, class_weight="balanced_subsample", n_jobs = -1) rand_for = rf.fit(X_train,y_train) rf_preds = rand_for.predict(X_test) prec,rec,f_1,supp = prf(y_test, rf_preds, average=None) class_rep = sklearn.metrics.classification_report(y_test,rf_preds) exp.log_other('Classification Report'+split_ID,class_rep) mcc = sklearn.metrics.matthews_corrcoef(y_test, rf_preds) #if first iteration, report model parameters to comet if split_ID == '0': exp.log_parameters(rand_for.get_params()) return prec,rec,f_1,supp,mcc
def get_accuracy_precision_recall_fscore(y_true: list, y_pred: list): """ Input : Actual labels and Predicted labels Output : Returns performance metrics """ accuracy = accuracy_score(y_true, y_pred) precision, recall, f_score, _ = prf(y_true, y_pred, average='binary', warn_for=()) if precision == 0 and recall == 0: f01_score = 0 else: f01_score = fbeta_score(y_true, y_pred, average='binary', beta=0.1) return accuracy, precision, recall, f_score, f01_score
def test(self): print("======================TEST MODE======================") self.ae.eval() loss = SVMLoss() for _, (x, y, m) in enumerate(self.testing_loader): y = y.data.cpu().numpy() x = x.to(self.device).float() m = m.to(self.device).float() z1, _, _ = self.ae(x.float(), m) error = ((z1 - self.ae.c1)**2) error = error.sum(dim=1) error = error.data.cpu().numpy() thresh = np.percentile(error, self.data_normaly_ratio * 100) print("Threshold :", thresh) pred = (error > thresh).astype(int) gt = y.astype(int) from sklearn.metrics import (precision_recall_fscore_support as prf, accuracy_score, roc_auc_score) gt = gt.squeeze() auc = roc_auc_score(gt, error) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, auc: {:0.4f}" .format(accuracy, precision, recall, f_score, auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "auc": auc, "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, }, ) print("result save to {}".format(self.result_path)) return accuracy, precision, recall, f_score, auc
def eval(): test_energy = [] test_labels = [] for it, (input_data, labels) in enumerate(dev_loader): input_data = input_data.cuda() pred = dis(input_data) test_energy.append(pred.data.cpu().numpy()) test_labels.append(labels.numpy()) test_energy = np.concatenate(test_energy, axis=0) test_labels = np.concatenate(test_labels, axis=0) test_energy = -test_energy thresh = np.percentile(test_energy, 80) pred = (test_energy > thresh).astype(int) gt = test_labels.astype(int) from sklearn.metrics import precision_recall_fscore_support as prf precision, recall, f_score, _ = prf(gt, pred, average='binary') return precision, recall, f_score
def get_DBSCAN_result(filename, eps, min_samples): #读取训练数据测试数据 data, labels = read('data\\' + filename + '_train.data') #data_test , test_lable = read('data\\'+ filename +'_test.data'); #labels_dict = get_labels_num(labels) t0 = time() dbscan = DBSCAN(eps=eps, min_samples=min_samples).fit(data) #预测结果 t1 = time() predict_label = dbscan.labels_ labels = change_labels_2_num(labels) #predict_label_d = [] #predict_label_d = [labels_dict[k] for k in predict_label] print("数据未降维 eps:", eps, "\t min_samples: ", min_samples) precision, recall, fbeta_score, support = prf(labels, predict_label, average='weighted') print('precision:', precision) print('recall:', recall) print('fbeta_socre:', fbeta_score) print('所用时间:', t1 - t0)
def get_kmeans_pca_result(filename): data, labels = read('data\\' + filename + '_train.data') data_test, test_lable = read('data\\' + filename + '_test.data') n_samples, n_features = data.shape n_digits = len(np.unique(labels)) print("数据降至二维,init='random',n_init=10") print("n_digits: %d, \t n_samples %d, \t n_features %d" % (n_digits, n_samples, n_features)) #将数据降至二维 reduced_data = PCA(n_components=2).fit_transform(data) reduced_data = normalize(reduced_data) t0 = time() kmeans_pca = KMeans(init='random', n_clusters=n_digits, n_init=10) t1 = time() kmeans_pca.fit(reduced_data) t2 = time() predict_data = PCA(n_components=2).fit_transform(data_test) predict_data = normalize(predict_data) y_kmeans = kmeans_pca.predict(predict_data) labels_dict = get_labels_num(labels) predict_label_d = [] predict_label_d = [labels_dict[k] for k in y_kmeans] precision, recall, fbeta_score, support = prf(test_lable, predict_label_d, average='weighted') print('precision:', precision) print('recall:', recall) print('fbeta_socre:', fbeta_score) print('模型训练时间:', t1 - t0) print('测试数据预测时间:', t2 - t1) plt.scatter(predict_data[:, 0], predict_data[:, 1], c=y_kmeans, s=50, cmap='viridis') centers = kmeans_pca.cluster_centers_ plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5) plt.show()
def test(self): print("======================TEST MODE======================") # pred = self.best_model.predict(self.X_test) score = self.best_model.score_samples(self.X_test) thresh = np.percentile(score, self.data_anomaly_ratio * 100) print("Threshold :", thresh) pred = (score < thresh).astype(int) # pred = pred < 0 gt = self.y_test.astype(int) from sklearn.metrics import (precision_recall_fscore_support as prf, accuracy_score, roc_auc_score) auc = roc_auc_score(gt, -self.best_model.decision_function(self.X_test)) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average="binary") print( "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC: {:0.4f}" .format(accuracy, precision, recall, f_score, auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "auc": auc, "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f_score, }, ) return accuracy, precision, recall, f_score
def perform_testing(): print('--- Performing Evaluation ---') nn_list = list(build_model(flag='testing')) data_dict = helpers.csv_to_dict(training=False) keys = list(data_dict.keys()) testing_key = keys[0] # Validate on the second composer print('Testing on: ' + ' '.join(testing_key)) # Get data x, y, fs = helpers.fetch_data(data_dict, testing_key) x *= 0.99 / np.max(np.abs(x)) sigmoid = torch.nn.Sigmoid() # Label helper! d_p_length_samples = exp_settings['d_p_length'] * exp_settings[ 'fs'] # Length in samples number_of_data_points = len(x) // d_p_length_samples for data_point in tqdm(range(number_of_data_points)): # Generate data x_d_p = x[data_point * d_p_length_samples:(data_point + 1) * d_p_length_samples] y_d_p = y[data_point * d_p_length_samples:(data_point + 1) * d_p_length_samples] # Reshape data x_d_p = x_d_p.reshape(1, d_p_length_samples) y_d_p = y_d_p.reshape(1, d_p_length_samples) x_cuda = torch.autograd.Variable(torch.from_numpy(x_d_p), requires_grad=False).float().detach() y_cuda = torch.autograd.Variable(torch.from_numpy(y_d_p), requires_grad=False).float().detach() if torch.has_cudnn: x_cuda = x_cuda.cuda() y_cuda = y_cuda.cuda() # Forward analysis pass: Input data x_real, x_imag = nn_list[0].forward(x_cuda) # Magnitude computation mag = torch.norm(torch.cat((x_real, x_imag), 0), 2, dim=0).unsqueeze(0) # Mel analysis mel_mag = torch.autograd.Variable(nn_list[1].forward(mag).data, requires_grad=False) # Learned normalization mel_mag_pr = nn_list[2].forward(mel_mag) # GRUs h_enc = nn_list[3].forward(mel_mag_pr) h_dec = nn_list[4].forward(h_enc) # Classifier _, vad_prob = nn_list[5].forward(h_dec, mel_mag_pr) vad_prob = sigmoid(vad_prob).gt(0.50).float().data.cpu().numpy()[0, :, 0] # Up-sample the labels to the time-domain # Target data preparation vad_true = nn_list[6].forward(y_cuda).gt( 0.50).float().data.cpu().numpy()[0, :, 0] if data_point == 0: out_prob = vad_prob out_true_prob = vad_true else: out_prob = np.hstack((out_prob, vad_prob)) out_true_prob = np.hstack((out_true_prob, vad_true)) res = prf(out_true_prob, out_prob, average='binary') cls_error = np.sum( np.abs(out_true_prob - out_prob)) / np.shape(out_true_prob)[0] * 100. voice_regions_percentage = (len( np.where(out_true_prob == 1)[0])) / np.shape(out_true_prob)[0] * 100. non_voice_regions_percentage = (len( np.where(out_true_prob == 0)[0])) / np.shape(out_true_prob)[0] * 100. print('Precision: %2f' % res[0]) print('Recall: %2f' % res[1]) print('Fscore: %2f' % res[2]) print('Error: %2f' % cls_error) print('Singing voice frames percentage %2f' % voice_regions_percentage) print('Non-singing voice frames percentage %2f' % non_voice_regions_percentage) print('-- Saving Results --') np.save( os.path.join('results', exp_settings['split_name'], 'lr_pcen_results.npy'), out_prob) np.save( os.path.join('results', exp_settings['split_name'], 'vad_true_targets.npy'), out_true_prob) return None
def perform_validation(nn_list): print('--- Performing Validation ---') d_p_length_samples = exp_settings['d_p_length'] * exp_settings['fs'] # Get data dictionary data_dict = helpers.csv_to_dict(training=True) keys = sorted(list(data_dict.keys())) validation_key = keys[exp_settings['split_validation_indx']] print('Validating on: ' + " ".join(validation_key)) # Get data x, y, _ = helpers.fetch_data(data_dict, validation_key) x *= 0.99 / np.max(np.abs(x)) sigmoid = torch.nn.Sigmoid() # Label helper! # Constructing batches number_of_data_points = len(x) // d_p_length_samples available_batches = number_of_data_points // exp_settings['batch_size'] data_points = np.arange(0, number_of_data_points) for batch in tqdm(range(available_batches)): x_d_p, y_d_p = helpers.gimme_batches(batch, data_points, x, y) x_cuda = torch.autograd.Variable(torch.from_numpy(x_d_p).cuda(), requires_grad=False).float().detach() y_cuda = torch.autograd.Variable(torch.from_numpy(y_d_p).cuda(), requires_grad=False).float().detach() # Forward analysis pass: Input data x_real, x_imag = nn_list[0].forward(x_cuda) # Magnitude computation mag = torch.sqrt(x_real.pow(2) + x_imag.pow(2)) # Mel analysis mel_mag = torch.autograd.Variable(nn_list[1].forward(mag).data, requires_grad=True) # Learned normalization mel_mag_pr = nn_list[2].forward(mel_mag) # GRUs h_enc = nn_list[3].forward(mel_mag_pr) h_dec = nn_list[4].forward(h_enc) # Classifier _, vad_prob = nn_list[5].forward(h_dec, mel_mag_pr) vad_prob = sigmoid(vad_prob) vad_prob = vad_prob.gt(0.51).float().data.cpu().numpy()[:, :, 0]\ .reshape(exp_settings['batch_size']*exp_settings['T'], 1) # Target data preparation y_true = nn_list[6].forward(y_cuda).detach()[:, :, 0] vad_true = y_true.gt(0.51).float().data.cpu().numpy().reshape( exp_settings['batch_size'] * exp_settings['T'], 1) if batch == 0: out_prob = vad_prob out_true_prob = vad_true else: out_prob = np.vstack((out_prob, vad_prob)) out_true_prob = np.vstack((out_true_prob, vad_true)) res = prf(out_true_prob, out_prob, average='binary') cls_error = np.sum( np.abs(out_true_prob - out_prob)) / len(out_true_prob) * 100. print('Precision: %2f' % res[0]) print('Recall: %2f' % res[1]) print('Fscore: %2f' % res[2]) print('Error: %2f' % cls_error) return cls_error