def classify(self, modelName): self.modelName = modelName # Classes to be predicted classes = np.array(['Extrasystole', 'Murmur', 'Normal']) # Classify the heartsound if self.modelName == 'ANN': ANN = md.ANN() result = ANN.predict_proba(self.featureVector) #print(result) elif self.modelName == 'XGB': XGB = md.XGB() result = XGB.predict_proba(self.featureVector) elif self.modelName == 'SVM': SVM = md.SVM() result = SVM.predict_proba(self.featureVector) #print(result) confidence = max(result[0]) class_ = (classes[np.where(result[0] == confidence)])[0] return class_, confidence * 100
def test(): """ test Perceptron, SVM and LDA accuracy """ tested_models = [ TestedModel('Perceptron', models.Perceptron()), TestedModel('SVM', models.SVM()), TestedModel('LDA', models.LDA()), ] k = 10000 iterations = 500 for i, m in enumerate(MS): for j in range(iterations): X, y = sample_d(m) X_t, y_t = sample_d(k) for tested in tested_models: tested.model.fit(X, y) score = tested.model.score(X_t, y_t) tested.add_accuracy(m, score['accuracy']) plt.figure() for tested in tested_models: plt.plot(MS, [tested.accuracy[m] for m in MS], marker='.', label=tested.name) plt.legend() plt.title('Training batch size vs. accuracy') plt.xlabel('m') plt.ylabel('accuracy') plt.show() for tested in tested_models: print(tested.name, tested.accuracy)
def models_compare(x, y): X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30) results_svm = models.SVM(X_train, y_train, X_test) sns.regplot(results_svm, y_test, color='red', label='SVM') Evaluationmatrix(y_test, results_svm, "SVM") results_tree = models.TREE(X_train, y_train, X_test) sns.regplot(results_tree, y_test, color='green', label='TREE') Evaluationmatrix(y_test, results_tree, "TREE") results_ridge = models.RIDGE(X_train, y_train, X_test) sns.regplot(results_ridge, y_test, color='orange', label='RIDGE') Evaluationmatrix(y_test, results_ridge, "RIDGE") results_knn = models.KNN(X_train, y_train, X_test) sns.regplot(results_knn, y_test, color='yellow', label='KNN') Evaluationmatrix(y_test, results_knn, "KNN") results_lr = models.LR(X_train, y_train, X_test) sns.regplot(results_lr, y_test, color='blue', label='LR') Evaluationmatrix(y_test, results_lr, "LR") results_rfr = models.RFR(X_train, y_train, X_test) sns.regplot(results_rfr, y_test, color='black', label='RFR') Evaluationmatrix(y_test, results_rfr, "RFR") plt.title('Models Comparison') plt.xlabel('Predicted Ratings') plt.ylabel('Actual Ratings') plt.legend() plt.show()
def PCA_please(enable_PCA_cache, submit_test_prediction): tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid( is_gabor=False) unlabeled_images = preprocessor.load_unlabeled(is_gabor=False) if (enable_PCA_cache): pca_filename = 'pca.pkl' if os.path.isfile(pca_filename): pca = joblib.load(pca_filename) else: # save the PCA pca = models.PCA() pca.fit(unlabeled_images) joblib.dump(pca, pca_filename) else: pca = models.PCA() pca.fit(unlabeled_images) pca_tr_images = pca.transform(tr_images) pca_valid_images = pca.transform(valid_images) model = models.SVM() model.fit(pca_tr_images, tr_labels) train_predictions = model.predict(pca_tr_images) valid_predictions = model.predict(pca_valid_images) printClassificationRate(model, valid_labels, valid_predictions, tr_labels, train_predictions) if (submit_test_prediction): test_images = preprocessor.load_test(is_gabor=False) pca_test_images = pca.transform(test_images) test_predictions = model.predict(pca_test_images) submission.output(test_predictions)
def cross_validate_svm_tree_model(validation_data): gamma_parameters = [10**1, 10**0, 10**-1, 10**-2, 10**-3, 10**-4] C_parameters = gamma_parameters params = {"gamma": gamma_parameters, "C": C_parameters} model = md.SVM() print("Cross validation SVM Model") model.cross_validate_model_trees(params, validation_data, 5) print() return model
def execute_with_algorithm(alg, X, y, fname, headers, out_dir, record_id, feature_selection, oversampling, survival, undersampling): '''execute learning task using the specified algorithm''' # feature selection # if survival == True and aggregation == True: # k=150 # if survival == True and aggregation == False: # k=220 # if survival == False and aggregation == True: # k=150 # if survival == False and aggregation == False: # k=220 k=220 # perform feature selection new_X, best_features, headers = fs.pearson_fs(X, y, headers, k, feature_selection, survival) # execute algorithm if alg == 'DT': results, model = ML.CART(new_X, y, best_features, out_dir+"{}.dot".format(fname), headers, oversampling, undersampling) #out_dir+"{}.dot".format(fname) elif alg == 'RF': results, features, model = ML.RF(new_X, y, best_features,oversampling, undersampling, n_estimators=200) elif alg == 'RFsmall': results, features, model = ML.RF(new_X, y, best_features, oversampling, undersampling, n_estimators=100) elif alg == 'SVM': results, model = ML.SVM(new_X, y, best_features, oversampling, undersampling) elif alg == 'LR': results, features, model = ML.LR(new_X, y, best_features,oversampling, undersampling) elif alg == 'XGBoost': results, features, model = ML.XGBoost(new_X, y, best_features,oversampling, undersampling) if alg == 'COX': results, features, model = ML.COX(new_X, y, best_features, oversampling, undersampling) if alg == 'survSVM': results, features, model = ML.survSVM(new_X, y, best_features, oversampling, undersampling) if alg == 'GBS': results, features, model = ML.GradientBoostingSurvival(new_X, y, best_features, oversampling, undersampling) if not results: return if survival == False: in_out.save_results(out_dir+fname+'.csv', ["fpr", "tpr", "auc", "cm"], results, [sum(y),len(y)]) # else: # in_out.save_results(out_dir+fname+'.csv', ["CI"], results, [sum(y),len(y)]) if 'features' in locals(): features = features.flatten() in_out.save_features(out_dir+"features_" + fname + '.csv', zip(headers[1:-1], features)) return model, best_features, [fname] + results[0:3]
def please(submit_test_prediction): tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid( is_gabor=False) model = models.SVM() model.fit(tr_images, tr_labels) train_predictions = model.predict(tr_images) valid_predictions = model.predict(valid_images) printClassificationRate(model, valid_labels, valid_predictions, tr_labels, train_predictions) if (submit_test_prediction): test_images = preprocessor.load_test(is_gabor=False) test_predictions = model.predict(test_images) submission.output(test_predictions)
def gabor_please(enable_PCA, use_all, submit_test_prediction): print "Using Gabor please, PCA: %s, use_all:%s, submit_test_prediction: %s" % ( str(enable_PCA), str(use_all), str(submit_test_prediction)) tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid( is_gabor=True) test_images = preprocessor.load_test(is_gabor=True) kernels = create_gabor_filters([1.0 / x for x in range(3, 15, 3)], [x * np.pi * 0.125 for x in range(8)], np.pi, np.pi) model = models.SVM() train_features = compute_all_filter_responses(tr_images, kernels) train_features = train_features.reshape(train_features.shape[0], 32 * 32 * 32) valid_features = compute_all_filter_responses(valid_images, kernels) valid_features = valid_features.reshape(valid_features.shape[0], 32 * 32 * 32) test_features = compute_all_filter_responses(test_images, kernels) test_features = test_features.reshape(test_features.shape[0], 32 * 32 * 32) ########## compute using both train and valid if (use_all): all_features = compute_all_filter_responses( np.concatenate((tr_images, valid_images)), kernels) all_features = all_features.reshape(all_features.shape[0], 32 * 32 * 32) if (enable_PCA): all_features, train_features, valid_features, test_features = PCA_Preprocess( all_features, train_features, valid_features, test_features) model.fit(all_features, np.concatenate((tr_labels, valid_labels))) else: if (enable_PCA): _train_features, train_features, valid_features, test_features = PCA_Preprocess( train_features, train_features, valid_features, test_features) model.fit(train_features, tr_labels) train_predictions = model.predict(train_features) valid_predictions = model.predict(valid_features) printClassificationRate(model, valid_labels, valid_predictions, tr_labels, train_predictions) if (submit_test_prediction): test_predictions = model.predict(test_features) submission.output(test_predictions)
def plot(): """ plot the hyperplanes portrayed by each model for m samples sampled from the distribution m """ ROWS = 2 COLS = 3 fig, axs = plt.subplots(ROWS, COLS, figsize=(15, 10)) scatter = None for i, m in enumerate(MS): X, y = draw_points(m) x_lim = np.array([min(X[:, 0]), max(X[:, 0])]) ax = axs[i // COLS, i % COLS] perceptron = models.Perceptron() perceptron.fit(X, y) svm = models.SVM() svm.fit(X, y) scatter = ax.scatter(X[:, 0], X[:, 1], c=y) ax.plot(x_lim, hyperplane_line(x_lim, W, BIAS), label='f') ax.plot(x_lim, hyperplane_line(x_lim, perceptron.get_w(), perceptron.get_b()), label='Perceptron') ax.plot(x_lim, hyperplane_line(x_lim, svm.get_w(), svm.get_b()), label='SVM') ax.set(xlabel='x', ylabel='y') ax.set_title(f"Data for m={m}") ax.legend() plt.legend(handles=scatter.legend_elements()[0], labels=('Negative', 'Positive'), loc=4) axs[-1, -1].axis('off') fig.tight_layout() plt.show()
import config import models def huber_approx_obj(preds, dtrain): ''' xgboost optimizing function for mean absolute error ''' d = preds - dtrain #add .get_labels() for xgb.train() h = 1 #h is delta in the graphic scale = 1 + (d / h)**2 scale_sqrt = np.sqrt(scale) grad = d / scale_sqrt hess = 1 / scale / scale_sqrt return grad, hess models = { "dt": models.DecisionTree(), "rf": models.RandomForest(), "lr": models.LR(), "xgb": models.XGBoost(), "svm": models.SVM(), "lgb": models.LGB(), # "mlp": models.MLP(), "lstm": models.LSTM() } # to get the final accuracy, calculate the mean and the mean absolute error should be the percentage of the # performance since he wants to see performance
# model = models.LogisticRegression(X_train.shape[1]) # model.setOptimizer('Langevin', 0.08, 7000, 1) # # model.setOptimizer('SGD', 0.001, 1500) # model.fit(X_train, y_train) # print("Test on test data:") # test_loss = model.lossFunction(X_test, y_test) # test_acc = model.evaluation(X_test, y_test) # print("loss: {} \nacc: {}".format(test_loss, test_acc)) # model.save('data/logistic_v3.pkl') # print('LDA classification') # model = models.LDA(X_train.shape[1], 30, 0) # model.fit(X_train, y_train) # print("Test on test data:") # test_acc = model.evaluation(X_test, y_test) # print("Test acc: {}".format(test_acc)) print('svm classification') kernel = 'rbf' model = models.SVM(kernel) best_model = model.fit(X_train, y_train) acc = model.evaluation(X_test, y_test) print(best_model.support_vectors_.shape) print(best_model.support_.shape) print("acc is {}".format(acc)) print('save model...') dump(best_model, 'data/{}_SVM_v2.joblib'.format(kernel)) # svm = load('data/rbf_SVM.joblib') # preds = svm.predict(X_test[:100]) # print(preds)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--feature_path', default='data/features/', type=str) parser.add_argument('--train_num', default=5, type=int) parser.add_argument('--val_num', default=0, type=int) parser.add_argument('--test_num', default=5, type=int) parser.add_argument('--model', default='bp', type=str) opt = parser.parse_args() ms = [50, 100, 150, 200, 250, 300, 350, 10304] # 10304, # gammas = list(np.arange(1.0, 10.0, 0.2)) gammas = [5.6, 3.4, 2.6, 2.4, 2.2, 2.8, 2.8, 1.4] cs = [ 0.001 ] #, 0.002, 0.003, 0.005, 0.007, 0.01, 0.02, 0.03, 0.05, 0.07, 0.1, 0.2, 0.3, 0.5, 0.7, 1, 2, 3, 5, 7, 10] Hs = [25, 50, 100, 200, 300, 400, 500, 750, 1000] R_h = list(range(6, 30)) svm_acc = [] if opt.model == 'bp': config = BP_config() elif opt.model == 'rbf': config = RBF_config() elif opt.model == 'svm': config = SVM_config() for m in ms: features = np.load(opt.feature_path + 'feature_' + str(m) + '.npy') # pca features features_ = get_sets(features, opt.train_num, opt.val_num, opt.test_num) if opt.model == 'bp': config.change_input_size(m) # to suit different size of input m for H in Hs: config.change_hideen_size(H) print('H:%d m:%d' % (H, m)) model = models.BpNet(config) loss_h, acc_h = model.train(features_['x_train'], features_['d_train'], features_['x_test'], features_['d_test']) y_test = np.argmax(model.predict(features_['x_test']), axis=1) acc = np.sum(y_test == features_['d_test'] ) / features_['d_test'].shape[0] print('acc: %f' % acc) elif opt.model == 'svm': svm_1 = [] for gamma in gammas: svm_2 = [] config.change_gamma(gamma) for C in cs: config.change_C(C) model = models.SVM(config) model.train(features_['x_train'], features_['d_train']) y_test = model.predict(features_['x_test']) acc = np.sum(y_test == features_['d_test'] ) / features_['d_test'].shape[0] print('m:%d gamma:%1f C: % 3f acc:%f' % (m, gamma, C, acc), end='\r') svm_2.append(acc) svm_1.append(svm_2) svm_acc.append(svm_1) elif opt.model == 'rbf': config.change_input_size(m) for h in R_h: config.change_hideen_size(h) # model = models.RBF(m, 20, 40) model = models.RBF_bp( config, features_['x_train'] ) # 25 for fun_3 20 for func_2 22 for func_1 model.train(features_['x_train'], features_['d_train']) y_test = np.argmax(model.predict(features_['x_test']), axis=1) acc = np.sum(y_test == features_['d_test'] ) / features_['d_test'].shape[0] print('m:%d h:%d' % (m, h)) print(acc) rbf_acc.append(acc) svm_acc_np = np.array(svm_acc) np.save('svm_acc.npy', svm_acc_np) print(svm_acc) print(svm_acc_np.shape)
plt.title('ROC: Random Forest') plt.show() cvsc = np.mean(cross_val_score(rf.clf, data, labels, cv=10)) print('Random Forest Accuracy: ' + str(sc[0])) print('Random Forest Cross Validation Score: ' + str(cvsc)) # SVM Model #======= X_train, X_test, y_train, y_test = train_test_split(data, labels, name_list, test_size=0.2) svm = models.SVM() svm.train(X_train, y_train, name_list) svm_sc, svm_prec = svm.test(X_test, y_test) # construct ROC curve for SVM svm.roc_auc(data, labels) svm.plot_margin(data, labels) svm_cvsc = cross_val_score(svm.clf, data, labels, cv=5) print('SVM Accuracy: ' + str(svm_sc)) print('SVM Precision: ' + str(svm_prec)) print('SVM Cross Validation Scores: ' + str(svm_cvsc)) print('Mean SVM Cross Validation Scores: ' + str(np.mean(svm_cvsc))) # ConvNet Model
models = { 'MLP': { 'build_fn': m.build_MLP((24, )), 'params': param_grid_MLP }, 'Decision_tree': { 'build_fn': m.DecisionTreeModel(train=False), 'params': param_grid_Dt }, 'Random_forest': { 'build_fn': m.RandomForest(train=False), 'params': param_grid_random_forest }, 'svm': { 'build_fn': m.SVM(train=False), 'params': param_grid_svm } } # to find the best parameters of a given model. A parameters grid should be provided. if finetune: print("Finetuning ...") model = models[model_name]['build_fn'] param_grid = models[model_name]['params'] gs, fitted_model, pred = search_pipeline(X_train, X_test, y_train, model, param_grid, scoring_fit='accuracy')
# Confusion Matrix models.plot_confusion_matrix(df_test_new_target, df_test_new_pred, names) models.plot_learning_curve(model.clf, "Boosting", \ df_test, df_test_target, cv=5, train_sizes=np.arange(2000,5000,1000)) print( metrics.classification_report(df_test_new_target, df_test_new_pred, target_names=names)) if (model_name.upper() == "S"): model = models.SVM() df_test = StandardScaler(with_mean=False).fit_transform(df_test) df_test_new = StandardScaler( with_mean=False).fit_transform(df_test_new) models.plot_learning_curve(model.clf, "SVM", \ df_test, df_test_target, cv=5, train_sizes=np.arange(2000,5000,1000)) start_time = time.time() model.clf.fit(df_test, df_test_target) df_test_new_pred = model.clf.predict(df_test_new) print("--- %s seconds ---" % (time.time() - start_time)) # Confusion Matrix models.plot_confusion_matrix(df_test_new_target, df_test_new_pred, names)
print('training') if args.model == 'knn': model = models.KNN(args.k, args.f, test_data.write, args.seed, args.num_seed) model.train_model(train_data, args.no_valid) elif args.model == 'lr': model = models.RidgeRegression(args.lamb, args.f, test_data.write, args.seed, args.num_seed) model.train_model(train_data, args.no_valid) elif args.model == 'nb': model = models.NaiveBayes(args.model_type, args.f, test_data.write, args.seed, args.num_seed) model.train_model(train_data, args.no_valid) elif args.model == 'svm': model = models.SVM(args.kernel, args.c, args.f, test_data.write, args.seed, args.num_seed) model.train_model(train_data, args.no_valid) elif args.model == 'mlp': model = models.MLP( train_data.feat_dim, train_data.cat_num, optim, args.ac_fn, args.dr, args.lr, args.gpu, args.max_epoch_num, args.bs, args.lamb, args.f, test_data.write,