Exemple #1
0
    def classify(self, modelName):

        self.modelName = modelName

        # Classes to be predicted

        classes = np.array(['Extrasystole', 'Murmur', 'Normal'])

        # Classify the heartsound
        if self.modelName == 'ANN':
            ANN = md.ANN()
            result = ANN.predict_proba(self.featureVector)
            #print(result)

        elif self.modelName == 'XGB':

            XGB = md.XGB()
            result = XGB.predict_proba(self.featureVector)

        elif self.modelName == 'SVM':

            SVM = md.SVM()
            result = SVM.predict_proba(self.featureVector)
            #print(result)

        confidence = max(result[0])
        class_ = (classes[np.where(result[0] == confidence)])[0]

        return class_, confidence * 100
Exemple #2
0
def test():
    """
    test Perceptron, SVM and LDA accuracy
    """
    tested_models = [
        TestedModel('Perceptron', models.Perceptron()),
        TestedModel('SVM', models.SVM()),
        TestedModel('LDA', models.LDA()),
    ]

    k = 10000
    iterations = 500
    for i, m in enumerate(MS):
        for j in range(iterations):
            X, y = sample_d(m)
            X_t, y_t = sample_d(k)

            for tested in tested_models:
                tested.model.fit(X, y)
                score = tested.model.score(X_t, y_t)
                tested.add_accuracy(m, score['accuracy'])

    plt.figure()
    for tested in tested_models:
        plt.plot(MS, [tested.accuracy[m] for m in MS],
                 marker='.',
                 label=tested.name)
    plt.legend()
    plt.title('Training batch size vs. accuracy')
    plt.xlabel('m')
    plt.ylabel('accuracy')
    plt.show()

    for tested in tested_models:
        print(tested.name, tested.accuracy)
def models_compare(x, y):

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)

    results_svm = models.SVM(X_train, y_train, X_test)
    sns.regplot(results_svm, y_test, color='red', label='SVM')
    Evaluationmatrix(y_test, results_svm, "SVM")

    results_tree = models.TREE(X_train, y_train, X_test)
    sns.regplot(results_tree, y_test, color='green', label='TREE')
    Evaluationmatrix(y_test, results_tree, "TREE")

    results_ridge = models.RIDGE(X_train, y_train, X_test)
    sns.regplot(results_ridge, y_test, color='orange', label='RIDGE')
    Evaluationmatrix(y_test, results_ridge, "RIDGE")

    results_knn = models.KNN(X_train, y_train, X_test)
    sns.regplot(results_knn, y_test, color='yellow', label='KNN')
    Evaluationmatrix(y_test, results_knn, "KNN")

    results_lr = models.LR(X_train, y_train, X_test)
    sns.regplot(results_lr, y_test, color='blue', label='LR')
    Evaluationmatrix(y_test, results_lr, "LR")

    results_rfr = models.RFR(X_train, y_train, X_test)
    sns.regplot(results_rfr, y_test, color='black', label='RFR')
    Evaluationmatrix(y_test, results_rfr, "RFR")

    plt.title('Models Comparison')
    plt.xlabel('Predicted Ratings')
    plt.ylabel('Actual Ratings')
    plt.legend()
    plt.show()
Exemple #4
0
def PCA_please(enable_PCA_cache, submit_test_prediction):
    tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid(
        is_gabor=False)
    unlabeled_images = preprocessor.load_unlabeled(is_gabor=False)

    if (enable_PCA_cache):
        pca_filename = 'pca.pkl'
        if os.path.isfile(pca_filename):
            pca = joblib.load(pca_filename)
        else:
            # save the PCA
            pca = models.PCA()
            pca.fit(unlabeled_images)
            joblib.dump(pca, pca_filename)
    else:
        pca = models.PCA()
        pca.fit(unlabeled_images)

    pca_tr_images = pca.transform(tr_images)
    pca_valid_images = pca.transform(valid_images)

    model = models.SVM()
    model.fit(pca_tr_images, tr_labels)
    train_predictions = model.predict(pca_tr_images)
    valid_predictions = model.predict(pca_valid_images)

    printClassificationRate(model, valid_labels, valid_predictions, tr_labels,
                            train_predictions)

    if (submit_test_prediction):
        test_images = preprocessor.load_test(is_gabor=False)
        pca_test_images = pca.transform(test_images)
        test_predictions = model.predict(pca_test_images)
        submission.output(test_predictions)
Exemple #5
0
def cross_validate_svm_tree_model(validation_data):
    gamma_parameters = [10**1, 10**0, 10**-1, 10**-2, 10**-3, 10**-4]
    C_parameters = gamma_parameters
    params = {"gamma": gamma_parameters, "C": C_parameters}
    model = md.SVM()
    print("Cross validation SVM Model")
    model.cross_validate_model_trees(params, validation_data, 5)
    print()
    return model
Exemple #6
0
def execute_with_algorithm(alg, X, y, fname, headers, out_dir, record_id, feature_selection, oversampling, survival, undersampling):
	'''execute learning task using the specified algorithm'''

	# feature selection
	# if survival == True and aggregation == True:
	# 	k=150
	# if survival == True and aggregation == False:
	# 	k=220
	# if survival == False and aggregation == True:
	# 	k=150
	# if survival == False and aggregation == False:
	# 	k=220

	k=220

	# perform feature selection
	new_X, best_features, headers = fs.pearson_fs(X, y, headers, k, feature_selection, survival)

	# execute algorithm
	if alg == 'DT':
		results, model = ML.CART(new_X, y, best_features, out_dir+"{}.dot".format(fname), headers, oversampling, undersampling)  #out_dir+"{}.dot".format(fname)
	elif alg == 'RF':
		results, features, model = ML.RF(new_X, y, best_features,oversampling, undersampling, n_estimators=200)
	elif alg == 'RFsmall':
		results, features, model = ML.RF(new_X, y, best_features, oversampling, undersampling, n_estimators=100)
	elif alg == 'SVM':
		results, model = ML.SVM(new_X, y, best_features, oversampling, undersampling)
	elif alg == 'LR':
		results, features, model = ML.LR(new_X, y, best_features,oversampling, undersampling)
	elif alg == 'XGBoost':
		results, features, model = ML.XGBoost(new_X, y, best_features,oversampling, undersampling)
	if alg == 'COX':
		results, features, model = ML.COX(new_X, y, best_features, oversampling, undersampling)
	if alg == 'survSVM':
		results, features, model = ML.survSVM(new_X, y, best_features, oversampling, undersampling)
	if alg == 'GBS':
		results, features, model = ML.GradientBoostingSurvival(new_X, y, best_features, oversampling, undersampling)

	if not results:
		return


	if survival == False:
		in_out.save_results(out_dir+fname+'.csv', ["fpr", "tpr", "auc", "cm"], results, [sum(y),len(y)])
	# else:
		# in_out.save_results(out_dir+fname+'.csv', ["CI"], results, [sum(y),len(y)])

	if 'features' in locals():
		features = features.flatten()
		in_out.save_features(out_dir+"features_" + fname + '.csv', zip(headers[1:-1], features))
	
	return model, best_features, [fname] + results[0:3]
Exemple #7
0
def please(submit_test_prediction):
    tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid(
        is_gabor=False)

    model = models.SVM()
    model.fit(tr_images, tr_labels)
    train_predictions = model.predict(tr_images)
    valid_predictions = model.predict(valid_images)

    printClassificationRate(model, valid_labels, valid_predictions, tr_labels,
                            train_predictions)

    if (submit_test_prediction):
        test_images = preprocessor.load_test(is_gabor=False)
        test_predictions = model.predict(test_images)
        submission.output(test_predictions)
Exemple #8
0
def gabor_please(enable_PCA, use_all, submit_test_prediction):
    print "Using Gabor please, PCA: %s, use_all:%s, submit_test_prediction: %s" % (
        str(enable_PCA), str(use_all), str(submit_test_prediction))
    tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid(
        is_gabor=True)
    test_images = preprocessor.load_test(is_gabor=True)

    kernels = create_gabor_filters([1.0 / x for x in range(3, 15, 3)],
                                   [x * np.pi * 0.125 for x in range(8)],
                                   np.pi, np.pi)

    model = models.SVM()

    train_features = compute_all_filter_responses(tr_images, kernels)
    train_features = train_features.reshape(train_features.shape[0],
                                            32 * 32 * 32)
    valid_features = compute_all_filter_responses(valid_images, kernels)
    valid_features = valid_features.reshape(valid_features.shape[0],
                                            32 * 32 * 32)
    test_features = compute_all_filter_responses(test_images, kernels)
    test_features = test_features.reshape(test_features.shape[0], 32 * 32 * 32)

    ########## compute using both train and valid
    if (use_all):
        all_features = compute_all_filter_responses(
            np.concatenate((tr_images, valid_images)), kernels)
        all_features = all_features.reshape(all_features.shape[0],
                                            32 * 32 * 32)
        if (enable_PCA):
            all_features, train_features, valid_features, test_features = PCA_Preprocess(
                all_features, train_features, valid_features, test_features)
        model.fit(all_features, np.concatenate((tr_labels, valid_labels)))
    else:
        if (enable_PCA):
            _train_features, train_features, valid_features, test_features = PCA_Preprocess(
                train_features, train_features, valid_features, test_features)
        model.fit(train_features, tr_labels)

    train_predictions = model.predict(train_features)
    valid_predictions = model.predict(valid_features)

    printClassificationRate(model, valid_labels, valid_predictions, tr_labels,
                            train_predictions)

    if (submit_test_prediction):
        test_predictions = model.predict(test_features)
        submission.output(test_predictions)
Exemple #9
0
def plot():
    """
    plot the hyperplanes portrayed by each model for m samples sampled from the distribution m
    """
    ROWS = 2
    COLS = 3

    fig, axs = plt.subplots(ROWS, COLS, figsize=(15, 10))
    scatter = None
    for i, m in enumerate(MS):
        X, y = draw_points(m)
        x_lim = np.array([min(X[:, 0]), max(X[:, 0])])
        ax = axs[i // COLS, i % COLS]

        perceptron = models.Perceptron()
        perceptron.fit(X, y)
        svm = models.SVM()
        svm.fit(X, y)

        scatter = ax.scatter(X[:, 0], X[:, 1], c=y)
        ax.plot(x_lim, hyperplane_line(x_lim, W, BIAS), label='f')
        ax.plot(x_lim,
                hyperplane_line(x_lim, perceptron.get_w(), perceptron.get_b()),
                label='Perceptron')
        ax.plot(x_lim,
                hyperplane_line(x_lim, svm.get_w(), svm.get_b()),
                label='SVM')

        ax.set(xlabel='x', ylabel='y')
        ax.set_title(f"Data for m={m}")
        ax.legend()

    plt.legend(handles=scatter.legend_elements()[0],
               labels=('Negative', 'Positive'),
               loc=4)
    axs[-1, -1].axis('off')
    fig.tight_layout()
    plt.show()
import config
import models


def huber_approx_obj(preds, dtrain):
    '''
    xgboost optimizing function for mean absolute error
    '''
    d = preds - dtrain  #add .get_labels() for xgb.train()
    h = 1  #h is delta in the graphic
    scale = 1 + (d / h)**2
    scale_sqrt = np.sqrt(scale)
    grad = d / scale_sqrt
    hess = 1 / scale / scale_sqrt
    return grad, hess


models = {
    "dt": models.DecisionTree(),
    "rf": models.RandomForest(),
    "lr": models.LR(),
    "xgb": models.XGBoost(),
    "svm": models.SVM(),
    "lgb": models.LGB(),
    # "mlp": models.MLP(),
    "lstm": models.LSTM()
}

# to get the final accuracy, calculate the mean and the mean absolute error should be the percentage of the
# performance since he wants to see performance
Exemple #11
0
    # model = models.LogisticRegression(X_train.shape[1])
    # model.setOptimizer('Langevin', 0.08, 7000, 1)
    # # model.setOptimizer('SGD', 0.001, 1500)
    # model.fit(X_train, y_train)
    # print("Test on test data:")
    # test_loss = model.lossFunction(X_test, y_test)
    # test_acc = model.evaluation(X_test, y_test)
    # print("loss: {} \nacc: {}".format(test_loss, test_acc))
    # model.save('data/logistic_v3.pkl')

    # print('LDA classification')
    # model = models.LDA(X_train.shape[1], 30, 0)
    # model.fit(X_train, y_train)
    # print("Test on test data:")
    # test_acc = model.evaluation(X_test, y_test)
    # print("Test acc: {}".format(test_acc))

    print('svm classification')
    kernel = 'rbf'
    model = models.SVM(kernel)
    best_model = model.fit(X_train, y_train)
    acc = model.evaluation(X_test, y_test)
    print(best_model.support_vectors_.shape)
    print(best_model.support_.shape)
    print("acc is {}".format(acc))
    print('save model...')
    dump(best_model, 'data/{}_SVM_v2.joblib'.format(kernel))

    # svm = load('data/rbf_SVM.joblib')
    # preds = svm.predict(X_test[:100])
    # print(preds)
Exemple #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--feature_path', default='data/features/', type=str)
    parser.add_argument('--train_num', default=5, type=int)
    parser.add_argument('--val_num', default=0, type=int)
    parser.add_argument('--test_num', default=5, type=int)
    parser.add_argument('--model', default='bp', type=str)
    opt = parser.parse_args()
    ms = [50, 100, 150, 200, 250, 300, 350, 10304]  # 10304,
    # gammas = list(np.arange(1.0, 10.0, 0.2))
    gammas = [5.6, 3.4, 2.6, 2.4, 2.2, 2.8, 2.8, 1.4]
    cs = [
        0.001
    ]  #, 0.002, 0.003, 0.005, 0.007, 0.01, 0.02, 0.03, 0.05, 0.07, 0.1, 0.2, 0.3, 0.5, 0.7, 1, 2, 3, 5, 7, 10]
    Hs = [25, 50, 100, 200, 300, 400, 500, 750, 1000]
    R_h = list(range(6, 30))
    svm_acc = []
    if opt.model == 'bp':
        config = BP_config()
    elif opt.model == 'rbf':
        config = RBF_config()
    elif opt.model == 'svm':
        config = SVM_config()
    for m in ms:
        features = np.load(opt.feature_path + 'feature_' + str(m) +
                           '.npy')  # pca features
        features_ = get_sets(features, opt.train_num, opt.val_num,
                             opt.test_num)
        if opt.model == 'bp':
            config.change_input_size(m)  # to suit different size of input m
            for H in Hs:
                config.change_hideen_size(H)
                print('H:%d m:%d' % (H, m))
                model = models.BpNet(config)
                loss_h, acc_h = model.train(features_['x_train'],
                                            features_['d_train'],
                                            features_['x_test'],
                                            features_['d_test'])
                y_test = np.argmax(model.predict(features_['x_test']), axis=1)
                acc = np.sum(y_test == features_['d_test']
                             ) / features_['d_test'].shape[0]
                print('acc: %f' % acc)
        elif opt.model == 'svm':
            svm_1 = []
            for gamma in gammas:
                svm_2 = []
                config.change_gamma(gamma)
                for C in cs:
                    config.change_C(C)
                    model = models.SVM(config)
                    model.train(features_['x_train'], features_['d_train'])
                    y_test = model.predict(features_['x_test'])
                    acc = np.sum(y_test == features_['d_test']
                                 ) / features_['d_test'].shape[0]
                    print('m:%d gamma:%1f C: % 3f acc:%f' % (m, gamma, C, acc),
                          end='\r')
                    svm_2.append(acc)
                svm_1.append(svm_2)
            svm_acc.append(svm_1)
        elif opt.model == 'rbf':
            config.change_input_size(m)
            for h in R_h:
                config.change_hideen_size(h)
                # model = models.RBF(m, 20, 40)
                model = models.RBF_bp(
                    config, features_['x_train']
                )  # 25 for fun_3 20 for func_2 22 for func_1
                model.train(features_['x_train'], features_['d_train'])
                y_test = np.argmax(model.predict(features_['x_test']), axis=1)
                acc = np.sum(y_test == features_['d_test']
                             ) / features_['d_test'].shape[0]
                print('m:%d h:%d' % (m, h))
                print(acc)
                rbf_acc.append(acc)
    svm_acc_np = np.array(svm_acc)

    np.save('svm_acc.npy', svm_acc_np)
    print(svm_acc)
    print(svm_acc_np.shape)
Exemple #13
0
plt.title('ROC: Random Forest')
plt.show()

cvsc = np.mean(cross_val_score(rf.clf, data, labels, cv=10))

print('Random Forest Accuracy: ' + str(sc[0]))
print('Random Forest Cross Validation Score: ' + str(cvsc))

# SVM Model
#=======
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    labels,
                                                    name_list,
                                                    test_size=0.2)

svm = models.SVM()
svm.train(X_train, y_train, name_list)
svm_sc, svm_prec = svm.test(X_test, y_test)

# construct ROC curve for SVM
svm.roc_auc(data, labels)
svm.plot_margin(data, labels)

svm_cvsc = cross_val_score(svm.clf, data, labels, cv=5)

print('SVM Accuracy: ' + str(svm_sc))
print('SVM Precision: ' + str(svm_prec))
print('SVM Cross Validation Scores: ' + str(svm_cvsc))
print('Mean SVM Cross Validation Scores: ' + str(np.mean(svm_cvsc)))

# ConvNet Model
models = {
    'MLP': {
        'build_fn': m.build_MLP((24, )),
        'params': param_grid_MLP
    },
    'Decision_tree': {
        'build_fn': m.DecisionTreeModel(train=False),
        'params': param_grid_Dt
    },
    'Random_forest': {
        'build_fn': m.RandomForest(train=False),
        'params': param_grid_random_forest
    },
    'svm': {
        'build_fn': m.SVM(train=False),
        'params': param_grid_svm
    }
}
# to find the best parameters of a given model. A parameters grid should be provided.
if finetune:
    print("Finetuning ...")

    model = models[model_name]['build_fn']
    param_grid = models[model_name]['params']
    gs, fitted_model, pred = search_pipeline(X_train,
                                             X_test,
                                             y_train,
                                             model,
                                             param_grid,
                                             scoring_fit='accuracy')
Exemple #15
0
        # Confusion Matrix
        models.plot_confusion_matrix(df_test_new_target, df_test_new_pred,
                                     names)


        models.plot_learning_curve(model.clf, "Boosting", \
            df_test, df_test_target, cv=5, train_sizes=np.arange(2000,5000,1000))

        print(
            metrics.classification_report(df_test_new_target,
                                          df_test_new_pred,
                                          target_names=names))

    if (model_name.upper() == "S"):
        model = models.SVM()
        df_test = StandardScaler(with_mean=False).fit_transform(df_test)
        df_test_new = StandardScaler(
            with_mean=False).fit_transform(df_test_new)

        models.plot_learning_curve(model.clf, "SVM", \
            df_test, df_test_target, cv=5, train_sizes=np.arange(2000,5000,1000))

        start_time = time.time()
        model.clf.fit(df_test, df_test_target)
        df_test_new_pred = model.clf.predict(df_test_new)
        print("--- %s seconds ---" % (time.time() - start_time))

        # Confusion Matrix
        models.plot_confusion_matrix(df_test_new_target, df_test_new_pred,
                                     names)
Exemple #16
0
    print('training')
    if args.model == 'knn':
        model = models.KNN(args.k, args.f, test_data.write, args.seed,
                           args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'lr':
        model = models.RidgeRegression(args.lamb, args.f, test_data.write,
                                       args.seed, args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'nb':
        model = models.NaiveBayes(args.model_type, args.f, test_data.write,
                                  args.seed, args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'svm':
        model = models.SVM(args.kernel, args.c, args.f, test_data.write,
                           args.seed, args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'mlp':
        model = models.MLP(
            train_data.feat_dim,
            train_data.cat_num,
            optim,
            args.ac_fn,
            args.dr,
            args.lr,
            args.gpu,
            args.max_epoch_num,
            args.bs,
            args.lamb,
            args.f,
            test_data.write,