Beispiel #1
0
def get_acc_auc_kfold(X, Y):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kf = KFold(len(Y), n_folds=5, random_state=RANDOM_STATE)
    accuracy = []
    auc = []
    for train_index, test_index in kf:
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        accuracy.append(models_partc.classification_metrics(Y_pred, Y_test)[0])
        auc.append(models_partc.classification_metrics(Y_pred, Y_test)[1])
    return mean(accuracy), mean(auc)
Beispiel #2
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations

    kf = ShuffleSplit(X.shape[0],
                      n_iter=iterNo,
                      test_size=test_percent,
                      random_state=545510477)

    kf_auc = []
    kf_acc = []
    for train_index, test_index in kf:

        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]

        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)

        acc, auc_, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred, Y_test)

        kf_acc.append(acc)
        kf_auc.append(auc_)

    avg_acc = mean(kf_acc)
    avg_auc = mean(kf_auc)

    return avg_acc, avg_auc
Beispiel #3
0
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds

    kf = KFold(X.shape[0], n_folds=k, random_state=545510477)

    kf_auc = []
    kf_acc = []
    for train_index, test_index in kf:

        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]

        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)

        acc, auc_, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred, Y_test)

        kf_acc.append(acc)
        kf_auc.append(auc_)

    avg_acc = mean(kf_acc)
    avg_auc = mean(kf_auc)

    return avg_acc, avg_auc
Beispiel #4
0
def main():
    X, Y = get_data(LSTM_DATA_PATH)
    X, Y = shuffle_data(X, Y)
    splits = k_fold_split_indices(X, 5)
    all_scores = []
    for i in range(5):
        split = splits[i]
        X_train, X_test, Y_train, Y_test = getTrainTest(split, X, Y)
        Y_pred = train(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = classification_metrics(Y_pred, Y_test)
        all_scores.append([acc, auc_, precision, recall, f1score])
    print('-------FINAL-------')
    print(getAverage(all_scores))
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kfolds = KFold(n_splits=k, shuffle=False, random_state=RANDOM_STATE)
    acc_auc_array = []
    for train, test in kfolds.split(X):
        X_train, Y_train = (X[train, :], Y[train])
        X_test, Y_test = (X[test, :], Y[test])
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test)
        acc_auc_array.append([acc, auc])
    acc_auc_array = np.array(acc_auc_array)
    return np.mean(acc_auc_array[:, 0]), np.mean(acc_auc_array[:, 1])
Beispiel #6
0
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2):
	#TODO: First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the iterations
	ss = ShuffleSplit(n_splits=iterNo, test_size=test_percent, random_state=RANDOM_STATE)
	acc_folds = []
	auc_folds = []
    
	for fold, (train,test) in enumerate(ss.split(X,Y)):
		pred = models_partc.logistic_regression_pred(X[train], Y[train], X[test])
		acc, auc_, precision, recall, f1score = models_partc.classification_metrics(pred,Y[test])
		acc_folds.append(acc)
		auc_folds.append(auc_)
        
	return mean(acc_folds), mean(auc_folds)
Beispiel #7
0
def get_acc_auc_kfold(X,Y,k=5):
	#TODO:First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the folds
	k_fold = KFold(k, random_state=RANDOM_STATE)
	acc_folds = []
	auc_folds = []
    
	for fold, (train,test) in enumerate(k_fold.split(X,Y)):
		pred = models_partc.logistic_regression_pred(X[train], Y[train], X[test])
		acc, auc_, precision, recall, f1score = models_partc.classification_metrics(pred,Y[test])
		acc_folds.append(acc)
		auc_folds.append(auc_)
    
	return mean(acc_folds), mean(auc_folds)
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    acc = 0
    auc_ = 0
    kf = KFold(n_splits=k, random_state=RANDOM_STATE)
    for train_index, test_index in kf.split(X):
        Y_pred = models_partc.logistic_regression_pred(X[train_index],
                                                       Y[train_index],
                                                       X[test_index])
        acc_, auc__, precision_, recall_, f1score_ = models_partc.classification_metrics(
            Y_pred, Y[test_index])
        acc += acc_
        auc_ += auc__
    return acc / k, auc_ / 5
Beispiel #9
0
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kf = KFold(n_splits=k, random_state=RANDOM_STATE)
    accuracyList = []
    aucList = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        accuracy, auc = models_partc.classification_metrics(Y_pred, Y_test)[:2]
        accuracyList.append(accuracy)
        aucList.append(auc)
    return mean(accuracyList), mean(aucList)
Beispiel #10
0
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kf = KFold(n_splits=k)
    log_regressor = LogisticRegression()
    scores = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        log_regressor.fit(X_train, y_train)
        y_predict = log_regressor.predict(X_test)
        acc, auc_, precision, recall, f1score = models_partc.classification_metrics(
            y_predict, y_test)
        scores.append([acc, auc_])
    scores_frame = pd.DataFrame(scores)
    return scores_frame[0].mean(), scores_frame[1].mean()
Beispiel #11
0
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2):
	#TODO: First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the iterations
	ss = ShuffleSplit(n_splits = iterNo, test_size = test_percent, random_state = RANDOM_STATE)
	scores = []
	
	for i_train, i_test in ss.split(X):
		X_train, X_test = X[i_train], X[i_test]
		Y_train, Y_test = Y[i_train], Y[i_test]

		Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test)
		accuracy, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test)
		scores.append([accuracy, auc])
	
	scores_df = pd.DataFrame(scores)
	return scores_df[0].mean(), scores_df[1].mean()
Beispiel #12
0
def get_acc_auc_kfold(X,Y,k=5):
	#TODO:First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the folds
	kf = KFold(n_splits = k, random_state = RANDOM_STATE)
	scores = []
	
	for i_train, i_test in kf.split(X):
		X_train, X_test = X[i_train], X[i_test]
		Y_train, Y_test = Y[i_train], Y[i_test]

		Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test)
		accuracy, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test)
		scores.append([accuracy, auc])

	scores_df = pd.DataFrame(scores)
	return scores_df[0].mean(), scores_df[1].mean()
Beispiel #13
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations

    sf = ShuffleSplit(n_splits=5, test_size=test_percent)
    log_regressor = LogisticRegression()
    scores = []
    for train_index, test_index in sf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        log_regressor.fit(X_train, y_train)
        y_predict = log_regressor.predict(X_test)
        acc, auc_, precision, recall, f1score = models_partc.classification_metrics(
            y_predict, y_test)
        scores.append([acc, auc_])
    scores_frame = pd.DataFrame(scores)
    return scores_frame[0].mean(), scores_frame[1].mean()
def get_acc_auc_kfold(X, Y, k=5):

    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kf = KFold(X.shape[0], n_folds=k, random_state=RANDOM_STATE)
    #cross_validation module is deprecated
    sum_acc = 0
    sum_auc = 0
    for train_index, test_index in kf:
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test)
        sum_acc += acc
        sum_auc += auc
    return float(sum_acc) / k, float(sum_auc) / k
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    acc = 0
    auc_ = 0
    ss = ShuffleSplit(n_splits=iterNo,
                      test_size=test_percent,
                      random_state=RANDOM_STATE)
    for train_index, test_index in ss.split(X):
        Y_pred = models_partc.logistic_regression_pred(X[train_index],
                                                       Y[train_index],
                                                       X[test_index])
        acc_, auc__, precision_, recall_, f1score_ = models_partc.classification_metrics(
            Y_pred, Y[test_index])
        acc += acc_
        auc_ += auc__
    return acc / iterNo, auc_ / iterNo
Beispiel #16
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    rs = ShuffleSplit(n_splits=iterNo,
                      test_size=test_percent,
                      random_state=RANDOM_STATE)
    accuracyList = []
    aucList = []
    for train_index, test_index in rs.split(X):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        accuracy, auc = models_partc.classification_metrics(Y_pred, Y_test)[:2]
        accuracyList.append(accuracy)
        aucList.append(auc)
    return mean(accuracyList), mean(aucList)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_size=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    random_acc_auc_array = []
    kfolds = ShuffleSplit(n_splits=iterNo, test_size=test_size)
    for train, test in kfolds.split(X):
        X_train, Y_train = (X[train, :], Y[train])
        X_test, Y_test = (X[test, :], Y[test])
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)

        acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test)
        random_acc_auc_array.append([acc, auc])
    random_acc_auc_array = np.array(random_acc_auc_array)
    return np.mean(random_acc_auc_array[:,
                                        0]), np.mean(random_acc_auc_array[:,
                                                                          1])
Beispiel #18
0
def get_acc_auc_kfold(X,Y,k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kf = KFold(n_splits=k, random_state=RANDOM_STATE)
    accuracy_array = np.array([])
    area_under_curve_array = np.array([])

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        y_pred = my_classifier_predictions(X_train, y_train, X_test)
        accuracy, area_under_curve, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred=y_pred, Y_true=y_test)
        accuracy_array = np.append(accuracy_array, accuracy)
        area_under_curve_array = np.append(area_under_curve_array, area_under_curve)
    return np.mean(accuracy_array), np.mean(area_under_curve_array)
Beispiel #19
0
def get_acc_auc_kfold(X, Y, algo="logistic regression", k=5, n_components=110):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds

    kf = KFold(n=X.shape[0], n_folds=k)
    acc_list = []
    auc_list = []
    precision_list = []
    recall_list = []

    for train_idx, test_idx in kf:
        # X_train, X_test = PC_analysis(X[train_idx], X[test_idx], n_components) # PCA
        X_train, X_test = X[train_idx], X[test_idx]  # without PCA
        if algo == "logistic regression":
            Y_pred = models_partc.logistic_regression_pred(
                X_train, Y[train_idx], X_test)
        elif algo == "linear_svm":
            Y_pred = models_partc.svm_pred(X_train, Y[train_idx], X_test)
        elif algo == "decision_tree":
            Y_pred = models_partc.decisionTree_pred(X_train, Y[train_idx],
                                                    X_test)
        elif algo == "ada boost":
            Y_pred = models_partc.ada_boost_pred(X_train, Y[train_idx], X_test)
        elif algo == "bagging logistic":
            Y_pred = models_partc.bagging_log_pred(X_train, Y[train_idx],
                                                   X_test)
        elif algo == "bagging_svm":
            Y_pred = models_partc.bagging_SVC_pred(X_train, Y[train_idx],
                                                   X_test)
        elif algo == "neural_network":
            Y_pred = models_partc.neural_network(X_train, Y[train_idx], X_test)
        acc, auc_, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred, Y[test_idx])
        acc_list.append(acc)
        auc_list.append(auc_)
        precision_list.append(precision)
        recall_list.append(recall)
    acc_ave = mean(acc_list)
    auc_ave = mean(auc_list)
    precision_ave = mean(precision_list)
    recall_ave = mean(recall_list)

    return acc_ave, auc_ave, precision_ave, recall_ave
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    ss = ShuffleSplit(X.shape[0],
                      n_iter=iterNo,
                      test_size=test_percent,
                      random_state=RANDOM_STATE)
    sum_acc = 0
    sum_auc = 0
    for train_index, test_index in ss:
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test)
        sum_acc += acc
        sum_auc += auc
    return float(sum_acc) / iterNo, float(sum_auc) / iterNo
Beispiel #21
0
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    acc_array = []

    kf = KFold(k)
    for train, test in kf.split(X):
        X_train, Y_train = (X[train, :], Y[train])
        X_test, Y_test = (X[test, :], Y[test])

        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        acc, auc, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred, Y_test)
        acc_array.append([acc, auc])

    acc_array = np.array(acc_array)
    return np.mean(acc_array[:, 0]), np.mean(acc_array[:, 1])
Beispiel #22
0
def get_acc_auc_randomisedCV(X, Y):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    rs = ShuffleSplit(len(Y),
                      n_iter=5,
                      test_size=0.2,
                      random_state=RANDOM_STATE)
    accuracylist = []
    auclist = []
    for train_index, test_index in rs:
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        Y_pred = models_partc.logistic_regression_pred(X_train, Y_train,
                                                       X_test)
        accuracy, auc, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred, Y_test)
        accuracylist.append(accuracy)
        auclist.append(auc)

    return mean(accuracylist), mean(auclist)
Beispiel #23
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    sp = ShuffleSplit(n_splits=iterNo,
                      random_state=RANDOM_STATE,
                      test_size=test_percent)
    accuracy_array = np.array([])
    area_under_curve_array = np.array([])

    for train_index, test_index in sp.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        y_pred = models_partc.logistic_regression_pred(X_train, y_train,
                                                       X_test)
        accuracy, area_under_curve, precision, recall, f1score = models_partc.classification_metrics(
            Y_pred=y_pred, Y_true=y_test)
        accuracy_array = np.append(accuracy_array, accuracy)
        area_under_curve_array = np.append(area_under_curve_array,
                                           area_under_curve)
    return np.mean(accuracy_array), np.mean(area_under_curve_array)