Ejemplo n.º 1
0
def get_acc_auc_kfold(X,Y,k=10):
	#TODO:First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the folds
	accuracy = 0
	auc_curve = 0
	kf_total = KFold(Y.size, n_folds=k)
	for train_indices, test_indices in kf_total:
		lr = my_model.my_classifier_predictions(X[train_indices], Y[train_indices], X[test_indices])
		acc , p , r , f = models.classification_metrics(lr, Y[test_indices])
		accuracy += acc
	return accuracy/k
Ejemplo n.º 2
0
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2):
	#TODO: First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the iterations
	accuracy = 0
	auc_curve = 0
	rs = ShuffleSplit(Y.size, n_iter=iterNo, test_size=test_percent)
	for train_indices, test_indices in rs:
		lr = my_model.my_classifier_predictions(X[train_indices], Y[train_indices], X[test_indices])
		acc, p , r , f = models.classification_metrics(lr, Y[test_indices])
		accuracy += acc
	return accuracy/iterNo
Ejemplo n.º 3
0
def get_acc_auc_kfold(X, Y, k=5):
    # TODO:First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the folds
    accuracy_arr = []
    auc_arr = []

    kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477)
    for train_i, test_i in kfold:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)
    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Ejemplo n.º 4
0
def get_acc_auc_kfold(X, Y, k=5):
    # TODO:First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the folds
    accuracy_arr = []
    auc_arr = []

    kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477)
    for train_i, test_i in kfold:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(
            Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)
    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Ejemplo n.º 5
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    # TODO: First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the iterations
    accuracy_arr = []
    auc_arr = []

    shuffle_split = ShuffleSplit(n=X.get_shape()[0], n_iter=5, test_size=.2, random_state=545510477)
    for train_i, test_i in shuffle_split:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)

    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Ejemplo n.º 6
0
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2):
	
	randomizedCV = ShuffleSplit(n = len(Y) , n_iter = iterNo, test_size = test_percent)
	
	# Train logistic regression classifier on each fold train set
	lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in randomizedCV]
	    
       	# Use the kfold object to get the true Y values for each fold test set
       	Y_true_values = [Y[test_index] for train_index , test_index in randomizedCV]
       	
       	# Get the various classications metrics for the logistic regression models implemented on each fold
	metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ]
	
	# Calculate mean accuracy across all iterations
	mean_accuracy = mean([elt[0] for elt in metrics])
	
	# Calculate mean AUC across all iterations
	mean_auc = mean([elt[1] for elt in metrics])
	
	return mean_accuracy,mean_auc
Ejemplo n.º 7
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    # TODO: First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the iterations
    accuracy_arr = []
    auc_arr = []

    shuffle_split = ShuffleSplit(n=X.get_shape()[0],
                                 n_iter=5,
                                 test_size=.2,
                                 random_state=545510477)
    for train_i, test_i in shuffle_split:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(
            Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)

    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Ejemplo n.º 8
0
def get_acc_auc_kfold(X,Y,k=5):
	
	# Get the train indices and test indices for each k-fold iteration
	kfold = KFold(n = len(Y) , n_folds = k)
	
	
	# Train logistic regression classifier on each fold train set
	lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in kfold]
	    
       	# Use the kfold object to get the true Y values for each fold test set
       	Y_true_values = [Y[test_index] for train_index , test_index in kfold]
       	
       	# Get the various classications metrics for the logistic regression models implemented on each fold
	metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ]
	
	
	mean_accuracy = mean([elt[0] for elt in metrics])
	
	mean_auc = mean([elt[1] for elt in metrics])
	
	return mean_accuracy,mean_auc