def k_fold(n): try: folds = n except IndexError: print 'Please list the number of folds for cross validation' print 'as a command line argument, for example : python cv.py 10' quit() # extract the data and the labels X, y = dataCollector.getCleanedData("data.csv") # initializing output labels acc, err, recall, precision, specificity = cross_validation(X, y, folds) print 'accuracy' print acc print 'error' print err print 'recall' print recall print 'precision' print precision print 'specificity' print specificity print 'mean accuracy' print np.mean(acc) print 'mean error' print np.mean(err) print 'mean recall' print np.mean(recall) print 'mean precision' print np.mean(precision) print 'mean specificity' print np.mean(specificity)
def main(): try: folds = int(sys.argv[1]) except IndexError: print 'Please list the number of folds for cross validation' print 'as a command line argument, for example : python cv.py 10' quit() # extract the data and the labels X, y = dataCollector.getCleanedData("data.csv") # initializing output labels acc, err, recall, precision, specificity = cross_validation(X, y, folds) print 'mean accuracy' print np.mean(acc) print 'mean error' print np.mean(err) print 'mean recall' print np.mean(recall) print 'mean precision' print np.mean(precision) print 'mean specificity' print np.mean(specificity) output = [acc, err, recall, precision, specificity] import pandas as pd df = pd.DataFrame(output) s = "kfold{}.csv".format(folds) df.to_csv(s)
def main(): # extract the data and the labels X, y = dataCollector.getCleanedData("data.csv") n, d = X.shape # initializing output labels acc, err, recall, precision, specificity = loocv(X, y) # print 'accuracy' # print acc # print 'error' # print err # print 'recall' # print recall # print 'precision' # print precision # print 'specificity' # print specificity print 'mean accuracy' print np.mean(acc) print 'mean error' print np.mean(err) print 'mean recall' print np.mean(recall) print 'mean precision' print np.mean(precision) print 'mean specificity' print np.mean(specificity) output = [acc, err, recall, precision, specificity] import pandas as pd df = pd.DataFrame(output) df.to_csv("loocv.csv")
def main(): try: B = int(sys.argv[1]) except IndexError: print 'Please list the number of bootstraps as a cmd line arg' print 'for example : python bootstrap.py 10' quit() # extract the data and the labels X, y = dataCollector.getCleanedData("data.csv") n, d = X.shape # create a dataset with the labels and the data mixed together acc, err, recall, precision, specificity = bootstrapping(B, X, y) # print 'accuracy' # print acc # print 'error' # print err # print 'recall' # print recall # print 'precision' # print precision # print 'specificity' # print specificity print 'mean accuracy' print np.mean(acc) print 'mean error' print np.mean(err) print 'mean recall' print np.mean(recall) print 'mean precision' print np.mean(precision) print 'mean specificity' print np.mean(specificity) output = [acc, err, recall, precision, specificity] import pandas as pd df = pd.DataFrame(output) s = "bootstrap{}.csv".format(B) df.to_csv(s)
def main(): try: B = int(sys.argv[1]) C = int(sys.argv[2]) except IndexError: print 'Please list the number of bootstraps as a cmd line arg' print 'for example : python bootstrap.py 10' quit() # extract the data and the labels X, y = dataCollector.getCleanedData("data.csv") n, d = X.shape # create a dataset with the labels and the data mixed together acc, err, recall, precision, specificity = bootstrapping(B, X, y, C) print "Using", str(B), "bootstaps, and C =", str(C), "\n" print 'accuracy' print acc print 'error' print err print 'recall' print recall print 'precision' print precision print 'specificity' print specificity print 'mean accuracy' print np.mean(acc) print 'mean error' print np.mean(err) print 'mean recall' print np.mean(recall) print 'mean precision' print np.mean(precision) print 'mean specificity' print np.mean(specificity) print "\n"
# Malignant is denoted by 1 and Benign as -1 in the original dataset # Converting benign to 0 import read_clean X, y = read_clean.getCleanedData("data.csv") y = [0 if x == -1 else x for x in y] total = len(y) malignant = sum(y) benign = total - malignant print(total, malignant, benign) # Classifier that predicts Benign always print(float(malignant) / total) # Classifier that predicts malignant always print(float(benign) / total)