def k_fold(n):

    try:
        folds = n
    except IndexError:
        print 'Please list the number of folds for cross validation'
        print 'as a command line argument, for example : python cv.py 10'
        quit()

    #  extract the data and the labels
    X, y = dataCollector.getCleanedData("data.csv")
    # initializing output labels
    acc, err, recall, precision, specificity = cross_validation(X, y, folds)

    print 'accuracy'
    print acc
    print 'error'
    print err
    print 'recall'
    print recall
    print 'precision'
    print precision
    print 'specificity'
    print specificity

    print 'mean accuracy'
    print np.mean(acc)
    print 'mean error'
    print np.mean(err)
    print 'mean recall'
    print np.mean(recall)
    print 'mean precision'
    print np.mean(precision)
    print 'mean specificity'
    print np.mean(specificity)
Beispiel #2
0
def main():

    try:
        folds = int(sys.argv[1])
    except IndexError:
        print 'Please list the number of folds for cross validation'
        print 'as a command line argument, for example : python cv.py 10'
        quit()

    #  extract the data and the labels

    X, y = dataCollector.getCleanedData("data.csv")
    # initializing output labels
    acc, err, recall, precision, specificity = cross_validation(X, y, folds)

    print 'mean accuracy'
    print np.mean(acc)
    print 'mean error'
    print np.mean(err)
    print 'mean recall'
    print np.mean(recall)
    print 'mean precision'
    print np.mean(precision)
    print 'mean specificity'
    print np.mean(specificity)

    output = [acc, err, recall, precision, specificity]
    import pandas as pd
    df = pd.DataFrame(output)

    s = "kfold{}.csv".format(folds)
    df.to_csv(s)
Beispiel #3
0
def main():

    #  extract the data and the labels
    X, y = dataCollector.getCleanedData("data.csv")
    n, d = X.shape

    # initializing output labels
    acc, err, recall, precision, specificity = loocv(X, y)

    # print 'accuracy'
    # print acc
    # print 'error'
    # print err
    # print 'recall'
    # print recall
    # print 'precision'
    # print precision
    # print 'specificity'
    # print specificity

    print 'mean accuracy'
    print np.mean(acc)
    print 'mean error'
    print np.mean(err)
    print 'mean recall'
    print np.mean(recall)
    print 'mean precision'
    print np.mean(precision)
    print 'mean specificity'
    print np.mean(specificity)

    output = [acc, err, recall, precision, specificity]
    import pandas as pd
    df = pd.DataFrame(output)
    df.to_csv("loocv.csv")
def main():
    try:
        B = int(sys.argv[1])
    except IndexError:
        print 'Please list the number of bootstraps as a cmd line arg'
        print 'for example : python bootstrap.py 10'
        quit()

    #  extract the data and the labels
    X, y = dataCollector.getCleanedData("data.csv")
    n, d = X.shape

    # create a dataset with the labels and the data mixed together
    acc, err, recall, precision, specificity = bootstrapping(B, X, y)
    # print 'accuracy'
    # print acc
    # print 'error'
    # print err
    # print 'recall'
    # print recall
    # print 'precision'
    # print precision
    # print 'specificity'
    # print specificity

    print 'mean accuracy'
    print np.mean(acc)
    print 'mean error'
    print np.mean(err)
    print 'mean recall'
    print np.mean(recall)
    print 'mean precision'
    print np.mean(precision)
    print 'mean specificity'
    print np.mean(specificity)

    output = [acc, err, recall, precision, specificity]
    import pandas as pd
    df = pd.DataFrame(output)

    s = "bootstrap{}.csv".format(B)
    df.to_csv(s)
def main():
    try:
        B = int(sys.argv[1])
        C = int(sys.argv[2])
    except IndexError:
        print 'Please list the number of bootstraps as a cmd line arg'
        print 'for example : python bootstrap.py 10'
        quit()

    #  extract the data and the labels
    X, y = dataCollector.getCleanedData("data.csv")
    n, d = X.shape

    # create a dataset with the labels and the data mixed together
    acc, err, recall, precision, specificity = bootstrapping(B, X, y, C)
    print "Using", str(B), "bootstaps, and C =", str(C), "\n"
    print 'accuracy'
    print acc
    print 'error'
    print err
    print 'recall'
    print recall
    print 'precision'
    print precision
    print 'specificity'
    print specificity

    print 'mean accuracy'
    print np.mean(acc)
    print 'mean error'
    print np.mean(err)
    print 'mean recall'
    print np.mean(recall)
    print 'mean precision'
    print np.mean(precision)
    print 'mean specificity'
    print np.mean(specificity)
    print "\n"
Beispiel #6
0
# Malignant is denoted by 1 and Benign as -1 in the original dataset
# Converting benign to 0

import read_clean

X, y = read_clean.getCleanedData("data.csv")
y = [0 if x == -1 else x for x in y]

total = len(y)
malignant = sum(y)
benign = total - malignant

print(total, malignant, benign)

# Classifier that predicts Benign always
print(float(malignant) / total)

# Classifier that predicts malignant always
print(float(benign) / total)