def testDriver(): seed(0) skData = [] split = "median" dataset=MODEL(split=split) if dataset._isCocomo: scores = testCoCoMo(dataset) for key, n in scores.items(): skData.append([key+". ."] + n.cache.all) scores = testRig(dataset=MODEL(split=split),doCART = True, doKNN=True, doLinRg=True) for key,n in scores.items(): if (key == "clstr" or key == "lRgCl"): skData.append([key+"(no tuning)"] + n.cache.all) else: skData.append([key+". ."] + n.cache.all) scores = testRig(dataset=MODEL(split=split, weighFeature = True), doKNN=True) for key,n in scores.items(): skData.append([key+"(sdiv_wt^1)"] + n.cache.all) scores = dict(TEAK=N()) for score in scores.values(): score.go=True dataset=MODEL(split=split) for test, train in loo(dataset._rows): say(".") desired_effort = effort(dataset, test) tree = teak(dataset, rows = train) n = scores["TEAK"] n.go and clusterk1(n, dataset, tree, test, desired_effort, leafTeak) for key,n in scores.items(): skData.append([key+". ."] + n.cache.all) print("") print(str(len(dataset._rows)) + " data points, " + str(len(dataset.indep)) + " attributes") print("") sk.rdivDemo(skData)
def testDriver(): seed(0) skData = [] dataset = MODEL() if dataset._isCocomo: scores = testCoCoMo(dataset) for key, n in scores.items(): skData.append([key + ". ."] + n.cache.all) scores = testRig(dataset=MODEL(), doCART=True, doKNN=True, doLinRg=True) for key, n in scores.items(): if (key == "clstr" or key == "lRgCl"): skData.append([key + "(no tuning)"] + n.cache.all) else: skData.append([key + ". ."] + n.cache.all) scores = testRig(dataset=MODEL(weighFeature=True), doKNN=True) for key, n in scores.items(): skData.append([key + "(sdiv_wt^1)"] + n.cache.all) print("") print( str(len(dataset._rows)) + " data points, " + str(len(dataset.indep)) + " attributes") print("") sk.rdivDemo(skData)
def untuned_runner(model=MODEL, cross_val=21): errors = { "Peek" : N(), "TEAK" : N(), "CART" : N(), } mdl=model() print('###'+model.__name__.upper()) print('####'+str(len(mdl._rows)) + " data points, " + str(len(mdl.indep)) + " attributes") all_rows = mdl._rows print("```") for inp in split_data(all_rows, cross_val): say(".") train,tune,test = inp de = DE(model(), launchWhere2, predictPEEKING, peekSettings(), inp) classifier = de.builder(de.model, settings=None, rows=train) mre = MRE(de.model, test, classifier, de.predictor) errors["Peek"] += mre.cache.has().median de = DE(model(), launchTeak, predictTeak, teakSettings(), inp) classifier = de.builder(de.model, settings=None, rows=train) mre = MRE(de.model, test, classifier, de.predictor) errors["TEAK"] += mre.cache.has().median de = DE(model(), launchCART, predictCART, cartSettings(), inp) classifier = de.builder(de.model, settings=None, rows=train) mre = MRE(de.model, test, classifier, de.predictor) errors["CART"] += mre.cache.has().median skData=[] for key, n in errors.items(): skData.append([key]+n.cache.all) rdivDemo(skData,"cliffs") print("```");print("")
def testKLOCTuneDriver(): tuneRatio = 0.9 skData = []; while(tuneRatio <= 1.2): dataset = MODEL(doTune=True, weighKLOC=False, klocWt=tuneRatio) scores = testRig(dataset=dataset) for key,n in scores.items(): skData.append([key+"( "+str(tuneRatio)+" )"] + n.cache.all) tuneRatio += 0.01 print("") sk.rdivDemo(skData)
def testSmote(): dataset=MODEL(split="variance", weighFeature=True) launchWhere2(dataset, verbose=False) skData = []; scores= dict(sm_knn_1_w=N(), sm_knn_3_w=N(), CART=N()) for score in scores.values(): score.go=True for test, train in loo(dataset._rows): say(".") desired_effort = effort(dataset, test) clones = smotify(dataset, train,k=5, factor=100) n = scores["CART"] n.go and CART(dataset, scores["CART"], train, test, desired_effort) n = scores["sm_knn_1_w"] n.go and kNearestNeighbor(n, dataset, test, desired_effort, 1, clones) n = scores["sm_knn_3_w"] n.go and kNearestNeighbor(n, dataset, test, desired_effort, 3, clones) for key,n in scores.items(): skData.append([key] + n.cache.all) if dataset._isCocomo: for key,n in testCoCoMo(dataset).items(): skData.append([key] + n.cache.all) scores= dict(knn_1=N(), knn_3=N()) dataset=MODEL(split="variance", weighFeature=True) for test, train in loo(dataset._rows): say(".") desired_effort = effort(dataset, test) n = scores["knn_1_w"] kNearestNeighbor(n, dataset, test, desired_effort, 1, train) n = scores["knn_3_w"] kNearestNeighbor(n, dataset, test, desired_effort, 3, train) for key,n in scores.items(): skData.append([key] + n.cache.all) scores= dict(knn_1_w=N(), knn_3_w=N()) dataset=MODEL(split="variance") for test, train in loo(dataset._rows): say(".") desired_effort = effort(dataset, test) n = scores["knn_1"] kNearestNeighbor(n, dataset, test, desired_effort, 1, train) n = scores["knn_3"] kNearestNeighbor(n, dataset, test, desired_effort, 3, train) for key,n in scores.items(): skData.append([key] + n.cache.all) print("") sk.rdivDemo(skData)
def run_model(model=MODEL, cross_val=3): errors = { "Peek": N(), "t_Peek": N(), "TEAK": N(), "t_TEAK": N(), "CART": N(), "t_CART": N(), "SVM": N(), "t_SVM": N(), "knn": N(), "t_knn": N() } runs = {"Peek": N(), "TEAK": N(), "SVM": N(), "CART": N(), "knn": N()} mdl = model() print('###' + model.__name__.upper()) print('####' + str(len(mdl._rows)) + " data points, " + str(len(mdl.indep)) + " attributes") all_rows = mdl._rows print("```") for inp in split_data(all_rows, cross_val): say(".") t_err, err, evals = TEAK_DE(model, inp) errors["TEAK"] += err errors["t_TEAK"] += t_err runs["TEAK"] += evals t_err, err, evals = PEEKING_DE(model, inp) errors["Peek"] += err errors["t_Peek"] += t_err runs["Peek"] += evals t_err, err, evals = CART_DE(model, inp) errors["CART"] += err errors["t_CART"] += t_err runs["CART"] += evals t_err, err, evals = SVM_DE(model, inp) errors["SVM"] += err errors["t_SVM"] += t_err runs["SVM"] += evals t_err, err, evals = KNN_DE(model, inp) errors["knn"] += err errors["t_knn"] += t_err runs["knn"] += evals skData = [] for key, n in errors.items(): skData.append([key] + n.cache.all) rdivDemo(skData, "cliffs") print("```") print("") for key, n in runs.items(): print("#### Average evals for " + key + " " + str(mean(n.cache.all)))
def run_model(model=MODEL, cross_val=3): errors = { "Peek" : N(), "t_Peek" : N(), "TEAK" : N(), "t_TEAK" : N(), "CART" : N(), "t_CART" : N(), "SVM" : N(), "t_SVM" : N(), "knn" : N(), "t_knn" : N() } runs = { "Peek" : N(), "TEAK" : N(), "SVM" : N(), "CART" : N(), "knn" : N() } mdl=model() print('###'+model.__name__.upper()) print('####'+str(len(mdl._rows)) + " data points, " + str(len(mdl.indep)) + " attributes") all_rows = mdl._rows print("```") for inp in split_data(all_rows, cross_val): say(".") t_err, err, evals = TEAK_DE(model, inp) errors["TEAK"] += err; errors["t_TEAK"] += t_err; runs["TEAK"] += evals t_err, err, evals = PEEKING_DE(model, inp) errors["Peek"] += err; errors["t_Peek"] += t_err; runs["Peek"] += evals t_err, err, evals = CART_DE(model, inp) errors["CART"] += err; errors["t_CART"] += t_err; runs["CART"] += evals t_err, err, evals = SVM_DE(model, inp) errors["SVM"] += err; errors["t_SVM"] += t_err; runs["SVM"] += evals t_err, err, evals = KNN_DE(model, inp) errors["knn"] += err; errors["t_knn"] += t_err; runs["knn"] += evals skData=[] for key, n in errors.items(): skData.append([key]+n.cache.all) rdivDemo(skData,"cliffs") print("```");print("") for key, n in runs.items(): print("#### Average evals for "+key + " " + str(mean(n.cache.all)))
def testRunner(model=MODEL, cross_val=21): errors = { "knn" : N(), "t_knn" : N(), } mdl=model() print('###'+model.__name__.upper()) print('####'+str(len(mdl._rows)) + " data points, " + str(len(mdl.indep)) + " attributes") print("```") all_rows = mdl._rows for inp in split_data(all_rows, cross_val): say(".") train,tune,test = inp t_err, err = SVM_DE(model, inp) errors["knn"] += err; errors["t_knn"] += t_err skData=[] for key, n in errors.items(): skData.append([key]+n.cache.all) rdivDemo(skData,"cliffs") print("```");print("")
def testForPaper(model=MODEL): split="median" print(model.__name__.upper()) dataset=model(split=split, weighFeature=False) print(str(len(dataset._rows)) + " data points, " + str(len(dataset.indep)) + " attributes") dataset_weighted = model(split=split, weighFeature=True) launchWhere2(dataset, verbose=False) skData = [] if dataset._isCocomo: for key,n in testCoCoMo(dataset).items(): skData.append([key] + n.cache.all) scores = dict(CART = N(), knn_1 = N(), knn_3 = N(),LSR = N(), wt_clstr_wdMn2=N()) for score in scores.values(): score.go=True for test, train in loo(dataset._rows): desired_effort = effort(dataset, test) tree = launchWhere2(dataset, rows=train, verbose=False) n = scores["LSR"] n.go and linearRegression(n, dataset, train, test, desired_effort) n = scores["CART"] n.go and CART(dataset, scores["CART"], train, test, desired_effort) n = scores["knn_1"] n.go and kNearestNeighbor(n, dataset, test, desired_effort, 1, train) n = scores["knn_3"] n.go and kNearestNeighbor(n, dataset, test, desired_effort, 3, train) for test, train in loo(dataset_weighted._rows): desired_effort = effort(dataset, test) tree_weighted, leafFunc = launchWhere2(dataset_weighted, rows=train, verbose=False), leaf n = scores["wt_clstr_wdMn2"] n.go and clusterWeightedMean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) for key,n in scores.items(): skData.append([key] + n.cache.all) print("") sk.rdivDemo(skData) print("");print("")
def testOverfit(dataset= MODEL(split="median")): skData = []; scores= dict(splitSize_2=N(),splitSize_4=N(),splitSize_8=N()) for score in scores.values(): score.go=True for test, train in loo(dataset._rows): say(".") desired_effort = effort(dataset, test) tree = launchWhere2(dataset, rows=train, verbose=False, minSize=2) n = scores["splitSize_2"] n.go and linRegressCluster(n, dataset, tree, test, desired_effort) tree = launchWhere2(dataset, rows=train, verbose=False, minSize=4) n = scores["splitSize_4"] n.go and linRegressCluster(n, dataset, tree, test, desired_effort) tree = launchWhere2(dataset, rows=train, verbose=False, minSize=8) n = scores["splitSize_8"] n.go and linRegressCluster(n, dataset, tree, test, desired_effort) for key,n in scores.items(): skData.append([key] + n.cache.all) print("") sk.rdivDemo(skData)
def testRunner(model=MODEL, cross_val=21): errors = { "knn": N(), "t_knn": N(), } mdl = model() print('###' + model.__name__.upper()) print('####' + str(len(mdl._rows)) + " data points, " + str(len(mdl.indep)) + " attributes") print("```") all_rows = mdl._rows for inp in split_data(all_rows, cross_val): say(".") train, tune, test = inp t_err, err = SVM_DE(model, inp) errors["knn"] += err errors["t_knn"] += t_err skData = [] for key, n in errors.items(): skData.append([key] + n.cache.all) rdivDemo(skData, "cliffs") print("```") print("")
def untuned_runner(model=MODEL, cross_val=21): errors = { "Peek": N(), "TEAK": N(), "CART": N(), } mdl = model() print('###' + model.__name__.upper()) print('####' + str(len(mdl._rows)) + " data points, " + str(len(mdl.indep)) + " attributes") all_rows = mdl._rows print("```") for inp in split_data(all_rows, cross_val): say(".") train, tune, test = inp de = DE(model(), launchWhere2, predictPEEKING, peekSettings(), inp) classifier = de.builder(de.model, settings=None, rows=train) mre = MRE(de.model, test, classifier, de.predictor) errors["Peek"] += mre.cache.has().median de = DE(model(), launchTeak, predictTeak, teakSettings(), inp) classifier = de.builder(de.model, settings=None, rows=train) mre = MRE(de.model, test, classifier, de.predictor) errors["TEAK"] += mre.cache.has().median de = DE(model(), launchCART, predictCART, cartSettings(), inp) classifier = de.builder(de.model, settings=None, rows=train) mre = MRE(de.model, test, classifier, de.predictor) errors["CART"] += mre.cache.has().median skData = [] for key, n in errors.items(): skData.append([key] + n.cache.all) rdivDemo(skData, "cliffs") print("```") print("")
def testEverything(model = MODEL): split="median" print(model.__name__.upper()) dataset=model(split=split, weighFeature=False) print(str(len(dataset._rows)) + " data points, " + str(len(dataset.indep)) + " attributes") dataset_weighted = model(split=split, weighFeature=True) launchWhere2(dataset, verbose=False) skData = []; scores= dict(TEAK=N(), linear_reg=N(), CART=N(), linRgCl_wt=N(), clstr_whr_wt=N(), linRgCl=N(), clstr_whr=N(), t_linRgCl_wt=N(), t_clstr_whr_wt=N(), knn_1=N(), knn_1_wt=N(), clstrMn2=N(), clstrMn2_wt=N(), t_clstrMn2_wt=N(), clstrWdMn2=N(), clstrWdMn2_wt=N(), t_clstrWdMn2_wt=N()) #scores= dict(TEAK=N(), linear_reg=N(), linRgCl=N()) for score in scores.values(): score.go=True for test, train in loo(dataset._rows): #say(".") desired_effort = effort(dataset, test) tree = launchWhere2(dataset, rows=train, verbose=False) tree_teak = teak(dataset, rows = train) n = scores["TEAK"] n.go and clusterk1(n, dataset, tree_teak, test, desired_effort, leafTeak) n = scores["linear_reg"] n.go and linearRegression(n, dataset, train, test, desired_effort) n = scores["clstr_whr"] n.go and clusterk1(n, dataset, tree, test, desired_effort, leaf) n = scores["linRgCl"] n.go and linRegressCluster(n, dataset, tree, test, desired_effort, leaf) n = scores["knn_1"] n.go and kNearestNeighbor(n, dataset, test, desired_effort, 1, train) n = scores["clstrMn2"] n.go and clustermean2(n, dataset, tree, test, desired_effort, leaf) n = scores["clstrWdMn2"] n.go and clusterWeightedMean2(n, dataset, tree, test, desired_effort, leaf) n = scores["CART"] n.go and CART(dataset, scores["CART"], train, test, desired_effort) for test, train in loo(dataset_weighted._rows): #say(".") desired_effort = effort(dataset_weighted, test) tree_weighted, leafFunc = launchWhere2(dataset_weighted, rows=train, verbose=False), leaf n = scores["clstr_whr_wt"] n.go and clusterk1(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) n = scores["linRgCl_wt"] n.go and linRegressCluster(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc=leafFunc) n = scores["clstrMn2_wt"] n.go and clustermean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) n = scores["clstrWdMn2_wt"] n.go and clusterWeightedMean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) tree_weighted, leafFunc = teak(dataset_weighted, rows=train, verbose=False),leafTeak n = scores["t_clstr_whr_wt"] n.go and clusterk1(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) n = scores["t_linRgCl_wt"] n.go and linRegressCluster(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc=leafFunc) n = scores["knn_1_wt"] n.go and kNearestNeighbor(n, dataset_weighted, test, desired_effort, 1, train) n = scores["t_clstrMn2_wt"] n.go and clustermean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) n = scores["t_clstrWdMn2_wt"] n.go and clusterWeightedMean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc) for key,n in scores.items(): skData.append([key] + n.cache.all) if dataset._isCocomo: for key,n in testCoCoMo(dataset).items(): skData.append([key] + n.cache.all) print("") sk.rdivDemo(skData) print("");print("")