예제 #1
0
def testDriver():
  seed(0)
  skData = []
  split = "median"
  dataset=MODEL(split=split)
  if  dataset._isCocomo:
    scores = testCoCoMo(dataset)
    for key, n in scores.items():
      skData.append([key+".       ."] + n.cache.all)
  scores = testRig(dataset=MODEL(split=split),doCART = True, doKNN=True, doLinRg=True)
  for key,n in scores.items():
    if (key == "clstr" or key == "lRgCl"):
      skData.append([key+"(no tuning)"] + n.cache.all)
    else:
      skData.append([key+".         ."] + n.cache.all)

  scores = testRig(dataset=MODEL(split=split, weighFeature = True), doKNN=True)
  for key,n in scores.items():
      skData.append([key+"(sdiv_wt^1)"] + n.cache.all)
  scores = dict(TEAK=N())
  for score in scores.values():
    score.go=True
  dataset=MODEL(split=split)
  for test, train in loo(dataset._rows):
    say(".")
    desired_effort = effort(dataset, test)
    tree = teak(dataset, rows = train)
    n = scores["TEAK"]
    n.go and clusterk1(n, dataset, tree, test, desired_effort, leafTeak)
  for key,n in scores.items():
      skData.append([key+".          ."] + n.cache.all)
  print("")
  print(str(len(dataset._rows)) + " data points,  " + str(len(dataset.indep)) + " attributes")
  print("")
  sk.rdivDemo(skData)
예제 #2
0
def testDriver():
    seed(0)
    skData = []
    dataset = MODEL()
    if dataset._isCocomo:
        scores = testCoCoMo(dataset)
        for key, n in scores.items():
            skData.append([key + ".       ."] + n.cache.all)
    scores = testRig(dataset=MODEL(), doCART=True, doKNN=True, doLinRg=True)
    for key, n in scores.items():
        if (key == "clstr" or key == "lRgCl"):
            skData.append([key + "(no tuning)"] + n.cache.all)
        else:
            skData.append([key + ".         ."] + n.cache.all)

    scores = testRig(dataset=MODEL(weighFeature=True), doKNN=True)
    for key, n in scores.items():
        skData.append([key + "(sdiv_wt^1)"] + n.cache.all)

    print("")
    print(
        str(len(dataset._rows)) + " data points,  " + str(len(dataset.indep)) +
        " attributes")
    print("")
    sk.rdivDemo(skData)
예제 #3
0
파일: runner.py 프로젝트: ai-se/x-effort
def untuned_runner(model=MODEL, cross_val=21):
  errors = {
    "Peek" : N(),
    "TEAK" : N(),
    "CART" : N(),
  }
  mdl=model()
  print('###'+model.__name__.upper())
  print('####'+str(len(mdl._rows)) + " data points,  " + str(len(mdl.indep)) + " attributes")
  all_rows = mdl._rows
  print("```")
  for inp in split_data(all_rows, cross_val):
    say(".")
    train,tune,test = inp

    de = DE(model(), launchWhere2, predictPEEKING, peekSettings(), inp)
    classifier = de.builder(de.model, settings=None, rows=train)
    mre = MRE(de.model, test, classifier, de.predictor)
    errors["Peek"] += mre.cache.has().median

    de = DE(model(), launchTeak, predictTeak, teakSettings(), inp)
    classifier = de.builder(de.model, settings=None, rows=train)
    mre = MRE(de.model, test, classifier, de.predictor)
    errors["TEAK"] += mre.cache.has().median

    de = DE(model(), launchCART, predictCART, cartSettings(), inp)
    classifier = de.builder(de.model, settings=None, rows=train)
    mre = MRE(de.model, test, classifier, de.predictor)
    errors["CART"] += mre.cache.has().median
  skData=[]
  for key, n in errors.items():
    skData.append([key]+n.cache.all)
  rdivDemo(skData,"cliffs")
  print("```");print("")
예제 #4
0
def testKLOCTuneDriver():
  tuneRatio = 0.9
  skData = [];
  while(tuneRatio <= 1.2):
    dataset = MODEL(doTune=True, weighKLOC=False, klocWt=tuneRatio)
    scores = testRig(dataset=dataset)
    for key,n in scores.items():
      skData.append([key+"( "+str(tuneRatio)+" )"] + n.cache.all)
    tuneRatio += 0.01
  print("")
  sk.rdivDemo(skData)
예제 #5
0
def testSmote():
  dataset=MODEL(split="variance", weighFeature=True)
  launchWhere2(dataset, verbose=False)
  skData = [];
  scores= dict(sm_knn_1_w=N(), sm_knn_3_w=N(), CART=N())
  for score in scores.values():
    score.go=True
  
  for test, train in loo(dataset._rows):
    say(".")
    desired_effort = effort(dataset, test)
    clones = smotify(dataset, train,k=5, factor=100)
    n = scores["CART"]
    n.go and CART(dataset, scores["CART"], train, test, desired_effort)
    n = scores["sm_knn_1_w"]
    n.go and kNearestNeighbor(n, dataset, test, desired_effort, 1, clones)
    n = scores["sm_knn_3_w"]
    n.go and kNearestNeighbor(n, dataset, test, desired_effort, 3, clones)
  
  for key,n in scores.items():
    skData.append([key] + n.cache.all)
  if dataset._isCocomo:
    for key,n in testCoCoMo(dataset).items():
      skData.append([key] + n.cache.all)
  
  scores= dict(knn_1=N(), knn_3=N())
  dataset=MODEL(split="variance", weighFeature=True)
  for test, train in loo(dataset._rows):
    say(".")
    desired_effort = effort(dataset, test)
    n = scores["knn_1_w"]
    kNearestNeighbor(n, dataset, test, desired_effort, 1, train)
    n = scores["knn_3_w"]
    kNearestNeighbor(n, dataset, test, desired_effort, 3, train)
  for key,n in scores.items():
    skData.append([key] + n.cache.all)
    
  scores= dict(knn_1_w=N(), knn_3_w=N())
  dataset=MODEL(split="variance")
  for test, train in loo(dataset._rows):
    say(".")
    desired_effort = effort(dataset, test)
    n = scores["knn_1"]
    kNearestNeighbor(n, dataset, test, desired_effort, 1, train)
    n = scores["knn_3"]
    kNearestNeighbor(n, dataset, test, desired_effort, 3, train)
  for key,n in scores.items():
    skData.append([key] + n.cache.all)
    
  print("")
  sk.rdivDemo(skData)
예제 #6
0
def run_model(model=MODEL, cross_val=3):
    errors = {
        "Peek": N(),
        "t_Peek": N(),
        "TEAK": N(),
        "t_TEAK": N(),
        "CART": N(),
        "t_CART": N(),
        "SVM": N(),
        "t_SVM": N(),
        "knn": N(),
        "t_knn": N()
    }
    runs = {"Peek": N(), "TEAK": N(), "SVM": N(), "CART": N(), "knn": N()}
    mdl = model()
    print('###' + model.__name__.upper())
    print('####' + str(len(mdl._rows)) + " data points,  " +
          str(len(mdl.indep)) + " attributes")
    all_rows = mdl._rows
    print("```")
    for inp in split_data(all_rows, cross_val):
        say(".")
        t_err, err, evals = TEAK_DE(model, inp)
        errors["TEAK"] += err
        errors["t_TEAK"] += t_err
        runs["TEAK"] += evals
        t_err, err, evals = PEEKING_DE(model, inp)
        errors["Peek"] += err
        errors["t_Peek"] += t_err
        runs["Peek"] += evals
        t_err, err, evals = CART_DE(model, inp)
        errors["CART"] += err
        errors["t_CART"] += t_err
        runs["CART"] += evals
        t_err, err, evals = SVM_DE(model, inp)
        errors["SVM"] += err
        errors["t_SVM"] += t_err
        runs["SVM"] += evals
        t_err, err, evals = KNN_DE(model, inp)
        errors["knn"] += err
        errors["t_knn"] += t_err
        runs["knn"] += evals
    skData = []
    for key, n in errors.items():
        skData.append([key] + n.cache.all)
    rdivDemo(skData, "cliffs")
    print("```")
    print("")
    for key, n in runs.items():
        print("#### Average evals for " + key + " " + str(mean(n.cache.all)))
예제 #7
0
파일: runner.py 프로젝트: ai-se/x-effort
def run_model(model=MODEL, cross_val=3):
  errors = {
    "Peek" : N(),
    "t_Peek" : N(),
    "TEAK" : N(),
    "t_TEAK" : N(),
    "CART" : N(),
    "t_CART"  : N(),
    "SVM" : N(),
    "t_SVM" : N(),
    "knn" : N(),
    "t_knn" : N()
  }
  runs = {
    "Peek" : N(),
    "TEAK" : N(),
    "SVM" : N(),
    "CART" : N(),
    "knn" : N()
  }
  mdl=model()
  print('###'+model.__name__.upper())
  print('####'+str(len(mdl._rows)) + " data points,  " + str(len(mdl.indep)) + " attributes")
  all_rows = mdl._rows
  print("```")
  for inp in split_data(all_rows, cross_val):
    say(".")
    t_err, err, evals = TEAK_DE(model, inp)
    errors["TEAK"] += err; errors["t_TEAK"] += t_err; runs["TEAK"] += evals
    t_err, err, evals = PEEKING_DE(model, inp)
    errors["Peek"] += err; errors["t_Peek"] += t_err; runs["Peek"] += evals
    t_err, err, evals = CART_DE(model, inp)
    errors["CART"] += err; errors["t_CART"] += t_err; runs["CART"] += evals
    t_err, err, evals = SVM_DE(model, inp)
    errors["SVM"] += err; errors["t_SVM"] += t_err; runs["SVM"] += evals
    t_err, err, evals = KNN_DE(model, inp)
    errors["knn"] += err; errors["t_knn"] += t_err; runs["knn"] += evals
  skData=[]
  for key, n in errors.items():
    skData.append([key]+n.cache.all)
  rdivDemo(skData,"cliffs")
  print("```");print("")
  for key, n in runs.items():
    print("#### Average evals for "+key + " " + str(mean(n.cache.all)))
예제 #8
0
파일: runner.py 프로젝트: ai-se/x-effort
def testRunner(model=MODEL, cross_val=21):
  errors = {
    "knn" : N(),
    "t_knn" : N(),
  }
  mdl=model()
  print('###'+model.__name__.upper())
  print('####'+str(len(mdl._rows)) + " data points,  " + str(len(mdl.indep)) + " attributes")
  print("```")
  all_rows = mdl._rows
  for inp in split_data(all_rows, cross_val):
    say(".")
    train,tune,test = inp
    t_err, err = SVM_DE(model, inp)
    errors["knn"] += err; errors["t_knn"] += t_err
  skData=[]
  for key, n in errors.items():
    skData.append([key]+n.cache.all)
  rdivDemo(skData,"cliffs")
  print("```");print("")
예제 #9
0
def testForPaper(model=MODEL):
  split="median"
  print(model.__name__.upper())
  dataset=model(split=split, weighFeature=False)
  print(str(len(dataset._rows)) + " data points,  " + str(len(dataset.indep)) + " attributes")
  dataset_weighted = model(split=split, weighFeature=True)
  launchWhere2(dataset, verbose=False)
  skData = []
  if dataset._isCocomo:
    for key,n in testCoCoMo(dataset).items():
      skData.append([key] + n.cache.all)
  scores = dict(CART = N(), knn_1 = N(),
                knn_3 = N(),LSR = N(), 
                wt_clstr_wdMn2=N())
  for score in scores.values():
    score.go=True
  for test, train in loo(dataset._rows):
    desired_effort = effort(dataset, test)
    tree = launchWhere2(dataset, rows=train, verbose=False)
    n = scores["LSR"]
    n.go and linearRegression(n, dataset, train, test, desired_effort)
    n = scores["CART"]
    n.go and CART(dataset, scores["CART"], train, test, desired_effort)
    n = scores["knn_1"]
    n.go and kNearestNeighbor(n, dataset, test, desired_effort, 1, train)
    n = scores["knn_3"]
    n.go and kNearestNeighbor(n, dataset, test, desired_effort, 3, train)
  
  for test, train in loo(dataset_weighted._rows):
    desired_effort = effort(dataset, test)
    tree_weighted, leafFunc = launchWhere2(dataset_weighted, rows=train, verbose=False), leaf
    n = scores["wt_clstr_wdMn2"]
    n.go and clusterWeightedMean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
  
  for key,n in scores.items():
    skData.append([key] + n.cache.all)
  
  print("")
  sk.rdivDemo(skData)
  print("");print("")
예제 #10
0
def testOverfit(dataset= MODEL(split="median")):
  skData = [];
  scores= dict(splitSize_2=N(),splitSize_4=N(),splitSize_8=N())
  for score in scores.values():
    score.go=True
  for test, train in loo(dataset._rows):
    say(".")
    desired_effort = effort(dataset, test)
    tree = launchWhere2(dataset, rows=train, verbose=False, minSize=2)
    n = scores["splitSize_2"]
    n.go and linRegressCluster(n, dataset, tree, test, desired_effort)
    tree = launchWhere2(dataset, rows=train, verbose=False, minSize=4)
    n = scores["splitSize_4"]
    n.go and linRegressCluster(n, dataset, tree, test, desired_effort)
    tree = launchWhere2(dataset, rows=train, verbose=False, minSize=8)
    n = scores["splitSize_8"]
    n.go and linRegressCluster(n, dataset, tree, test, desired_effort)
  
  for key,n in scores.items():
      skData.append([key] + n.cache.all)
  print("")
  sk.rdivDemo(skData)
예제 #11
0
def testRunner(model=MODEL, cross_val=21):
    errors = {
        "knn": N(),
        "t_knn": N(),
    }
    mdl = model()
    print('###' + model.__name__.upper())
    print('####' + str(len(mdl._rows)) + " data points,  " +
          str(len(mdl.indep)) + " attributes")
    print("```")
    all_rows = mdl._rows
    for inp in split_data(all_rows, cross_val):
        say(".")
        train, tune, test = inp
        t_err, err = SVM_DE(model, inp)
        errors["knn"] += err
        errors["t_knn"] += t_err
    skData = []
    for key, n in errors.items():
        skData.append([key] + n.cache.all)
    rdivDemo(skData, "cliffs")
    print("```")
    print("")
예제 #12
0
def untuned_runner(model=MODEL, cross_val=21):
    errors = {
        "Peek": N(),
        "TEAK": N(),
        "CART": N(),
    }
    mdl = model()
    print('###' + model.__name__.upper())
    print('####' + str(len(mdl._rows)) + " data points,  " +
          str(len(mdl.indep)) + " attributes")
    all_rows = mdl._rows
    print("```")
    for inp in split_data(all_rows, cross_val):
        say(".")
        train, tune, test = inp

        de = DE(model(), launchWhere2, predictPEEKING, peekSettings(), inp)
        classifier = de.builder(de.model, settings=None, rows=train)
        mre = MRE(de.model, test, classifier, de.predictor)
        errors["Peek"] += mre.cache.has().median

        de = DE(model(), launchTeak, predictTeak, teakSettings(), inp)
        classifier = de.builder(de.model, settings=None, rows=train)
        mre = MRE(de.model, test, classifier, de.predictor)
        errors["TEAK"] += mre.cache.has().median

        de = DE(model(), launchCART, predictCART, cartSettings(), inp)
        classifier = de.builder(de.model, settings=None, rows=train)
        mre = MRE(de.model, test, classifier, de.predictor)
        errors["CART"] += mre.cache.has().median
    skData = []
    for key, n in errors.items():
        skData.append([key] + n.cache.all)
    rdivDemo(skData, "cliffs")
    print("```")
    print("")
예제 #13
0
def testEverything(model = MODEL):
  split="median"
  print(model.__name__.upper())
  dataset=model(split=split, weighFeature=False)
  print(str(len(dataset._rows)) + " data points,  " + str(len(dataset.indep)) + " attributes")
  dataset_weighted = model(split=split, weighFeature=True)
  launchWhere2(dataset, verbose=False)
  skData = [];
  scores= dict(TEAK=N(), linear_reg=N(), CART=N(),
               linRgCl_wt=N(), clstr_whr_wt=N(),
               linRgCl=N(), clstr_whr=N(),
               t_linRgCl_wt=N(), t_clstr_whr_wt=N(),
               knn_1=N(), knn_1_wt=N(), 
               clstrMn2=N(), clstrMn2_wt=N(), t_clstrMn2_wt=N(),
               clstrWdMn2=N(), clstrWdMn2_wt=N(), t_clstrWdMn2_wt=N())
  #scores= dict(TEAK=N(), linear_reg=N(), linRgCl=N())
  for score in scores.values():
    score.go=True
  for test, train in loo(dataset._rows):
    #say(".")
    desired_effort = effort(dataset, test)
    tree = launchWhere2(dataset, rows=train, verbose=False)
    tree_teak = teak(dataset, rows = train)
    n = scores["TEAK"]
    n.go and clusterk1(n, dataset, tree_teak, test, desired_effort, leafTeak)
    n = scores["linear_reg"]
    n.go and linearRegression(n, dataset, train, test, desired_effort)
    n = scores["clstr_whr"]
    n.go and clusterk1(n, dataset, tree, test, desired_effort, leaf)
    n = scores["linRgCl"]
    n.go and linRegressCluster(n, dataset, tree, test, desired_effort, leaf)
    n = scores["knn_1"]
    n.go and kNearestNeighbor(n, dataset, test, desired_effort, 1, train)
    n = scores["clstrMn2"]
    n.go and clustermean2(n, dataset, tree, test, desired_effort, leaf)
    n = scores["clstrWdMn2"]
    n.go and clusterWeightedMean2(n, dataset, tree, test, desired_effort, leaf)
    n = scores["CART"]
    n.go and CART(dataset, scores["CART"], train, test, desired_effort)
    
  for test, train in loo(dataset_weighted._rows):
    #say(".")
    desired_effort = effort(dataset_weighted, test)
    
    tree_weighted, leafFunc = launchWhere2(dataset_weighted, rows=train, verbose=False), leaf
    n = scores["clstr_whr_wt"]
    n.go and clusterk1(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
    n = scores["linRgCl_wt"]
    n.go and linRegressCluster(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc=leafFunc)
    n = scores["clstrMn2_wt"]
    n.go and clustermean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
    n = scores["clstrWdMn2_wt"]
    n.go and clusterWeightedMean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
    
    tree_weighted, leafFunc = teak(dataset_weighted, rows=train, verbose=False),leafTeak
    n = scores["t_clstr_whr_wt"]
    n.go and clusterk1(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
    n = scores["t_linRgCl_wt"]
    n.go and linRegressCluster(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc=leafFunc)
    n = scores["knn_1_wt"]
    n.go and kNearestNeighbor(n, dataset_weighted, test, desired_effort, 1, train)
    n = scores["t_clstrMn2_wt"]
    n.go and clustermean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
    n = scores["t_clstrWdMn2_wt"]
    n.go and clusterWeightedMean2(n, dataset_weighted, tree_weighted, test, desired_effort, leafFunc)
    
  for key,n in scores.items():
    skData.append([key] + n.cache.all)
  if dataset._isCocomo:
    for key,n in testCoCoMo(dataset).items():
      skData.append([key] + n.cache.all)  
  print("")
  sk.rdivDemo(skData)
  print("");print("")