def task(self): listInst = get_clean_insts() cFold = 10 listSeries = [] for sNamePref, fUseTraining in [("Training", True), ("Test", False)]: listData = [] for cRounds in xrange(1, 32): fxnGen = self.build_fold_gen(cRounds, fUseTraining) listData.append(dtree.cv_score(fxnGen(listInst, cFold))) listSeries.append({ "name": sNamePref + " Set Accuracy", "data": listData }) return { "chart": { "defaultSeriesType": "line" }, "title": { "text": "Training- vs. Test-Set Accuracy" }, "xAxis": { "min": 0, "max": 32, "title": { "text": "Rounds" } }, "yAxis": { "title": { "text": "Accuracy" } }, "series": listSeries }
def test_cv_score(self): def label_weight(listInst, fLabel): dblWeight = 0.0 for inst in listInst: if inst.fLabel == fLabel: dblWeight += inst.dblWeight return dblWeight cValues = 4 fxnGen = build_consistent_generator(cValues=cValues, fxnGenWeight=random.random) cInst = random.randint(30, 60) listLeft = fxnGen(cInst) listRight = [ dtree.Instance([cAttr + cValues + 1 for cAttr in inst.listAttrs], inst.fLabel) for inst in fxnGen(cInst) ] fMajL = dtree.majority_label(listLeft) fMajR = dtree.majority_label(listRight) iterableFolds = [ dtree.TreeFold(listLeft, listRight), dtree.TreeFold(listRight, listLeft) ] dblScore = dtree.cv_score(iterableFolds) dblL = label_weight(listRight, fMajL) dblR = label_weight(listLeft, fMajR) dblTotalWeight = sum([inst.dblWeight for inst in listRight + listLeft]) self.assertAlmostEqual((dblL + dblR) / dblTotalWeight, dblScore)
def task(self): listInstClean = get_clean_insts() listInstNoisy = get_noisy_insts() listSeries = [] cFold = 10 for sName, cMaxLevel, cMaxRounds in [("Depth 1, 10 Rounds", 1, 10), ("Depth 2, 10 Rounds", 2, 10), ("Depth 1, 30 Rounds", 1, 30), ("Depth 2, 30 Rounds", 2, 30)]: fxnGen = self.build_fold_generator(cMaxLevel, cMaxRounds) fxnScore = lambda listInst: dtree.cv_score(fxnGen(listInst, cFold)) listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)] listSeries.append({"name": sName, "data": listData}) sTitle = "Classification Accuracy For Different Boosting Parameters" return { "chart": { "defaultSeriesType": "column" }, "title": { "text": sTitle }, "xAxis": { "categories": ["Clean", "Noisy"] }, "yAxis": { "title": { "text": "Fraction Correct" }, "min": 0.0, "max": 1.0 }, "series": listSeries }
def task(self): listInstClean = dtree.load_csv_dataset(datadir("data.csv")) listInstNoisy = dtree.load_csv_dataset(datadir("noisy.dat")) cFold = 10 listSeries = [] for sLbl, fxn in [ ("Unpruned", dtree.yield_cv_folds), ("Pruned", dtree.yield_cv_folds_with_validation), ("Boosted", dtree.yield_boosted_folds), ("Stumps", self.build_depth_yield(1)), ("Depth-2", self.build_depth_yield(2)), ]: try: fxnScore = lambda listInst: dtree.cv_score(fxn(listInst, cFold)) listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)] dictSeries = {"name": sLbl, "data": listData} except NotImplementedError: # we can forget about un-implemented functionality dictSeries = {"name": sLbl + " (not implemented)", "data": []} listSeries.append(dictSeries) return { "chart": {"defaultSeriesType": "column"}, "title": {"text": "Clean vs. Noisy Classification"}, "xAxis": {"categories": ["Clean", "Noisy"]}, "yAxis": {"title": {"text": "Fraction Correct"}, "min": 0.0, "max": 1.0}, "series": listSeries, }
def dt_list_acc(maxRounds, listInst): """Get a list of testing accuracies for each round of boosting.""" cFold = 10 listTestAcc = [] for cRounds in xrange(1, maxRounds+1): fxnGen = dt_build_fold_gen(cRounds, False) score = dtree.cv_score(fxnGen(listInst, cFold)) print "%d rounds, %f score" % (cRounds, score) listTestAcc.append(score) return listTestAcc
def dt_list_acc(maxRounds, listInst): """Get a list of testing accuracies for each round of boosting.""" cFold = 10 listTestAcc = [] for cRounds in xrange(1, maxRounds + 1): fxnGen = dt_build_fold_gen(cRounds, False) score = dtree.cv_score(fxnGen(listInst, cFold)) print "%d rounds, %f score" % (cRounds, score) listTestAcc.append(score) return listTestAcc
def task(self): listInst = get_clean_insts() cFold = 10 listSeries = [] for sNamePref, fUseTraining in [("Training", True), ("Test", False)]: listData = [] for cRounds in xrange(1, 16): fxnGen = self.build_fold_gen(cRounds, fUseTraining) listData.append(dtree.cv_score(fxnGen(listInst, cFold))) listSeries.append({"name": sNamePref + " Set Accuracy", "data": listData}) return {"chart": {"defaultSeriesType": "line"}, "title": {"text": "Training- vs. Test-Set Accuracy"}, "xAxis": {"min": 0, "max": 16, "title": {"text": "Rounds"}}, "yAxis": {"title": {"text": "Accuracy"}}, "series": listSeries}
def test_cv_score(self): def label_weight(listInst, fLabel): dblWeight = 0.0 for inst in listInst: if inst.fLabel == fLabel: dblWeight += inst.dblWeight return dblWeight cValues = 4 fxnGen = build_consistent_generator(cValues=cValues) cInst = random.randint(30,60) listLeft = fxnGen(cInst) listRight = [dtree.Instance([cAttr+cValues+1 for cAttr in inst.listAttrs], inst.fLabel) for inst in fxnGen(cInst)] fMajL = dtree.majority_label(listLeft) fMajR = dtree.majority_label(listRight) iterableFolds = dtree.yield_cv_folds(listLeft + listRight, 2) dblScore = dtree.cv_score(iterableFolds) dblL = label_weight(listRight, fMajL) dblR = label_weight(listLeft, fMajR) self.assertAlmostEqual((dblL + dblR)/(2.0*cInst), dblScore)
def task(self): listInstClean = get_clean_insts() listInstNoisy = get_noisy_insts() listSeries = [] cFold = 10 for sName, cMaxLevel, cMaxRounds in [("Depth 1, 10 Rounds", 1, 10), ("Depth 2, 10 Rounds", 2, 10), ("Depth 1, 30 Rounds", 1, 30), ("Depth 2, 30 Rounds", 2, 30)]: fxnGen = self.build_fold_generator(cMaxLevel, cMaxRounds) fxnScore = lambda listInst: dtree.cv_score(fxnGen(listInst, cFold)) listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)] listSeries.append({"name": sName, "data": listData}) sTitle = "Classification Accuracy For Different Boosting Parameters" return {"chart": {"defaultSeriesType": "column"}, "title": {"text": sTitle}, "xAxis": {"categories": ["Clean", "Noisy"]}, "yAxis": {"title": {"text": "Fraction Correct"}, "min": 0.0, "max": 1.0}, "series": listSeries}
def task(self): listInstClean = dtree.load_csv_dataset(datadir("data.csv")) listInstNoisy = dtree.load_csv_dataset(datadir("noisy.dat")) cFold = 10 listSeries = [] for sLbl, fxn in [("Unpruned", dtree.yield_cv_folds), ("Pruned", dtree.yield_cv_folds_with_validation), ("Boosted", dtree.yield_boosted_folds), ("Stumps", self.build_depth_yield(1)), ("Depth-2", self.build_depth_yield(2))]: try: fxnScore = lambda listInst: dtree.cv_score(fxn( listInst, cFold)) listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)] dictSeries = {"name": sLbl, "data": listData} except NotImplementedError: # we can forget about un-implemented functionality dictSeries = {"name": sLbl + " (not implemented)", "data": []} listSeries.append(dictSeries) return { "chart": { "defaultSeriesType": "column" }, "title": { "text": "Clean vs. Noisy Classification" }, "xAxis": { "categories": ["Clean", "Noisy"] }, "yAxis": { "title": { "text": "Fraction Correct" }, "min": 0.0, "max": 1.0 }, "series": listSeries }