Beispiel #1
0
 def task(self):
     listInst = get_clean_insts()
     cFold = 10
     listSeries = []
     for sNamePref, fUseTraining in [("Training", True), ("Test", False)]:
         listData = []
         for cRounds in xrange(1, 32):
             fxnGen = self.build_fold_gen(cRounds, fUseTraining)
             listData.append(dtree.cv_score(fxnGen(listInst, cFold)))
         listSeries.append({
             "name": sNamePref + " Set Accuracy",
             "data": listData
         })
     return {
         "chart": {
             "defaultSeriesType": "line"
         },
         "title": {
             "text": "Training- vs. Test-Set Accuracy"
         },
         "xAxis": {
             "min": 0,
             "max": 32,
             "title": {
                 "text": "Rounds"
             }
         },
         "yAxis": {
             "title": {
                 "text": "Accuracy"
             }
         },
         "series": listSeries
     }
Beispiel #2
0
    def test_cv_score(self):
        def label_weight(listInst, fLabel):
            dblWeight = 0.0
            for inst in listInst:
                if inst.fLabel == fLabel:
                    dblWeight += inst.dblWeight
            return dblWeight

        cValues = 4
        fxnGen = build_consistent_generator(cValues=cValues,
                                            fxnGenWeight=random.random)
        cInst = random.randint(30, 60)
        listLeft = fxnGen(cInst)
        listRight = [
            dtree.Instance([cAttr + cValues + 1
                            for cAttr in inst.listAttrs], inst.fLabel)
            for inst in fxnGen(cInst)
        ]
        fMajL = dtree.majority_label(listLeft)
        fMajR = dtree.majority_label(listRight)
        iterableFolds = [
            dtree.TreeFold(listLeft, listRight),
            dtree.TreeFold(listRight, listLeft)
        ]
        dblScore = dtree.cv_score(iterableFolds)
        dblL = label_weight(listRight, fMajL)
        dblR = label_weight(listLeft, fMajR)
        dblTotalWeight = sum([inst.dblWeight for inst in listRight + listLeft])
        self.assertAlmostEqual((dblL + dblR) / dblTotalWeight, dblScore)
Beispiel #3
0
    def task(self):
        listInstClean = get_clean_insts()
        listInstNoisy = get_noisy_insts()
        listSeries = []
        cFold = 10
        for sName, cMaxLevel, cMaxRounds in [("Depth 1, 10 Rounds", 1, 10),
                                             ("Depth 2, 10 Rounds", 2, 10),
                                             ("Depth 1, 30 Rounds", 1, 30),
                                             ("Depth 2, 30 Rounds", 2, 30)]:
            fxnGen = self.build_fold_generator(cMaxLevel, cMaxRounds)
            fxnScore = lambda listInst: dtree.cv_score(fxnGen(listInst, cFold))
            listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)]
            listSeries.append({"name": sName, "data": listData})

        sTitle = "Classification Accuracy For Different Boosting Parameters"
        return {
            "chart": {
                "defaultSeriesType": "column"
            },
            "title": {
                "text": sTitle
            },
            "xAxis": {
                "categories": ["Clean", "Noisy"]
            },
            "yAxis": {
                "title": {
                    "text": "Fraction Correct"
                },
                "min": 0.0,
                "max": 1.0
            },
            "series": listSeries
        }
Beispiel #4
0
    def task(self):
        listInstClean = dtree.load_csv_dataset(datadir("data.csv"))
        listInstNoisy = dtree.load_csv_dataset(datadir("noisy.dat"))
        cFold = 10
        listSeries = []
        for sLbl, fxn in [
            ("Unpruned", dtree.yield_cv_folds),
            ("Pruned", dtree.yield_cv_folds_with_validation),
            ("Boosted", dtree.yield_boosted_folds),
            ("Stumps", self.build_depth_yield(1)),
            ("Depth-2", self.build_depth_yield(2)),
        ]:
            try:
                fxnScore = lambda listInst: dtree.cv_score(fxn(listInst, cFold))
                listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)]
                dictSeries = {"name": sLbl, "data": listData}
            except NotImplementedError:
                # we can forget about un-implemented functionality
                dictSeries = {"name": sLbl + " (not implemented)", "data": []}
            listSeries.append(dictSeries)

        return {
            "chart": {"defaultSeriesType": "column"},
            "title": {"text": "Clean vs. Noisy Classification"},
            "xAxis": {"categories": ["Clean", "Noisy"]},
            "yAxis": {"title": {"text": "Fraction Correct"}, "min": 0.0, "max": 1.0},
            "series": listSeries,
        }
Beispiel #5
0
def dt_list_acc(maxRounds, listInst):
    """Get a list of testing accuracies for each round of boosting."""
    cFold = 10
    listTestAcc = []
    for cRounds in xrange(1, maxRounds+1):
        fxnGen = dt_build_fold_gen(cRounds, False)
        score = dtree.cv_score(fxnGen(listInst, cFold))
        print "%d rounds, %f score" % (cRounds, score)
        listTestAcc.append(score)
    return listTestAcc
Beispiel #6
0
def dt_list_acc(maxRounds, listInst):
    """Get a list of testing accuracies for each round of boosting."""
    cFold = 10
    listTestAcc = []
    for cRounds in xrange(1, maxRounds + 1):
        fxnGen = dt_build_fold_gen(cRounds, False)
        score = dtree.cv_score(fxnGen(listInst, cFold))
        print "%d rounds, %f score" % (cRounds, score)
        listTestAcc.append(score)
    return listTestAcc
 def task(self):
     listInst = get_clean_insts()
     cFold = 10
     listSeries = []
     for sNamePref, fUseTraining in [("Training", True), ("Test", False)]:
         listData = []
         for cRounds in xrange(1, 16):
             fxnGen = self.build_fold_gen(cRounds, fUseTraining)
             listData.append(dtree.cv_score(fxnGen(listInst, cFold)))
         listSeries.append({"name": sNamePref + " Set Accuracy",
                            "data": listData})
     return {"chart": {"defaultSeriesType": "line"},
             "title": {"text": "Training- vs. Test-Set Accuracy"},
             "xAxis": {"min": 0, "max": 16, "title": {"text": "Rounds"}},
             "yAxis": {"title": {"text": "Accuracy"}},
             "series": listSeries}
Beispiel #8
0
 def test_cv_score(self):
     def label_weight(listInst, fLabel):
         dblWeight = 0.0
         for inst in listInst:
             if inst.fLabel == fLabel:
                 dblWeight += inst.dblWeight
         return dblWeight
     cValues = 4
     fxnGen = build_consistent_generator(cValues=cValues)
     cInst = random.randint(30,60)
     listLeft = fxnGen(cInst)
     listRight = [dtree.Instance([cAttr+cValues+1
                                  for cAttr in inst.listAttrs],
                           inst.fLabel) for inst in fxnGen(cInst)]
     fMajL = dtree.majority_label(listLeft)
     fMajR = dtree.majority_label(listRight)
     iterableFolds = dtree.yield_cv_folds(listLeft + listRight, 2)
     dblScore = dtree.cv_score(iterableFolds)
     dblL = label_weight(listRight, fMajL)
     dblR = label_weight(listLeft, fMajR)
     self.assertAlmostEqual((dblL + dblR)/(2.0*cInst), dblScore)
    def task(self):
        listInstClean = get_clean_insts()
        listInstNoisy = get_noisy_insts()
        listSeries = []
        cFold = 10
        for sName, cMaxLevel, cMaxRounds in [("Depth 1, 10 Rounds", 1, 10),
                                             ("Depth 2, 10 Rounds", 2, 10),
                                             ("Depth 1, 30 Rounds", 1, 30),
                                             ("Depth 2, 30 Rounds", 2, 30)]:
            fxnGen = self.build_fold_generator(cMaxLevel, cMaxRounds)
            fxnScore = lambda listInst: dtree.cv_score(fxnGen(listInst, cFold))
            listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)]
            listSeries.append({"name": sName, "data": listData})

        sTitle = "Classification Accuracy For Different Boosting Parameters"
        return {"chart": {"defaultSeriesType": "column"},
                "title": {"text": sTitle},
                "xAxis": {"categories": ["Clean", "Noisy"]},
                "yAxis": {"title": {"text": "Fraction Correct"},
                          "min": 0.0, "max": 1.0},
                "series": listSeries}
Beispiel #10
0
    def task(self):
        listInstClean = dtree.load_csv_dataset(datadir("data.csv"))
        listInstNoisy = dtree.load_csv_dataset(datadir("noisy.dat"))
        cFold = 10
        listSeries = []
        for sLbl, fxn in [("Unpruned", dtree.yield_cv_folds),
                          ("Pruned", dtree.yield_cv_folds_with_validation),
                          ("Boosted", dtree.yield_boosted_folds),
                          ("Stumps", self.build_depth_yield(1)),
                          ("Depth-2", self.build_depth_yield(2))]:
            try:
                fxnScore = lambda listInst: dtree.cv_score(fxn(
                    listInst, cFold))
                listData = [fxnScore(listInstClean), fxnScore(listInstNoisy)]
                dictSeries = {"name": sLbl, "data": listData}
            except NotImplementedError:
                # we can forget about un-implemented functionality
                dictSeries = {"name": sLbl + " (not implemented)", "data": []}
            listSeries.append(dictSeries)

        return {
            "chart": {
                "defaultSeriesType": "column"
            },
            "title": {
                "text": "Clean vs. Noisy Classification"
            },
            "xAxis": {
                "categories": ["Clean", "Noisy"]
            },
            "yAxis": {
                "title": {
                    "text": "Fraction Correct"
                },
                "min": 0.0,
                "max": 1.0
            },
            "series": listSeries
        }