コード例 #1
0
    def test_set_params(self):
        tbl = self.table

        mean_imp = Imputer(Imputer.MEAN)
        mode_imp = Imputer(Imputer.MODE)
        dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs)

        pipe = Pipeline([mean_imp, mode_imp, dtree])
        out = pipe.fit(tbl).score(tbl)
        self.assertEqual(out.loc['Target'], 'Origin')

        # Set extra parameters on Pipeline (not on estimator)
        pipe.set_params({dtree.target: 'MSRP'})
        self.assertEqual(dtree.target, 'Origin')

        out = pipe.fit(tbl).score(tbl)
        self.assertEqual(out.loc['Target'], 'MSRP')

        # Set parameters during fit
        pipe = Pipeline([mean_imp, mode_imp, dtree])

        out = pipe.fit(tbl).score(tbl)
        self.assertEqual(out.loc['Target'], 'Origin')

        out = pipe.fit(tbl, {dtree.target: 'MSRP'}).score(tbl)
        self.assertEqual(out.loc['Target'], 'MSRP')
コード例 #2
0
    def test_basic(self):
        tbl = self.table

        mean_imp = Imputer(Imputer.MEAN)
        mode_imp = Imputer(Imputer.MODE)
        dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs)

        pipe = Pipeline([mean_imp, mode_imp, dtree])

        model = pipe.fit(tbl)
        self.assertEqual(model.__class__.__name__, 'PipelineModel')
        self.assertEqual(len(model.stages), 3)
        self.assertTrue(model[0] is mean_imp)
        self.assertTrue(model[1] is mode_imp)
        self.assertEqual(model[2].__class__.__name__, 'DecisionTreeModel')

        out = model.score(tbl)

        self.assertEqual(
            set(list(out.index)),
            set([
                'Target', 'Level', 'Var', 'NBins', 'NObsUsed', 'TargetCount',
                'TargetMiss', 'PredCount', 'PredMiss', 'Event', 'EventCount',
                'NonEventCount', 'EventMiss', 'AreaUnderROCCurve', 'CRCut',
                'ClassificationCutOff', 'KS', 'KSCutOff',
                'MisClassificationRate'
            ]))

        # Bad item type
        with self.assertRaises(TypeError):
            Pipeline([mean_imp, mode_imp, 'foo', dtree])
コード例 #3
0
    def test_regression_score(self):
        tbl = self.table

        mean_imp = Imputer(Imputer.MEAN)
        mode_imp = Imputer(Imputer.MODE)
        dtree = DecisionTree(target='MSRP', nominals=nominals, inputs=inputs)

        pipe = Pipeline([mean_imp, mode_imp, dtree])

        model = pipe.fit(tbl)
        score = model.score(tbl)

        self.assertTrue(isinstance(score, pd.Series))
        self.assertEqual(score.loc['Target'], 'MSRP')
        self.assertEqual(score.loc['Level'], 'INTERVAL')
        self.assertEqual(score.loc['NBins'], 100)
        self.assertEqual(score.loc['NObsUsed'], 428)
        self.assertTrue(isinstance(score.loc['AverageSquaredError'], float))
        self.assertTrue(isinstance(score.loc['AverageAbsoluteError'], float))
        self.assertTrue(
            isinstance(score.loc['AverageSquaredLogarithmicError'], float))
        self.assertTrue(isinstance(score.loc['RootAverageSquaredError'],
                                   float))
        self.assertTrue(
            isinstance(score.loc['RootAverageAbsoluteError'], float))
        self.assertTrue(
            isinstance(score.loc['RootAverageSquaredLogarithmicError'], float))
コード例 #4
0
    def test_multiple_estimators(self):
        tbl = self.table

        mean_imp = Imputer(Imputer.MEAN)
        mode_imp = Imputer(Imputer.MODE)
        dtree1 = DecisionTree(target='Origin',
                              nominals=nominals,
                              inputs=inputs)
        dtree2 = DecisionTree(target='Origin',
                              nominals=nominals,
                              inputs=inputs)

        pipe = Pipeline([mean_imp, mode_imp, dtree1, dtree2])

        model = pipe.fit(tbl)
        self.assertEqual(model.__class__.__name__, 'PipelineModel')
        self.assertEqual(len(model.stages), 4)
        self.assertTrue(model[0] is mean_imp)
        self.assertTrue(model[1] is mode_imp)
        self.assertEqual(model[2].__class__.__name__, 'DecisionTreeModel')
        self.assertEqual(model[3].__class__.__name__, 'DecisionTreeModel')

        out = model.score(tbl)
        self.assertEqual(set(list(out.index)),
                         set(['DecisionTree', 'DecisionTree1']))
コード例 #5
0
    def test_unload(self):
        mean_imp = Imputer(Imputer.MEAN)
        mode_imp = Imputer(Imputer.MODE)
        dtree = DecisionTree(target='MSRP', nominals=nominals, inputs=inputs)

        pipe = Pipeline([mean_imp, mode_imp, dtree])

        model = pipe.fit(self.table)
        self.assertEqual(model[-1].data.table.tableexists().exists, 1)
        model.unload()
        self.assertEqual(model[-1].data.table.tableexists().exists, 0)
コード例 #6
0
    def test_model_transform(self):
        tbl = self.table

        mode_imp = Imputer(Imputer.MODE)
        dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs)

        pipe = Pipeline([mode_imp, dtree])

        self.assertEqual(tbl.nmiss().max(), 2)

        model = pipe.fit(tbl)
        out = model.transform(tbl)

        self.assertEqual(out.__class__.__name__, 'CASTable')
        self.assertEqual(tbl.nmiss().max(), 2)
        self.assertEqual(out.nmiss().max(), 0)
コード例 #7
0
    def test_classification_score(self):
        tbl = self.table

        mean_imp = Imputer(Imputer.MEAN)
        mode_imp = Imputer(Imputer.MODE)
        dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs)

        pipe = Pipeline([mean_imp, mode_imp, dtree])

        model = pipe.fit(tbl)
        score = model.score(tbl)

        self.assertTrue(isinstance(score, pd.Series))
        self.assertEqual(score.loc['Target'], 'Origin')
        self.assertEqual(score.loc['Level'], 'CLASS')
        self.assertEqual(score.loc['Event'], 'USA')
        self.assertEqual(score.loc['NBins'], 100)
        self.assertEqual(score.loc['NObsUsed'], 428)
        self.assertTrue(isinstance(score.loc['AreaUnderROCCurve'], float))
        self.assertTrue(isinstance(score.loc['CRCut'], float))
        self.assertTrue(isinstance(score.loc['KS'], float))
        self.assertTrue(isinstance(score.loc['KSCutOff'], float))
        self.assertTrue(isinstance(score.loc['MisClassificationRate'], float))