def test_set_params(self): tbl = self.table mean_imp = Imputer(Imputer.MEAN) mode_imp = Imputer(Imputer.MODE) dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) pipe = Pipeline([mean_imp, mode_imp, dtree]) out = pipe.fit(tbl).score(tbl) self.assertEqual(out.loc['Target'], 'Origin') # Set extra parameters on Pipeline (not on estimator) pipe.set_params({dtree.target: 'MSRP'}) self.assertEqual(dtree.target, 'Origin') out = pipe.fit(tbl).score(tbl) self.assertEqual(out.loc['Target'], 'MSRP') # Set parameters during fit pipe = Pipeline([mean_imp, mode_imp, dtree]) out = pipe.fit(tbl).score(tbl) self.assertEqual(out.loc['Target'], 'Origin') out = pipe.fit(tbl, {dtree.target: 'MSRP'}).score(tbl) self.assertEqual(out.loc['Target'], 'MSRP')
def test_basic(self): tbl = self.table mean_imp = Imputer(Imputer.MEAN) mode_imp = Imputer(Imputer.MODE) dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) pipe = Pipeline([mean_imp, mode_imp, dtree]) model = pipe.fit(tbl) self.assertEqual(model.__class__.__name__, 'PipelineModel') self.assertEqual(len(model.stages), 3) self.assertTrue(model[0] is mean_imp) self.assertTrue(model[1] is mode_imp) self.assertEqual(model[2].__class__.__name__, 'DecisionTreeModel') out = model.score(tbl) self.assertEqual( set(list(out.index)), set([ 'Target', 'Level', 'Var', 'NBins', 'NObsUsed', 'TargetCount', 'TargetMiss', 'PredCount', 'PredMiss', 'Event', 'EventCount', 'NonEventCount', 'EventMiss', 'AreaUnderROCCurve', 'CRCut', 'ClassificationCutOff', 'KS', 'KSCutOff', 'MisClassificationRate' ])) # Bad item type with self.assertRaises(TypeError): Pipeline([mean_imp, mode_imp, 'foo', dtree])
def test_regression_score(self): tbl = self.table mean_imp = Imputer(Imputer.MEAN) mode_imp = Imputer(Imputer.MODE) dtree = DecisionTree(target='MSRP', nominals=nominals, inputs=inputs) pipe = Pipeline([mean_imp, mode_imp, dtree]) model = pipe.fit(tbl) score = model.score(tbl) self.assertTrue(isinstance(score, pd.Series)) self.assertEqual(score.loc['Target'], 'MSRP') self.assertEqual(score.loc['Level'], 'INTERVAL') self.assertEqual(score.loc['NBins'], 100) self.assertEqual(score.loc['NObsUsed'], 428) self.assertTrue(isinstance(score.loc['AverageSquaredError'], float)) self.assertTrue(isinstance(score.loc['AverageAbsoluteError'], float)) self.assertTrue( isinstance(score.loc['AverageSquaredLogarithmicError'], float)) self.assertTrue(isinstance(score.loc['RootAverageSquaredError'], float)) self.assertTrue( isinstance(score.loc['RootAverageAbsoluteError'], float)) self.assertTrue( isinstance(score.loc['RootAverageSquaredLogarithmicError'], float))
def test_multiple_estimators(self): tbl = self.table mean_imp = Imputer(Imputer.MEAN) mode_imp = Imputer(Imputer.MODE) dtree1 = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) dtree2 = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) pipe = Pipeline([mean_imp, mode_imp, dtree1, dtree2]) model = pipe.fit(tbl) self.assertEqual(model.__class__.__name__, 'PipelineModel') self.assertEqual(len(model.stages), 4) self.assertTrue(model[0] is mean_imp) self.assertTrue(model[1] is mode_imp) self.assertEqual(model[2].__class__.__name__, 'DecisionTreeModel') self.assertEqual(model[3].__class__.__name__, 'DecisionTreeModel') out = model.score(tbl) self.assertEqual(set(list(out.index)), set(['DecisionTree', 'DecisionTree1']))
def test_unload(self): mean_imp = Imputer(Imputer.MEAN) mode_imp = Imputer(Imputer.MODE) dtree = DecisionTree(target='MSRP', nominals=nominals, inputs=inputs) pipe = Pipeline([mean_imp, mode_imp, dtree]) model = pipe.fit(self.table) self.assertEqual(model[-1].data.table.tableexists().exists, 1) model.unload() self.assertEqual(model[-1].data.table.tableexists().exists, 0)
def test_model_transform(self): tbl = self.table mode_imp = Imputer(Imputer.MODE) dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) pipe = Pipeline([mode_imp, dtree]) self.assertEqual(tbl.nmiss().max(), 2) model = pipe.fit(tbl) out = model.transform(tbl) self.assertEqual(out.__class__.__name__, 'CASTable') self.assertEqual(tbl.nmiss().max(), 2) self.assertEqual(out.nmiss().max(), 0)
def test_classification_score(self): tbl = self.table mean_imp = Imputer(Imputer.MEAN) mode_imp = Imputer(Imputer.MODE) dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) pipe = Pipeline([mean_imp, mode_imp, dtree]) model = pipe.fit(tbl) score = model.score(tbl) self.assertTrue(isinstance(score, pd.Series)) self.assertEqual(score.loc['Target'], 'Origin') self.assertEqual(score.loc['Level'], 'CLASS') self.assertEqual(score.loc['Event'], 'USA') self.assertEqual(score.loc['NBins'], 100) self.assertEqual(score.loc['NObsUsed'], 428) self.assertTrue(isinstance(score.loc['AreaUnderROCCurve'], float)) self.assertTrue(isinstance(score.loc['CRCut'], float)) self.assertTrue(isinstance(score.loc['KS'], float)) self.assertTrue(isinstance(score.loc['KSCutOff'], float)) self.assertTrue(isinstance(score.loc['MisClassificationRate'], float))