def test_fit(self): tbl = self.table params = dtree_defaults.copy() params.update( dict(target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower'])) dtree = DecisionTree(target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower']) model = dtree.fit(tbl) self.assertEqual(model.__class__.__name__, 'DecisionTreeModel') self.assertEqual(model.data.__class__.__name__, 'CASTable') self.assertEqual(model.params, params) self.assertEqual(model.diagnostics.__class__.__name__, 'CASResults') self.assertEqual(sorted(model.diagnostics.keys()), ['ModelInfo', 'OutputCasTables']) # Have nominals set automatically dtree = DecisionTree(target='Cylinders', nominals=[], inputs=['Make', 'Model', 'Horsepower']) model = dtree.fit(tbl) self.assertEqual(model.params['nominals'], [])
def test_unload_model(self): dtree = DecisionTree(target='Cylinders', inputs=['MSRP', 'Horsepower']) model = dtree.fit(self.table) self.assertEqual(model.data.table.tableexists().exists, 1) with ResourceManager() as mgr: mgr.track_model(model) self.assertEqual(model.data.table.tableexists().exists, 0)
def test_unload(self): dtree = DecisionTree(target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower']) model = dtree.fit(self.table) self.assertEqual(model.data.table.tableexists().exists, 1) model.unload() self.assertEqual(model.data.table.tableexists().exists, 0)
def test_fit(self): tbl = self.table params = dtree_defaults.copy() params.update(dict(target='Origin', nominals=nominals, inputs=inputs)) dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) model = dtree.fit(tbl) self.assertEqual(model.__class__.__name__, 'DecisionTreeModel') self.assertEqual(model.data.__class__.__name__, 'CASTable') self.assertEqual(model.params, params) self.assertEqual(model.diagnostics.__class__.__name__, 'CASResults') self.assertEqual(sorted(model.diagnostics.keys()), ['ModelInfo', 'OutputCasTables'])
def test_params(self): tbl = self.table # Check defaults dtree = DecisionTree() self.assertEqual(dtree.params.to_dict(), dtree_defaults) # Check constructor parameters params = dtree_defaults.copy() params.update( dict(prune=True, target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower'])) dtree = DecisionTree(prune=True, target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower']) self.assertEqual(dtree.params.to_dict(), params) model = dtree.fit(tbl) self.assertEqual(model.__class__.__name__, 'DecisionTreeModel') self.assertEqual(model.params, params) # Check constructor parameter error with self.assertRaises(ValueError): DecisionTree(prune=True, criterion='foo', target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower']) with self.assertRaises(TypeError): DecisionTree(foo='bar') # Check fit parameter overrides params = dtree_defaults.copy() params.update( dict(max_depth=7, leaf_size=5, target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower'])) model = dtree.fit(tbl, prune=False, max_depth=7) self.assertEqual(model.__class__.__name__, 'DecisionTreeModel') self.assertEqual(model.params, params) # Check parameter overrides error with self.assertRaises(TypeError): dtree.fit(tbl, prune='foo', max_depth=7) with self.assertRaises(KeyError): dtree.fit(tbl, foo='bar')
def test_score(self): tbl = self.table params = dtree_defaults.copy() params.update( dict(target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower'])) dtree = DecisionTree(target='Cylinders', nominals=['Make', 'Model'], inputs=['Make', 'Model', 'Horsepower']) model = dtree.fit(tbl) score = model.score(tbl) self.assertTrue(isinstance(score, pd.Series)) self.assertAlmostEqual(score.loc['MeanSquaredError'], 0.4423817642) self.assertEqual(score.loc['NObsUsed'], 426) self.assertEqual(score.loc['NObsRead'], 428)
def test_regression_score(self): tbl = self.table params = dtree_defaults.copy() params.update(dict(target='MSRP', nominals=nominals, inputs=inputs)) dtree = DecisionTree(target='MSRP', nominals=nominals, inputs=inputs) model = dtree.fit(tbl) score = model.score(tbl) self.assertTrue(isinstance(score, pd.Series)) self.assertEqual(score.loc['Target'], 'MSRP') self.assertEqual(score.loc['Level'], 'INTERVAL') self.assertEqual(score.loc['NBins'], 100) self.assertEqual(score.loc['NObsUsed'], 428) self.assertTrue(isinstance(score.loc['AverageSquaredError'], float)) self.assertTrue(isinstance(score.loc['AverageAbsoluteError'], float)) self.assertTrue(isinstance(score.loc['AverageSquaredLogarithmicError'], float)) self.assertTrue(isinstance(score.loc['RootAverageSquaredError'], float)) self.assertTrue(isinstance(score.loc['RootAverageAbsoluteError'], float)) self.assertTrue(isinstance(score.loc['RootAverageSquaredLogarithmicError'], float))
def test_classification_score(self): tbl = self.table params = dtree_defaults.copy() params.update(dict(target='Origin', nominals=nominals, inputs=inputs)) dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) model = dtree.fit(tbl) score = model.score(tbl) self.assertTrue(isinstance(score, pd.Series)) self.assertEqual(score.loc['Target'], 'Origin') self.assertEqual(score.loc['Level'], 'CLASS') self.assertEqual(score.loc['Event'], 'USA') self.assertEqual(score.loc['NBins'], 100) self.assertEqual(score.loc['NObsUsed'], 428) self.assertTrue(isinstance(score.loc['AreaUnderROCCurve'], float)) self.assertTrue(isinstance(score.loc['CRCut'], float)) self.assertTrue(isinstance(score.loc['KS'], float)) self.assertTrue(isinstance(score.loc['KSCutOff'], float)) self.assertTrue(isinstance(score.loc['MisClassificationRate'], float))
def test_unload(self): dtree = DecisionTree(target='Origin', nominals=nominals, inputs=inputs) model = dtree.fit(self.table) self.assertEqual(model.data.table.tableexists().exists, 1) model.unload() self.assertEqual(model.data.table.tableexists().exists, 0)