def run(fast=False): # read data train_path = os.path.join(HOME_DIR, 'resources', 'datasets', 'titanic-train.csv') df_train = pandas.read_csv(train_path) num, cat = FeatureTypes.numerical, FeatureTypes.categorical dmd_train, dmd_test = DMD.from_df(df_train=df_train, df_test=None, is_classification=True, target_name='Survived', feature_types=[num, cat, cat, cat, num, num, num, cat, num, cat, cat], categorical_encoding=True, nan_list=['?'], split_ratio=0.2) classifier = Pipeline(steps=[('Imputer', SimpleImputer()), ('Estimator', RandomForestClassifier(n_estimators=3))]) classifier.fit(dmd_train.values, dmd_train.target) pytrust = PyTrust( model=classifier, xtrain=dmd_train, xtest=dmd_test, metric='recall') # some analysis print('\n'.join(pytrust.insights)) pytrust.dataset_analysis_report.plot() pytrust.scoring_report.plot() pytrust.sensitivity_report.plot()
def test_dummy(self): dmd_train = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1))) dmd_test = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1))) print('init model') model = DMDPipeline(transformers=[TransformerDummy], estimator=EstimatorDummy, hyperparameters=[{ 'name': 'Dummy' }, { 'n_estimators': 10 }]) self.assertIsInstance(model.name, str) self.assertIsInstance(model.transformers, list) self.assertEqual(len(model.transformers), 1) self.assertIsInstance(model.transformers[0], TransformerBase) self.assertEqual(model.transformers[0].name, 'Dummy') self.assertIsInstance(model.estimator, EstimatorBase) self.assertEqual(model.estimator.kwargs['n_estimators'], 10) self.assertEqual(len(model.hyperparameters), 2) self.assertEqual((model.transformers[0].name, { 'name': 'Dummy' }), model.hyperparameters[0]) self.assertEqual((model.estimator.name, { 'n_estimators': 10 }), model.hyperparameters[1]) print('fit model') model.fit(dmd_train) print('model predict') predictions = model.predict(dmd_test) self.assertIsInstance(predictions, DMD)
def predict_proba(self, dmd: DMD, **kwargs): print('EstimatorDummy', 'predict_proba', 'n_estimators', self.kwargs.get('n_estimators')) return DMD(x=dmd.values[:, 0:2])
def fit(self, dmd: DMD, **kwargs): print('EstimatorDummy', 'fit', 'n_estimators', self.kwargs.get('n_estimators')) pass def predict(self, dmd: DMD, **kwargs): print('EstimatorDummy', 'predict', 'n_estimators', self.kwargs.get('n_estimators')) return DMD(x=dmd.values[:, 0:1]) def predict_proba(self, dmd: DMD, **kwargs): print('EstimatorDummy', 'predict_proba', 'n_estimators', self.kwargs.get('n_estimators')) return DMD(x=dmd.values[:, 0:2]) dmd_train = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1))) dmd_test = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1))) print('init model') model = DMDPipeline(transformers=[TransformerDummy], estimator=EstimatorDummy, hyperparameters=[{ 'name': 'Dummy' }, { 'n_estimators': 10 }]) print('fit model') model.fit(dmd_train) print('model predict')