Exemple #1
0
def run(fast=False):
    # read data
    train_path = os.path.join(HOME_DIR, 'resources', 'datasets', 'titanic-train.csv')

    df_train = pandas.read_csv(train_path)

    num, cat = FeatureTypes.numerical, FeatureTypes.categorical
    dmd_train, dmd_test = DMD.from_df(df_train=df_train, df_test=None,
                                      is_classification=True,
                                      target_name='Survived',
                                      feature_types=[num, cat, cat, cat, num, num, num, cat, num, cat, cat],
                                      categorical_encoding=True, nan_list=['?'],
                                      split_ratio=0.2)

    classifier = Pipeline(steps=[('Imputer', SimpleImputer()),
                                 ('Estimator', RandomForestClassifier(n_estimators=3))])

    classifier.fit(dmd_train.values, dmd_train.target)

    pytrust = PyTrust(
        model=classifier,
        xtrain=dmd_train,
        xtest=dmd_test,
        metric='recall')

    # some analysis
    print('\n'.join(pytrust.insights))

    pytrust.dataset_analysis_report.plot()
    pytrust.scoring_report.plot()
    pytrust.sensitivity_report.plot()
Exemple #2
0
    def test_dummy(self):
        dmd_train = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1)))
        dmd_test = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1)))

        print('init model')
        model = DMDPipeline(transformers=[TransformerDummy],
                            estimator=EstimatorDummy,
                            hyperparameters=[{
                                'name': 'Dummy'
                            }, {
                                'n_estimators': 10
                            }])

        self.assertIsInstance(model.name, str)

        self.assertIsInstance(model.transformers, list)
        self.assertEqual(len(model.transformers), 1)
        self.assertIsInstance(model.transformers[0], TransformerBase)
        self.assertEqual(model.transformers[0].name, 'Dummy')

        self.assertIsInstance(model.estimator, EstimatorBase)
        self.assertEqual(model.estimator.kwargs['n_estimators'], 10)

        self.assertEqual(len(model.hyperparameters), 2)
        self.assertEqual((model.transformers[0].name, {
            'name': 'Dummy'
        }), model.hyperparameters[0])
        self.assertEqual((model.estimator.name, {
            'n_estimators': 10
        }), model.hyperparameters[1])

        print('fit model')
        model.fit(dmd_train)

        print('model predict')
        predictions = model.predict(dmd_test)
        self.assertIsInstance(predictions, DMD)
Exemple #3
0
 def predict_proba(self, dmd: DMD, **kwargs):
     print('EstimatorDummy', 'predict_proba', 'n_estimators',
           self.kwargs.get('n_estimators'))
     return DMD(x=dmd.values[:, 0:2])
Exemple #4
0
        def fit(self, dmd: DMD, **kwargs):
            print('EstimatorDummy', 'fit', 'n_estimators',
                  self.kwargs.get('n_estimators'))
            pass

        def predict(self, dmd: DMD, **kwargs):
            print('EstimatorDummy', 'predict', 'n_estimators',
                  self.kwargs.get('n_estimators'))
            return DMD(x=dmd.values[:, 0:1])

        def predict_proba(self, dmd: DMD, **kwargs):
            print('EstimatorDummy', 'predict_proba', 'n_estimators',
                  self.kwargs.get('n_estimators'))
            return DMD(x=dmd.values[:, 0:2])

    dmd_train = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1)))
    dmd_test = DMD(x=numpy.zeros((3, 3)), y=numpy.zeros((3, 1)))

    print('init model')
    model = DMDPipeline(transformers=[TransformerDummy],
                        estimator=EstimatorDummy,
                        hyperparameters=[{
                            'name': 'Dummy'
                        }, {
                            'n_estimators': 10
                        }])

    print('fit model')
    model.fit(dmd_train)

    print('model predict')