Esempio n. 1
0
    def test_calculate_ceteris_paribus(self):
        splits = utils.calculate_variable_split(self.X, ['age', 'country'],
                                                121)

        cp = utils.calculate_ceteris_paribus(self.exp,
                                             self.X.iloc[[0], :].copy(),
                                             splits, self.y.iloc[0])

        self.assertIsInstance(cp, tuple)
        self.assertIsInstance(cp[0], pd.DataFrame)
        self.assertIsInstance(cp[1], pd.DataFrame)

        splits = utils.calculate_variable_split(self.X, ['country'], 5)

        cp = utils.calculate_ceteris_paribus(self.exp,
                                             self.X.iloc[[0], :].copy(),
                                             splits, self.y.iloc[0])

        self.assertIsInstance(cp, tuple)
        self.assertIsInstance(cp[0], pd.DataFrame)
        self.assertIsInstance(cp[1], pd.DataFrame)

        splits = utils.calculate_variable_split(self.X, self.X.columns, 15)

        cp = utils.calculate_ceteris_paribus(self.exp,
                                             self.X.iloc[[0], :].copy(),
                                             splits, self.y.iloc[0])

        self.assertIsInstance(cp, tuple)
        self.assertIsInstance(cp[0], pd.DataFrame)
        self.assertIsInstance(cp[1], pd.DataFrame)
    def test_calculate_variable_profile(self):
        splits = utils.calculate_variable_split(self.X, ['age', 'gender'], 121)
        vp = utils.calculate_variable_profile(self.exp, self.X.iloc[[0], :],
                                              splits)
        self.assertIsInstance(vp, pd.DataFrame)

        splits = utils.calculate_variable_split(self.X, ['gender'], 5)
        vp = utils.calculate_variable_profile(self.exp, self.X.iloc[[0], :],
                                              splits)
        self.assertIsInstance(vp, pd.DataFrame)

        splits = utils.calculate_variable_split(self.X, self.X.columns, 15)
        vp = utils.calculate_variable_profile(self.exp, self.X.iloc[[0], :],
                                              splits)
        self.assertIsInstance(vp, pd.DataFrame)
    def test_calculate_variable_split(self):
        splits = utils.calculate_variable_split(self.X, self.X.columns, 101)
        self.assertIsInstance(splits, (dict, ))
        for key, value in splits.items():
            self.assertLessEqual(len(value), 101)

        splits = utils.calculate_variable_split(self.X, ['age', 'fare'], 121)
        self.assertIsInstance(splits, (dict, ))
        for key, value in splits.items():
            self.assertLessEqual(len(value), 121)

        splits = utils.calculate_variable_split(self.X, ['gender'], 5)
        self.assertIsInstance(splits, (dict, ))
        for key, value in splits.items():
            self.assertLessEqual(len(value),
                                 np.unique(self.X.loc[:, 'gender']).shape[0])
Esempio n. 4
0
    def test_single_variable_profile(self):
        splits = utils.calculate_variable_split(self.X, self.X.columns, 101)
        new_data_age = utils.single_variable_profile(self.exp.predict_function,
                                                     self.exp.model,
                                                     self.X.iloc[[0], :],
                                                     'age', splits['age'])

        new_data_embarked = utils.single_variable_profile(
            self.exp.predict_function, self.exp.model, self.X.iloc[[0], :],
            'embarked', splits['embarked'])

        self.assertIsInstance(new_data_age, (pd.DataFrame, ))
        self.assertIsInstance(new_data_embarked, (pd.DataFrame, ))

        self.assertLessEqual(new_data_age.shape[0], 101)
        self.assertLessEqual(new_data_embarked.shape[0], 101)

        self.assertTrue(
            np.isin(np.array(['_yhat_', '_vname_', '_ids_']),
                    new_data_age.columns).all())

        self.assertTrue(
            np.isin(np.array(['_yhat_', '_vname_', '_ids_']),
                    new_data_embarked.columns).all())

        self.assertTrue(
            pd.api.types.is_numeric_dtype(new_data_age.loc[:, 'age']))
Esempio n. 5
0
    def test_calculate_ceteris_paribus(self):
        splits = utils.calculate_variable_split(self.X, ['age', 'gender'], 121)

        cp = utils.calculate_ceteris_paribus(self.exp,
                                             self.X.iloc[[0], :].copy(),
                                             splits,
                                             self.y.iloc[0],
                                             processes=1,
                                             verbose=False)

        self.assertIsInstance(cp, tuple)
        self.assertIsInstance(cp[0], pd.DataFrame)
        self.assertIsInstance(cp[1], pd.DataFrame)

        splits = utils.calculate_variable_split(self.X, ['embarked'], 5)

        cp = utils.calculate_ceteris_paribus(self.exp,
                                             self.X.iloc[[0], :].copy(),
                                             splits,
                                             self.y.iloc[0],
                                             processes=1,
                                             verbose=False)

        self.assertIsInstance(cp, tuple)
        self.assertIsInstance(cp[0], pd.DataFrame)
        self.assertIsInstance(cp[1], pd.DataFrame)

        splits = utils.calculate_variable_split(self.X, self.X.columns, 15)

        cp = utils.calculate_ceteris_paribus(self.exp,
                                             self.X.iloc[[0], :].copy(),
                                             splits,
                                             self.y.iloc[0],
                                             processes=1,
                                             verbose=False)

        self.assertIsInstance(cp, tuple)
        self.assertIsInstance(cp[0], pd.DataFrame)
        self.assertIsInstance(cp[1], pd.DataFrame)
Esempio n. 6
0
    def test_single_variable_profile(self):
        splits = utils.calculate_variable_split(self.X, self.X.columns, 101)
        new_data_age = utils.single_variable_profile(self.exp,
                                                     self.X.iloc[[0], :],
                                                     'age', splits['age'])

        new_data_country = utils.single_variable_profile(
            self.exp, self.X.iloc[[0], :], 'country', splits['country'])

        self.assertIsInstance(new_data_age, (pd.DataFrame, ))
        self.assertIsInstance(new_data_country, (pd.DataFrame, ))

        self.assertLessEqual(new_data_age.shape[0], 101)
        self.assertLessEqual(new_data_country.shape[0], 101)

        self.assertTrue(
            np.isin(np.array(['_yhat_', '_vname_', '_ids_']),
                    new_data_age.columns).all())

        self.assertTrue(
            np.isin(np.array(['_yhat_', '_vname_', '_ids_']),
                    new_data_country.columns).all())

        self.assertTrue(np.issubdtype(new_data_age.loc[:, 'age'], np.floating))