예제 #1
0
 def test_grunfeld(self):
     data = getattr(sm.datasets.grunfeld, self.load_method)()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (220, 5))
     self.assertEqual(df.target_name, 'invest')
     tm.assert_index_equal(df.data.columns, pd.Index(data.exog_name))
예제 #2
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.semi_supervised.LabelPropagation, ss.LabelPropagation)
     self.assertIs(df.semi_supervised.LabelSpreading, ss.LabelSpreading)
예제 #3
0
    def test_frame_data_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')

        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)

        new = pd.DataFrame({
            'X': [1, 2, 3],
            'Y': [4, 5, 6]
        },
                           index=['a', 'b', 'c'],
                           columns=['X', 'Y'])
        # set data property
        mdf.data = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y']))
        tm.assert_frame_equal(mdf.data, new)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        new = pdml.ModelFrame({
            'M': [1, 2, 3],
            'N': [4, 5, 6]
        },
                              index=['a', 'b', 'c'],
                              columns=['M', 'N'])

        # set data property
        mdf.data = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N']))
        tm.assert_frame_equal(mdf.data, new)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        new = pd.DataFrame({
            '.target': [1, 2, 3],
            'K': [4, 5, 6]
        },
                           index=['a', 'b', 'c'])

        # unable to set data if passed value has the same column as the target
        msg = "Passed data has the same column name as the target '.target'"
        with pytest.raises(ValueError, match=msg):
            mdf.data = new

        # unable to set ModelFrame with target attribute
        msg = "Cannot update with ModelFrame which has target attribute"
        with pytest.raises(ValueError, match=msg):
            mdf.data = mdf

        # set delete property
        del mdf.data
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 1))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target']))
        self.assertTrue(mdf.data is None)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')
예제 #4
0
 def test_plot_partial_dependence(self):
     df = pdml.ModelFrame(datasets.load_iris())
     clf = df.ensemble.GradientBoostingRegressor(n_estimators=10)
     df.fit(clf)
     """
예제 #5
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.multioutput.MultiOutputRegressor,
                   multioutput.MultiOutputRegressor)
     self.assertIs(df.multioutput.MultiOutputClassifier,
                   multioutput.MultiOutputClassifier)
예제 #6
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.xgboost.XGBRegressor, xgb.XGBRegressor)
     self.assertIs(df.xgboost.XGBClassifier, xgb.XGBClassifier)
예제 #7
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.grid_search.GridSearchCV, gs.GridSearchCV)
     self.assertIs(df.grid_search.ParameterGrid, gs.ParameterGrid)
     self.assertIs(df.grid_search.ParameterSampler, gs.ParameterSampler)
     self.assertIs(df.grid_search.RandomizedSearchCV, gs.RandomizedSearchCV)
예제 #8
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.random_projection.GaussianRandomProjection, rp.GaussianRandomProjection)
     self.assertIs(df.random_projection.SparseRandomProjection, rp.SparseRandomProjection)
     self.assertIs(df.random_projection.johnson_lindenstrauss_min_dim, rp.johnson_lindenstrauss_min_dim)
예제 #9
0
    def test_check_cv(self):
        iris = datasets.load_iris()

        df = pdml.ModelFrame(iris)
        result = df.cross_validation.check_cv(cv=5)
        self.assertIsInstance(result, cv.KFold)
예제 #10
0
browser_dummies.drop(['Browser_other'], axis=1, inplace=True)
adex_dummies = get_dummies(ads['AdExchange'], prefix='AdExchange')
adex_dummies.drop(['AdExchange_1'], axis=1, inplace=True)
advi_dummies = get_dummies(ads['Adslotvisibility'], prefix='Adslotvisibility')
advi_dummies.drop(['Adslotvisibility_Na'], axis=1, inplace=True)
adfo_dummies = get_dummies(ads['Adslotformat'], prefix='Adslotformat')
adfo_dummies.drop(['Adslotformat_Na'], axis=1, inplace=True)
os_dummies = get_dummies(ads['OS'], prefix='OS')
os_dummies.drop(['OS_other'], axis=1, inplace=True)
ads = ads.join(
    [browser_dummies, adex_dummies, advi_dummies, adfo_dummies, os_dummies])
ads.drop(['Browser', 'AdExchange', 'Adslotvisibility', 'Adslotformat', 'OS'],
         axis=1,
         inplace=True)

ads_ml = pdml.ModelFrame(ads, target='click')
sampler = ads_ml.imbalance.over_sampling.RandomOverSampler()
ads = ads_ml.fit_sample(sampler)

if mode == 'rfe':
    from sklearn.feature_selection import RFECV
    from sklearn.linear_model import LogisticRegression
    logit_rfe = LogisticRegression()
    rfe = RFECV(logit_rfe, cv=5)
    rfe = rfe.fit(ads.drop(['click'], axis=1), ads['click'])
    print(rfe.support_)
    result = rfe
elif mode == 'stat':
    import statsmodels.api as sm
    ads['intercept'] = 1.0
    ads = ads.astype(int)
예제 #11
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.neural_network.BernoulliRBM, nn.BernoulliRBM)
     self.assertIs(df.neural_network.MLPClassifier, nn.MLPClassifier)
     self.assertIs(df.neural_network.MLPRegressor, nn.MLPRegressor)
예제 #12
0
 def test_nile(self):
     data = getattr(sm.datasets.nile, self.load_method)()
     msg = "Unable to read statsmodels Dataset without exog"
     with self.assertRaisesRegexp(ValueError, msg):
         pdml.ModelFrame(data)
예제 #13
0
 def test_modechoice(self):
     data = getattr(sm.datasets.modechoice, self.load_method)()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (840, 7))
     self.assertEqual(df.target_name, 'choice')
     tm.assert_index_equal(df.data.columns, pd.Index(data.exog_name))
예제 #14
0
 def test_longley(self):
     data = getattr(sm.datasets.longley, self.load_method)()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (16, 7))
     self.assertEqual(df.target_name, 'TOTEMP')
     tm.assert_index_equal(df.data.columns, pd.Index(data.exog_name))
예제 #15
0
    def test_frame_init_dict_list(self):
        # initialization by dataframe and list
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = [1, 2, 3]
        mdf = pdml.ModelFrame(df, target=s)
        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 4))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['.target', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        self.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
        s = [1, 2, 3]
        mdf = pdml.ModelFrame(df, target=s)
        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 4))
        self.assert_index_equal(mdf.index, pd.Index([0, 1, 2]))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['.target', 'A', 'B', 'C']))
        expected = pd.DataFrame(df)
        self.assert_frame_equal(mdf.data, expected)
        expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target')
        self.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        mdf = pdml.ModelFrame(df, target='A')
        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 3))
        self.assert_index_equal(mdf.index, pd.Index([0, 1, 2]))
        self.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        expected = pd.DataFrame(df)
        self.assert_frame_equal(mdf.data, expected[['B', 'C']])
        self.assert_series_equal(mdf.target, expected['A'])
        self.assertEqual(mdf.target.name, 'A')
        self.assertEqual(mdf.target_name, 'A')

        mdf = pdml.ModelFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                              index=['a', 'b', 'c'],
                              columns=['A', 'B', 'C'])
        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 3))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, mdf)
        self.assertEqual(mdf.target_name, '.target')
예제 #16
0
    elif aresults.loc[i, "Payment Typology 1"] == "Medicare":
        aresults.loc[i, "Payment Typology 1"] = 1
    elif aresults.loc[i, "Payment Typology 1"] == "Blue Cross/Blue Shield":
        aresults.loc[i, "Payment Typology 1"] = 2
    else:
        aresults.loc[i, "Payment Typology 1"] = 3
print "aresults"
#print df
test1 = test1.replace("120 +", 120)
test2 = test2.replace("120 +", 120)
test3 = test3.replace("120 +", 120)
test4 = test4.replace("120 +", 120)
test5 = test5.replace("120 +", 120)
scaler = MinMaxScaler()

test1 = pdml.ModelFrame(scaler.fit_transform(test1), columns=test1.columns)
test2 = pdml.ModelFrame(scaler.fit_transform(test2), columns=test2.columns)
test3 = pdml.ModelFrame(scaler.fit_transform(test3), columns=test3.columns)
test4 = pdml.ModelFrame(scaler.fit_transform(test4), columns=test4.columns)
test5 = pdml.ModelFrame(scaler.fit_transform(test5), columns=test5.columns)

bresults = aresults.replace("120 +", "120")

bresults = pdml.ModelFrame(scaler.fit_transform(bresults),
                           columns=bresults.columns)

import matplotlib.pyplot as plt
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
import seaborn as sns
예제 #17
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.naive_bayes.GaussianNB, nb.GaussianNB)
     self.assertIs(df.naive_bayes.MultinomialNB, nb.MultinomialNB)
     self.assertIs(df.naive_bayes.BernoulliNB, nb.BernoulliNB)
예제 #18
0
 def test_diabetes(self):
     data = datasets.load_diabetes()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (442, 11))
예제 #19
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.dummy.DummyClassifier, dummy.DummyClassifier)
     self.assertIs(df.dummy.DummyRegressor, dummy.DummyRegressor)
예제 #20
0
 def test_digits(self):
     data = datasets.load_digits()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (1797, 65))
예제 #21
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.metrics.make_scorer, metrics.make_scorer)
예제 #22
0
 def test_iris(self):
     data = datasets.load_iris()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (150, 5))
예제 #23
0
y = df['Class']

df.head()

from pandas_ml import ConfusionMatrix

from sklearn.decomposition import PCA
from sklearn.preprocessing import scale

data = scale(X)
pca = PCA(n_components=10)
X = pca.fit_transform(data)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

df = pdml.ModelFrame(X_train, target=y_train)
sampler = df.imbalance.over_sampling.SMOTE()
oversampled = df.fit_sample(sampler)
X2, y2 = oversampled.iloc[:, 1:11], oversampled['Class']
print(X2)
print(y2)
X2 = X2.as_matrix()
y2 = y2.as_matrix()

model = Sequential()
model.add(Dense(27, input_dim=10, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adagrad',
예제 #24
0
 def test_boston(self):
     data = datasets.load_boston()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (506, 14))
    def test_check_cv(self):
        iris = datasets.load_iris()

        df = pdml.ModelFrame(iris)
        result = df.model_selection.check_cv(cv=5)
        self.assertIsInstance(result, ms.KFold)
예제 #26
0
    def test_frame_target_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 4))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['.target', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        self.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertTrue(isinstance(mdf, pdml.ModelFrame))
            self.assertEqual(mdf.shape, (3, 4))
            self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            self.assert_index_equal(mdf.columns,
                                    pd.Index(['.target', 'A', 'B', 'C']))
            self.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            self.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with self.assertRaisesRegexp(ValueError,
                                     'data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 4))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['.target', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        self.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with self.assertRaisesRegexp(
                ValueError,
                'Wrong number of items passed 2, placement implies 3'):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 3))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')
예제 #27
0
 def test_objectmapper(self):
     df = pdml.ModelFrame([])
     self.assertIs(df.mixture.GMM, mixture.GMM)
     self.assertIs(df.mixture.DPGMM, mixture.DPGMM)
     self.assertIs(df.mixture.VBGMM, mixture.VBGMM)
예제 #28
0
    def test_frame_init_df_target_setter(self):
        # initialization by dataframe and dataframe
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        mdf = pdml.ModelFrame(df)
        self.assertFalse(mdf.has_target())
        target = pd.DataFrame({
            't1': [10, 11, 12],
            't2': [13, 14, 15]
        },
                              index=['a', 'b', 'c'])
        mdf.target = target

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame(
            {
                't1': [10, 11, 12],
                't2': [13, 14, 15],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['t1', 't2', 'A', 'B', 'C'])
        self.assert_frame_equal(mdf, expected)
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['t1', 't2', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        self.assert_frame_equal(mdf.target, target)
        self.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        self.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame({
            'x1': [20, 21, 22],
            'x2': [23, 24, 25]
        },
                              index=['a', 'b', 'c'])

        with tm.assert_produces_warning(UserWarning):
            # when the target has the same length as the target_name,
            # is renamed to existing target ['t1', 't2']
            mdf.target = target

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame(
            {
                't1': [20, 21, 22],
                't2': [23, 24, 25],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['t1', 't2', 'A', 'B', 'C'])
        self.assert_frame_equal(mdf, expected)
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['t1', 't2', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        expected = pd.DataFrame({
            't1': [20, 21, 22],
            't2': [23, 24, 25]
        },
                                index=['a', 'b', 'c'])
        self.assert_frame_equal(mdf.target, expected)
        self.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        self.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame(
            {
                'x1': [20, 21, 22],
                'x2': [23, 24, 25],
                'x3': [25, 26, 27]
            },
            index=['a', 'b', 'c'])

        # when the target has the different length as the target_name,
        # target is being replaced
        mdf.target = target

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 6))
        expected = pd.DataFrame(
            {
                'x1': [20, 21, 22],
                'x2': [23, 24, 25],
                'x3': [25, 26, 27],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['x1', 'x2', 'x3', 'A', 'B', 'C'])
        self.assert_frame_equal(mdf, expected)
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['x1', 'x2', 'x3', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        self.assert_frame_equal(mdf.target, target)
        self.assert_index_equal(mdf.target.columns,
                                pd.Index(['x1', 'x2', 'x3']))
        self.assert_index_equal(mdf.target_name, pd.Index(['x1', 'x2', 'x3']))
        self.assertTrue(mdf.has_multi_targets())
예제 #29
0
    def test_frame_target_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertIsInstance(mdf, pdml.ModelFrame)
            self.assertEqual(mdf.shape, (3, 4))
            tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            tm.assert_index_equal(mdf.columns,
                                  pd.Index(['.target', 'A', 'B', 'C']))
            tm.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            tm.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with pytest.raises(ValueError,
                           match='data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        if pdml.compat._PANDAS_ge_023:
            msg = 'Length of passed values is 2, index implies 3'
        else:
            msg = 'Wrong number of items passed 2, placement implies 3'

        with pytest.raises(ValueError, match=msg):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')
예제 #30
0
 def test_engel(self):
     data = getattr(sm.datasets.engel, self.load_method)()
     df = pdml.ModelFrame(data)
     self.assertEqual(df.shape, (235, 2))
     self.assertEqual(df.target_name, 'income')
     tm.assert_index_equal(df.data.columns, pd.Index(data.exog_name))