Ejemplo n.º 1
0
    def test_hyper_dt(self):
        rs = RandomSearcher(
            mini_dt_space,
            optimize_direction=OptimizeDirection.Maximize,
        )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback()],
                      reward_metric='accuracy',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      cache_preprocessed_data=True,
                      cache_home=homedir + '/cache')
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.randint(0, 2, size=(100)).astype('str')
        x4 = np.random.normal(0.0, 1.0, size=(100))

        y = np.random.randint(0, 2, size=(100), dtype='int')
        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        hdt.search(df, y, df, y, max_trails=3, epochs=1)
        best_trial = hdt.get_best_trail()

        estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1)
        score = estimator.predict(df)
        result = estimator.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(estimator.model, DeepTable)
Ejemplo n.º 2
0
    def test_boston(self):

        print("Loading datasets...")
        boston_dataset = load_boston()

        df_train = pd.DataFrame(boston_dataset.data)
        df_train.columns = boston_dataset.feature_names
        self.y = pd.Series(boston_dataset.target)
        self.X = df_train

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

        rs = RandomSearcher(
            mini_dt_space,
            optimize_direction=OptimizeDirection.Maximize,
        )
        hdt = HyperDT(
            rs,
            callbacks=[
                SummaryCallback(),
                FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')
            ],
            reward_metric='RootMeanSquaredError',
            dnn_params={
                'hidden_units': ((256, 0, False), (256, 0, False)),
                'dnn_activation': 'relu',
            },
        )
        hdt.search(self.X_train,
                   self.y_train,
                   self.X_test,
                   self.y_test,
                   max_trails=3)

        best_trial = hdt.get_best_trail()

        estimator = hdt.final_train(best_trial.space_sample, self.X, self.y)
        score = estimator.predict(self.X_test)
        result = estimator.evaluate(self.X_test, self.y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
Ejemplo n.º 3
0
    def bankdata(self):
        rs = RandomSearcher(
            mini_dt_space,
            optimize_direction=OptimizeDirection.Maximize,
        )
        hdt = HyperDT(
            rs,
            callbacks=[
                SummaryCallback(),
                FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')
            ],
            reward_metric='accuracy',
            max_trails=3,
            dnn_params={
                'hidden_units': ((256, 0, False), (256, 0, False)),
                'dnn_activation': 'relu',
            },
        )

        df = dsutils.load_bank()
        df.drop(['id'], axis=1, inplace=True)
        df_train, df_test = train_test_split(df,
                                             test_size=0.2,
                                             random_state=42)
        y = df_train.pop('y')
        y_test = df_test.pop('y')

        hdt.search(df_train, y, df_test, y_test)
        assert hdt.best_model
        best_trial = hdt.get_best_trail()

        estimator = hdt.final_train(best_trial.space_sample, df_train, y)
        score = estimator.predict(df)
        result = estimator.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(estimator.model, DeepTable)
Ejemplo n.º 4
0
print("data shape: ")
print(df.shape)

df.drop(['id'], axis=1, inplace=True)
y = df.pop("y")
X = df

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=12)

t1 = time.time()
hdt.search(X_train, y_train, X_test, y_test, max_trails=1)

best_trial = hdt.get_best_trail()
estimator = hdt.final_train(best_trial.space_sample, X_train, y_train)

print("escaped: ")
print(time.time() - t1)

r = estimator.evaluate(X_test, y_test, metrics=['accuracy', 'auc'])
print(r)

y_score = estimator.predict_proba(X_test)
# y_pred = estimator.predict(X_test)

from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, y_score, pos_label='yes')
ks = max(tpr - fpr)
print(ks)