Beispiel #1
0
    def H2OBuildModel(self):
        weather = "serm.csv"
        weather_df = h2o.import_file(path=weather)   
        global model
        global test
        train,test,valid = weather_df.split_frame(ratios=(.7, .15))

        estimator_index = self.tabWidget_PM_Estimator.currentIndex()
        
        if estimator_index == 0:
            _distribution = self.comboBox_PM_distribution.currentText()
            _activation = self.comboBox_PM_activation.currentText()
            _hidden = self.comboBox_PM_hidden.currentText()
            _epochs = self.spinBox_PM_epochs.value()
            _sparse = self.comboBox_PM_sparse.currentText()
            _shuffle = self.comboBox_PM_shuffle.currentText()
            model = H2ODeepLearningEstimator(distribution=_distribution,activation=_activation,hidden=_hidden,shuffle = _shuffle,sparse=_sparse,epochs=_epochs)
                
        self.completed = 0

        while self.completed < 100:
            self.completed += 0.0001
            self.progressBar.setValue(self.completed)
            
        model.train(y="risk", x=["datetime","ffwi","smoke","temperature", "humidity", "windspeed"], training_frame=train)
        metrics = model.model_performance()
        self.lineEdit_PM_MSE.setText(str(round(metrics['MSE'],5)))
        self.lineEdit_PM_RMSE.setText(str(round(metrics['RMSE'],5)))
        self.lineEdit_PM_MAE.setText(str(round(metrics['mae'],5)))
        self.lineEdit_PM_MRD.setText(str(round(metrics['mean_residual_deviance'],5)))
 def bake(self) -> H2ODeepLearningEstimator:
     fr = titanic_frame()
     fr["parch"] = fr["parch"].asfactor()
     model = H2ODeepLearningEstimator(epochs=50, reproducible=True)
     model.train(y="parch",
                 training_frame=fr,
                 ignored_columns=["name", "ticket", "boat", "home.dest"])
     return model
Beispiel #3
0
 def bake(self) -> H2ODeepLearningEstimator:
     fr = names_frame()
     fr = fr[:5000, :]
     fr["name"] = fr["name"].ascharacter().asfactor()  # trim nlevels()
     assert 256 < fr["name"].nlevels()[0] < 500
     model = H2ODeepLearningEstimator(epochs=100, reproducible=True)
     model.train(y="sex", training_frame=fr)
     return model
 def bake(self) -> H2ODeepLearningEstimator:
     fr = stars_frame()
     assert fr.type("distance") == "int"
     model = H2ODeepLearningEstimator(epochs=100, reproducible=True)
     model.train(y="distance",
                 training_frame=fr,
                 ignored_columns=["name1", "name2"])
     return model
Beispiel #5
0
    def demo_body(go):
        """
        Demo of H2O's Deep Learning model.

        This demo uploads a dataset to h2o, parses it, and shows a description.
        Then it divides the dataset into training and test sets, builds a GLM
        from the training set, and makes predictions for the test set.
        Finally, default performance metrics are displayed.
        """
        go()
        # Connect to H2O
        h2o.init()

        go()
        # Upload the prostate dataset that comes included in the h2o python package
        prostate = h2o.upload_file(data_file("h2o_data/prostate.csv"))

        go()
        # Print a description of the prostate data
        prostate.summary()

        go()
        # Randomly split the dataset into ~70/30, training/test sets
        r = prostate[0].runif()
        train = prostate[r < 0.70]
        test = prostate[r >= 0.70]

        go()
        # Convert the response columns to factors (for binary classification problems)
        train["CAPSULE"] = train["CAPSULE"].asfactor()
        test["CAPSULE"] = test["CAPSULE"].asfactor()

        go()
        # Build a (classification) GLM
        from h2o.estimators import H2ODeepLearningEstimator
        prostate_dl = H2ODeepLearningEstimator(activation="Tanh",
                                               hidden=[10, 10, 10],
                                               epochs=10000)
        prostate_dl.train(x=list(set(prostate.col_names) - {"ID", "CAPSULE"}),
                          y="CAPSULE",
                          training_frame=train)

        go()
        # Show the model
        prostate_dl.show()

        go()
        # Predict on the test set and show the first ten predictions
        predictions = prostate_dl.predict(test)
        predictions.show()

        go()
        # Show default performance metrics
        performance = prostate_dl.model_performance(test)
        performance.show()
Beispiel #6
0
 def final_train(self, train: pd.DataFrame, valid: pd.DataFrame):
     train_hex = h2o.H2OFrame(train)
     valid_hex = h2o.H2OFrame(valid)
     self.listCheckpointsNN = list()
     counter = 1
     for model in self.listNNModels:
         id = model.model_id
         name = str(model.model_id) + str(counter)
         model_chkp = H2ODeepLearningEstimator(
             checkpoint=model.model_id,
             model_id=name,
             activation=model.actual_params.get("acivation"),
             training_frame=train_hex,
             validation_frame=valid_hex,
             # x=self.predictors,
             # y=self.response,
             stopping_tolerance=1e-4,
             stopping_rounds=3,
             mini_batch_size=24,
             epochs=1e6,
             hidden=model.actual_params.get("hidden"),
             rate=model.actual_params.get("rate"),
             rate_annealing=model.actual_params.get("rate_annealing"),
             distribution=model.actual_params.get("distribution"),
             categorical_encoding=model.actual_params.get(
                 "categorical_encoding"),
             standardize=model.actual_params.get("standardize"),
             adaptive_rate=model.actual_params.get("adaptive_rate"),
             nesterov_accelerated_gradient=model.actual_params.get(
                 "nesterov_accelerated_gradient"),
             shuffle_training_data=model.actual_params.get(
                 "shuffle_training_data"),
             stopping_metric=model.actual_params.get("stopping_metric"),
             train_samples_per_iteration=0,
             score_validation_samples=
             0,  ## downsample validation set for faster scoring#
             score_duty_cycle=
             0.025,  ## don't score more than 2.5% of the wall time
             max_w2=model.actual_params.get(
                 "max_w2")  ## can help improve stability for Rectifier
         )
         model_chkp.train(x=self.predictors,
                          y=self.response,
                          training_frame=train_hex,
                          validation_frame=valid_hex)
         counter = counter + 1
         self.listCheckpointsNN.append(model_chkp)
         #self.listCheckpointsNN.append(model)
     self.listNNModels = self.listCheckpointsNN
     NNH2o.print_model_params(self.listNNModels, False)
def test(x, y, output_test, strip_part, algo_name, generic_algo_name):
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    gbm = H2ODeepLearningEstimator(epochs=1)
    gbm.train(x=x, y=y, training_frame=airlines, validation_frame=airlines)
    print(gbm)
    with Capturing() as original_output:
        gbm.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = gbm.download_mojo(original_model_filename)

    key = h2o.lazy_import(original_model_filename)
    fr = h2o.get_frame(key[0])
    generic_mojo_model = H2OGenericEstimator(model_key=fr)
    generic_mojo_model.train()
    compare_params(gbm, generic_mojo_model)
    print(generic_mojo_model)
    with Capturing() as generic_output:
        generic_mojo_model.show()

    output_test(str(original_output), str(generic_output), strip_part,
                algo_name, generic_algo_name)

    predictions = generic_mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert generic_mojo_model._model_json["output"][
        "model_summary"] is not None
    assert len(generic_mojo_model._model_json["output"]
               ["model_summary"]._cell_values) > 0

    # Test constructor generating the model from existing MOJO file
    generic_mojo_model_from_file = H2OGenericEstimator.from_file(
        original_model_filename)
    assert generic_mojo_model_from_file is not None
    predictions = generic_mojo_model_from_file.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert generic_mojo_model_from_file._model_json["output"][
        "model_summary"] is not None
    assert len(generic_mojo_model_from_file._model_json["output"]
               ["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = generic_mojo_model_from_file.download_mojo(
        path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
# Set mapper
df_mapper = DataFrameMapper([(training_columns, None),
                             (response_column, None)])

# Test data - pandas to sklearn
test_tmp = df_mapper.fit_transform(testing_frame)

# [row : column]
column_count = len(test_tmp[0, :])

# ground truth
tY = np.array(testing_frame['RUL'])

# Building model
model1 = H2ODeepLearningEstimator(hidden=[200, 200],
                                  score_each_iteration=False,
                                  variable_importances=True)
model2 = H2ODeepLearningEstimator(hidden=[200, 200],
                                  score_each_iteration=False,
                                  variable_importances=True)
model3 = H2ODeepLearningEstimator(hidden=[200, 200],
                                  score_each_iteration=False,
                                  variable_importances=True)
model4 = H2ODeepLearningEstimator(hidden=[200, 200],
                                  score_each_iteration=False,
                                  variable_importances=True)
model5 = H2ODeepLearningEstimator(hidden=[200, 200],
                                  score_each_iteration=False,
                                  variable_importances=True)

# train model
training_columns.remove(response_column)
training_columns.remove("UnitNumber")
training_columns.remove("Time")

# split frames
train, validate = hTrain.split_frame([_validation_ratio])
test = hTest
ground_truth = np.array(pTest['RUL'])

# Building model
model_arr = list(range(_nmodels))

print("Building models")
print("---------------")
for i in range(_nmodels):
    model_arr[i] = H2ODeepLearningEstimator(hidden=[200, 200], score_each_iteration=True, variable_importances=True)
print("Build model complete...\n")

print("Train models")
print("------------")
for i in range(_nmodels):
    print("Train : " + str(i + 1) + "/" + str(_nmodels))
    model_arr[i].train(x=training_columns, y=response_column, training_frame=train)
print("Train model complete...\n")

print("Validate models")
print("---------------")
mse_val = np.zeros(shape=_nmodels)
for i in range(_nmodels):
    mse_val[i] = model_arr[i].mse(model_arr[i].model_performance(test_data=validate))
print("Validation model complete...\n")
Beispiel #10
0
# Remove anomalies
p_filtered = p_train.drop(p_train.index[rm_index])

# Convert pandas to H2OFrame
h_data = h2o.H2OFrame(p_filtered)
h_data.set_names(list(p_data.columns))

# DeepLearning model training and validation
h_train, h_validate = h_data.split_frame(ratios=[_vr_model])

# Extract ground truth data
ground_truth_data = np.array(p_test[response_column])

# Define columns
dl_train_columns = list(p_filtered.columns)
rm_columns = ['RUL', 'UnitNumber', 'Time']
for column in rm_columns:
    dl_train_columns.remove(column)

model = H2ODeepLearningEstimator(epochs=100,
                                 loss='Automatic',
                                 activation='RectifierWithDropout',
                                 distribution='poisson',
                                 hidden=[512])
model.train(x=dl_train_columns,
            y=response_column,
            training_frame=h_train,
            validation_frame=h_validate)
performance = model.model_performance(test_data=h_test)
print(performance)
Beispiel #11
0
import h2o
from h2o.estimators import H2ODeepLearningEstimator

h2o.init()

train = h2o.import_file('dataset/train.csv')
test = h2o.import_file('dataset/test.csv')

# define columns
response_column = 'SalePrice'
training_columns = train.col_names
training_columns.remove(response_column)

#train[training_columns].describe()
#OverallQual, OverallCond, GrLivArea

model = H2ODeepLearningEstimator(nfolds=10, epochs=100, hidden=[500, 500])
model.train(x=training_columns, y=response_column, training_frame=train)

h2o.export_file(frame=model.predict(test_data=test), path='prediction.csv', force=True)

print model.model_performance()
training_columns = list(pData.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')

response_column = 'RUL'

hyper_parameters = {
    'activation': [
        'tanh', 'tanh_with_dropout', 'rectifier', 'rectifier_with_dropout',
        'maxout', 'maxout_with_dropout'
    ],
    'distribution': [
        'auto', 'bernoulli', 'multinomial', 'gaussian', 'poisson', 'gamma',
        'tweedie', 'laplace', 'quantile', 'huber'
    ],
    'epochs': [100],
    'hidden': [512],
    'loss': ['automatic']
}

grid_search = H2OGridSearch(H2ODeepLearningEstimator(nfold=10),
                            hyper_params=hyper_parameters)
grid_search.train(x=training_columns,
                  y='RUL',
                  training_frame=hTrain,
                  validation_frame=hValidate)
grid_search.show()
models = grid_search.sort_by("mse")
print(models)
Beispiel #13
0
    def binary_class(self, type, target, duplicated, sep, exclude,
                     max_runtime_secs):

        img = plt.figure()
        self.write_image(img, 'blank', width=600, height=500)

        self.gstep(0, "Reading Dataset")

        buffer = io.StringIO()
        self.dfo.columns = [c.replace(' ', '_') for c in self.dfo.columns]

        self.gstep(1, "Verify if duplicated")
        self.insert_text(
            "shape",
            str(self.dfo.shape[0]) + ' / ' + str(self.dfo.shape[1]))
        self.get_classes(self.dfo, target)
        self.insert_text("nclasses", str(self.nclasses))
        self.insert_text("allclasses", str(self.allclasses))
        shape_before = self.dfo.shape[0]
        if duplicated:
            self.dfo = self.dfo.drop_duplicates(self.dfo.columns)
            shape_after = self.dfo.shape[0]
        if shape_before == shape_after:
            self.insert_text("duplicated", "none")
        else:
            self.insert_text("duplicated", str(shape_after - shape_before))

        if exclude != 'none':
            self.dfo.drop(columns=exclude, inplace=True)

        self.gstep(1, "Detecting hi frequency features")
        exclude = self.hi_freq(self.dfo)
        self.dfo.drop(columns=exclude['Feature'], inplace=True)

        hi_freq = self.w_table(data=exclude,
                               border=0,
                               align='left',
                               collapse='collapse',
                               color='black',
                               foot=False)
        self.insert_text("excluded", hi_freq)

        self.gstep(1, "Encoding as sort_by_response")
        self.dfo_encode = self.encode(self.dfo.copy())

        self.gstep(1, "Basic Informations")

        df_info = pd.DataFrame()
        for column in self.dfo.columns:
            not_null = int(self.dfo.shape[0] -
                           int(self.dfo[column].isna().sum()))
            dtype = self.dfo[column].dtypes
            df_info = df_info.append(
                {
                    'column': column,
                    'not_null': not_null,
                    'dtype': dtype
                },
                ignore_index=True)
        df_info['not_null'] = df_info['not_null'].apply(lambda x: int(x))
        df_info['percent'] = df_info['not_null'].apply(
            lambda x: float("{:.4f}".format(1 - (x / self.dfo.shape[0]))))
        info_dataset = self.w_table(data=df_info,
                                    border=0,
                                    align='left',
                                    collapse='collapse',
                                    color='black',
                                    foot=False)
        self.insert_text("info_dataset", info_dataset)

        self.gstep(1, "Computing Regression")

        Y = self.dfo_encode[target]
        dfo_num = self.dfo_encode[self.dfo_encode._get_numeric_data().columns]
        X = dfo_num.drop(columns=[target])

        # Criando os dados de train e test
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        cols = X.columns
        formule = " + ".join(map(str, cols))
        formule = target + " ~ " + formule
        reg = smf.ols(formule, data=dfo_num)
        res = reg.fit()
        self.insert_text('regression', str(res.summary()))

        self.gstep(1, "Unbalance Classes")

        temp = self.dfo[target].value_counts()
        df = pd.DataFrame({target: temp.index, 'values': temp.values})
        plt.figure(figsize=(6, 6))
        plt.title('Data Set - target value - data unbalance\n (' + target +
                  ')')
        sns.set_color_codes("pastel")
        sns.barplot(x=target, y="values", data=df)
        locs, labels = plt.xticks()
        self.write_image(plt, "unbalance", width=500, height=350, crop=True)

        self.gstep(1, "Correlation")

        plt.clf()
        corr = self.dfo_encode.corr()
        mask = np.zeros_like(corr, dtype=bool)
        mask[np.triu_indices_from(mask)] = True
        cmap = sns.diverging_palette(230, 20, as_cmap=True)
        plt.figure(figsize=(8, 8))
        # Draw the heatmap with the mask and correct aspect ratio
        sns.heatmap(corr,
                    mask=mask,
                    cmap=cmap,
                    vmax=1,
                    vmin=-1,
                    center=0,
                    annot=True,
                    square=True,
                    linewidths=1.5,
                    cbar_kws={"shrink": .5})
        self.write_image(plt, "corr", width=0, height=0, crop=True)

        self.gstep(1, "Detecting Multicollinearity with VIF")

        y = self.dfo_encode[target]
        y = y.apply(lambda x: 1 if x == 'yes' else 0)
        X = self.dfo_encode.drop(target, axis=1)
        X = X[X._get_numeric_data().columns]
        X = X.fillna(0)
        X = X.dropna()
        vif = [
            variance_inflation_factor(X.values, i) for i in range(X.shape[1])
        ]
        cols = X.columns
        cols = cols[cols != target]
        df_m = pd.DataFrame({'cols': cols, 'vif': vif})
        df_m['significant'] = ''
        df_m['significant'] = df_m['vif'].apply(self.parse_values)
        m_vif = self.w_table(data=df_m,
                             border=0,
                             align='left',
                             collapse='collapse',
                             color='black',
                             foot=False)
        self.insert_text("vif", str(m_vif))

        i = 2
        text = ''
        text2 = ''
        for column in self.dfo.columns:
            feature = self.dfo[column].describe()
            text = text + '<option value="' + str(
                i) + '"> ' + column + ' </option>n\t\t\t\t\t\t\t\t'
            text2 = text2 + "\n\t\t\t\t\t\t\t\t\t\t} else if (selectedValue == '" + str(
                i
            ) + "') {\n\t\t\t\t\t\t\t\tdivElement.innerHTML = '" + pd.DataFrame(
                feature).to_html().replace('\n', '') + "';\n\t\t\t\t\t\t\t\t"
            i = i + 1
        text2 = text2 + '\n\t\t\t\t\t\t\t\t};'
        self.insert_text('vif_desc_option', text)
        self.insert_text('vif_desc_table', text2)

        self.gstep(1, "Residual Analisys")

        plt.clf()
        model = Ridge()
        visualizer = ResidualsPlot(model, hist=False, qqplot=True)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        self.write_image(plt, "residual1", width=500, height=350, crop=True)
        plt.clf()
        visualizer = ResidualsPlot(model, hist=True, qqplot=False)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        self.write_image(plt, "residual2", width=500, height=350, crop=True)

        self.gstep(1, "Initializing H2O")
        h2o.init()
        self.gstep(1, "Parsing Data Frame")
        df = h2o.H2OFrame(self.dfo_encode)
        self.gstep(1, "Trainning Auto Machine Learning")
        train, valid, test = df.split_frame(ratios=[0.7, 0.2], seed=1234)
        x = train.columns
        y = target
        x.remove(y)
        train[y] = train[y].asfactor()
        test[y] = test[y].asfactor()
        aml = H2OAutoML(max_models=20,
                        max_runtime_secs=max_runtime_secs,
                        seed=1,
                        include_algos=[
                            "GLM", "DeepLearning", "DRF", "xGBoost",
                            "StackedEnsemble"
                        ],
                        balance_classes=True)
        aml.train(x=x, y=y, training_frame=train)

        lb = h2o.automl.get_leaderboard(aml, extra_columns='ALL')
        lb = lb.as_data_frame()
        lb = lb.drop(columns=['rmse', 'mse', 'predict_time_per_row_ms'])
        text = self.w_table(lb)
        self.insert_text('auto_ml_results', text)
        self.write_image(aml.varimp_heatmap(),
                         'var_imp_model',
                         width=450,
                         height=400,
                         crop=True)

        self.gstep(1, "AML - Partial Dependence")

        i = 101
        text = ''
        text2 = ''
        for column in tqdm(self.dfo.columns):
            feature = self.dfo[column].describe()
            text = text + '<option value="' + str(
                i) + '"> ' + column + ' </option>n\t\t\t\t\t\t\t\t'
            text2 = text2 + "\n\t\t\t\t\t\t\t\t\t\t} else if (selectedValue2 == '" + str(
                i
            ) + "'){\n\t\t\t\t\t\t\t\tdivElement2.innerHTML = '<img src=\"images/img_aml_pd_" + str(
                i) + ".png\">';\n\t\t\t\t\t\t\t\t"
            self.write_image(aml.pd_multi_plot(valid, column),
                             'aml_pd_' + str(i),
                             width=600,
                             height=500)
            i = i + 1
        text2 = text2 + '\n\t\t\t\t\t\t\t\t};'
        self.insert_text('aml_pd_option', text)
        self.insert_text('aml_pd_image', text2)

        self.gstep(1, "Trainning (GLM) Gradient Linear Model to Ensemble")

        nfolds = 5
        family = "binomial"

        amlr_glm = H2OGeneralizedLinearEstimator(
            family=family,
            nfolds=nfolds,
            lambda_=0,
            max_runtime_secs=max_runtime_secs,
            balance_classes=True,
            fold_assignment="Modulo",
            compute_p_values=True,
            keep_cross_validation_predictions=True,
            remove_collinear_columns=True)
        amlr_glm.train(x, y, training_frame=train)

        self.gstep(1, "Trainning (DRF) Dynamic Random Forest to Ensemble")
        amlr_rf = H2ORandomForestEstimator(
            ntrees=50,
            nfolds=nfolds,
            fold_assignment="Modulo",
            max_runtime_secs=max_runtime_secs,
            balance_classes=True,
            keep_cross_validation_predictions=True,
            seed=1)
        amlr_rf.train(x=x, y=y, training_frame=train)

        self.gstep(
            1, "Trainning (GBM) Gradient Boost Estimator Model to Ensemble")
        amlr_gbm = H2OGradientBoostingEstimator(
            nfolds=nfolds,
            seed=1111,
            balance_classes=True,
            fold_assignment="Modulo",
            max_runtime_secs=max_runtime_secs,
            keep_cross_validation_predictions=True)
        amlr_gbm.train(x=x, y=y, training_frame=train)

        self.gstep(1, "Trainning xGBoost Model to Ensemble")
        amlr_xgb = H2OXGBoostEstimator(booster='dart',
                                       nfolds=nfolds,
                                       normalize_type="tree",
                                       fold_assignment="Modulo",
                                       max_runtime_secs=max_runtime_secs,
                                       keep_cross_validation_predictions=True,
                                       seed=1234)
        amlr_xgb.train(x=x, y=y, training_frame=train, validation_frame=valid)

        self.gstep(1, "Trainning Deep Learning Model to Ensemble")

        family = "bernoulli"
        dl_model = H2ODeepLearningEstimator(distribution=family,
                                            hidden=[1],
                                            epochs=1000,
                                            train_samples_per_iteration=-1,
                                            reproducible=True,
                                            activation="Tanh",
                                            single_node_mode=False,
                                            balance_classes=True,
                                            force_load_balance=False,
                                            seed=23123,
                                            tweedie_power=1.5,
                                            max_runtime_secs=max_runtime_secs,
                                            score_training_samples=0,
                                            score_validation_samples=0,
                                            stopping_rounds=0)
        dl_model.train(x=x, y=y, training_frame=train)

        self.gstep(1, "Trainning Ensemble")
        ensemble = H2OStackedEnsembleEstimator(
            model_id="amlr_ensemble",
            base_models=[amlr_gbm, amlr_rf, amlr_xgb, amlr_glm])
        ensemble.train(x=x, y=y, training_frame=train)

        i = 201
        text = ''
        text2 = ''
        self.gstep(1, "Ensamble - (ICE) Individual Condition Expectation")
        for column in tqdm(self.dfo.columns):
            feature = self.dfo[column].describe()
            text = text + '<option value="' + str(
                i) + '"> ' + column + ' </option>n\t\t\t\t\t\t\t\t'
            text2 = text2 + "\n\t\t\t\t\t\t\t\t\t\t} else if (selectedValue3 == '" + str(
                i
            ) + "'){\n\t\t\t\t\t\t\t\tdivElement3.innerHTML = '<img src=\"images/img_ice_pd_" + str(
                i) + ".png\">';\n\t\t\t\t\t\t\t\t"
            self.write_image(ensemble.ice_plot(valid, column),
                             'ice_pd_' + str(i),
                             width=600,
                             height=500)
            i = i + 1
        text2 = text2 + '\n\t\t\t\t\t\t\t\t};'
        self.insert_text('ice_pd_option', text)
        self.insert_text('ice_pd_image', text2)

        self.gstep(1, "AMLR - Correlation by Model")
        self.write_image(aml.model_correlation_heatmap(test),
                         'aml_correlation_models')

        self.gstep(1, "Processing Models Performance")

        i = 0
        dfp = pd.DataFrame({'Algo': []})
        outcome = list(valid[target].as_data_frame()[target])
        for algo in [
                'GLM', 'Random Forest', 'GBM', 'xGBoost', 'Deep Learning'
        ]:
            plt.clf()
            if algo == 'GLM':
                predict = list(
                    amlr_glm.predict(valid).as_data_frame()['predict'])
                cf_table = 'cf_glm'
                cm_glm = ConfusionMatrix(outcome, predict)
                glm_var_imp = amlr_glm._model_json['output'][
                    'variable_importances'].as_data_frame()
                x = glm_var_imp['percentage']
                x.index = glm_var_imp['variable']
                x.sort_values().plot(kind='barh')
                plt.xlabel('Percentage')
                fig = plt.gcf()
                self.write_image(fig, 'fi_glm', width=450, height=450)

            if algo == 'Random Forest':
                predict = list(
                    amlr_rf.predict(valid).as_data_frame()['predict'])
                cf_table = 'cf_rf'
                cm_rf = ConfusionMatrix(outcome, predict)
                rf_var_imp = amlr_rf._model_json['output'][
                    'variable_importances'].as_data_frame()
                x = rf_var_imp['percentage']
                x.index = rf_var_imp['variable']
                x.sort_values().plot(kind='barh')
                plt.xlabel('Percentage')
                fig = plt.gcf()
                self.write_image(fig, 'fi_rf', width=450, height=450)
            if algo == 'GBM':
                predict = list(
                    amlr_gbm.predict(valid).as_data_frame()['predict'])
                cf_table = 'cf_gbm'
                cm_gbm = ConfusionMatrix(outcome, predict)
                gbm_var_imp = amlr_gbm._model_json['output'][
                    'variable_importances'].as_data_frame()
                x = gbm_var_imp['percentage']
                x.index = gbm_var_imp['variable']
                x.sort_values().plot(kind='barh')
                plt.xlabel('Percentage')
                fig = plt.gcf()
                self.write_image(fig, 'fi_gbm', width=450, height=450)
            if algo == 'xGBoost':
                predict = list(
                    amlr_xgb.predict(valid).as_data_frame()['predict'])
                cf_table = 'cf_xgb'
                cm_xgb = ConfusionMatrix(outcome, predict)
                xgb_var_imp = amlr_xgb._model_json['output'][
                    'variable_importances'].as_data_frame()
                x = xgb_var_imp['percentage']
                x.index = xgb_var_imp['variable']
                x.sort_values().plot(kind='barh')
                plt.xlabel('Percentage')
                fig = plt.gcf()
                self.write_image(fig, 'fi_xgb', width=450, height=450)
            if algo == 'Deep Learning':
                predict = list(
                    dl_model.predict(valid).as_data_frame()['predict'])
                cf_table = 'cf_dl'
                cm_dl = ConfusionMatrix(outcome, predict)
                dl_var_imp = dl_model._model_json['output'][
                    'variable_importances'].as_data_frame()
                x = dl_var_imp['percentage']
                x.index = dl_var_imp['variable']
                x.sort_values().plot(kind='barh')
                plt.xlabel('Percentage')
                fig = plt.gcf()
                self.write_image(fig, 'fi_dl', width=450, height=450)
            # Confusion Matrix for all models
            cm = confusion_matrix(predict, outcome)
            cm = pd.DataFrame(cm)
            cr = classification_report(outcome,
                                       predict,
                                       target_names=self.allclasses,
                                       output_dict=True)
            table_cr = pd.DataFrame(cr).transpose().round(4)
            table_cr.reset_index(level=0, inplace=True)
            table_cr = table_cr.rename(columns={'index': 'Description'})
            table_model = self.w_table(data=table_cr,
                                       border=0,
                                       align='left',
                                       collapse='collapse',
                                       color='black',
                                       foot=False)
            self.insert_text(cf_table, str(table_model))

            # Statistcs for all metrics
            cm = ConfusionMatrix(outcome, predict)
            dfp = pd.concat([dfp, pd.DataFrame(cm.overall_stat)[1:]],
                            ignore_index=True)
            dfp.loc[i:, ['Algo']] = algo
            i = i + 1
        dfp = dfp.round(4)

        cp = Compare({
            'RF': cm_rf,
            'GLM': cm_glm,
            'GBM': cm_gbm,
            'XGB': cm_xgb,
            'DL': cm_dl
        })
        cp_best_name = cp.best_name
        cp = pd.DataFrame(cp.scores)
        cp.reset_index(level=0, inplace=True)
        cp = cp.rename(columns={'index': 'Description'})
        table_cp = self.w_table(data=cp,
                                border=0,
                                align='left',
                                collapse='collapse',
                                color='black',
                                foot=False)
        if str(cp_best_name) == 'None':
            cp_best_name = 'Confusion matrices are too close and the best one can not be recognized.'
            max_v = cp.loc[0][1:].max()
            i = 0
            list_max = list()
            for column in cp.columns:
                if i > 0:
                    if cp[column][0] >= max_v:
                        list_max.append(column)
                i = i + 1
            self.insert_text(
                "the_best_name",
                "Winners: " + ' - '.join(list_max) + '<br>' + cp_best_name)

        else:
            self.insert_text("the_best_name", str(cp_best_name))

        self.insert_text("best_algorithms", str(table_cp))
        self.insert_text("the_best_name", str(cp_best_name))

        table_model = self.w_table(data=dfp,
                                   border=0,
                                   align='left',
                                   collapse='collapse',
                                   color='black',
                                   foot=False)
        self.insert_text("table_performance", str(table_model))

        self.gstep(1, "Closing!! All works are done!!")
        # write report
        self.write_report(self.index_html)
            iq_testing_frame[col] = iq_testing_frame[col].asfactor()

# Training parameters
input_columns = list(sj_train.columns)
response_column = 'total_cases'
input_columns.remove(response_column)

# Models
sj_min_mae = 1000
sj_best_model = None

iq_min_mae = 1000
iq_best_model = None

for i in range(n_iterations):
    model_sj = H2ODeepLearningEstimator(nfolds=10, hidden=[512, 512])
    model_sj.train(x=input_columns,
                   y=response_column,
                   training_frame=sj_training_frame)

    model_iq = H2ODeepLearningEstimator(nfolds=10, hidden=[512, 512])
    model_iq.train(x=input_columns,
                   y=response_column,
                   training_frame=iq_training_frame)

    if model_sj.mae() < sj_min_mae:
        sj_min_mae = model_sj.mae()
        sj_best_model = model_sj

    if model_iq.mae() < iq_min_mae:
        iq_min_mae = model_iq.mae()
Beispiel #15
0
del p_filter['Sensor10']
del p_filter['Sensor16']
del p_filter['Sensor18']
del p_filter['Sensor19']

h_filter = h2o.H2OFrame(p_filter)
h_filter.set_names(list(p_filter.columns))

h_test = h2o.H2OFrame(p_test)
h_test.set_names(list(p_test.columns))

training_columns = list(p_filter.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')
training_columns.remove('BIN')

h_filter['BIN'] = h_filter['BIN'].asfactor()
h_test['BIN'] = h_test['BIN'].asfactor()

model = H2ODeepLearningEstimator(epochs=100, nfolds=10, balance_classes=True)
model.train(x=training_columns, y='BIN', training_frame=h_filter)

predict = model.predict(test_data=h_test)
predict = DataFrameParser.h2oToList(predict['predict'])
actual = DataFrameParser.h2oToList(h_test['BIN'])

Measures.confusion_matrix(actual, predict)
print(predict)
print(actual)
Beispiel #16
0
# Extract ground truth data
ground_truth_data = np.array(p_test[response_column])

# Define columns
dl_train_columns = list(p_filtered.columns)
rm_columns = ['RUL', 'UnitNumber', 'Time']
for column in rm_columns:
    dl_train_columns.remove(column)

# Building multiple models
print "Building Models"
print "---------------"
model_array = range(_nmodels)
for i in range(_nmodels):
    model_array[i] = H2ODeepLearningEstimator()

# Training models
print "Training Models"
print "---------------"
for i in range(_nmodels):
    model_array[i].train(x=dl_train_columns,
                         y=response_column,
                         training_frame=h_train,
                         nfolds=10)

# Validate models and assign weights
print "Validating Models"
print "-----------------"
rmse_vals = np.zeros(
    shape=_nmodels)  # Store root mean squared error of each model
Beispiel #17
0
'''
q25 = np.percentile(err_list, 25)
q75 = np.percentile(err_list, 75)
iqr = q75 - q25

rm_index = [] # Stores row numbers which have anomalies
for i in range(h_train.nrow):
    if abs(err_list[i] - q75) > 3 * iqr:
        rm_index.append(i)

# Remove anomalies
p_filtered = p_train.drop(p_train.index[rm_index])

# Convert pandas to H2OFrame
h_data = h2o.H2OFrame(p_filtered)
h_data.set_names(list(p_data.columns))

# Define columns
dl_train_columns = list(p_filtered.columns)
rm_columns = ['RUL', 'UnitNumber', 'Time']
for column in rm_columns:
    dl_train_columns.remove(column)

#model = H2ODeepLearningEstimator(epochs=100, loss='Automatic', activation='RectifierWithDropout', distribution='poisson', hidden=[512], nfolds=10)
model = H2ODeepLearningEstimator(epochs=100, hidden=[512], nfolds=10)
model.train(x=dl_train_columns, y=response_column, training_frame=h_data)
performance = model.model_performance(test_data=h_test)
print(performance)

pValidate = pd.read_csv("hValidateMy.csv")
pTest = pd.read_csv("hTestingMy.csv")

hTrain = h2o.H2OFrame(pTrain)
hTrain.set_names(list(pTrain.columns))

hValidate = h2o.H2OFrame(pValidate)
hValidate.set_names(list(pValidate.columns))

hTest = h2o.H2OFrame(pTest)
hTest.set_names(list(pTest.columns))

training_columns = list(pTrain.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')

response_column = 'RUL'
print("OK")

model = H2ODeepLearningEstimator(hidden=[1024],
                                 activation='Maxout',
                                 epochs=100)
#model = H2ORandomForestEstimator(ntrees=50, max_depth=20, nbins=100, seed=12345)
model.train(x=training_columns,
            y=response_column,
            training_frame=hTrain,
            validation_frame=hValidate)

print(model.model_performance(test_data=hTest))
Beispiel #19
0
def function():
    # AutoEncoder anomaly removal process
    p_train = ProcessData.trainData(moving_median_centered_average=True,
                                    standard_deviation=True,
                                    probability_distribution=True,
                                    bin_classification=True)
    p_test = ProcessData.testData(moving_median_centered_average=True,
                                  standard_deviation=True,
                                  probability_from_file=True,
                                  bin_classification=True)

    # Converting to h2o frane
    h_test = h2o.H2OFrame(p_test)
    h_test.set_names(list(p_test.columns))

    h_train = h2o.H2OFrame(p_train)
    h_train.set_names(list(p_train.columns))

    # Define autoencoder
    anomaly_model = H2OAutoEncoderEstimator(activation="Rectifier",
                                            hidden=[25, 12, 25],
                                            sparse=True,
                                            l1=1e-4,
                                            epochs=100)

    # Select relevant features
    anomaly_train_columns = list(p_train.columns)
    print(anomaly_train_columns)
    anomaly_train_columns.remove('RUL')
    anomaly_train_columns.remove('BIN')
    anomaly_train_columns.remove('UnitNumber')
    anomaly_train_columns.remove('Time')
    anomaly_train_columns.remove('Setting1')
    anomaly_train_columns.remove('Setting2')
    anomaly_train_columns.remove('Setting3')

    # Train model
    anomaly_model.train(x=anomaly_train_columns, training_frame=h_train)

    # Get reconstruction error
    reconstruction_error = anomaly_model.anomaly(test_data=h_train,
                                                 per_feature=False)
    error_str = reconstruction_error.get_frame_data()
    err_list = list(map(float, error_str.split("\n")[1:-1]))
    err_list = np.array(err_list)

    # Threshold
    threshold = np.amax(err_list) * 0.97

    print("Max Reconstruction Error       :", reconstruction_error.max())
    print("Threshold Reconstruction Error :", threshold)

    # Filter anomalies based on reconstruction error
    p_filter = Filter.filterDataAutoEncoder(panda_frame=p_train,
                                            reconstruction_error=err_list,
                                            threshold=threshold)

    # Drop features
    del p_filter['Setting3']
    del p_filter['Sensor1']
    del p_filter['Sensor5']
    del p_filter['Sensor10']
    del p_filter['Sensor16']
    del p_filter['Sensor18']
    del p_filter['Sensor19']

    h_filter = h2o.H2OFrame(p_filter)
    h_filter.set_names(list(p_filter.columns))

    h_test = h2o.H2OFrame(p_test)
    h_test.set_names(list(p_test.columns))

    training_columns = list(p_filter.columns)
    training_columns.remove('UnitNumber')
    training_columns.remove('Time')
    training_columns.remove('RUL')
    training_columns.remove('BIN')

    h_filter['BIN'] = h_filter['BIN'].asfactor()
    h_test['BIN'] = h_test['BIN'].asfactor()

    model = H2ODeepLearningEstimator(variable_importances=True)
    model.train(x=training_columns,
                y='BIN',
                training_frame=h_filter,
                nfolds=10)

    predict = model.predict(test_data=h_test)
    predict = DataFrameParser.h2oToList(predict['predict'])
    actual = DataFrameParser.h2oToList(h_test['BIN'])

    Measures.confusion_matrix(actual, predict)
    print(predict)
    print(actual)
Beispiel #20
0
data = h2o.H2OFrame(bands_list)

splits = data.split_frame(ratios=[0.7, 0.15], seed=1)

train = splits[0]
valid = splits[1]
test = splits[2]

nfolds = 10
fold_assignment = 'Random'

model = H2ODeepLearningEstimator(distribution='Gaussian',
                                 standardize=True,
                                 activation='Rectifier',
                                 hidden=[200, 200, 200],
                                 l1=1e-5,
                                 l2=1e-5,
                                 epochs=10,
                                 nfolds=nfolds,
                                 fold_assignment=fold_assignment,
                                 keep_cross_validation_predictions=True)

model.train(y="THERMAL",
            x=[
                'BLUE', 'GREEN', 'RED', 'SEQGREEN', 'SEQRED', 'SEQREDEDGE',
                'NIR', 'GNDVI', 'NVDI', 'RENVDI', 'NDSM', 'SLOPE', 'TPI',
                'ROUGHNESS'
            ],
            training_frame=train,
            validation_frame=test)

metrics = model.model_performance()
Beispiel #21
0
training_columns.remove('UnitNumber')
training_columns.remove('RUL')
training_columns.remove('Time')

#filter_train = Process.filterData(panda_frame=train, columns=sustain, removal_method='iqr', threshold=4)
filter_train = train

feature_engineered_train = ProcessData.trainDataToFrame(
    training_frame=filter_train,
    moving_k_closest_average=True,
    standard_deviation=True)
feature_engineered_test = ProcessData.trainDataToFrame(
    training_frame=test,
    moving_k_closest_average=True,
    standard_deviation=True,
    rul=True)

h_train = h2o.H2OFrame(feature_engineered_train)

h_train.set_names(list(feature_engineered_train.columns))

h_test = h2o.H2OFrame(feature_engineered_test)
h_test.set_names(list(feature_engineered_test.columns))

model = H2ODeepLearningEstimator(epochs=100,
                                 hidden=[200, 200],
                                 score_each_iteration=True)
model.train(x=training_columns, y='RUL', training_frame=h_train)

print(model.model_performance(test_data=h_test))
import h2o
from h2o.estimators import H2ODeepLearningEstimator

h2o.init()

train = h2o.import_file('dataset/train.csv')
test = h2o.import_file('dataset/test.csv')

# define columns
response_column = 'SalePrice'
training_columns = [
    'SaleType', 'Condition1', 'LandContour', 'Condition2', 'RoofMatl',
    'BsmtExposure', 'ExterQual', 'Neighborhood', 'SaleCondition', 'LotConfig',
    'OverallQual', 'LotShape', 'PoolQC', 'Heating', 'Functional', 'Street',
    'OverallCond', 'RoofStyle', 'GrLivArea', 'CentralAir'
]

#train[training_columns].describe()
#OverallQual, OverallCond, GrLivArea

model = H2ODeepLearningEstimator(nfolds=10, epochs=100)
model.train(x=training_columns, y=response_column, training_frame=train)

h2o.export_file(frame=model.predict(test_data=test),
                path='prediction.csv',
                force=True)

print model.model_performance()
Beispiel #23
0
pTest = ProcessData.testData(moving_k_closest_average=True,
                             standard_deviation=True,
                             probability_from_file=True)

# Training model
print "\nTraining Model"
print "----------------------------------------------------------------------------------------------------------------"
training_columns = list(pData.columns)
training_columns.remove(response_column)
training_columns.remove('UnitNumber')
training_columns.remove('Time')

# Create h2o frame using filtered pandas frame
hTrain = h2o.H2OFrame(pData)
hTrain.set_names(list(pData.columns))

hTest = h2o.H2OFrame(pTest)
hTest.set_names(list(pTest.columns))

model = H2ODeepLearningEstimator(hidden=[64, 64, 64],
                                 score_each_iteration=True,
                                 variable_importances=True,
                                 epochs=100,
                                 activation='Tanh')
model.train(x=training_columns, y=response_column, training_frame=hTrain)

print "\nModel Performance"
print "----------------------------------------------------------------------------------------------------------------"
# Evaluate model
print model.model_performance(test_data=hTest)
# Initialize server
h2o.init()

# Load training data set from csv
train = h2o.import_file('dataset/train.csv')

# define columns
training_columns = [
    'Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
    'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
    'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
    'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle',
    'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
    'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
    'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2',
    'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir',
    'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
    'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr',
    'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces',
    'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars',
    'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive', 'WoodDeckSF',
    'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
    'PoolQC', 'Fence', 'MiscFeature', 'MiscVal', 'MoSold', 'YrSold',
    'SaleType', 'SaleCondition'
]
response_column = 'SalePrice'

model = H2ODeepLearningEstimator(nfolds=10, variable_importances=True)
for i in range(10):
    model.train(x=training_columns, y=response_column, training_frame=train)
    model.varimp(use_pandas=True).to_csv('varimp' + str(i) + '.csv')
testing_frame = ProcessData.testData(standard_deviation=True, moving_k_closest_average=True, probability_from_file=True)

# create h2o frames
train = h2o.H2OFrame(training_frame)
test = h2o.H2OFrame(testing_frame)
train.set_names(list(training_frame.columns))
test.set_names(list(testing_frame.columns))

# Feature selection
training_columns = list(training_frame.columns)
training_columns.remove(response_column)
training_columns.remove("UnitNumber")
training_columns.remove("Time")

# Build model
model1 = H2ODeepLearningEstimator()

# Train model
model1.train(x=training_columns, y=response_column, training_frame=train)

# End : Deep Learning
# ----------------------------------------------------------------------------------------------------------------------


# Begin : Random Forest Regression
# ----------------------------------------------------------------------------------------------------------------------
# MKA, SD, PROB
_model_name_2 = "Random Forest Regression"
print "Model : " + _model_name_2
print "-------------------------"
Beispiel #26
0
                                                probability_distribution=True)
p_featured_test = ProcessData.testDataToFrame(testing_frame=p_test,
                                              selected_column_names=columns,
                                              probability_from_file=True)

h_filter = h2o.H2OFrame(p_featured_train)
h_filter.set_names(list(p_featured_train.columns))

h_test = h2o.H2OFrame(p_featured_test)
h_test.set_names(list(p_featured_test.columns))

training_columns = list(p_featured_train.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')

model = H2ODeepLearningEstimator(variable_importances=True)
model.train(x=columns, y='RUL', training_frame=h_filter, nfolds=10)

print(model.model_performance(test_data=h_test))

predict = DataFrameParser.h2oToNumpyArray(model.predict(test_data=h_test))
actual = DataFrameParser.h2oToNumpyArray(h_test['RUL'])
# var_imp = model.varimp()
# for detail in var_imp:
#     print detail[0]

Chart.residual_histogram(actual, predict)
Chart.residual_vs_estimated(actual, predict)
Chart.acutal_and_predict(actual, predict)
Beispiel #27
0
hData = h2o.H2OFrame(data_frame)
hData.set_names(list(data_frame.columns))

hTesting = h2o.H2OFrame(testing_frame)
hTesting.set_names(list(testing_frame.columns))

# Split data inti training and validation
hTrain, hValidate = hData.split_frame(ratios=[0.8])
h2o.export_file(hTrain, "hTrainMy.csv", force=True)
h2o.export_file(hValidate, "hValidateMy.csv", force=True)

training_columns = list(pData.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')

response_column = 'RUL'
print("OK")

model = H2ODeepLearningEstimator(hidden=[500, 500], score_each_iteration=True, variable_importances=True, epochs=100)
#model = H2ORandomForestEstimator(ntrees=50, max_depth=20, nbins=100, seed=12345)
model.train(x=training_columns, y=response_column, training_frame=hTrain, validation_frame=hValidate)

print(model.model_performance(test_data=hTesting))






Beispiel #28
0
# Set factors
insurance["offset"] = insurance["Holders"].log()
insurance["Group"] = insurance["Group"].asfactor()
insurance["Age"] = insurance["Age"].asfactor()
insurance["District"] = insurance["District"].asfactor()


# Train model
model = H2ODeepLearningEstimator(
    distribution="tweedie",
    hidden=[1],
    epochs=1000,
    train_samples_per_iteration=-1,
    reproducible=True,
    activation="Tanh",
    single_node_mode=False,
    balance_classes=False,
    force_load_balance=False,
    seed=23123,
    tweedie_power=1.5,
    score_training_samples=0,
    score_validation_samples=0,
    stopping_rounds=0,
)

model.train(x=list(range(3)), y="Claims", training_frame=insurance)


# Predict
input = {"District": [1], "Group": "1-1.5l", "Age": ">35", "Holders": [3582]}
df = pd.DataFrame(input)
hf = h2o.H2OFrame(df)
Beispiel #29
0
# Load train and test data as H2O frames
train = h2o.import_file('processed-data/A1Benchmark_train.csv')
test = h2o.import_file('processed-data/A1Benchmark_test.csv')

# Define input and response columns
response_column = 'is_anomaly'
input_columns = train.col_names
input_columns.remove(response_column)
input_columns.remove('timestamp')

print 'Input columns   :', input_columns
print 'Response column :', response_column

# Explicitly imply response column contains label data
train[response_column] = train[response_column].asfactor()
test[response_column] = test[response_column].asfactor()

# Define model and train model
model = H2ODeepLearningEstimator(hidden=[20, 20], nfolds=10, epochs=100)
model.train(x=input_columns, y=response_column, training_frame=train)

# Test model
performance = model.model_performance(test_data=test)
print performance
'''
Sample Result
-------------

'''
Beispiel #30
0
 def bake(self) -> H2ODeepLearningEstimator:
     fr = eyestate_frame()
     model = H2ODeepLearningEstimator(epochs=100, reproducible=True)
     model.train(y="eyeDetection", training_frame=fr)
     return model