Python H2OFrameCreator Examples

Programming Language: Python

Namespace/Package Name: sklearn2pmml.preprocessing.h2o

Class/Type: H2OFrameCreator

Examples at hotexamples.com: 4

Python H2OFrameCreator - 4 examples found. These are the top rated real world Python examples of sklearn2pmml.preprocessing.h2o.H2OFrameCreator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

H2OFrameCreator(5)

Frequently Used Methods

H2OFrameCreator (5)

Example #1

Show file

def build_auto_h2o(regressor, name):
    transformer = ColumnTransformer(
        [(column, CategoricalDomain(), [column])
         for column in ["cylinders", "model_year", "origin"]] +
        [(column, ContinuousDomain(), [column]) for column in
         ["displacement", "horsepower", "weight", "acceleration"]])
    pipeline = PMMLPipeline([("transformer", transformer),
                             ("uploader",
                              H2OFrameCreator(column_names=[
                                  "cylinders", "model_year", "origin",
                                  "displacement", "horsepower", "weight",
                                  "acceleration"
                              ],
                                              column_types=[
                                                  "enum", "enum", "enum",
                                                  "numeric", "numeric",
                                                  "numeric", "numeric"
                                              ])), ("regressor", regressor)])
    pipeline.fit(auto_X, H2OFrame(auto_y.to_frame()))
    pipeline.verify(auto_X.sample(frac=0.05, random_state=13))
    regressor = pipeline._final_estimator
    store_mojo(regressor, name + ".zip")
    store_pkl(pipeline, name + ".pkl")
    mpg = pipeline.predict(auto_X)
    mpg.set_names(["mpg"])
    store_csv(mpg.as_data_frame(), name + ".csv")

Example #2

Show file

def build_audit_h2o(classifier, name):
	mapper = DataFrameMapper(
		[([column], ContinuousDomain()) for column in ["Age", "Hours", "Income"]] +
		[([column], CategoricalDomain()) for column in ["Employment", "Education", "Marital", "Occupation", "Gender", "Deductions"]]
	)
	pipeline = PMMLPipeline([
		("mapper", mapper),
		("uploader", H2OFrameCreator()),
		("classifier", classifier)
	])
	pipeline.fit(audit_X, H2OFrame(audit_y.to_frame(), column_types = ["categorical"]))
	pipeline.verify(audit_X.sample(frac = 0.05, random_state = 13))
	classifier = pipeline._final_estimator
	store_mojo(classifier, name)
	store_pkl(pipeline, name)
	adjusted = pipeline.predict(audit_X)
	adjusted.set_names(["h2o(Adjusted)", "probability(0)", "probability(1)"])
	store_csv(adjusted.as_data_frame(), name)

Example #3

Show file

File: RandomForestAudit.py Project: openscoring/papis.io

                          ("Income", ContinuousDomain()),
                          (["Hours", "Income"],
                           Alias(ExpressionTransformer("X[1] / (X[0] * 52)"),
                                 "Hourly_Income"))])
classifier = H2ORandomForestEstimator(ntrees=17)

predict_proba_transformer = Pipeline([
    ("expression", ExpressionTransformer("X[1]")),
    ("cut",
     Alias(CutTransformer(bins=[0.0, 0.75, 0.90, 1.0],
                          labels=["no", "maybe", "yes"]),
           "Decision",
           prefit=True))
])

pipeline = PMMLPipeline([("local_mapper", mapper),
                         ("uploader", H2OFrameCreator()),
                         ("remote_classifier", classifier)],
                        predict_proba_transformer=predict_proba_transformer)
pipeline.fit(audit_X, H2OFrame(audit_y.to_frame(),
                               column_types=["categorical"]))

pipeline.verify(audit_X.sample(100))

sklearn2pmml(pipeline, "pmml/RandomForestAudit.pmml")

if "--deploy" in sys.argv:
    from openscoring import Openscoring

    os = Openscoring("http://localhost:8080/openscoring")
    os.deployFile("RandomForestAudit", "pmml/RandomForestAudit.pmml")

Example #4

Show file

          Pipeline([('extract', ColumnExtractor(['user_id', 'ip_address'])),
                    ('groupby_count', AddGroupByCount())])),
         ('numerics',
          Pipeline([('extract', ColumnExtractor(NUM_FEATS)),
                    ('zero_fill', ZeroFillTransformer()),
                    ('log', Log1pTransformer())]))
     ]))
])
##############################
# Modeling + Tuning
##############################
from h2o.cross_validation import H2OKFold
dataset = pd.concat([X_train, y_train], axis=1)
cv = H2OKFold(dataset, n_folds=5, seed=42)
# H2O approach
("H2OCreator", H2OFrameCreator()),
# ('standardize', H2OScaler()),
# ('pca', H2OPCA()),
('rf', H2ORandomForestEstimator(ntrees=20))

# something new to try
# from scipy.stats import randint
# params = {
#           # "standardize__center":    [True, False],
#           # "standardize__scale":     [True, False],
#           "pca__k":  2,
#               # randint(2, X_train[1:].shape[1]),
#           "rf__ntrees": 20
# # randint(50,60),
#           # "rf__max_depth":          randint(4,8),
#           # "rf__min_rows":           randint(5,10),