Example #1
0
def mojo_predict_pandas(dataframe, mojo_zip_path, genmodel_jar_path=None, classpath=None, java_options=None, verbose=False):
    """
    MOJO scoring function to take a Pandas frame and use MOJO model as zip file to score.
    :param dataframe: Pandas frame to score.
    :param mojo_zip_path: Path to MOJO zip downloaded from H2O.
    :param genmodel_jar_path: Optional, path to genmodel jar file. If None (default) then the h2o-genmodel.jar in the same
    folder as the MOJO zip will be used.
    :param classpath: Optional, specifies custom user defined classpath which will be used when scoring. If None
    (default) then the default classpath for this MOJO model will be used.
    :param java_options: Optional, custom user defined options for Java. By default '-Xmx4g' is used.
    :param verbose: Optional, if True, then additional debug information will be printed. False by default.
    :return: Pandas frame with predictions
    """
    tmp_dir = tempfile.mkdtemp()
    try:
        if not can_use_pandas():
            raise RuntimeException('Cannot import pandas')
        import pandas
        assert_is_type(dataframe, pandas.DataFrame)
        input_csv_path = os.path.join(tmp_dir, 'input.csv')
        prediction_csv_path = os.path.join(tmp_dir, 'prediction.csv')
        dataframe.to_csv(input_csv_path)
        mojo_predict_csv(input_csv_path=input_csv_path, mojo_zip_path=mojo_zip_path,
                         output_csv_path=prediction_csv_path, genmodel_jar_path=genmodel_jar_path,
                         classpath=classpath, java_options=java_options, verbose=verbose)
        return pandas.read_csv(prediction_csv_path)
    finally:
        shutil.rmtree(tmp_dir)
def h2oshow_progress():
    """
    Python API test: h2o.show_progress()

    Command is verified by eyeballing the pyunit test output file and make sure the progress bars are there.
    Here, we will assume the command runs well if there is no error message.
    """
    try:    # only only work with Python 3.
        s = StringIO()
        sys.stdout = s   # redirect output
        h2o.show_progress()   # true by default.
        training_data = h2o.upload_file(pyunit_utils.locate("smalldata/logreg/benign.csv"))
        Y = 3
        X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]
        model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5)
        model.train(x=X, y=Y, training_frame=training_data)
        sys.stdout=sys.__stdout__       # restore old stdout
        # make sure the word progress is found and % is found.  That is how progress is displayed.
        assert ("progress" in s.getvalue()) and ("100%" in s.getvalue()), "h2o.show_progress() command is not working."
    except Exception as e:  # will get error for python 2
        sys.stdout=sys.__stdout__       # restore old stdout
        assert_is_type(e, AttributeError)   # error for using python 2
        assert "encoding" in e.args[0], "h2o.show_progress() command is not working."
        allargs = inspect.getargspec(h2o.show_progress)
        assert len(allargs.args)==0, "h2o.show_progress() should have no arguments!"
def h2ogrid_checkpoints():
    """
    Python API test: H2OGridSearch with export_checkpoints_dir

    Copy from pyunit_gbm_random_grid.py
    """
    air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
    myX = ["DayofMonth", "DayOfWeek"]

    hyper_parameters = {
        'ntrees': [5, 10]
    }

    search_crit = {'strategy': "RandomDiscrete",
                   'max_models': 5,
                   'seed': 1234,
                   'stopping_rounds' : 3,
                   'stopping_metric' : "AUTO",
                   'stopping_tolerance': 1e-2
                   }
    checkpoints_dir = tempfile.mkdtemp()

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli",
                   learn_rate=0.1,
                   max_depth=3,
                   export_checkpoints_dir=checkpoints_dir)

    num_files = len(listdir(checkpoints_dir))
    shutil.rmtree(checkpoints_dir)

    assert_is_type(air_grid, H2OGridSearch)
    assert num_files == 2, "No models generated by AutoML"
    assert len(air_grid.get_grid()) == num_files, "Number of models in grid does not much count of files in checkpoints dir."
def h2o_H2OFrame_num_valid_substrings():
    """
    Python API test: h2o.frame.H2OFrame.num_valid_substrings(i)
    """
    try:
        # generate files to write to
        results_dir = pyunit_utils.locate("results")    # real test when result directory is there
        full_path = os.path.join(results_dir, "test_num_valid_substrings.txt")
        with open(full_path, "w") as text_file:
            text_file.write("setosa")
            text_file.write('\n')
            text_file.write("virginica")
        iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader_NA_2.csv"))
        temp = iris[4].num_valid_substrings(path_to_words=full_path)
        assert_is_type(temp, H2OFrame)
        assert temp.sum().flatten()==100, "h2o.H2OFrame.num_valid_substrings command is not working."
    except Exception as e:
        if 'File not found' in e.args[0]:
            print("Directory is not writable.  h2o.H2OFrame.num_valid_substrings is tested for number of argument "
                  "and argument name only.")
            allargs = inspect.getargspec(h2o.H2OFrame.num_valid_substrings)
            assert len(allargs.args)==2 and allargs.args[1]=='path_to_words', \
                "h2o.H2OFrame.num_valid_substrings() contains only one argument, path_to_words!"
        else:
            assert False, "h2o.H2OFrame.num_valid_substrings() contains only one argument, path_to_words!"
Example #5
0
    def __init__(self, service_principal, mech_oid=kerberos.GSS_MECH_OID_SPNEGO):
        assert_is_type(service_principal, str)

        self._header_regex = re.compile('(?:.*,)*\s*Negotiate\s*([^,]*),?', re.I)

        self._service_principal = service_principal
        self._mech_oid = mech_oid
Example #6
0
 def confusion_matrix(self, data):
     """
     Returns a confusion matrix based of H2O's default prediction threshold for a dataset
     """
     assert_is_type(data, H2OFrame)
     j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self._id, data.frame_id))
     return j["model_metrics"][0]["cm"]["table"]
Example #7
0
def h2oget_grid():
    """
    Python API test: h2o.get_grid(grid_id)

    Copy from pyunit_gbm_random_grid.py
    """
    air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
    myX = ["DayofMonth","DayOfWeek"]

    hyper_parameters = {
        'learn_rate':[0.1,0.2],
        'max_depth':[2,3],
        'ntrees':[5,10]
    }

    search_crit = {'strategy': "RandomDiscrete",
                   'max_models': 5,
                   'seed' : 1234,
                   'stopping_rounds' : 3,
                   'stopping_metric' : "AUTO",
                   'stopping_tolerance': 1e-2
                   }

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli")

    fetched_grid = h2o.get_grid(str(air_grid.grid_id))
    assert_is_type(fetched_grid, H2OGridSearch)
    assert len(air_grid.get_grid())==5, "h2o.get_grid() is command not working.  " \
                                        "It returned the wrong number of models."
    assert len(air_grid.get_grid())==len(fetched_grid.get_grid()), "h2o.get_grid() is command not working."
Example #8
0
def h2oconnection():
    """
    Python API test: h2o.connection()
    """
    # call with no arguments
    temp = h2o.connection()
    assert_is_type(temp, H2OConnection)
Example #9
0
def h2oinitname():
    """
    Python API test for h2o.init
    :return:
    """
    try:
        h2o.init(strict_version_check=False, name="test")  # Should initialize
        h2o.init(strict_version_check=False, name="test")  # Should just connect
        assert h2o.cluster().cloud_name == "test"
    except H2OConnectionError as e:  # some errors are okay like version mismatch
        print("error message type is {0} and the error message is {1}\n".format(e.__class__.__name__, e.args[0]))

    try:
        h2o.init(strict_version_check=False, port=54321, name="test2", as_port=True)
        assert False, "Should fail to connect and the port should be used by previous invocation."
    except H2OServerError as e:
        print("error message type is {0} and the error message is {1}\n".format(e.__class__.__name__, e.args[0]))

    try:
        h2o.init(strict_version_check=False, port=54321, name="test2")  # Should bump the port to next one
        assert h2o.cluster().cloud_name == "test2"
    except H2OConnectionError as e:
        print("error message type is {0} and the error message is {1}\n".format(e.__class__.__name__, e.args[0]))

    try:
        h2o.init(strict_version_check=False, port=60000, name="test3", as_port=True)
        assert h2o.cluster().cloud_name == "test3"
    except H2OConnectionError as e:
        print("error message type is {0} and the error message is {1}\n".format(e.__class__.__name__, e.args[0]))
        assert_is_type(e, H2OConnectionError)
        h2o.cluster().shutdown()
Example #10
0
def get_human_readable_bytes(size):
    """
    Convert given number of bytes into a human readable representation, i.e. add prefix such as kb, Mb, Gb,
    etc. The `size` argument must be a non-negative integer.

    :param size: integer representing byte size of something
    :return: string representation of the size, in human-readable form
    """
    if size == 0: return "0"
    if size is None: return ""
    assert_is_type(size, int)
    assert size >= 0, "`size` cannot be negative, got %d" % size
    suffixes = "PTGMk"
    maxl = len(suffixes)
    for i in range(maxl + 1):
        shift = (maxl - i) * 10
        if size >> shift == 0: continue
        ndigits = 0
        for nd in [3, 2, 1]:
            if size >> (shift + 12 - nd * 3) == 0:
                ndigits = nd
                break
        if ndigits == 0 or size == (size >> shift) << shift:
            rounded_val = str(size >> shift)
        else:
            rounded_val = "%.*f" % (ndigits, size / (1 << shift))
        return "%s %sb" % (rounded_val, suffixes[i] if i < maxl else "")
Example #11
0
def h2omake_metrics():
    """
    Python API test: h2o.make_metrics(predicted, actual, domain=None, distribution=None)

    Copied from pyunit_make_metrics.py
    """
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()

    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="multinomial", ntrees=2, max_depth=3, min_rows=1,
                                         learn_rate=0.01, nbins=20)
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)
    assert_is_type(m1, H2OMultinomialModelMetrics)
    assert_is_type(m2, H2OMultinomialModelMetrics)
    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
Example #12
0
def h2oget_frame():
    """
    Python API test: h2o.get_frame(frame_id)
    """
    frame1 = h2o.import_file(pyunit_utils.locate("smalldata/jira/hexdev_29.csv"))
    frame2 = h2o.get_frame(frame1.frame_id)
    assert_is_type(frame2, H2OFrame)
Example #13
0
 def base_models(self, base_models):
      if is_type(base_models,[H2OEstimator]):
         base_models = [b.model_id for b in base_models]
         self._parms["base_models"] = base_models
      else:
         assert_is_type(base_models, None, [str])
         self._parms["base_models"] = base_models
Example #14
0
    def plot(self, type="roc", server=False):
        """
        Produce the desired metric plot.

        :param type: the type of metric plot (currently, only ROC supported).
        :param server: if True, generate plot inline using matplotlib's "Agg" backend.
        :returns: None
        """
        # TODO: add more types (i.e. cutoffs)
        assert_is_type(type, "roc")
        # check for matplotlib. exit if absent.
        try:
            imp.find_module('matplotlib')
            import matplotlib
            if server: matplotlib.use('Agg', warn=False)
            import matplotlib.pyplot as plt
        except ImportError:
            print("matplotlib is required for this function!")
            return

        if type == "roc":
            plt.xlabel('False Positive Rate (FPR)')
            plt.ylabel('True Positive Rate (TPR)')
            plt.title('ROC Curve')
            plt.text(0.5, 0.5, r'AUC={0:.4f}'.format(self._metric_json["AUC"]))
            plt.plot(self.fprs, self.tprs, 'b--')
            plt.axis([0, 1, 0, 1])
            if not server: plt.show()
Example #15
0
    def transform(self, frame=None, holdout_type=None, noise=-1, seed=-1):
        """
        Apply transformation to `te_columns` based on the encoding maps generated during `TargetEncoder.fit()` call.
        You must not pass encodings manually from `.fit()` method because they are being stored internally
        after `.fit()' had been called.

        :param frame frame: to which frame we are applying target encoding transformations.
        :param str holdout_type: Supported options:

                1) "kfold" - encodings for a fold are generated based on out-of-fold data.
                2) "loo" - leave one out. Current row's response value is subtracted from the pre-calculated per-level frequencies.
                3) "none" - we do not holdout anything. Using whole frame for training
                
        :param float noise: the amount of random noise added to the target encoding.  This helps prevent overfitting. Defaults to 0.01 * range of y.
        :param int seed: a random seed used to generate draws from the uniform distribution for random noise. Defaults to -1.
        """
        assert_is_type(holdout_type, "kfold", "loo", "none")

        # We need to make sure that frames are being sent in the same order
        assert self._encodingMap.map_keys['string'] == self._teColumns
        encodingMapKeys = self._encodingMap.map_keys['string']
        encodingMapFramesKeys = list(map(lambda x: x['key']['name'], self._encodingMap.frames))
        return H2OFrame._expr(expr=ExprNode("target.encoder.transform", encodingMapKeys, encodingMapFramesKeys, frame, self._teColumns, holdout_type,
                                            self._responseColumnName, self._foldColumnName,
                                            self._blending, self._inflectionPoint, self._smoothing,
                                            noise, seed))
Example #16
0
    def train(self, x=None, y=None, training_frame=None, blending_frame=None, **kwargs):
        assert_is_type(blending_frame, None, H2OFrame)

        def extend_parms(parms):
            if blending_frame is not None:
                parms['blending_frame'] = blending_frame

        super(self.__class__, self)._train(x, y, training_frame, extend_parms_fn=extend_parms, **kwargs)
Example #17
0
    def confusion_matrix(self, data):
        """
        Returns a confusion matrix based of H2O's default prediction threshold for a dataset.

        :param H2OFrame data: the frame with the prediction results for which the confusion matrix should be extracted.
        """
        assert_is_type(data, H2OFrame)
        j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self._id, data.frame_id))
        return j["model_metrics"][0]["cm"]["table"]
def h2olist_timezones():
    """
    Python API test: h2o.list_timezones()
    Deprecated, use h2o.cluster().list_timezones().
    """
    timezones = h2o.list_timezones()
    assert_is_type(timezones, H2OFrame)
    assert timezones.nrow==468, "h2o.get_timezone() returns frame with wrong row number."
    assert timezones.ncol==1, "h2o.get_timezone() returns frame with wrong column number."
Example #19
0
 def metalearner_params(self, metalearner_params):
     assert_is_type(metalearner_params, None, dict)
     if metalearner_params is not None and metalearner_params != "":
         for k in metalearner_params:
             if ("[" and "]") not in str(metalearner_params[k]):
                 metalearner_params[k]=[metalearner_params[k]]
         self._parms["metalearner_params"] = str(json.dumps(metalearner_params))
     else:
         self._parms["metalearner_params"] = None
def h2o_H2OFrame_skewness():
    """
    Python API test: h2o.frame.H2OFrame.skewness(na_rm=False)
    """
    python_lists = np.random.uniform(-1,1, (10000,2))
    h2oframe = h2o.H2OFrame(python_obj=python_lists)
    newframe = h2oframe.skewness()
    assert_is_type(newframe, list)
    assert len(newframe)==2, "h2o.H2OFrame.skewness() command is not working."
def h2o_H2OFrame_stratified_kfold_column():
    """
    Python API test: h2o.frame.H2OFrame.stratified_kfold_column(n_folds=3, seed=-1)
    """
    python_lists = np.random.randint(-3,3, (10000,2))
    h2oframe = h2o.H2OFrame(python_obj=python_lists).asfactor()
    newframe = h2oframe[1].stratified_kfold_column(n_folds=3, seed=-1)
    assert_is_type(newframe, H2OFrame)
    assert ((newframe==0).sum()+(newframe==1).sum()+(newframe==2).sum())==h2oframe.nrow, \
        "h2o.H2OFrame.stratified_kfold_column() command is not working."
def h2o_H2OFrame_na_omit():
    """
    Python API test: h2o.frame.H2OFrame.na_omit()

    Copied from runit_lstrip.R
    """
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader_NA_2.csv"))
    newframe=iris.na_omit()
    assert_is_type(newframe, H2OFrame)
    assert newframe.nrow==iris.nrow-10, "h2o.H2OFrame.na_omit() command is not working."  # check return result
Example #23
0
 def assert_error(*args, **kwargs):
     try:
         assert_is_type(*args, **kwargs)
         raise RuntimeError("Failed to throw an exception")
     except H2OTypeError as e:
         # Check whether the message can stringify properly
         message = str(e)
         assert len(message) < 1000
         return
     raise RuntimeError("???")
Example #24
0
    def set_mode(self, mode):
        """
        Inform the widget that it will be rendered in either tty or file mode.

        This is only useful for widgets that support dual rendering mode.

        :param mode: either "tty" or "file".
        """
        assert_is_type(mode, "tty", "file")
        self._file_mode = mode == "file"
Example #25
0
 def assert_error(*args, **kwargs):
     """Check that assert_is_type() with given arguments throws an error."""
     try:
         assert_is_type(*args, **kwargs)
         raise RuntimeError("Failed to throw an exception")
     except H2OTypeError as e:
         # Check whether the message can stringify properly
         message = str(e)
         assert len(message) < 1000
         return
def h2o_H2OFrame_names():
    """
    Python API test: h2o.frame.H2OFrame.names

    Copied from runit_lstrip.R
    """
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader_NA_2.csv"))
    newframe=iris.names
    assert_is_type(newframe, list)
    assert len(newframe)==iris.ncol,  "h2o.H2OFrame.names command is not working."  # check return result
def h2o_H2OFrame_nlevels():
    """
    Python API test: h2o.frame.H2OFrame.nlevels()
    """
    python_lists = np.random.randint(-2,2, (10000,2))
    h2oframe = h2o.H2OFrame(python_obj=python_lists, column_types=['enum', 'enum'])
    clist = h2oframe.nlevels()

    assert_is_type(clist, list)     # check return type
    assert len(clist)==2 and max(clist)==min(clist)==4, "h2o.H2OFrame.nlevels() command is not working."
def verifyOps(opers, shapeS, threshold_name, threshold_val, groupByCommand):
    assert_is_type(opers, GroupBy)
    operInfo = opers.get_frame()
    assert_is_type(operInfo, H2OFrame)

    assert operInfo.shape == shapeS, "{0} command is not working.".format(groupByCommand)

    for index in range(len(threshold_val)):
        assert abs(operInfo[index, threshold_name[index]] - threshold_val[index]) < 1e-6, \
            "{0} command is not working.".format(groupByCommand)
Example #29
0
def rapids(expr):
    """
    Execute a Rapids expression.

    :param expr: The rapids expression (ascii string).

    :returns: The JSON response (as a python dictionary) of the Rapids execution
    """
    assert_is_type(expr, str)
    return ExprNode.rapids(expr)
def h2o_H2OFrame_modulo_kfold_column():
    """
    Python API test: h2o.frame.H2OFrame.modulo_kfold_column(n_folds=3)
    """
    python_lists = np.random.randint(-5,5, (1000, 2))
    k = randrange(2,10)
    h2oframe = h2o.H2OFrame(python_obj=python_lists)
    clist = h2oframe.kfold_column(n_folds=k)

    assert_is_type(clist, H2OFrame)     # check return type
    assert clist.asfactor().nlevels()[0]==k, "h2o.H2OFrame.modulo_kfold_column() command is not working."
Example #31
0
 def ignore_const_cols(self, ignore_const_cols):
     assert_is_type(ignore_const_cols, None, bool)
     self._parms["ignore_const_cols"] = ignore_const_cols
Example #32
0
 def weights_column(self, weights_column):
     assert_is_type(weights_column, None, str)
     self._parms["weights_column"] = weights_column
Example #33
0
 def score_each_iteration(self, score_each_iteration):
     assert_is_type(score_each_iteration, None, bool)
     self._parms["score_each_iteration"] = score_each_iteration
Example #34
0
 def classification_stop(self, classification_stop):
     assert_is_type(classification_stop, None, numeric)
     self._parms["classification_stop"] = classification_stop
Example #35
0
 def score_training_samples(self, score_training_samples):
     assert_is_type(score_training_samples, None, int)
     self._parms["score_training_samples"] = score_training_samples
Example #36
0
 def validation_frame(self, validation_frame):
     assert_is_type(validation_frame, None, H2OFrame)
     self._parms["validation_frame"] = validation_frame
Example #37
0
 def regression_stop(self, regression_stop):
     assert_is_type(regression_stop, None, numeric)
     self._parms["regression_stop"] = regression_stop
Example #38
0
 def elastic_averaging(self, elastic_averaging):
     assert_is_type(elastic_averaging, None, bool)
     self._parms["elastic_averaging"] = elastic_averaging
Example #39
0
 def score_duty_cycle(self, score_duty_cycle):
     assert_is_type(score_duty_cycle, None, numeric)
     self._parms["score_duty_cycle"] = score_duty_cycle
Example #40
0
 def elastic_averaging_regularization(self,
                                      elastic_averaging_regularization):
     assert_is_type(elastic_averaging_regularization, None, numeric)
     self._parms[
         "elastic_averaging_regularization"] = elastic_averaging_regularization
Example #41
0
 def elastic_averaging_moving_rate(self, elastic_averaging_moving_rate):
     assert_is_type(elastic_averaging_moving_rate, None, numeric)
     self._parms[
         "elastic_averaging_moving_rate"] = elastic_averaging_moving_rate
Example #42
0
 def nfolds(self, nfolds):
     assert_is_type(nfolds, None, int)
     self._parms["nfolds"] = nfolds
Example #43
0
 def balance_classes(self, balance_classes):
     assert_is_type(balance_classes, None, bool)
     self._parms["balance_classes"] = balance_classes
Example #44
0
 def keep_cross_validation_predictions(self,
                                       keep_cross_validation_predictions):
     assert_is_type(keep_cross_validation_predictions, None, bool)
     self._parms[
         "keep_cross_validation_predictions"] = keep_cross_validation_predictions
Example #45
0
 def export_weights_and_biases(self, export_weights_and_biases):
     assert_is_type(export_weights_and_biases, None, bool)
     self._parms["export_weights_and_biases"] = export_weights_and_biases
Example #46
0
 def ignored_columns(self, ignored_columns):
     assert_is_type(ignored_columns, None, [str])
     self._parms["ignored_columns"] = ignored_columns
Example #47
0
 def reproducible(self, reproducible):
     assert_is_type(reproducible, None, bool)
     self._parms["reproducible"] = reproducible
Example #48
0
 def categorical_encoding(self, categorical_encoding):
     assert_is_type(
         categorical_encoding, None,
         Enum("auto", "enum", "one_hot_internal", "one_hot_explicit",
              "binary", "eigen"))
     self._parms["categorical_encoding"] = categorical_encoding
Example #49
0
 def stopping_rounds(self, stopping_rounds):
     assert_is_type(stopping_rounds, None, int)
     self._parms["stopping_rounds"] = stopping_rounds
Example #50
0
 def mini_batch_size(self, mini_batch_size):
     assert_is_type(mini_batch_size, None, int)
     self._parms["mini_batch_size"] = mini_batch_size
Example #51
0
 def training_frame(self, training_frame):
     assert_is_type(training_frame, None, H2OFrame)
     self._parms["training_frame"] = training_frame
Example #52
0
 def offset_column(self, offset_column):
     assert_is_type(offset_column, None, str)
     self._parms["offset_column"] = offset_column
Example #53
0
 def fold_column(self, fold_column):
     assert_is_type(fold_column, None, str)
     self._parms["fold_column"] = fold_column
Example #54
0
 def keep_cross_validation_fold_assignment(
         self, keep_cross_validation_fold_assignment):
     assert_is_type(keep_cross_validation_fold_assignment, None, bool)
     self._parms[
         "keep_cross_validation_fold_assignment"] = keep_cross_validation_fold_assignment
Example #55
0
 def score_validation_samples(self, score_validation_samples):
     assert_is_type(score_validation_samples, None, int)
     self._parms["score_validation_samples"] = score_validation_samples
Example #56
0
 def response_column(self, response_column):
     assert_is_type(response_column, None, str)
     self._parms["response_column"] = response_column
Example #57
0
 def max_after_balance_size(self, max_after_balance_size):
     assert_is_type(max_after_balance_size, None, float)
     self._parms["max_after_balance_size"] = max_after_balance_size
Example #58
0
 def fold_assignment(self, fold_assignment):
     assert_is_type(fold_assignment, None,
                    Enum("auto", "random", "modulo", "stratified"))
     self._parms["fold_assignment"] = fold_assignment
Example #59
0
 def class_sampling_factors(self, class_sampling_factors):
     assert_is_type(class_sampling_factors, None, [float])
     self._parms["class_sampling_factors"] = class_sampling_factors
Example #60
0
 def score_interval(self, score_interval):
     assert_is_type(score_interval, None, numeric)
     self._parms["score_interval"] = score_interval