Esempio n. 1
0
 def read_cms(cms=None, domains=None):
     if cms is None: raise ValueError("Missing data, no `cms`.")
     if not isinstance(cms, list):
         raise ValueError("`cms` must be a list of lists")
     lol_all = all(is_listlike(l) for l in cms)
     if not lol_all: raise ValueError("`cms` must be a list of lists")
     return [ConfusionMatrix(cm, domains) for cm in cms]
Esempio n. 2
0
def lazy_import(path):
    """
    Import a single file or collection of files.

    :param path: A path to a data file (remote or local).
    """
    if is_listlike(path):
        return [_import(p)[0] for p in path]
    else:
        assert_is_str(path)
        return _import(path)
Esempio n. 3
0
def lazy_import(path):
    """
    Import a single file or collection of files.

    :param path: A path to a data file (remote or local).
    """
    if is_listlike(path):
        return [_import(p)[0] for p in path]
    else:
        assert_is_str(path)
        return _import(path)
Esempio n. 4
0
    def __init__(self, fr, by):
        self._fr = fr  # IN
        self._by = by  # IN
        self._aggs = {}  # IN
        self._res = None  # OUT

        if is_str(by):
            self._by = [self._fr.names.index(by)]
        elif is_listlike(by):
            self._by = [self._fr.names.index(b) if is_str(b) else b for b in by]
        else:
            self._by = [self._by]
Esempio n. 5
0
 def _add_agg(self, op, col, na):
     if op == "nrow": col = 0
     if col is None:
         for i in range(self._fr.ncol):
             if i not in self._by: self._add_agg(op, i, na)
         return self
     elif is_str(col):
         cidx = self._fr.names.index(col)
     elif is_int(col):
         cidx = col
     elif is_listlike(col):
         for i in col:
             self._add_agg(op, i, na)
         return self
     else:
         raise ValueError("col must be a column name or index.")
     name = "{}_{}".format(op, self._fr.names[cidx])
     self._aggs[name] = [op, cidx, na]
     return self
Esempio n. 6
0
 def train(self,
           x,
           y=None,
           training_frame=None,
           offset_column=None,
           fold_column=None,
           weights_column=None,
           validation_frame=None,
           **params):
     # same api as estimator_base train
     algo_params = locals()
     parms = self._parms.copy()
     parms.update({
         k: v
         for k, v in algo_params.items()
         if k not in ["self", "params", "algo_params", "parms"]
     })
     parms["search_criteria"] = self.search_criteria
     parms["hyper_parameters"] = self.hyper_params  # unique to grid search
     parms.update({
         k: v
         for k, v in list(self.model._parms.items()) if v is not None
     })  # unique to grid search
     parms.update(params)
     if '__class__' in parms:  # FIXME: hackt for PY3
         del parms['__class__']
     y = algo_params["y"]
     tframe = algo_params["training_frame"]
     if tframe is None: raise ValueError("Missing training_frame")
     if y is not None:
         if is_listlike(y):
             if len(y) == 1:
                 parms["y"] = y[0]
             else:
                 raise ValueError('y must be a single column reference')
         self._estimator_type = "classifier" if tframe[y].isfactor(
         ) else "regressor"
     self.build_model(parms)
Esempio n. 7
0
    def _model_build(self, x, y, tframe, vframe, kwargs):
        kwargs['training_frame'] = tframe
        if vframe is not None: kwargs["validation_frame"] = vframe
        if is_int(y): y = tframe.names[y]
        if y is not None: kwargs['response_column'] = y
        if not is_listlike(x): x = [x]
        if is_int(x[0]):
            x = [tframe.names[i] for i in x]
        offset = kwargs["offset_column"]
        folds = kwargs["fold_column"]
        weights = kwargs["weights_column"]
        ignored_columns = list(
            set(tframe.names) - set(x + [y, offset, folds, weights]))
        kwargs["ignored_columns"] = None if not ignored_columns else [
            quoted(col) for col in ignored_columns
        ]
        kwargs = dict([(k, kwargs[k].frame_id if isinstance(
            kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs
                       if kwargs[k] is not None])  # gruesome one-liner
        algo = self.model._compute_algo()  # unique to grid search
        if self.grid_id is not None: kwargs["grid_id"] = self.grid_id
        rest_ver = kwargs.pop(
            "_rest_version") if "_rest_version" in kwargs else None

        grid = H2OJob(h2o.api("POST /99/Grid/%s" % algo, data=kwargs),
                      job_type=(algo + " Grid Build"))

        if self._future:
            self._job = grid
            return

        grid.poll()
        if rest_ver is not None:
            grid_json = h2o.api("GET /99/Grids/%s" % (grid.dest_key))

            error_index = 0
            if len(grid_json["failure_details"]) > 0:
                print("Errors/Warnings building gridsearch model\n")

                for error_message in grid_json["failure_details"]:
                    if isinstance(grid_json["failed_params"][error_index],
                                  dict):
                        for h_name in grid_json['hyper_names']:
                            print("Hyper-parameter: {0}, {1}".format(
                                h_name, grid_json['failed_params'][error_index]
                                [h_name]))

                    if len(grid_json["failure_stack_traces"]) > error_index:
                        print("failure_details: {0}\nfailure_stack_traces: "
                              "{1}\n".format(
                                  error_message,
                                  grid_json['failure_stack_traces']
                                  [error_index]))
                    error_index += 1
        else:
            grid_json = h2o.api("GET /99/Grids/%s" % grid.dest_key)

        self.models = [
            h2o.get_model(key['name']) for key in grid_json['model_ids']
        ]

        # get first model returned in list of models from grid search to get model class (binomial, multinomial, etc)
        # sometimes no model is returned due to bad parameter values provided by the user.
        if len(grid_json['model_ids']) > 0:
            first_model_json = h2o.api(
                "GET /%d/Models/%s" %
                (rest_ver
                 or 3, grid_json['model_ids'][0]['name']))['models'][0]
            self._resolve_grid(grid.dest_key, grid_json, first_model_json)
        else:
            raise ValueError(
                "Gridsearch returns no model due to bad parameter values or other reasons...."
            )
Esempio n. 8
0
 def read_cms(cms=None, domains=None):
     if cms is None:  raise ValueError("Missing data, no `cms`.")
     if not isinstance(cms, list):  raise ValueError("`cms` must be a list of lists")
     lol_all = all(is_listlike(l) for l in cms)
     if not lol_all: raise ValueError("`cms` must be a list of lists")
     return [ConfusionMatrix(cm, domains) for cm in cms]