def _process_response(response, save_to): """ Given a response object, prepare it to be handed over to the external caller. Preparation steps include: * detect if the response has error status, and convert it to an appropriate exception; * detect Content-Type, and based on that either parse the response as JSON or return as plain text. """ status_code = response.status_code if status_code == 200 and save_to: if save_to.startswith("~"): save_to = os.path.expanduser(save_to) if os.path.isdir(save_to) or save_to.endswith(os.path.sep): dirname = os.path.abspath(save_to) filename = H2OConnection._find_file_name(response) else: dirname, filename = os.path.split(os.path.abspath(save_to)) fullname = os.path.join(dirname, filename) try: if not os.path.exists(dirname): os.makedirs(dirname) with open(fullname, "wb") as f: for chunk in response.iter_content(chunk_size=65536): if chunk: # Empty chunks may occasionally happen f.write(chunk) except OSError as e: raise H2OValueError("Cannot write to file %s: %s" % (fullname, e)) return fullname content_type = response.headers.get("Content-Type", "") if ";" in content_type: # Remove a ";charset=..." part content_type = content_type[:content_type.index(";")] # Auto-detect response type by its content-type. Decode JSON, all other responses pass as-is. if content_type == "application/json": try: data = response.json(object_pairs_hook=H2OResponse) except (JSONDecodeError, requests.exceptions.ContentDecodingError) as e: raise H2OServerError("Malformed JSON from server (%s):\n%s" % (str(e), response.text)) else: data = response.text # Success (200 = "Ok", 201 = "Created", 202 = "Accepted", 204 = "No Content") if status_code in {200, 201, 202, 204}: return data # Client errors (400 = "Bad Request", 404 = "Not Found", 412 = "Precondition Failed") if status_code in {400, 404, 412} and isinstance( data, (H2OErrorV3, H2OModelBuilderErrorV3)): raise H2OResponseError(data) # Server errors (notably 500 = "Server Error") # Note that it is possible to receive valid H2OErrorV3 object in this case, however it merely means the server # did not provide the correct status code. raise H2OServerError("HTTP %d %s:\n%r" % (status_code, response.reason, data))
def _process_response(response): """ Given a response object, prepare it to be handed over to the external caller. Preparation steps include: * detect if the response has error status, and convert it to an appropriate exception; * detect Content-Type, and based on that either parse the response as JSON or return as plain text. """ content_type = response.headers[ "Content-Type"] if "Content-Type" in response.headers else "" if ";" in content_type: # Remove a ";charset=..." part content_type = content_type[:content_type.index(";")] status_code = response.status_code # Auto-detect response type by its content-type. Decode JSON, all other responses pass as-is. if content_type == "application/json": try: data = response.json(object_pairs_hook=H2OResponse) except (JSONDecodeError, requests.exceptions.ContentDecodingError) as e: raise H2OServerError("Malformed JSON from server (%s):\n%s" % (str(e), response.text)) else: data = response.text # Success (200 = "Ok", 201 = "Created", 202 = "Accepted", 204 = "No Content") if status_code in {200, 201, 202, 204}: return data # Client errors (400 = "Bad Request", 404 = "Not Found", 412 = "Precondition Failed") if status_code in {400, 404, 412} and isinstance( data, (H2OErrorV3, H2OModelBuilderErrorV3)): raise H2OResponseError(data) # Server errors (notably 500 = "Server Error") # Note that it is possible to receive valid H2OErrorV3 object in this case, however it merely means the server # did not provide the correct status code. raise H2OServerError("HTTP %d %s:\n%r" % (status_code, response.reason, data))
def train(self, x=None, y=None, training_frame=None, blending_frame=None, verbose=False, **kwargs): has_training_frame = training_frame is not None or self.training_frame is not None blending_frame = H2OFrame._validate(blending_frame, 'blending_frame', required=not has_training_frame) if not has_training_frame: training_frame = blending_frame # used to bypass default checks in super class and backend and to guarantee default metrics sup = super(self.__class__, self) def extend_parms(parms): if blending_frame is not None: parms['blending_frame'] = blending_frame if self.metalearner_fold_column is not None: parms['ignored_columns'].remove( quoted(self.metalearner_fold_column)) parms = sup._make_parms(x, y, training_frame, extend_parms_fn=extend_parms, **kwargs) sup._train(parms, verbose=verbose) if self.metalearner() is None: raise H2OResponseError( "Meta learner didn't get to be trained in time. " "Try increasing max_runtime_secs or setting it to 0 (unlimited)." )
def train(self, x=None, y=None, training_frame=None, fold_column=None, weights_column=None, validation_frame=None, leaderboard_frame=None, blending_frame=None): """ Begins an AutoML task, a background task that automatically builds a number of models with various algorithms and tracks their performance in a leaderboard. At any point in the process you may use H2O's performance or prediction functions on the resulting models. :param x: A list of column names or indices indicating the predictor columns. :param y: An index or a column name indicating the response column. :param fold_column: The name or index of the column in training_frame that holds per-row fold assignments. :param weights_column: The name or index of the column in training_frame that holds per-row weights. :param training_frame: The H2OFrame having the columns indicated by x and y (as well as any additional columns specified by fold_column or weights_column). :param validation_frame: H2OFrame with validation data. This argument is ignored unless the user sets nfolds = 0. If cross-validation is turned off, then a validation frame can be specified and used for early stopping of individual models and early stopping of the grid searches. By default and when nfolds > 1, cross-validation metrics will be used for early stopping and thus validation_frame will be ignored. :param leaderboard_frame: H2OFrame with test data for scoring the leaderboard. This is optional and if this is set to None (the default), then cross-validation metrics will be used to generate the leaderboard rankings instead. :param blending_frame: H2OFrame used to train the the metalearning algorithm in Stacked Ensembles (instead of relying on cross-validated predicted values). This is optional, but when provided, it is also recommended to disable cross validation by setting `nfolds=0` and to provide a leaderboard frame for scoring purposes. :returns: An H2OAutoML object. :examples: >>> # Set up an H2OAutoML object >>> aml = H2OAutoML(max_runtime_secs=30) >>> # Launch an AutoML run >>> aml.train(y=y, training_frame=train) """ # Minimal required arguments are training_frame and y (response) self.training_frame = training_frame ncols = self.training_frame.ncols names = self.training_frame.names if y is None and self.response_column is None: raise H2OValueError( 'The response column (y) is not set; please set it to the name of the column that you are trying to predict in your data.' ) elif y is not None: assert_is_type(y, int, str) if is_type(y, int): if not (-ncols <= y < ncols): raise H2OValueError( "Column %d does not exist in the training frame" % y) y = names[y] else: if y not in names: raise H2OValueError( "Column %s does not exist in the training frame" % y) self.response_column = y self.fold_column = fold_column self.weights_column = weights_column self.validation_frame = validation_frame self.leaderboard_frame = leaderboard_frame self.blending_frame = blending_frame if x is not None: assert_is_type(x, list) xset = set() if is_type(x, int, str): x = [x] for xi in x: if is_type(xi, int): if not (-ncols <= xi < ncols): raise H2OValueError( "Column %d does not exist in the training frame" % xi) xset.add(names[xi]) else: if xi not in names: raise H2OValueError( "Column %s not in the training frame" % xi) xset.add(xi) ignored_columns = set(names) - xset for col in [y, fold_column, weights_column]: if col is not None and col in ignored_columns: ignored_columns.remove(col) if ignored_columns is not None: self.input_spec['ignored_columns'] = list(ignored_columns) def clean_params(params): return ({ k: clean_params(v) for k, v in params.items() if v is not None } if isinstance(params, dict) else H2OEstimator._keyify(params)) automl_build_params = clean_params( dict( build_control=self.build_control, build_models=self.build_models, input_spec=self.input_spec, )) resp = self._build_resp = h2o.api('POST /99/AutoMLBuilder', json=automl_build_params) if 'job' not in resp: raise H2OResponseError( "Backend failed to build the AutoML job: {}".format(resp)) if not self.project_name: self.project_name = resp['build_control']['project_name'] self.__frozen = True self._job = H2OJob(resp['job'], "AutoML") poll_updates = ft.partial(self._poll_training_updates, verbosity=self._verbosity, state={}) try: self._job.poll(poll_updates=poll_updates) finally: poll_updates(self._job, 1) self._fetch() return self.leader