def get_automl(project_name): """ Retrieve information about an AutoML instance. :param str project_name: A string indicating the project_name of the automl instance to retrieve. :returns: A dictionary containing the project_name, leader model, and leaderboard. """ automl_json = h2o.api("GET /99/AutoML/%s" % project_name) project_name = automl_json["project_name"] leaderboard_list = [key["name"] for key in automl_json['leaderboard']['models']] if leaderboard_list is not None and len(leaderboard_list) > 0: leader_id = leaderboard_list[0] else: leader_id = None leader = h2o.get_model(leader_id) # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users. # If any failure happens, revert back to user's original setting for progress and display the error message. is_progress = H2OJob.__PROGRESS_BAR__ h2o.no_progress() try: # Parse leaderboard H2OTwoDimTable & return as an H2OFrame leaderboard = h2o.H2OFrame( automl_json["leaderboard_table"].cell_values, column_names=automl_json["leaderboard_table"].col_header) except Exception as ex: raise ex finally: if is_progress is True: h2o.show_progress() leaderboard = leaderboard[1:] automl_dict = {'project_name': project_name, "leader": leader, "leaderboard": leaderboard} return automl_dict
def _fetch(self): res = h2o.api("GET /99/AutoML/" + self._automl_key) leaderboard_list = [key["name"] for key in res['leaderboard']['models']] if leaderboard_list is not None and len(leaderboard_list) > 0: self._leader_id = leaderboard_list[0] else: self._leader_id = None # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users. # If any failure happens, revert back to user's original setting for progress and display the error message. is_progress = H2OJob.__PROGRESS_BAR__ h2o.no_progress() try: # Parse leaderboard H2OTwoDimTable & return as an H2OFrame leaderboard = h2o.H2OFrame( res["leaderboard_table"].cell_values, column_names=res["leaderboard_table"].col_header) except Exception as ex: raise ex finally: if is_progress is True: h2o.show_progress() self._leaderboard = leaderboard[1:] return self._leader_id is not None
def _fetch(self): res = h2o.api("GET /99/AutoML/" + self.project_name) leaderboard_list = [key["name"] for key in res['leaderboard']['models']] if leaderboard_list is not None and len(leaderboard_list) > 0: self._leader_id = leaderboard_list[0] else: self._leader_id = None # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users. # If any failure happens, revert back to user's original setting for progress and display the error message. is_progress = H2OJob.__PROGRESS_BAR__ h2o.no_progress() try: # Parse leaderboard H2OTwoDimTable & return as an H2OFrame leaderboard = h2o.H2OFrame( res["leaderboard_table"].cell_values, column_names=res["leaderboard_table"].col_header) except Exception as ex: raise ex finally: if is_progress is True: h2o.show_progress() self._leaderboard = leaderboard[1:] return self._leader_id is not None
def h2oshow_progress(): """ Python API test: h2o.show_progress() Command is verified by eyeballing the pyunit test output file and make sure the progress bars are there. Here, we will assume the command runs well if there is no error message. """ try: # only only work with Python 3. s = StringIO() sys.stdout = s # redirect output h2o.show_progress() # true by default. training_data = h2o.upload_file( pyunit_utils.locate("smalldata/logreg/benign.csv")) Y = 3 X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10] model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5) model.train(x=X, y=Y, training_frame=training_data) sys.stdout = sys.__stdout__ # restore old stdout # make sure the word progress is found and % is found. That is how progress is displayed. assert ("progress" in s.getvalue()) and ( "100%" in s.getvalue()), "h2o.show_progress() command is not working." except Exception as e: # will get error for python 2 sys.stdout = sys.__stdout__ # restore old stdout assert_is_type(e, AttributeError) # error for using python 2 assert "encoding" in e.args[ 0], "h2o.show_progress() command is not working." allargs = inspect.getargspec(h2o.show_progress) assert len( allargs.args) == 0, "h2o.show_progress() should have no arguments!"
def h2oshow_progress(): """ Python API test: h2o.show_progress() Command is verified by eyeballing the pyunit test output file and make sure the progress bars are there. Here, we will assume the command runs well if there is no error message. """ try: # only only work with Python 3. s = StringIO() sys.stdout = s # redirect output h2o.show_progress() # true by default. training_data = h2o.upload_file(pyunit_utils.locate("smalldata/logreg/benign.csv")) Y = 3 X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10] model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5) model.train(x=X, y=Y, training_frame=training_data) sys.stdout=sys.__stdout__ # restore old stdout # make sure the word progress is found and % is found. That is how progress is displayed. assert ("progress" in s.getvalue()) and ("100%" in s.getvalue()), "h2o.show_progress() command is not working." except Exception as e: # will get error for python 2 sys.stdout=sys.__stdout__ # restore old stdout assert_is_type(e, AttributeError) # error for using python 2 assert "encoding" in e.args[0], "h2o.show_progress() command is not working." allargs = inspect.getargspec(h2o.show_progress) assert len(allargs.args)==0, "h2o.show_progress() should have no arguments!"
for i, v in enumerate(MOST_IMPORTANT_VARS_ORDERD): if v in cats: MOST_IMPORTANT_VARS_ORDERD[i] = v + '_Tencode' # # print('Combining features....') # (train, valid, test) = feature_combiner(train, test, MOST_IMPORTANT_VARS_ORDERD, valid_frame = valid, frame_type='spark') # print('Done combining features.') # # encoded_combined_nums, cats = get_type_lists(frame=train,rejects=[ID_VAR,Y],frame_type='spark') ################################################################################ # DONE WITH PREPROCESSING - START TRAINING # ################################################################################ import h2o h2o.show_progress() # turn on progress bars from h2o.estimators.glm import H2OGeneralizedLinearEstimator # import GLM models from h2o.estimators.deeplearning import H2ODeepLearningEstimator from h2o.estimators.gbm import H2OGradientBoostingEstimator from h2o.estimators.random_forest import H2ORandomForestEstimator from h2o.grid.grid_search import H2OGridSearch # grid search from h2o.estimators.xgboost import H2OXGBoostEstimator from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator import xgboost as xgb import matplotlib matplotlib.use('Agg') #Need this if running matplot on a server w/o display from pysparkling import * conf = H2OConf(spark=spark) conf.nthreads = -1 hc = H2OContext.getOrCreate(spark, conf)
import seaborn as sns import h2o from h2o.estimators.glrm import H2OGeneralizedLowRankEstimator from h2o.estimators.gbm import H2OGradientBoostingEstimator from h2o.estimators.random_forest import H2ORandomForestEstimator from h2o.grid.grid_search import H2OGridSearch from h2o.estimators.xgboost import H2OXGBoostEstimator from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator from h2o.estimators.glm import H2OGeneralizedLinearEstimator # import GLM models from h2o.grid.grid_search import H2OGridSearch #import xgboost as xgb h2o.init(max_mem_size='12G') # give h2o as much memory as possible h2o.show_progress() # turn off h2o progress bars # Definitions pd.set_option('display.float_format', lambda x: '%.3f' % x) #%matplotlib inline #njobs = 4 def get_type_lists(frame, rejects): """Creates lists of numeric and categorical variables. :param frame: The frame from which to determine types. :param rejects: Variable names not to be included in returned lists. :return: Tuple of lists for numeric and categorical variables in the frame. """ nums, cats = [], []