def getLambdaModel(ip,port): print("Read data") prostate = h2o.import_file(path=h2o.locate("smalldata/logreg/prostate.csv")) myX = ["AGE","RACE","DPROS","DCAPS","PSA","VOL","GLEASON"] myY = "CAPSULE" family = random.choice(["gaussian","binomial"]) print(family) print("Do lambda search and build models") if family == "gaussian": model = h2o.glm(x=prostate[myX], y=prostate[myY], family=family, standardize=True, use_all_factor_levels=True, lambda_search=True) else: model = h2o.glm(x=prostate[myX], y=prostate[myY].asfactor(), family=family, standardize=True, use_all_factor_levels=True, lambda_search=True) print("the models were built over the following lambda values: ") all_lambdas = model.models(1).lambda_all() print(all_lambdas) for i in range(10): Lambda = random.sample(all_lambdas,1) print("For Lambda we get this model:") m1 = h2o.getGLMLambdaModel(model.models(random.randint(0,len(model.models()-1)),Lambda=Lambda)) m1.show() print("this model should be same as the one above:") m2 = h2o.getGLMLambdaModel(model.models(random.randint(0,len(model.models()-1)),Lambda=Lambda)) m2.show() assert m1==m2, "expected models to be equal"
def getLambdaModel(): print("Read data") prostate = h2o.import_file(path=tests.locate("smalldata/logreg/prostate.csv")) myX = ["AGE","RACE","DPROS","DCAPS","PSA","VOL","GLEASON"] myY = "CAPSULE" family = random.choice(["gaussian","binomial"]) print(family) print("Do lambda search and build models") if family == "gaussian": model = h2o.glm(x=prostate[myX], y=prostate[myY], family=family, standardize=True, use_all_factor_levels=True, lambda_search=True) else: model = h2o.glm(x=prostate[myX], y=prostate[myY].asfactor(), family=family, standardize=True, use_all_factor_levels=True, lambda_search=True) print("the models were built over the following lambda values: ") all_lambdas = model.models(1).lambda_all() print(all_lambdas) for i in range(10): Lambda = random.sample(all_lambdas,1) print("For Lambda we get this model:") m1 = h2o.getGLMLambdaModel(model.models(random.randint(0,len(model.models()-1)),Lambda=Lambda)) m1.show() print("this model should be same as the one above:") m2 = h2o.getGLMLambdaModel(model.models(random.randint(0,len(model.models()-1)),Lambda=Lambda)) m2.show() assert m1==m2, "expected models to be equal"
def grid_lambda_search(): # Log.info("Importing prostate.csv data...\n") prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) #prostate.summary() # Log.info("H2O GLM (binomial) with parameters: alpha = c(0.25, 0.5), nlambda = 20, lambda_search = TRUE, nfolds: 2\n") model = H2OGeneralizedLinearEstimator(family="binomial", nlambdas=5, lambda_search=True, n_folds=2) model.train(x=list(range(2,9)), y=1, training_frame=prostate) # model = h2o.glm(x=prostate[2:9], y=prostate[1], family="binomial", nlambdas=5, lambda_search=True, n_folds=2) if random.random() < 0.5: model_idx = 0 else: model_idx = 1 model_bestlambda = model.models(model_idx) params_bestlambda = model.params() # Log.info(cat("All lambda values returned:\n", params_bestlambda.lambdas())) assert len(params_bestlambda.lambdas()) <= 5, "expected 5 or less lambdas" random_lambda = random.choice(params_bestlambda.lambdas()) print("RANDOM LAMBDA") print(random_lambda) # Log.info(cat("Retrieving model corresponding to alpha =", params_bestlambda.alpha(), "and randomly chosen lambda", random_lambda, "\n")) random_model = model.getGLMLambdaModel(model_bestlambda, random_lambda) # Log.info("EXPECTING THESE TO BE EQUAL") print(random_model.Lambda()) print(random_lambda) assert random_model.Lambda() == random_lambda, "expected lambdas to be equal" # Log.info(cat("Retrieving model corresponding to alpha =", params_bestlambda.alpha(), "and best lambda", params_bestlambda.lambdaBest(), "\n")) best_model = h2o.getGLMLambdaModel(model_bestlambda, params_bestlambda.lambda_best()) assert best_model.model() == model_bestlambda.model(), "expected models to be equal" # Log.info("H2O GLM (binomial) with parameters: alpha = [0.25, 0.5], nlambda = 20, lambda_search = TRUE, nfolds: 2\n") prostate_search = H2OGeneralizedLinearEstimator(family="binomial", alpha=[0.25, 0.5], nlambdas=5, lambda_search=True, n_folds=2) prostate_search.train(x=list(range(2,9)),y=1,training_frame=prostate) # prostate_search = h2o.glm(x=prostate[2:9], y=prostate[1], family="binomial", alpha=[0.25, 0.5], nlambdas=5, lambda_search=True, n_folds=2) model_search = prostate_search.models(model_idx) models_best = model_search.models(model_search.best_model()) params_best = models_best.params() assert params_bestlambda.lambda_best() == params_best.lambda_best(), "expected lambdas to be equal" assert len(params_best.lambda_all()) <= 20, "expected 20 or fewer lambdas"
def grid_lambda_search(ip, port): # Connect to h2o h2o.init(ip, port) # Log.info("Importing prostate.csv data...\n") prostate = h2o.import_frame( path=h2o.locate("smalldata/logreg/prostate.csv")) #prostate.summary() # Log.info("H2O GLM (binomial) with parameters: alpha = c(0.25, 0.5), nlambda = 20, lambda_search = TRUE, nfolds: 2\n") model = h2o.glm(x=prostate[2:9], y=prostate[1], family="binomial", nlambdas=5, lambda_search=True, n_folds=2) if random.random() < 0.5: model_idx = 0 else: model_idx = 1 model_bestlambda = model.models(model_idx) params_bestlambda = model.params() # Log.info(cat("All lambda values returned:\n", params_bestlambda.lambdas())) assert len(params_bestlambda.lambdas()) <= 5, "expected 5 or less lambdas" random_lambda = random.choice(params_bestlambda.lambdas()) print("RANDOM LAMBDA") print(random_lambda) # Log.info(cat("Retrieving model corresponding to alpha =", params_bestlambda.alpha(), "and randomly chosen lambda", random_lambda, "\n")) random_model = model.getGLMLambdaModel(model_bestlambda, random_lambda) # Log.info("EXPECTING THESE TO BE EQUAL") print(random_model.Lambda()) print(random_lambda) assert random_model.Lambda( ) == random_lambda, "expected lambdas to be equal" # Log.info(cat("Retrieving model corresponding to alpha =", params_bestlambda.alpha(), "and best lambda", params_bestlambda.lambdaBest(), "\n")) best_model = h2o.getGLMLambdaModel(model_bestlambda, params_bestlambda.lambda_best()) assert best_model.model() == model_bestlambda.model( ), "expected models to be equal" # Log.info("H2O GLM (binomial) with parameters: alpha = [0.25, 0.5], nlambda = 20, lambda_search = TRUE, nfolds: 2\n") prostate_search = h2o.glm(x=prostate[2:9], y=prostate[1], family="binomial", alpha=[0.25, 0.5], nlambdas=5, lambda_search=True, n_folds=2) model_search = prostate_search.models(model_idx) models_best = model_search.models(model_search.best_model()) params_best = models_best.params() assert params_bestlambda.lambda_best() == params_best.lambda_best( ), "expected lambdas to be equal" assert len(params_best.lambda_all()) <= 20, "expected 20 or fewer lambdas"