target = 'PositiveReview' ratio = 0.8 reviews_data = h2oai.create_dataset_sync(dataPath) # Split the data reviews_split_data = h2oai.make_dataset_split(dataset_key=reviews_data.key, output_name1=basename + "_train", output_name2=basename + "_test", target=target, fold_col="", time_col="", ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(reviews_split_data).entity[0] test_key = h2oai.get_dataset_split_job(reviews_split_data).entity[1] # Reviews Default dropped = [ 'UserID', 'ProductId', 'Id', 'Summary', 'Score', 'HelpfulnessDenominator', 'HelpfulnessNumerator', 'ProfileName', 'Time' ] knobs = [8, 2, 7] reviews1 = h2oai.start_experiment_sync(experiment_name="Reviews NLP Big", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0],
card_data = h2oai.create_dataset_sync(dataPath) # Split the data card_split_data = h2oai.make_dataset_split( dataset_key = card_data.key , output_name1 = basename + "_train" , output_name2 = basename + "_test" , target = target , fold_col = "" , time_col = "" , ratio = ratio , seed = 1234 ) train_key = h2oai.get_dataset_split_job(card_split_data).entity[0] test_key = h2oai.get_dataset_split_job(card_split_data).entity[1] # Card Default knobs = [6, 4, 6] card_default = h2oai.start_experiment_sync( experiment_name = "Card Default" , dataset_key = train_key , testset_key = test_key , target_col = target , is_classification = True , accuracy = knobs[0] , time = knobs[1] , interpretability = knobs[2] , enable_gpus = True , cols_to_drop = dropped
ratio = 0.8 diabetes_data = h2oai.create_dataset_sync(dataPath) # Split the data diabetes_split_data = h2oai.make_dataset_split(dataset_key=diabetes_data.key, output_name1=basename + "_train", output_name2=basename + "_test", target=target, fold_col="", time_col="", ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(diabetes_split_data).entity[0] test_key = h2oai.get_dataset_split_job(diabetes_split_data).entity[1] dropped = [] # Diabetes Default knobs = [8, 2, 8] diabetes1 = h2oai.start_experiment_sync(experiment_name="Diabetes", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], enable_gpus=True, cols_to_drop=dropped)
target = 'VALUE' ratio = 0.8 boston_data = h2oai.create_dataset_sync(dataPath) # Split the data boston_split_data = h2oai.make_dataset_split(dataset_key=boston_data.key, output_name1=basename + "_train", output_name2=basename + "_test", target=target, fold_col="", time_col="", ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(boston_split_data).entity[0] test_key = h2oai.get_dataset_split_job(boston_split_data).entity[1] dropped = [] # Housing Experiment #1 knobs = [7, 2, 8] housing1 = h2oai.start_experiment_sync(experiment_name="Housing", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=False, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], scorer='RMSE', enable_gpus=True,
target = 'survived' ratio = 0.8 titanic_data = h2oai.create_dataset_sync(dataPath) # Split the data titanic_split_data = h2oai.make_dataset_split(dataset_key=titanic_data.key, output_name1=basename + "_train", output_name2=basename + "_test", target=target, fold_col="", time_col="", ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(titanic_split_data).entity[0] test_key = h2oai.get_dataset_split_job(titanic_split_data).entity[1] knobs = [8, 2, 8] # Titanic Default dropped = ['no.title', 'cabin', 'embarked', 'boat', 'body', 'home.dest'] titanic1 = h2oai.start_experiment_sync(experiment_name="Titanic", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2],