예제 #1
0
target = 'PositiveReview'
ratio = 0.8

reviews_data = h2oai.create_dataset_sync(dataPath)

# Split the data
reviews_split_data = h2oai.make_dataset_split(dataset_key=reviews_data.key,
                                              output_name1=basename + "_train",
                                              output_name2=basename + "_test",
                                              target=target,
                                              fold_col="",
                                              time_col="",
                                              ratio=ratio,
                                              seed=1234)

train_key = h2oai.get_dataset_split_job(reviews_split_data).entity[0]
test_key = h2oai.get_dataset_split_job(reviews_split_data).entity[1]

# Reviews Default

dropped = [
    'UserID', 'ProductId', 'Id', 'Summary', 'Score', 'HelpfulnessDenominator',
    'HelpfulnessNumerator', 'ProfileName', 'Time'
]
knobs = [8, 2, 7]
reviews1 = h2oai.start_experiment_sync(experiment_name="Reviews NLP Big",
                                       dataset_key=train_key,
                                       testset_key=test_key,
                                       target_col=target,
                                       is_classification=True,
                                       accuracy=knobs[0],
예제 #2
0
card_data = h2oai.create_dataset_sync(dataPath)

# Split the data
card_split_data = h2oai.make_dataset_split(
    dataset_key = card_data.key
    , output_name1 = basename + "_train"
    , output_name2 = basename + "_test"
    , target = target
    , fold_col = ""
    , time_col = ""
    , ratio = ratio
    , seed = 1234
)

train_key = h2oai.get_dataset_split_job(card_split_data).entity[0]
test_key  = h2oai.get_dataset_split_job(card_split_data).entity[1]

# Card Default
knobs = [6, 4, 6]
card_default = h2oai.start_experiment_sync(
      experiment_name = "Card Default"
    , dataset_key = train_key
    , testset_key = test_key
    , target_col = target
    , is_classification = True
    , accuracy = knobs[0]
    , time = knobs[1]
    , interpretability = knobs[2]
    , enable_gpus = True
    , cols_to_drop = dropped
ratio = 0.8

diabetes_data = h2oai.create_dataset_sync(dataPath)

# Split the data
diabetes_split_data = h2oai.make_dataset_split(dataset_key=diabetes_data.key,
                                               output_name1=basename +
                                               "_train",
                                               output_name2=basename + "_test",
                                               target=target,
                                               fold_col="",
                                               time_col="",
                                               ratio=ratio,
                                               seed=1234)

train_key = h2oai.get_dataset_split_job(diabetes_split_data).entity[0]
test_key = h2oai.get_dataset_split_job(diabetes_split_data).entity[1]
dropped = []

# Diabetes Default
knobs = [8, 2, 8]
diabetes1 = h2oai.start_experiment_sync(experiment_name="Diabetes",
                                        dataset_key=train_key,
                                        testset_key=test_key,
                                        target_col=target,
                                        is_classification=True,
                                        accuracy=knobs[0],
                                        time=knobs[1],
                                        interpretability=knobs[2],
                                        enable_gpus=True,
                                        cols_to_drop=dropped)
target = 'VALUE'
ratio = 0.8

boston_data = h2oai.create_dataset_sync(dataPath)

# Split the data
boston_split_data = h2oai.make_dataset_split(dataset_key=boston_data.key,
                                             output_name1=basename + "_train",
                                             output_name2=basename + "_test",
                                             target=target,
                                             fold_col="",
                                             time_col="",
                                             ratio=ratio,
                                             seed=1234)

train_key = h2oai.get_dataset_split_job(boston_split_data).entity[0]
test_key = h2oai.get_dataset_split_job(boston_split_data).entity[1]
dropped = []

# Housing Experiment #1
knobs = [7, 2, 8]
housing1 = h2oai.start_experiment_sync(experiment_name="Housing",
                                       dataset_key=train_key,
                                       testset_key=test_key,
                                       target_col=target,
                                       is_classification=False,
                                       accuracy=knobs[0],
                                       time=knobs[1],
                                       interpretability=knobs[2],
                                       scorer='RMSE',
                                       enable_gpus=True,
예제 #5
0
target = 'survived'
ratio = 0.8

titanic_data = h2oai.create_dataset_sync(dataPath)

# Split the data
titanic_split_data = h2oai.make_dataset_split(dataset_key=titanic_data.key,
                                              output_name1=basename + "_train",
                                              output_name2=basename + "_test",
                                              target=target,
                                              fold_col="",
                                              time_col="",
                                              ratio=ratio,
                                              seed=1234)

train_key = h2oai.get_dataset_split_job(titanic_split_data).entity[0]
test_key = h2oai.get_dataset_split_job(titanic_split_data).entity[1]

knobs = [8, 2, 8]

# Titanic Default

dropped = ['no.title', 'cabin', 'embarked', 'boat', 'body', 'home.dest']
titanic1 = h2oai.start_experiment_sync(experiment_name="Titanic",
                                       dataset_key=train_key,
                                       testset_key=test_key,
                                       target_col=target,
                                       is_classification=True,
                                       accuracy=knobs[0],
                                       time=knobs[1],
                                       interpretability=knobs[2],