Reviews3 = makeTrainingModel(ReviewsKey, target, knobs, drop=dropped) dropped = ['Score', 'ProfileName', 'Time', 'Description'] knobs = [6, 2, 7] Reviews4 = makeTrainingModel(ReviewsKey, target, knobs, drop=dropped) dropped = ['Summary', 'Score', 'ProfileName'] knobs = [6, 2, 7] Reviews5 = makeTrainingModel(ReviewsKey, target, knobs, drop=dropped) ### Cannabis Time Series ## ## 4/2/6, seed=212507590 120 days 0 gap : 7, 7, 7 minutes ## 4 / 2 / 6 , 28 days, 1 gap 7 minutes experiment = h2oai.start_experiment_sync(dataset_key=train_dai.key, testset_key = test_dai.key, target_col="Weekly_Sales", is_classification=False, cols_to_drop = ["sample_weight"], accuracy=5, time=3, interpretability=1, scorer="RMSE", enable_gpus=True, seed=1234, time_col = "Date", time_groups_columns = ["Store", "Dept"], num_prediction_periods = 1, num_gap_periods = 0)
train_key = h2oai.get_dataset_split_job(reviews_split_data).entity[0] test_key = h2oai.get_dataset_split_job(reviews_split_data).entity[1] # Reviews Default dropped = [ 'UserID', 'ProductId', 'Id', 'Summary', 'Score', 'HelpfulnessDenominator', 'HelpfulnessNumerator', 'ProfileName', 'Time' ] knobs = [8, 2, 7] reviews1 = h2oai.start_experiment_sync(experiment_name="Reviews NLP Big", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], enable_gpus=True, cols_to_drop=dropped) reviews1a = h2oai.start_experiment_sync(experiment_name="Reviews NLP Big TF", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], enable_gpus=True, cols_to_drop=dropped,
def test_debug_pyclient(): from h2oai_client import Client pd.set_option('display.max_rows', 50) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # Login info dai_url = "http://****:12345" dai_user = "******" dai_pwd = "****" # Data Information data_file_name = "****.csv" y = "****" # Transformers information transformer_file_name = "****.py" transformers_noncustom = [] transformers_custom_nontesting = [] # All Offical Transformers transformers_noncustom = ['CVCatNumEncode', 'CVTargetEncode' , 'CatOriginalTransformer', 'ClusterDistTransformer' , 'ClusterIdTransformer', 'ClusterTETransformer', 'DatesTransformer' , 'EwmaLagsTransformer', 'FrequentTransformer', 'InteractionsTransformer' , 'IsHolidayTransformer', 'LagsAggregatesTransformer', 'LagsInteractionTransformer' , 'LagsTransformer', 'LexiLabelEncoder', 'NumCatTETransformer', 'NumToCatTETransformer' , 'NumToCatWoEMonotonicTransformer', 'NumToCatWoETransformer', 'OneHotEncodingTransformer' , 'OriginalTransformer', 'SortedLETransformer', 'StrFeatureTransformer', 'TextClustDistTransformer' , 'TextClustTETransformer', 'TextLinModelTransformer', 'TextTransformer', 'TruncSVDNumTransformer' , 'WeightOfEvidenceTransformer'] # Any Installed Custom Transformers you don't want to test transformers_custom_nontesting = ['MyLogTransformer'] all_nontest_transformers = transformers_noncustom + transformers_custom_nontesting # STEP ZERO: Connect to Driverless AI h2oai = Client(dai_url, dai_user, dai_pwd) # STEP ONE: Load data set (and related tasks) # view all data sets in DAI all_data_sets = h2oai.list_datasets(0, 100) all_data_sets = pd.DataFrame({ 'key': list(map(lambda x: x.key, all_data_sets)) , 'name': list(map(lambda x: x.name, all_data_sets))}) print("PRE-LOADED DATASETS:") print(all_data_sets) # check if data was pre-loaded - if so use that data set - if not load data if data_file_name in all_data_sets['name'].values: print("\nData already loaded ", data_file_name) data_key = all_data_sets[all_data_sets["name"] == data_file_name]["key"][0] data_load_job = h2oai.get_dataset_job(data_key).entity else: print("\nLoading file ", data_file_name) data_load_job = h2oai.upload_dataset_sync(data_file_name) data_key = data_load_job.key # STEP TWO: Load custom transformer (and related tasks) # probably not good to just upload every time # no function to delete from python, only from ssh-ing in # rm tmp/contrib/transformers/[function]_randomletters_content.py print("\nUploading Transformer ", transformer_file_name) my_transformer = h2oai.upload_custom_recipe_sync(transformer_file_name) # returns true or false - exit if fails - check DAI UI for error message if my_transformer: print("\nTransformer uploaded successfully\n") else: print("\nTransformer uploaded failed, exiting program.\n") sys.exit() # STEP THREE: Run experiment (and related tasks) print("\nStarting Experiment\n") experiment = h2oai.start_experiment_sync( dataset_key=data_key , target_col=y , is_classification=True , accuracy=1 , time=1 , interpretability=10 , scorer="F1" , score_f_name=None , config_overrides=""" feature_brain_level=0 exclude_transformers={dont_use} """.format(dont_use=all_nontest_transformers) ) # experiment = h2oai.get_model_job("lomotare").entity # STEP FOUR: Check the transformation was used # Download Summary summary_path = h2oai.download(src_path=experiment.summary_path, dest_dir=".") dir_path = "h2oai_experiment_summary_" + experiment.key import zipfile with zipfile.ZipFile(summary_path, 'r') as z: z.extractall(dir_path) # View Features features = pd.read_table(dir_path + "/features.txt", sep=',', skipinitialspace=True) print(features) # STEP FIVE: Transform data and ensure it looks as expected transform = h2oai.fit_transform_batch_sync(model_key=experiment.key , training_dataset_key=data_key , validation_dataset_key=None , test_dataset_key=None , validation_split_fraction=0.25 , seed=1234 , fold_column=None) # Download the training and validation transformed data transform_train_path = h2oai.download(src_path=transform.training_output_csv_path, dest_dir=".") transform_validate_path = h2oai.download(src_path=transform.validation_output_csv_path, dest_dir=".") transform_train = pd.read_table(transform_train_path, sep=',', skipinitialspace=True) transform_validate = pd.read_table(transform_validate_path, sep=',', skipinitialspace=True) print(transform_train.head()) print(transform_validate.head()) # STEP 1000: Clean up os.remove(summary_path) os.remove(transform_train_path) os.remove(transform_validate_path) shutil.rmtree(dir_path)
import h2oai_client from h2oai_client import Client h2oai = Client(address='http://129.213.63.69:12345', username='******', password='******') train = h2oai.create_dataset_sync('/train.csv') test = h2oai.create_dataset_sync('/test.csv') experiment = h2oai.start_experiment_sync(dataset_key=train.key, testset_key=test.key, accuracy=10, time=10, interpretability=1, is_classification=True, target_col='LABEL', is_timeseries=True, time_col='DATE', num_gap_periods=1, num_prediction_periods=1) print("Final Model Score on Validation Data: " + str(round(experiment.valid_score, 3))) print("Final Model Score on Test Data: " + str(round(experiment.test_score, 3)))
, time_col = "" , ratio = ratio , seed = 1234 ) train_key = h2oai.get_dataset_split_job(card_split_data).entity[0] test_key = h2oai.get_dataset_split_job(card_split_data).entity[1] # Card Default knobs = [6, 4, 6] card_default = h2oai.start_experiment_sync( experiment_name = "Card Default" , dataset_key = train_key , testset_key = test_key , target_col = target , is_classification = True , accuracy = knobs[0] , time = knobs[1] , interpretability = knobs[2] , enable_gpus = True , cols_to_drop = dropped ) # Card Monotonic knobs = [6, 4, 7] card_monotonic = h2oai.start_experiment_sync( experiment_name = "Card Monotonic" , dataset_key = train_key , testset_key = test_key , target_col = target , is_classification = True , accuracy = knobs[0]
time_col="", ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(boston_split_data).entity[0] test_key = h2oai.get_dataset_split_job(boston_split_data).entity[1] dropped = [] # Housing Experiment #1 knobs = [7, 2, 8] housing1 = h2oai.start_experiment_sync(experiment_name="Housing", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=False, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], scorer='RMSE', enable_gpus=True, cols_to_drop=dropped) # Housing Experiment #2 knobs = [4, 2, 8] housing2 = h2oai.start_experiment_sync(experiment_name="Housing Quick", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=False, accuracy=knobs[0], time=knobs[1],
fold_col="", time_col="", ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(diabetes_split_data).entity[0] test_key = h2oai.get_dataset_split_job(diabetes_split_data).entity[1] dropped = [] # Diabetes Default knobs = [8, 2, 8] diabetes1 = h2oai.start_experiment_sync(experiment_name="Diabetes", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], enable_gpus=True, cols_to_drop=dropped) # Diabetes GLM diabetes2 = h2oai.start_experiment_sync( experiment_name="Diabetes GLM", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2],
ratio=ratio, seed=1234) train_key = h2oai.get_dataset_split_job(titanic_split_data).entity[0] test_key = h2oai.get_dataset_split_job(titanic_split_data).entity[1] knobs = [8, 2, 8] # Titanic Default dropped = ['no.title', 'cabin', 'embarked', 'boat', 'body', 'home.dest'] titanic1 = h2oai.start_experiment_sync(experiment_name="Titanic", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0], time=knobs[1], interpretability=knobs[2], enable_gpus=True, cols_to_drop=dropped) # Titanic No Name dropped = [ 'name', 'no.title', 'cabin', 'embarked', 'boat', 'body', 'home.dest' ] titanic2 = h2oai.start_experiment_sync(experiment_name="Titanic no Name", dataset_key=train_key, testset_key=test_key, target_col=target, is_classification=True, accuracy=knobs[0],