def load_titanic(): load( dataset_url= 'https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv', out_train_dataset_path='data/titanic_train.csv', out_val_dataset_path='data/titanic_val.csv')
def load_body_fat(): load(dataset_url= 'https://course1.winona.edu/bdeppa/Stat%20425/Data/bodyfat.csv', out_train_dataset_path='data/bodyfat_train.csv', out_val_dataset_path='data/bodyfat_val.csv')
def load_boston_housing(): load(dataset_url= 'http://course1.winona.edu/bdeppa/Stat%20425/Data/Boston_Housing.csv', out_train_dataset_path='data/boston_train.csv', out_val_dataset_path='data/boston_val.csv')
def run_tabular_regression(client, csv_file_url, gpus, hours, features=None, target=None, queries=None): ''' Runs a sample full train-inference flow for the task ``TABULAR_REGRESSION``. ''' task = 'TABULAR_REGRESSION' # Randomly generate app & model names to avoid naming conflicts app_id = gen_id() app = 'tabular_regression_app_{}'.format(app_id) xgb_model_name = 'XgbReg_{}'.format(app_id) train_dataset_path = 'data/{}_train.csv'.format(app) val_dataset_path = 'data/{}_val.csv'.format(app) print('Preprocessing dataset...') load(csv_file_url, train_dataset_path, val_dataset_path) print('Creating & uploading datasets onto SINGA-Auto...') train_dataset = client.create_dataset('{}_train'.format(app), task, train_dataset_path) pprint(train_dataset) val_dataset = client.create_dataset('{}_val'.format(app), task, val_dataset_path) pprint(val_dataset) print('Adding models "{}" to SINGA-Auto...'.format(xgb_model_name)) xgb_model = client.create_model(xgb_model_name, task, 'examples/models/tabular_regression/XgbReg.py', \ 'XgbReg', dependencies={ ModelDependency.XGBOOST: '0.90' }) pprint(xgb_model) print('Creating train job for app "{}" on SINGA-Auto...'.format(app)) budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus} train_job = client.create_train_job(app, task, train_dataset['id'], val_dataset['id'], budget, models=[xgb_model['id']], train_args={ 'features': features, 'target': target }) pprint(train_job) print('Waiting for train job to complete...') print('This might take a few minutes') wait_until_train_job_has_stopped(client, app) print('Train job has been stopped') print( 'Listing best trials of latest train job for app "{}"...'.format(app)) pprint(client.get_best_trials_of_train_job(app)) print('Creating inference job for app "{}" on SINGA-Auto...'.format(app)) pprint(client.create_inference_job(app)) predictor_host = get_predictor_host(client, app) if not predictor_host: raise Exception('Inference job has errored or stopped') print('Inference job is running!') if queries is not None: print('Making predictions for queries:') print(queries) predictions = make_predictions(client, predictor_host, queries) print('Predictions are:') print(predictions) print('Stopping inference job...') pprint(client.stop_inference_job(app))
def load_diabetes(): load( dataset_url= 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv', out_train_dataset_path='data/diabetes_train.csv', out_val_dataset_path='data/diabetes_val.csv')
def load_heart(): load(dataset_url='data/heart.csv', out_train_dataset_path='data/heart_train.csv', out_val_dataset_path='data/heart_val.csv')