def benchmark(args, dataset_folder, dataset): data = prepare_dataset(dataset_folder, dataset, args.nrows) results = {} # "all" runs all algorithms if args.algorithm == "all": args.algorithm = "xgb-gpu,xgb-cpu,lgbm-cpu,lgbm-gpu,cat-cpu,cat-gpu" for alg in args.algorithm.split(","): print("Running '%s' ..." % alg) runner = algorithms.Algorithm.create(alg) with runner: train_time = runner.fit(data, args) pred = runner.test(data) results[alg] = { "train_time": train_time, "accuracy": get_metrics(data, pred), } return results
import cudf import numpy as np import pandas as pd import pickle from datasets import prepare_dataset from sklearn.metrics import accuracy_score from cuml.ensemble import RandomForestClassifier as GPURandomForestClassifier import ray from ray import tune from ray.tune.utils import pin_in_object_store, get_pinned_object data = prepare_dataset("/data", "airline", None) X_train, X_test, y_train, y_test = data.X_train, data.X_test, data.y_train, data.y_test y_train = y_train.astype(np.int32) y_test = y_test.astype(np.int32) QUARTER = len(X_train) // 3 X_train = X_train[QUARTER:] y_train = y_train[QUARTER:] # ray.init() # data_id = pin_in_object_store([X_train, X_test, y_train, y_test]) import os from filelock import FileLock class CUMLTrainable(tune.Trainable): def _setup(self, config):
except: start_epoch, epoch_iter = 1, 0 print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) else: start_epoch, epoch_iter = 1, 0 if opt.debug: opt.display_freq = 1 opt.print_freq = 1 opt.niter = 1 opt.niter_decay = 0 opt.max_dataset_size = 10 dataurl = opt.dataurl opt.dataroot = str(prepare_dataset(dataurl, opt.dataroot) / 'subject4' / 'train') data_loader = CreateDataLoader(opt) dataset = data_loader.load_data() dataset_size = len(data_loader) print('#training images = %d' % dataset_size) """ new residual model """ model = create_model_fullts(opt) visualizer = Visualizer(opt) total_steps = (start_epoch-1) * dataset_size + epoch_iter for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1): epoch_start_time = time.time() if epoch != start_epoch:
from datasets import prepare_dataset import os os.makedirs(os.path.expanduser("~/data"), exist_ok=True) prepare_dataset("~/data", "airline", None)