def testWithParameters(self): class Data: def __init__(self): self.data = [0] * 500_000 data = Data() data.data[100] = 1 class TestTrainable(Trainable): def setup(self, config, data): self.data = data.data self.data[101] = 2 # Changes are local def step(self): return dict( metric=len(self.data), hundred=self.data[100], done=True) trial_1, trial_2 = tune.run( tune.with_parameters(TestTrainable, data=data), num_samples=2).trials self.assertEqual(data.data[101], 0) self.assertEqual(trial_1.last_result["metric"], 500_000) self.assertEqual(trial_1.last_result["hundred"], 1) self.assertEqual(trial_2.last_result["metric"], 500_000) self.assertEqual(trial_2.last_result["hundred"], 1) self.assertTrue(str(trial_1).startswith("TestTrainable"))
def main(cpus_per_actor, num_actors, num_samples): # Set XGBoost config. config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9) } ray_params = RayParams(max_actor_restarts=1, gpus_per_actor=0, cpus_per_actor=cpus_per_actor, num_actors=num_actors) analysis = tune.run( tune.with_parameters(train_breast_cancer, ray_params=ray_params), # Use the `get_tune_resources` helper function to set the resources. resources_per_trial=ray_params.get_tune_resources(), config=config, num_samples=num_samples, metric="eval-error", mode="min") # Load the best model checkpoint. best_bst = xgboost_ray.tune.load_model( os.path.join(analysis.best_logdir, "tuned.xgb")) best_bst.save_model("best_model.xgb") accuracy = 1. - analysis.best_result["eval-error"] print(f"Best model parameters: {analysis.best_config}") print(f"Best model total accuracy: {accuracy:.4f}")
def testWithParametersTwoRuns2(self): # Makes sure two runs in the same script # pass (https://github.com/ray-project/ray/issues/16609) def train_fn(config, extra=4): tune.report(metric=extra) def train_fn_2(config, extra=5): tune.report(metric=extra) trainable1 = tune.with_parameters(train_fn, extra=8) trainable2 = tune.with_parameters(train_fn_2, extra=9) out1 = tune.run(trainable1, metric="metric", mode="max") out2 = tune.run(trainable2, metric="metric", mode="max") self.assertEquals(out1.best_result["metric"], 8) self.assertEquals(out2.best_result["metric"], 9)
def run_tuning_procedure(config, expname, ntrials, ncpus, ngpus, NetClass, dataset="hchs"): trainable = tune.with_parameters(hyper_tuner, NetClass=NetClass, dataset=dataset) analysis = tune.run(trainable, resources_per_trial={ "cpu": ncpus, "gpu": ngpus }, metric="loss", mode="min", config=config, num_samples=ntrials, name=expname) print("Best Parameters:", analysis.best_config) analysis.best_result_df.to_csv("best_parameters_exp%s_trials%d.csv" % (expname, ntrials)) analysis.results_df.to_csv("all_results_exp%s_trials%d.csv" % (expname, ntrials)) print("Best 5 results") print(analysis.results_df.sort_values(by="mcc", ascending=False).head(5))
def main_tune(base_args): # ray.init(log_to_driver=False) tune_config = { "learning_rate": tune.loguniform(5e-6, 1e-3), "weight_decay": tune.choice([0.0, 1e-3, 1e-2, 0.1]), "batch_size": tune.choice([16, 32, 64, 128]), "latent_dim": tune.choice([2, 3, 8, 16, 32, 128, 256, 512]) } scheduler = ASHAScheduler(max_t=base_args.max_tune_epoches, grace_period=3, reduction_factor=2) reporter = CLIReporter(parameter_columns=[ "learning_rate", "weight_decay", "batch_size", "latent_dim" ], metric_columns=[ "val_lossR", "loss", "Reconstruction_Loss", "training_iteration" ]) analysis = tune.run(tune.with_parameters(tune_train, base_arg=base_args), resources_per_trial={ "cpu": 12, "gpu": 1.0, }, metric="val_lossR", mode="min", config=tune_config, num_samples=10, scheduler=scheduler, progress_reporter=reporter, name="tune_vae_chol") print("Best hyperparameters found were: ", analysis.best_config)
def main(cpus_per_actor, num_actors, num_samples): # Set XGBoost config. config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9) } analysis = tune.run( tune.with_parameters(train_breast_cancer, cpus_per_actor=cpus_per_actor, num_actors=num_actors), # extra_cpu is used if the trainable creates additional remote actors. # https://docs.ray.io/en/master/tune/api_docs/trainable.html#advanced-resource-allocation resources_per_trial={ "cpu": 1, "extra_cpu": cpus_per_actor * num_actors }, config=config, num_samples=num_samples, metric="eval-error", mode="min") # Load the best model checkpoint best_bst = xgb.Booster() best_bst.load_model(os.path.join(analysis.best_logdir, "simple.xgb")) accuracy = 1. - analysis.best_result["eval-error"] print(f"Best model parameters: {analysis.best_config}") print(f"Best model total accuracy: {accuracy:.4f}")
def main(): parser = argparse.ArgumentParser() parser.add_argument("--gpus") parser.add_argument("--gpus-per-trial", type=float) parser.add_argument("--num-epochs", type=int) parser.add_argument("--num-samples", type=int) parser.add_argument("--w2v", type=str) args = parser.parse_args() w2v_sd = torch.load(args.w2v) gpus_per_trial = args.gpus_per_trial trainable = tune.with_parameters( train_model, gpus=args.gpus, w2v=w2v_sd, num_epochs=args.num_epochs, ) algo = AxSearch(max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run(trainable, resources_per_trial={ "cpu": 4, "gpu": gpus_per_trial }, metric="acc", mode="max", search_alg=algo, scheduler=scheduler, config=config, num_samples=args.num_samples, name="tune_w2v_lr") print(analysis.best_config)
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): config = { "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([2, 4, 8, 16]) } scheduler = ASHAScheduler( max_t=max_num_epochs, grace_period=1, reduction_factor=2) result = tune.run( tune.with_parameters(train_cifar), resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, config=config, metric="loss", mode="min", num_samples=num_samples, scheduler=scheduler ) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) print("Best trial final validation accuracy: {}".format( best_trial.last_result["accuracy"])) if ray.util.client.ray.is_connected(): # If using Ray Client, we want to make sure checkpoint access # happens on the server. So we wrap `test_best_model` in a Ray task. ray.get(ray.remote(test_best_model).remote(best_trial)) else: test_best_model(best_trial)
def tune_mnist(data_dir, num_samples=10, num_epochs=10, num_workers=1, use_gpu=False): config = { "layer_1": tune.choice([32, 64, 128]), "layer_2": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([32, 64, 128]), } # Add Tune callback. metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"} callbacks = [TuneReportCallback(metrics, on="validation_end")] trainable = tune.with_parameters(train_mnist, data_dir=data_dir, num_epochs=num_epochs, num_workers=num_workers, use_gpu=use_gpu, callbacks=callbacks) analysis = tune.run(trainable, metric="loss", mode="min", config=config, num_samples=num_samples, resources_per_trial={ "cpu": 1, "gpu": int(use_gpu), "extra_cpu": num_workers, "extra_gpu": num_workers * int(use_gpu) }, name="tune_mnist") print("Best hyperparameters found were: ", analysis.best_config)
def tune_mnist_mxnet(num_samples=10, num_epochs=10): logger.info("Downloading MNIST data...") mnist_data = mx.test_utils.get_mnist() logger.info("Got MNIST data, starting Ray Tune.") config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-3, 1e-1), "batch_size": tune.choice([32, 64, 128]), } scheduler = ASHAScheduler(max_t=num_epochs, grace_period=1, reduction_factor=2) analysis = tune.run( tune.with_parameters( train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs ), resources_per_trial={ "cpu": 1, }, metric="mean_accuracy", mode="max", config=config, num_samples=num_samples, scheduler=scheduler, name="tune_mnist_mxnet", ) return analysis
def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0): data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_") # Download data MNISTDataModule(data_dir=data_dir).prepare_data() config = { "layer_1": tune.choice([32, 64, 128]), "layer_2": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([32, 64, 128]), } trainable = tune.with_parameters(train_mnist_tune, data_dir=data_dir, num_epochs=num_epochs, num_gpus=gpus_per_trial) analysis = tune.run(trainable, resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, metric="loss", mode="min", config=config, num_samples=num_samples, name="tune_mnist") print("Best hyperparameters found were: ", analysis.best_config)
def main(num_samples=10, max_num_epochs=15): # data_dir = os.path.abspath("./data") configs = load_configs("configs/training/smg_configs_template.yaml") # configs["processor"]["torch_model_dict"]["hidden_sizes"] = a = tune.choice([[90, 45, 25, 10], [45, 25], [45, 25], [90, 45, 25, 10]]) dataloaders, amplification, data_info_dict = load_data( configs) # Download data for all trials before starting the run scheduler = ASHAScheduler(max_t=max_num_epochs, grace_period=1, reduction_factor=2) result = tune.run( tune.with_parameters(generate_surrogate_model, configs=configs), resources_per_trial={ "cpu": 8, "gpu": 1 }, config=configs, metric="loss", mode="min", num_samples=num_samples, scheduler=scheduler, ) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) print("Best trial final validation accuracy: {}".format( best_trial.last_result["accuracy"]))
def grid_search(hparams): scheduler = ASHAScheduler(max_t=hparams['n_epochs'], grace_period=1, reduction_factor=2) reporter = CLIReporter( parameter_columns=hparams['param_cols'], metric_columns=['valid_acc', 'valid_f1', 'valid_loss']) rdm = RetinalDataModule() analysis = tune.run(tune.with_parameters(train_tune, rdm=rdm), resources_per_trial={ "cpu": 1, "gpu": 1 }, metric="valid_loss", mode="min", config=hparams, local_dir=Path(hparams['output_dir'], 'ray_tune'), num_samples=5, scheduler=scheduler, progress_reporter=reporter, name=f"tune_{hparams['model']}_DRIVE") print("Best hyperparameters found were: ", analysis.best_config)
def tune_xgboost(train_df, test_df, target_column): # Set XGBoost config. config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9) } ray_params = RayParams(max_actor_restarts=1, gpus_per_actor=0, cpus_per_actor=4, num_actors=4) analysis = tune.run( tune.with_parameters(train_xgboost, train_df=train_df, test_df=test_df, target_column=target_column, ray_params=ray_params), # Use the `get_tune_resources` helper function to set the resources. resources_per_trial=ray_params.get_tune_resources(), config=config, num_samples=1, metric="eval-error", mode="min", verbose=1) accuracy = 1. - analysis.best_result["eval-error"] print(f"Best model parameters: {analysis.best_config}") print(f"Best model total accuracy: {accuracy:.4f}") return analysis.best_config
def hypertune(num_samples, num_epochs, sym01, sym02, period): config = { "seq_len": tune.choice([5, 10]), "hidden_size": tune.choice([10, 50, 100]), "batch_size": tune.choice([30, 60]), "dropout": tune.choice([0.1, 0.2]), "lr": tune.loguniform(1e-4, 1e-1), "num_layers": tune.choice([2, 3, 4]) } trainable = tune.with_parameters( myTrain, num_epochs=num_epochs, sym01=sym01, sym02=sym02, period=period, ) analysis = tune.run(trainable, resources_per_trial={ "cpu": 1, }, metric="loss", mode="min", config=config, num_samples=num_samples, name="tune_lstm") print("tuning finished") return analysis.best_config
def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0): config = { "layer_1": tune.choice([32, 64, 128]), "layer_2": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([32, 64, 128]), } trainable = tune.with_parameters(train_mnist_tune, num_epochs=num_epochs, num_gpus=gpus_per_trial) analysis = tune.run( trainable, resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, metric="loss", mode="min", config=config, num_samples=num_samples, name="tune_mnist", ) print("Best hyperparameters found were: ", analysis.best_config)
def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0, data_dir="~/data"): config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([32, 64, 128]), } scheduler = ASHAScheduler( max_t=num_epochs, grace_period=1, reduction_factor=2) reporter = CLIReporter( parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) train_fn_with_parameters = tune.with_parameters(train_mnist_tune, num_epochs=num_epochs, num_gpus=gpus_per_trial, data_dir=data_dir) resources_per_trial = {"cpu": 1, "gpu": gpus_per_trial} analysis = tune.run(train_fn_with_parameters, resources_per_trial=resources_per_trial, metric="loss", mode="min", config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_asha") print("Best hyperparameters found were: ", analysis.best_config)
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) session_dirpath = make_get_filepath(os.path.join('reports', backtest_config['exchange'], backtest_config['symbol'], f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", '')) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print('Parameter iters should be defined in the configuration. Defaulting to 10.') num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print('Parameter num_cpus should be defined in the configuration. Defaulting to 2.') n_particles = 10 if 'n_particles' in backtest_config: n_particles = backtest_config['n_particles'] phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(backtest, ticks=ticks), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=session_dirpath, progress_reporter=LogReporter(metric_columns=['daily_gain', 'closest_liquidation', 'objective'], parameter_columns=[k for k in backtest_config['ranges']])) ray.shutdown() df = analysis.results_df df.reset_index(inplace=True) df.drop(columns=['trial_id', 'time_this_iter_s', 'done', 'timesteps_total', 'episodes_total', 'training_iteration', 'experiment_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'timesteps_since_restore', 'iterations_since_restore', 'experiment_tag'], inplace=True) df.to_csv(os.path.join(backtest_config['session_dirpath'], 'results.csv'), index=False) print('Best candidate found:') pprint.pprint(analysis.best_config) plot_wrap(backtest_config, ticks, clean_result_config(analysis.best_config)) return analysis
def main(): name = "large xgboost sweep" ray.init(address="auto") num_samples = 31 # So that we fit on 1024 CPUs with 1 head bundle num_actors_per_sample = 32 max_runtime = 3500 config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": 4, } ray_params = RayParams( max_actor_restarts=1, gpus_per_actor=0, cpus_per_actor=1, num_actors=num_actors_per_sample, ) start_time = time.monotonic() analysis = tune.run( tune.with_parameters(xgboost_train, ray_params=ray_params, num_boost_round=100), config=config, num_samples=num_samples, resources_per_trial=ray_params.get_tune_resources(), ) time_taken = time.monotonic() - start_time result = { "time_taken": time_taken, "trial_states": dict(Counter([trial.status for trial in analysis.trials])), "last_update": time.time(), } test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/tune_test.json") with open(test_output_json, "wt") as f: json.dump(result, f) if time_taken > max_runtime: print(f"The {name} test took {time_taken:.2f} seconds, but should not " f"have exceeded {max_runtime:.2f} seconds. Test failed. \n\n" f"--- FAILED: {name.upper()} ::: " f"{time_taken:.2f} > {max_runtime:.2f} ---") else: print(f"The {name} test took {time_taken:.2f} seconds, which " f"is below the budget of {max_runtime:.2f} seconds. " f"Test successful. \n\n" f"--- PASSED: {name.upper()} ::: " f"{time_taken:.2f} <= {max_runtime:.2f} ---")
def optimize_hyperparameters( train_model, create_model, data_train, data_test, search_space, model_kwargs_str, callbacks, hyperparams_file_name, random_seed, model_path, epochs, n_steps, num_samples_optim, ): tmp_dir = tempfile.TemporaryDirectory(dir=os.getcwd()) ray.shutdown() ray.init(log_to_driver=False, local_mode=True) search_alg = HyperOptSearch(random_state_seed=random_seed) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1) scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", grace_period=10) analysis = tune.run( tune.with_parameters( train_model, data_train=data_train, data_test=data_test, create_model=create_model, model_kwargs_str=model_kwargs_str, callbacks=callbacks, epochs=epochs, n_steps=n_steps, ), verbose=1, config=search_space, search_alg=search_alg, scheduler=scheduler, resources_per_trial={ "cpu": os.cpu_count(), "gpu": 0 }, metric="val_loss", mode="min", name="ray_tune_keras_hyperopt_gru", local_dir=tmp_dir.name, num_samples=num_samples_optim, ) shutil.rmtree(tmp_dir) best_params = analysis.get_best_config(metric="val_loss", mode="min") with open(os.path.join(model_path, hyperparams_file_name), "w") as f: json.dump(best_params, f)
def backtest_tune(ohlc: np.ndarray, backtest_config: dict): config = create_config(backtest_config) if not os.path.isdir(os.path.join('reports', backtest_config['symbol'])): os.makedirs(os.path.join('reports', backtest_config['symbol']), exist_ok=True) report_path = os.path.join('reports', backtest_config['symbol']) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print( 'Parameter iters should be defined in the configuration. Defaulting to 10.' ) num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print( 'Parameter num_cpus should be defined in the configuration. Defaulting to 2.' ) initial_points = max(1, min(int(iters / 10), 20)) ray.init(num_cpus=num_cpus ) # , logging_level=logging.FATAL, log_to_driver=False) algo = HyperOptSearch(n_initial_points=initial_points) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(backtest, ohlc=ohlc), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=report_path) ray.shutdown() session_path = os.path.join( os.path.join('sessions', backtest_config['symbol']), backtest_config['session_name']) if not os.path.isdir(session_path): os.makedirs(session_path, exist_ok=True) print('Best candidate found is: ', analysis.best_config) json.dump(analysis.best_config, open(os.path.join(session_path, 'best_config.json'), 'w'), indent=4) result = backtest(analysis.best_config, ohlc, True) result.to_csv(os.path.join(session_path, 'best_trades.csv'), index=False) return analysis
def tune4_withLabel( model, train_set: Dataset, val_set: Dataset, dims: list, config: dict, EPOCHS: int = 300, extra_feature_len: int = 0, extra_feature_len2: int = 0, n_gpu=1, n_samples=20, model_name="model", ): dim1, dim2, dim3, dim4 = dims[0], dims[1], dims[2], dims[3] scheduler = ASHAScheduler(max_t=EPOCHS, grace_period=1, reduction_factor=2) reporter = CLIReporter( parameter_columns=["k", "lr", "batch_size", "hidden_dim"], metric_columns=["loss", "training_iteration"], max_error_rows=5, max_progress_rows=5, max_report_frequency=10) analysis = tune.run(tune.with_parameters( train4_withLabel, model=model, dim1=dim1, dim2=dim2, dim3=dim3, dim4=dim4, extra_feature_len=extra_feature_len, extra_feature_len2=extra_feature_len2, train_set=train_set, val_set=val_set, num_epochs=EPOCHS, num_gpus=n_gpu, model_name=model_name), resources_per_trial={ "cpu": 1, "gpu": n_gpu }, metric="loss", mode="min", config=config, num_samples=n_samples, scheduler=scheduler, progress_reporter=reporter, name=model_name, verbose=False) print("-" * 70) print("Done") print("Best hyperparameters found were: ", analysis.best_config) print("Best achieved loss was: ", analysis.best_result) print("-" * 70)
def testWithParametersTwoRuns1(self): # Makes sure two runs in the same script but different ray sessions # pass (https://github.com/ray-project/ray/issues/16609) def train_fn(config, extra=4): tune.report(metric=extra) trainable = tune.with_parameters(train_fn, extra=8) out = tune.run(trainable, metric="metric", mode="max") self.assertEquals(out.best_result["metric"], 8) self.tearDown() self.setUp() def train_fn_2(config, extra=5): tune.report(metric=extra) trainable = tune.with_parameters(train_fn_2, extra=9) out = tune.run(trainable, metric="metric", mode="max") self.assertEquals(out.best_result["metric"], 9)
def start_training(name): Epochs = 1000 Samples = 50 ModelName = name pose_autoencoder = MLP_withLabel.load_checkpoint( "/home/nuoc/Documents/MEX/models/MLP4_withLabel_best/M3/0.00324857.512.pbz2" ) # pose_autoencoder = MLP_withLabel.load_checkpoint("/home/nuoc/Documents/MEX/models/MLP_withLabel/0.0013522337.512.pbz2") pose_encoder_out_dim = pose_autoencoder.dimensions[-1] scheduler = ASHAScheduler(max_t=Epochs, grace_period=15, reduction_factor=2) reporter = CLIReporter( parameter_columns=["k", "lr", "batch_size", "loss_fn"], metric_columns=["loss", "training_iteration"], max_error_rows=5, max_progress_rows=5, max_report_frequency=1) analysis = tune.run(tune.with_parameters( tuning, MODEL=MotionGenerationModel, pose_autoencoder=pose_autoencoder, cost_dim=cost_dim, phase_dim=phase_dim, input_slices=[phase_dim, pose_dim, cost_dim], output_slices=[phase_dim, phase_dim, pose_encoder_out_dim], train_set=train_set, val_set=val_set, num_epochs=Epochs, model_name=ModelName), resources_per_trial={ "cpu": 2, "gpu": 1 }, metric="loss", mode="min", config=config, num_samples=Samples, scheduler=scheduler, progress_reporter=reporter, name=ModelName, verbose=False) print("-" * 70) print("Done") print("Best hyperparameters found were: ", analysis.best_config) print("Best achieved loss was: ", analysis.best_result) print("-" * 70) ray.shutdown()
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) session_dirpath = make_get_filepath(os.path.join('reports', backtest_config['exchange'], backtest_config['symbol'], f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", '')) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print('Parameter iters should be defined in the configuration. Defaulting to 10.') num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print('Parameter num_cpus should be defined in the configuration. Defaulting to 2.') n_particles = 10 if 'n_particles' in backtest_config: n_particles = backtest_config['n_particles'] phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(wrap_backtest, ticks=ticks), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=session_dirpath, progress_reporter=LogReporter(metric_columns=['daily_gain', 'closest_liquidation', 'max_hours_between_fills', 'objective'], parameter_columns=[k for k in backtest_config['ranges'] if type( config[k]) == ray.tune.sample.Float or type( config[k]) == ray.tune.sample.Integer])) ray.shutdown() return analysis
def make_trainable(*, num_epochs, gpus_per_trial, dataset, init_config, init_state_dict, processor): return tune.with_parameters( clip_fine_tune, num_epochs=num_epochs, num_gpus=gpus_per_trial, dataset=dataset, init_config=init_config, init_state_dict=init_state_dict, processor=processor, )
def main(distributed: bool, num_samples: int = 5, batch_size: int = 512, num_epochs: int = 10) -> None: init_logging("main.log") logger.info("Running main ...") if distributed: ray.init(address="localhost:6379", _redis_password=os.getenv("RAY_REDIS_PWD"), ignore_reinit_error=True) else: ray.init(ignore_reinit_error=True) X, y = make_data(NUM_ROW) X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2) # NOTE: Hyperopt config metric = "loss" mode = "min" hp_search = HyperOptSearch(metric=metric, mode=mode) # NOTE: Like functools.partial, but stores data in object store objective = tune.with_parameters(fit, X_tr=X_tr, X_val=X_val, y_tr=y_tr, y_val=y_val, batch_size=batch_size, num_epochs=num_epochs) # NOTE: Define the support of the parameters we're optimizing over param_space = { "width": tune.choice((2**np.arange(5, 11)).astype(int)), "depth": tune.choice(range(1, 5)), "lr": tune.loguniform(1e-4, 5e-2) } logger.info("Starting hyperparameter search ...") analysis = tune.run(objective, num_samples=num_samples, config=param_space, search_alg=hp_search, resources_per_trial={ "cpu": 2, "gpu": 0.5 }, metric=metric, mode=mode) best_config = analysis.get_best_config(metric=metric, mode=mode) logger.info("Best config:\n%s", best_config) with open("/tmp/analysis.p", "wb") as f: pickle.dump(analysis, f) logger.info("Best results %s", pformat(analysis.results)) analysis.results_df.to_parquet(RESULTS_PATH)
def testWithParameters2(self): class Data: def __init__(self): import numpy as np self.data = np.random.rand((2 * 1024 * 1024)) def train(config, data=None): tune.report(metric=len(data.data)) trainable = tune.with_parameters(train, data=Data()) dumped = cloudpickle.dumps(trainable) assert sys.getsizeof(dumped) < 100 * 1024
def tune_mnist( num_samples=10, num_epochs=10, gpus_per_trial=0, tracking_uri=None, experiment_name="ptl_autologging_example", ): data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_") # Download data MNISTDataModule(data_dir=data_dir).prepare_data() # Set the MLflow experiment, or create it if it does not exist. mlflow.set_tracking_uri(tracking_uri) mlflow.set_experiment(experiment_name) config = { "layer_1": tune.choice([32, 64, 128]), "layer_2": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([32, 64, 128]), "mlflow": { "experiment_name": experiment_name, "tracking_uri": mlflow.get_tracking_uri(), }, "data_dir": os.path.join(tempfile.gettempdir(), "mnist_data_"), "num_epochs": num_epochs, } trainable = tune.with_parameters( train_mnist_tune, data_dir=data_dir, num_epochs=num_epochs, num_gpus=gpus_per_trial, ) analysis = tune.run( trainable, resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, metric="loss", mode="min", config=config, num_samples=num_samples, name="tune_mnist", ) print("Best hyperparameters found were: ", analysis.best_config)
def main(): logging.basicConfig(level=logging.INFO) # Raylib parameters num_samples = 10 envname = 'AdversarialAntBulletEnv-v0' trainingconfig = Path.cwd() / 'trainingconfig.json' evaluate_mean_n = 1000 # Number of timesteps over which to evaluate the mean reward name_fmt = 'million-bucks_{adv_force}' config = { # TODO: sample from control once, then different adversarial strengths # Range is centered on the force that achieves the closest reward to the control (7.5) "adv_force": tune.qrandn(7.5, 2.5, 0.1), } # https://docs.ray.io/en/master/tune/tutorials/overview.html#which-search-algorithm-scheduler-should-i-choose # Use BOHB for larger problems with a small number of hyperparameters # search = TuneBOHB(max_concurrent=4, metric="mean_loss", mode="min") # sched = HyperBandForBOHB( # time_attr="training_iteration", # max_t=100, # ) # Implicitly use random search if search algo is not specified sched = ASHAScheduler( time_attr='training_iteration', max_t=100, grace_period=1, # Unit is iterations, not timesteps. TODO configure ) # Pass in a Trainable class or function to tune.run. local_dir = str(Path.cwd() / "ray") logging.info(f'{local_dir=}') anal = tune.run(tune.with_parameters(trainable, envname=envname, trainingconfig=trainingconfig, evaluate_mean_n=evaluate_mean_n, name_fmt=name_fmt), config=config, num_samples=num_samples, scheduler=sched, local_dir=local_dir, metric="robustness", mode="max", log_to_file=True) logging.info(f'best config: {anal.best_config}') logging.info(f'best config: {anal.best_result}')