def testFetchPinned(self): X = pin_in_object_store("hello") def train(config, reporter): get_pinned_object(X) reporter(timesteps_total=100, done=True) register_trainable("f1", train) [trial] = run_experiments({"foo": { "run": "f1", }}) self.assertEqual(trial.status, Trial.TERMINATED) self.assertEqual(trial.last_result[TIMESTEPS_TOTAL], 100)
"max_depth": max_depth_range, "max_features": max_features_range, } ray.init(memory=11000 * 1024 * 1024, object_store_memory=500 * 1024 * 1024, driver_object_store_memory=100 * 1024 * 1024, local_mode=False, num_gpus=1) max_concurrent = cupy.cuda.runtime.getDeviceCount() cdf = prepare_dataset() #prepare dataset # for shared access across processes data_id = pin_in_object_store(cdf) search = build_search_alg(search_alg, hpo_ranges) sched = select_sched_alg(sched_alg) exp_name = None if exp_name is not None: exp_name += exp_name else: exp_name = "" exp_name += "{}_{}_CV-{}_{}M_SAMP-{}".format("RF", compute, CV_folds, int(len(cdf) / 1000000), num_samples)
def optimize(self, trial_amount, search_algo="bayesopt", max_workers=8, verbose=1): """ Description: This method goes through all labeling methods in self.automatic_labeling_methods and optimizes their parameters for maximal specific profit. It is built with ray tune therefore it is highly scalable. Arguments: - trial_amount (int): the amount of parameter-combinations that are going to be tested - search_algo (string): the search algorithm with which it's going to get optimized [hyperopt, bayesopt] - max_workers (int): how many parallel tasks it should run - verbose (int): The Level of logging in the console (0, 1, 2) Return: - nothing - updates the alm_optimal dictionary if the new best config is better than the old one - for new optimizations to be saved, you need to save the object with the .save() method """ for index, labeling_method in enumerate(self.auto_labeling_methods): #get the labeling method labeler = getattr(self, f"_{labeling_method}_labeling") #get he price data price_array = pd.DataFrame() for data in self.raw_data: df = pd.DataFrame() df["close"] = data["close"].copy() price_array = pd.concat([price_array, df], axis=0) price_array = price_array.to_numpy() #store the needed objects in raytune mainprocess ray.init() labeler_id = pin_in_object_store(labeler) price_array_id = pin_in_object_store(price_array) calculate_profit_id = pin_in_object_store(calculate_profit) #function that gets passed to the bayesian optimizer def objective(config): #get objects from central tune storage labeler = get_pinned_object(labeler_id) price_array = get_pinned_object(price_array_id) calculate_profit = get_pinned_object(calculate_profit_id) #get the labels from the specific method label_list = labeler(config) #create the array tha gets passed to the profit calculator label_array = pd.concat(label_list, axis=0).to_numpy() label_array = np.expand_dims(label_array, axis=1) array = np.concatenate([price_array, label_array], axis=1) specific_profit, _ = calculate_profit(array, self.trading_fee) tune.report(specific_profit=specific_profit) time.sleep(0.1) #setup the searchalgo if search_algo == "bayesopt": search_alg = BayesOptSearch(random_search_steps=trial_amount/10) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=max_workers) elif search_algo == "hyperopt": #get the current best params best_config = None if labeling_method in self.alm_optimal.keys(): best_config = [self.alm_optimal[labeling_method]["parameters"]] search_alg = HyperOptSearch(n_initial_points=trial_amount/10, points_to_evaluate=best_config) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=max_workers) else: raise Exception("You chose a Search Algorithm that is not available, please choose from this list: bayesopt, hyperopt") #run the optimization result = tune.run(objective, config=self.alm_range[labeling_method], metric="specific_profit", search_alg=search_alg, mode="max", num_samples=trial_amount, verbose=verbose) #save the best config in self.alm_optimal if labeling_method not in self.alm_optimal.keys() or self.alm_optimal[labeling_method]["specific_profit"] < result.best_result["specific_profit"]: self.alm_optimal[labeling_method] = {"parameters": result.get_best_config(), "specific_profit": result.best_result["specific_profit"]} print(result.get_best_config()) print(result.best_result["specific_profit"])
print("Preparing and caching data (this may take a while)…") ds = SparseEventDataset() ds.value_columns = ["wf_AE_loss", "wf_integral"] ds.prune(dtype) ds.limit_event_t_size(limit) ds.save(cache_dir, dtype) # cache_data() ds = SparseEventDataset.load(cache_dir) data_train, data_val, data_test = ds.get_train_val_test(CPU_WORKERS_PER_TRAIL) ray.init(num_cpus=12, num_gpus=1, memory=29000000000) #ray.init(local_mode=True) data_train = pin_in_object_store(data_train) data_val = pin_in_object_store(data_val) config_space = CS.ConfigurationSpace() class _: add = config_space.add_hyperparameter extend = config_space.add_hyperparameters int = CS.UniformIntegerHyperparameter float = CS.UniformFloatHyperparameter cat = CS.CategoricalHyperparameter cond = config_space.add_condition eq = CS.EqualsCondition
n_classes=2) X = pd.DataFrame(X.astype(data_type)) # cuML Random Forest Classifier requires the labels to be integers y = pd.Series(y.astype(np.int32)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) from ray import tune from ray.tune.utils import pin_in_object_store, get_pinned_object data_id = pin_in_object_store([X_train, X_test, y_train, y_test]) class CUMLTrainable(tune.Trainable): def _setup(self, config): [X_train, X_test, y_train, y_test] = get_pinned_object(data_id) self.cuml_model = curfc(n_estimators=config.get("estimators", 40), max_depth=config.get("depth", 16), max_features=1.0) self.X_cudf_train = cudf.DataFrame.from_pandas(X_train) self.X_cudf_test = cudf.DataFrame.from_pandas(X_test) self.y_cudf_train = cudf.Series(y_train.values) self.y_test = y_test def _train(self):