def _setup(self, config): # print("config in set up is", config) global_ft = ray.get(ft_id) # global_model = ray.get(model_id) self.trial_ft = deepcopy(global_ft) self.trial_model = TimeSequenceModel(check_optional_config=False, future_seq_len=future_seq_len) # handling input global_input_df = ray.get(input_df_id) trial_input_df = deepcopy(global_input_df) self.config = convert_bayes_configs(config).copy() (self.x_train, self.y_train) = self.trial_ft.fit_transform(trial_input_df, **self.config) # trial_ft.fit(trial_input_df, **config) # handling validation data self.validation_data = None if is_val_df_valid: global_validation_df = ray.get(validation_df_id) trial_validation_df = deepcopy(global_validation_df) self.validation_data = self.trial_ft.transform(trial_validation_df) # no need to call build since it is called the first time fit_eval is called. # callbacks = [TuneCallback(tune_reporter)] # fit model self.best_reward_m = -999 self.reward_m = -999 self.ckpt_name = "pipeline.ckpt"
def _hp_search(self, input_df, validation_df, metric, recipe, mc, resources_per_trial, remote_dir): ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col, self.drop_missing) if isinstance(input_df, list): feature_list = ft.get_feature_list(input_df[0]) else: feature_list = ft.get_feature_list(input_df) # model = VanillaLSTM(check_optional_config=False) model = TimeSequenceModel(check_optional_config=False, future_seq_len=self.future_seq_len) # prepare parameters for search engine search_space = recipe.search_space(feature_list) runtime_params = recipe.runtime_params() num_samples = runtime_params['num_samples'] stop = dict(runtime_params) search_algorithm_params = recipe.search_algorithm_params() search_algorithm = recipe.search_algorithm() fixed_params = recipe.fixed_params() del stop['num_samples'] searcher = RayTuneSearchEngine(logs_dir=self.logs_dir, resources_per_trial=resources_per_trial, name=self.name, remote_dir=remote_dir, ) searcher.compile(input_df, search_space=search_space, stop=stop, search_algorithm_params=search_algorithm_params, search_algorithm=search_algorithm, fixed_params=fixed_params, # feature_transformers=TimeSequenceFeatures, feature_transformers=ft, # model=model, future_seq_len=self.future_seq_len, validation_df=validation_df, metric=metric, mc=mc, num_samples=num_samples) # searcher.test_run() searcher.run() best = searcher.get_best_trials(k=1)[0] # get the best one trial, later could be n pipeline = self._make_pipeline(best, feature_transformers=ft, model=model, remote_dir=remote_dir) return pipeline
def train_func(config, tune_reporter): # make a copy from global variables for trial to make changes global_ft = ray.get(ft_id) # global_model = ray.get(model_id) trial_ft = deepcopy(global_ft) # trial_model = deepcopy(global_model) trial_model = TimeSequenceModel(check_optional_config=False, future_seq_len=future_seq_len) # handling input global_input_df = ray.get(input_df_id) trial_input_df = deepcopy(global_input_df) config = convert_bayes_configs(config).copy() # print("config is ", config) (x_train, y_train) = trial_ft.fit_transform(trial_input_df, **config) # trial_ft.fit(trial_input_df, **config) # handling validation data validation_data = None if is_val_df_valid: global_validation_df = ray.get(validation_df_id) trial_validation_df = deepcopy(global_validation_df) validation_data = trial_ft.transform(trial_validation_df) # no need to call build since it is called the first time fit_eval is called. # callbacks = [TuneCallback(tune_reporter)] # fit model best_reward_m = -999 reward_m = -999 for i in range(1, 101): result = trial_model.fit_eval(x_train, y_train, validation_data=validation_data, mc=mc, # verbose=1, **config) reward_m = metric_op * result # if metric == "mean_squared_error": # reward_m = (-1) * result # # print("running iteration: ",i) # elif metric == "r_square": # reward_m = result # else: # raise ValueError("metric can only be \"mean_squared_error\" or \"r_square\"") ckpt_name = "best.ckpt" if reward_m > best_reward_m: best_reward_m = reward_m save_zip(ckpt_name, trial_ft, trial_model, config) if remote_dir is not None: upload_ppl_hdfs(remote_dir, ckpt_name) tune_reporter( training_iteration=i, reward_metric=reward_m, checkpoint="best.ckpt" )
def load_ts_pipeline(file): feature_transformers = TimeSequenceFeatureTransformer() model = TimeSequenceModel(check_optional_config=False) all_config = restore_zip(file, feature_transformers, model) ts_pipeline = TimeSequencePipeline(feature_transformers=feature_transformers, model=model, config=all_config) print("Restore pipeline from", file) return ts_pipeline
def fit_with_fixed_configs(self, input_df, validation_df=None, mc=False, **user_configs): """ Fit pipeline with fixed configs. The model will be trained from initialization with the hyper-parameter specified in configs. The configs contain both identity configs (Eg. "future_seq_len", "dt_col", "target_col", "metric") and automl tunable configs (Eg. "past_seq_len", "batch_size"). We recommend calling get_default_configs to see the name and default values of configs you you can specify. :param input_df: one data frame or a list of data frames :param validation_df: one data frame or a list of data frames :param user_configs: you can overwrite or add more configs with user_configs. Eg. "epochs" :return: """ # self._check_configs() if self.config is None: self.config = self.get_default_configs() if user_configs is not None: self.config.update(user_configs) ft_id_config_set = { 'future_seq_len', 'dt_col', 'target_col', 'extra_features_col', 'drop_missing' } ft_id_configs = {a: self.config[a] for a in ft_id_config_set} self.feature_transformers = TimeSequenceFeatureTransformer( **ft_id_configs) model_id_config_set = {'future_seq_len'} ft_id_configs = {a: self.config[a] for a in model_id_config_set} self.model = TimeSequenceModel(check_optional_config=False, **ft_id_configs) all_available_features = self.feature_transformers.get_feature_list( input_df) self.config.update({"selected_features": all_available_features}) (x_train, y_train) = self.feature_transformers.fit_transform( input_df, **self.config) if self._is_val_df_valid(validation_df): validation_data = self.feature_transformers.transform( validation_df) else: validation_data = None self.model.fit_eval(x_train, y_train, validation_data=validation_data, mc=mc, verbose=1, **self.config)
def model_create_func(): # model = VanillaLSTM(check_optional_config=False) model = TimeSequenceModel( check_optional_config=False, future_seq_len=self.future_seq_len) return model