コード例 #1
0
            def _setup(self, config):
                # print("config in set up is", config)
                global_ft = ray.get(ft_id)
                # global_model = ray.get(model_id)
                self.trial_ft = deepcopy(global_ft)
                self.trial_model = TimeSequenceModel(check_optional_config=False,
                                                     future_seq_len=future_seq_len)

                # handling input
                global_input_df = ray.get(input_df_id)
                trial_input_df = deepcopy(global_input_df)
                self.config = convert_bayes_configs(config).copy()
                (self.x_train, self.y_train) = self.trial_ft.fit_transform(trial_input_df,
                                                                           **self.config)
                # trial_ft.fit(trial_input_df, **config)

                # handling validation data
                self.validation_data = None
                if is_val_df_valid:
                    global_validation_df = ray.get(validation_df_id)
                    trial_validation_df = deepcopy(global_validation_df)
                    self.validation_data = self.trial_ft.transform(trial_validation_df)

                # no need to call build since it is called the first time fit_eval is called.
                # callbacks = [TuneCallback(tune_reporter)]
                # fit model
                self.best_reward_m = -999
                self.reward_m = -999
                self.ckpt_name = "pipeline.ckpt"
コード例 #2
0
    def _hp_search(self,
                   input_df,
                   validation_df,
                   metric,
                   recipe,
                   mc,
                   resources_per_trial,
                   remote_dir):

        ft = TimeSequenceFeatureTransformer(self.future_seq_len,
                                            self.dt_col,
                                            self.target_col,
                                            self.extra_features_col,
                                            self.drop_missing)
        if isinstance(input_df, list):
            feature_list = ft.get_feature_list(input_df[0])
        else:
            feature_list = ft.get_feature_list(input_df)

        # model = VanillaLSTM(check_optional_config=False)
        model = TimeSequenceModel(check_optional_config=False, future_seq_len=self.future_seq_len)

        # prepare parameters for search engine
        search_space = recipe.search_space(feature_list)
        runtime_params = recipe.runtime_params()
        num_samples = runtime_params['num_samples']
        stop = dict(runtime_params)
        search_algorithm_params = recipe.search_algorithm_params()
        search_algorithm = recipe.search_algorithm()
        fixed_params = recipe.fixed_params()
        del stop['num_samples']

        searcher = RayTuneSearchEngine(logs_dir=self.logs_dir,
                                       resources_per_trial=resources_per_trial,
                                       name=self.name,
                                       remote_dir=remote_dir,
                                       )
        searcher.compile(input_df,
                         search_space=search_space,
                         stop=stop,
                         search_algorithm_params=search_algorithm_params,
                         search_algorithm=search_algorithm,
                         fixed_params=fixed_params,
                         # feature_transformers=TimeSequenceFeatures,
                         feature_transformers=ft,
                         # model=model,
                         future_seq_len=self.future_seq_len,
                         validation_df=validation_df,
                         metric=metric,
                         mc=mc,
                         num_samples=num_samples)
        # searcher.test_run()
        searcher.run()

        best = searcher.get_best_trials(k=1)[0]  # get the best one trial, later could be n
        pipeline = self._make_pipeline(best,
                                       feature_transformers=ft,
                                       model=model,
                                       remote_dir=remote_dir)
        return pipeline
コード例 #3
0
        def train_func(config, tune_reporter):
            # make a copy from global variables for trial to make changes
            global_ft = ray.get(ft_id)
            # global_model = ray.get(model_id)
            trial_ft = deepcopy(global_ft)
            # trial_model = deepcopy(global_model)
            trial_model = TimeSequenceModel(check_optional_config=False,
                                            future_seq_len=future_seq_len)

            # handling input
            global_input_df = ray.get(input_df_id)
            trial_input_df = deepcopy(global_input_df)
            config = convert_bayes_configs(config).copy()
            # print("config is ", config)
            (x_train, y_train) = trial_ft.fit_transform(trial_input_df, **config)
            # trial_ft.fit(trial_input_df, **config)

            # handling validation data
            validation_data = None
            if is_val_df_valid:
                global_validation_df = ray.get(validation_df_id)
                trial_validation_df = deepcopy(global_validation_df)
                validation_data = trial_ft.transform(trial_validation_df)

            # no need to call build since it is called the first time fit_eval is called.
            # callbacks = [TuneCallback(tune_reporter)]
            # fit model
            best_reward_m = -999
            reward_m = -999
            for i in range(1, 101):
                result = trial_model.fit_eval(x_train,
                                              y_train,
                                              validation_data=validation_data,
                                              mc=mc,
                                              # verbose=1,
                                              **config)
                reward_m = metric_op * result
                # if metric == "mean_squared_error":
                #     reward_m = (-1) * result
                #     # print("running iteration: ",i)
                # elif metric == "r_square":
                #     reward_m = result
                # else:
                #     raise ValueError("metric can only be \"mean_squared_error\" or \"r_square\"")
                ckpt_name = "best.ckpt"
                if reward_m > best_reward_m:
                    best_reward_m = reward_m
                    save_zip(ckpt_name, trial_ft, trial_model, config)
                    if remote_dir is not None:
                        upload_ppl_hdfs(remote_dir, ckpt_name)

                tune_reporter(
                    training_iteration=i,
                    reward_metric=reward_m,
                    checkpoint="best.ckpt"
                )
コード例 #4
0
def load_ts_pipeline(file):
    feature_transformers = TimeSequenceFeatureTransformer()
    model = TimeSequenceModel(check_optional_config=False)

    all_config = restore_zip(file, feature_transformers, model)
    ts_pipeline = TimeSequencePipeline(feature_transformers=feature_transformers,
                                       model=model,
                                       config=all_config)
    print("Restore pipeline from", file)
    return ts_pipeline
コード例 #5
0
    def fit_with_fixed_configs(self,
                               input_df,
                               validation_df=None,
                               mc=False,
                               **user_configs):
        """
        Fit pipeline with fixed configs. The model will be trained from initialization
        with the hyper-parameter specified in configs. The configs contain both identity configs
        (Eg. "future_seq_len", "dt_col", "target_col", "metric") and automl tunable configs
        (Eg. "past_seq_len", "batch_size").
        We recommend calling get_default_configs to see the name and default values of configs you
        you can specify.
        :param input_df: one data frame or a list of data frames
        :param validation_df: one data frame or a list of data frames
        :param user_configs: you can overwrite or add more configs with user_configs. Eg. "epochs"
        :return:
        """
        # self._check_configs()
        if self.config is None:
            self.config = self.get_default_configs()
        if user_configs is not None:
            self.config.update(user_configs)
        ft_id_config_set = {
            'future_seq_len', 'dt_col', 'target_col', 'extra_features_col',
            'drop_missing'
        }
        ft_id_configs = {a: self.config[a] for a in ft_id_config_set}
        self.feature_transformers = TimeSequenceFeatureTransformer(
            **ft_id_configs)
        model_id_config_set = {'future_seq_len'}
        ft_id_configs = {a: self.config[a] for a in model_id_config_set}
        self.model = TimeSequenceModel(check_optional_config=False,
                                       **ft_id_configs)
        all_available_features = self.feature_transformers.get_feature_list(
            input_df)
        self.config.update({"selected_features": all_available_features})
        (x_train, y_train) = self.feature_transformers.fit_transform(
            input_df, **self.config)
        if self._is_val_df_valid(validation_df):
            validation_data = self.feature_transformers.transform(
                validation_df)
        else:
            validation_data = None

        self.model.fit_eval(x_train,
                            y_train,
                            validation_data=validation_data,
                            mc=mc,
                            verbose=1,
                            **self.config)
コード例 #6
0
 def model_create_func():
     # model = VanillaLSTM(check_optional_config=False)
     model = TimeSequenceModel(
         check_optional_config=False,
         future_seq_len=self.future_seq_len)
     return model