Ejemplo n.º 1
0
    def optimize_params(self,
                        trial,
                        n_prune_evals_per_trial: int = 4,
                        n_tests_per_eval: int = 1,
                        speedup_factor: int = 10):
        env_params = self.optimize_env_params(trial)

        full_train_len = self.test_set_percentage * len(self.feature_df)
        optimize_train_len = int(self.validation_set_percentage *
                                 full_train_len)
        train_len = int(optimize_train_len / speedup_factor)
        train_start = optimize_train_len - train_len

        train_df = self.feature_df[train_start:optimize_train_len]
        validation_df = self.feature_df[optimize_train_len:]

        train_env = DummyVecEnv(
            [lambda: BitcoinTradingEnv(train_df, **env_params)])
        validation_env = DummyVecEnv(
            [lambda: BitcoinTradingEnv(validation_df, **env_params)])

        model_params = self.optimize_agent_params(trial)
        model = self.model(self.policy,
                           train_env,
                           verbose=self.model_verbose,
                           nminibatches=self.nminibatches,
                           tensorboard_log=self.tensorboard_path,
                           **model_params)

        last_reward = -np.finfo(np.float16).max
        evaluation_interval = int(train_len / n_prune_evals_per_trial)

        for eval_idx in range(n_prune_evals_per_trial):
            try:
                model.learn(evaluation_interval)
            except AssertionError:
                raise

            rewards = []
            n_episodes, reward_sum = 0, 0.0

            obs = validation_env.reset()
            while n_episodes < n_tests_per_eval:
                action, _ = model.predict(obs)
                obs, reward, done, _ = validation_env.step(action)
                reward_sum += reward

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = validation_env.reset()

            last_reward = np.mean(rewards)
            trial.report(-1 * last_reward, eval_idx)

            if trial.should_prune(eval_idx):
                raise optuna.structs.TrialPruned()

        return -1 * last_reward
Ejemplo n.º 2
0
    def test(self, model_epoch: int = 0, should_render: bool = True):
        env_params = self.get_env_params()

        train_len = int(self.test_set_percentage * len(self.feature_df))
        test_df = self.feature_df[train_len:]

        test_env = DummyVecEnv(
            [lambda: BitcoinTradingEnv(test_df, **env_params)])

        model_path = path.join('data', 'agents',
                               f'{self.study_name}__{model_epoch}.pkl')
        model = self.model.load(model_path, env=test_env)

        self.logger.info(f'Testing model ({self.study_name}__{model_epoch})')

        obs, done, reward_sum = test_env.reset(), False, 0
        while not done:
            action, _states = model.predict(obs)
            obs, reward, done, _ = test_env.step(action)

            reward_sum += reward

            if should_render:
                test_env.render(mode='human')

        self.logger.info(
            f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(reward_sum)}'
        )
Ejemplo n.º 3
0
    def train(self,
              n_epochs: int = 1,
              iters_per_epoch: int = 1,
              test_trained_model: bool = False,
              render_trained_model: bool = False):
        self.initialize_optuna()

        env_params = self.get_env_params()

        train_len = int(self.test_set_percentage * len(self.feature_df))
        train_df = self.feature_df[:train_len]

        train_env = DummyVecEnv(
            [lambda: BitcoinTradingEnv(train_df, **env_params)])

        model_params = self.get_model_params()

        model = self.model(self.policy,
                           train_env,
                           verbose=self.model_verbose,
                           nminibatches=self.nminibatches,
                           tensorboard_log=self.tensorboard_path,
                           **model_params)

        self.logger.info(f'Training for {n_epochs} epochs')

        n_timesteps = len(train_df) * iters_per_epoch

        for model_epoch in range(0, n_epochs):
            self.logger.info(
                f'[{model_epoch}] Training for: {n_timesteps} time steps')

            model.learn(total_timesteps=n_timesteps)

            model_path = path.join('data', 'agents',
                                   f'{self.study_name}__{model_epoch}.pkl')
            model.save(model_path)

            if test_trained_model:
                self.test(model_epoch, should_render=render_trained_model)

        self.logger.info(f'Trained {n_epochs} models')
Ejemplo n.º 4
0
    def initialize_optuna(self):
        try:
            train_env = DummyVecEnv(
                [lambda: BitcoinTradingEnv(self.feature_df)])
            model = self.Model(self.Policy, train_env, nminibatches=1)
            self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}__{self.reward_strategy}'
        except:
            self.study_name = f'UnknownModel__UnknownPolicy__{self.reward_strategy}'

        self.optuna_study = optuna.create_study(study_name=self.study_name,
                                                storage=self.params_db_path,
                                                load_if_exists=True)

        self.logger.debug('Initialized Optuna:')

        try:
            self.logger.debug(
                f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}'
            )
        except:
            self.logger.debug('No trials have been finished yet.')
Ejemplo n.º 5
0
from lib.util.indicators import add_indicators


df = pd.read_csv('./data/coinbase_hourly.csv')
df = df.drop(['Symbol'], axis=1)
df = df.sort_values(['Date'])
df = add_indicators(df.reset_index())

test_len = int(len(df) * 0.2)
train_len = int(len(df)) - test_len

test_df = df[train_len:]

profit_study = optuna.load_study(study_name='ppo2_profit',
                                 storage='sqlite:///params.db')
profit_env = DummyVecEnv([lambda: BitcoinTradingEnv(
    test_df, reward_func="profit", forecast_steps=int(profit_study.best_trial.params['forecast_steps']), forecast_alpha=profit_study.best_trial.params['forecast_alpha'])])

sortino_study = optuna.load_study(study_name='ppo2_sortino',
                                  storage='sqlite:///params.db')
sortino_env = DummyVecEnv([lambda: BitcoinTradingEnv(
    test_df, reward_func="profit", forecast_steps=int(sortino_study.best_trial.params['forecast_steps']), forecast_alpha=sortino_study.best_trial.params['forecast_alpha'])])

# calmar_study = optuna.load_study(study_name='ppo2_sortino',
# storage='sqlite:///params.db')
# calmar_env = DummyVecEnv([lambda: BitcoinTradingEnv(
#    test_df, reward_func="profit", forecast_steps=int(calmar_study.best_trial.params['forecast_steps']), forecast_alpha=calmar_study.best_trial.params['forecast_alpha'])])

omega_study = optuna.load_study(study_name='ppo2_omega',
                                storage='sqlite:///params.db')
omega_env = DummyVecEnv([lambda: BitcoinTradingEnv(
    test_df, reward_func="profit", forecast_steps=int(omega_study.best_trial.params['forecast_steps']), forecast_alpha=omega_study.best_trial.params['forecast_alpha'])])