예제 #1
0
    def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_eval: int = 1):
        train_provider, test_provider = self.data_provider.split_data_train_test(
            self.train_split_percentage)
        train_provider, validation_provider = train_provider.split_data_train_test(
            self.train_split_percentage)

        del test_provider

        train_env = DummyVecEnv([lambda: TradingEnv(train_provider)])
        validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider)])

        model_params = self.optimize_agent_params(trial)
        model = self.Model(self.Policy,
                           train_env,
                           verbose=self.model_verbose,
                           nminibatches=1,
                           tensorboard_log=self.tensorboard_path,
                           **model_params)

        last_reward = -np.finfo(np.float16).max
        n_steps_per_eval = int(
            len(train_provider.data_frame) / n_prune_evals_per_trial)

        for eval_idx in range(n_prune_evals_per_trial):
            try:
                model.learn(n_steps_per_eval)
            except AssertionError:
                raise

            rewards = []
            n_episodes, reward_sum = 0, 0.0

            trades = train_env.get_attr('trades')

            if len(trades[0]) < 1:
                self.logger.info(
                    f'Pruning trial for not making any trades: {eval_idx}')
                raise optuna.structs.TrialPruned()

            state = None
            obs = validation_env.reset()
            while n_episodes < n_tests_per_eval:
                action, state = model.predict(obs, state=state)
                obs, reward, done, _ = validation_env.step([action])

                reward_sum += reward[0]

                if all(done):
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = validation_env.reset()

            last_reward = np.mean(rewards)
            trial.report(-1 * last_reward, eval_idx)

            if trial.should_prune(eval_idx):
                raise optuna.structs.TrialPruned()

        return -1 * last_reward
예제 #2
0
    def test(self, model_epoch: int = 0, should_render: bool = True):
        train_provider, test_provider = self.data_provider.split_provider_train_test(
            self.train_split_percentage)

        del train_provider

        test_env = DummyVecEnv([lambda: TradingEnv(test_provider)])

        model_path = path.join('data', 'agents',
                               f'{self.study_name}__{model_epoch}.pkl')
        model = self.Model.load(model_path, env=test_env)

        self.logger.info(f'Testing model ({self.study_name}__{model_epoch})')

        state = None
        obs, done, rewards = test_env.reset(), False, []
        while not done:
            action, state = model.predict(obs, state=state)
            obs, reward, done, _ = test_env.step(action)

            rewards.append(reward)

            if should_render:
                test_env.render(mode='human')

        self.logger.info(
            f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.mean(rewards))}'
        )
예제 #3
0
    def train(self, n_epochs: int = 10, test_trained_model: bool = False, render_trained_model: bool = False):
        train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)

        del test_provider

        train_env = DummyVecEnv([lambda: TradingEnv(train_provider)])

        model_params = self.get_model_params()

        model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches,
                           tensorboard_log=self.tensorboard_path, **model_params)

        self.logger.info(f'Training for {n_epochs} epochs')

        steps_per_epoch = len(train_provider.data_frame)

        for model_epoch in range(0, n_epochs):
            self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps')

            model.learn(total_timesteps=steps_per_epoch)

            model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
            model.save(model_path)

            if test_trained_model:
                self.test(model_epoch, should_render=render_trained_model)

        self.logger.info(f'Trained {n_epochs} models')
예제 #4
0
파일: RLTrader.py 프로젝트: cholox/RLTrader
    def initialize_optuna(self):
        try:
            train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)])
            model = self.Model(self.Policy, train_env, nminibatches=1)
            self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}'
        except:
            self.study_name = f'UnknownModel__UnknownPolicy'

        self.optuna_study = optuna.create_study(
            study_name=self.study_name, storage=self.params_db_path, load_if_exists=True)

        self.logger.debug('Initialized Optuna:')

        try:
            self.logger.debug(
                f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}')
        except:
            self.logger.debug('No trials have been finished yet.')
예제 #5
0
 def _init():
     env = TradingEnv(data_provider)
     env.seed(seed + rank)
     return env