def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_eval: int = 1): train_provider, test_provider = self.data_provider.split_data_train_test( self.train_split_percentage) train_provider, validation_provider = train_provider.split_data_train_test( self.train_split_percentage) del test_provider train_env = DummyVecEnv([lambda: TradingEnv(train_provider)]) validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider)]) model_params = self.optimize_agent_params(trial) model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=1, tensorboard_log=self.tensorboard_path, **model_params) last_reward = -np.finfo(np.float16).max n_steps_per_eval = int( len(train_provider.data_frame) / n_prune_evals_per_trial) for eval_idx in range(n_prune_evals_per_trial): try: model.learn(n_steps_per_eval) except AssertionError: raise rewards = [] n_episodes, reward_sum = 0, 0.0 trades = train_env.get_attr('trades') if len(trades[0]) < 1: self.logger.info( f'Pruning trial for not making any trades: {eval_idx}') raise optuna.structs.TrialPruned() state = None obs = validation_env.reset() while n_episodes < n_tests_per_eval: action, state = model.predict(obs, state=state) obs, reward, done, _ = validation_env.step([action]) reward_sum += reward[0] if all(done): rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = validation_env.reset() last_reward = np.mean(rewards) trial.report(-1 * last_reward, eval_idx) if trial.should_prune(eval_idx): raise optuna.structs.TrialPruned() return -1 * last_reward
def test(self, model_epoch: int = 0, should_render: bool = True): train_provider, test_provider = self.data_provider.split_provider_train_test( self.train_split_percentage) del train_provider test_env = DummyVecEnv([lambda: TradingEnv(test_provider)]) model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model = self.Model.load(model_path, env=test_env) self.logger.info(f'Testing model ({self.study_name}__{model_epoch})') state = None obs, done, rewards = test_env.reset(), False, [] while not done: action, state = model.predict(obs, state=state) obs, reward, done, _ = test_env.step(action) rewards.append(reward) if should_render: test_env.render(mode='human') self.logger.info( f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.mean(rewards))}' )
def train(self, n_epochs: int = 10, test_trained_model: bool = False, render_trained_model: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) del test_provider train_env = DummyVecEnv([lambda: TradingEnv(train_provider)]) model_params = self.get_model_params() model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, tensorboard_log=self.tensorboard_path, **model_params) self.logger.info(f'Training for {n_epochs} epochs') steps_per_epoch = len(train_provider.data_frame) for model_epoch in range(0, n_epochs): self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps') model.learn(total_timesteps=steps_per_epoch) model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model.save(model_path) if test_trained_model: self.test(model_epoch, should_render=render_trained_model) self.logger.info(f'Trained {n_epochs} models')
def initialize_optuna(self): try: train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)]) model = self.Model(self.Policy, train_env, nminibatches=1) self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}' except: self.study_name = f'UnknownModel__UnknownPolicy' self.optuna_study = optuna.create_study( study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) self.logger.debug('Initialized Optuna:') try: self.logger.debug( f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}') except: self.logger.debug('No trials have been finished yet.')
def _init(): env = TradingEnv(data_provider) env.seed(seed + rank) return env