def initialize_data(self, load_raw_data, features_to_add): self.logger.debug('Initializing data:') if self.data_provider == 'static': if not os.path.isfile(self.data_path): class_dir = os.path.dirname(__file__) self.data_path = os.path.realpath( os.path.join(class_dir, "../{}".format(self.data_path))) # data_columns = {'Date': 'Date', # 'Open': 'Open' , 'High': 'High' , 'Low': 'Low' , 'Close': 'Close' , 'Volume': 'Volume' # ,'OpenVIX':'OpenVIX','HighVIX':'HighVIX', 'LowVIX':'LowVIX', 'CloseVIX':'CloseVIX', 'SKEW':'SKEW'# for *v2.csv files # }#VolumeFrom if load_raw_data: self.logger.info( f'Loading from disc raw data :{self.data_path} ') df = None else: d = self.data_path.replace('.csv', '_with_features.csv') self.logger.info(f'Loading from disc prepared data :{d} ') df = pd.read_csv(d) self.data_provider = StaticDataProvider( date_format=self.date_format, csv_data_path=self.data_path, df=df, do_prepare_data=load_raw_data, features_to_add=features_to_add) elif self.data_provider == 'exchange': self.data_provider = ExchangeDataProvider(**self.exchange_args) self.logger.debug( f'Successfully Initialized data , \nFeature list={self.data_provider.columns}' )
def initialize_data(self): if self.data_provider == 'static': if not os.path.isfile(self.input_data_path): class_dir = os.path.dirname(__file__) self.input_data_path = os.path.realpath( os.path.join(class_dir, "../{}".format(self.input_data_path))) data_columns = { 'Date': 'Date', 'Open': 'Open', 'High': 'High', 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom' } self.data_provider = StaticDataProvider( date_format=self.date_format, csv_data_path=self.input_data_path, data_columns=data_columns) elif self.data_provider == 'exchange': self.data_provider = ExchangeDataProvider(**self.exchange_args) self.logger.debug( f'Initialized Features: {self.data_provider.columns}')
def initialize_data(self, provider, input_data_path): if 'static' == provider: if self.input_data_path is None: class_dir = os.path.dirname(__file__) self.input_data_path = os.path.realpath( os.path.join(class_dir, "../{}".format(input_data_path))) data_columns = { 'Date': 'Date', 'Open': 'Open', 'High': 'High', 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom' } self.data_provider = StaticDataProvider( date_format=self.date_format, csv_data_path=self.input_data_path, data_columns=data_columns) else: self.data_provider = ExchangeDataProvider() self.logger.debug( f'Initialized Features: {self.data_provider.columns}')
def initialize_data(self): if self.data_provider == 'static': if not os.path.isfile(self.input_data_path): class_dir = os.path.dirname(__file__) self.input_data_path = os.path.realpath( os.path.join(class_dir, "../{}".format(self.input_data_path))) data_columns = { 'Date': 'Date', 'Open': 'Open', 'High': 'High', 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom' } self.data_provider = StaticDataProvider( date_format=self.date_format, csv_data_path=self.input_data_path, data_columns=data_columns) elif self.data_provider == 'exchange': self.data_provider = ExchangeDataProvider(**self.exchange_args) # edited elif self.data_provider == 'vb': variety = 'RB' in_columns = [ 'Date', 'open', 'close', 'a1', 'a1v', 'b1', 'b1v', 'up_down_limit' ] + vb_factor_columns columns = [ 'Date', 'open', 'Close', 'a1', 'a1v', 'b1', 'b1v', 'up_down_limit' ] + vb_factor_columns data_columns = dict(zip(columns, in_columns)) self.data_provider = VbDataProvider(data_columns=data_columns, variety='RB') self.logger.debug( f'Initialized Features: {self.data_provider.columns}')
class RLTrader: data_provider = None study_name = None def __init__(self, model: BaseRLModel = PPO2, policy: BasePolicy = MlpLnLstmPolicy, reward_strategy: BaseRewardStrategy = IncrementalProfit, exchange_args: Dict = {}, **kwargs): self.logger = kwargs.get( 'logger', init_logger(__name__, show_debug=kwargs.get('show_debug', True))) self.Model = model self.Policy = policy self.Reward_Strategy = reward_strategy self.exchange_args = exchange_args self.tensorboard_path = kwargs.get('tensorboard_path', None) self.input_data_path = kwargs.get('input_data_path', 'data/input/EURUSD60.csv') self.params_db_path = kwargs.get('params_db_path', 'sqlite:///data/params.db') self.date_format = kwargs.get('date_format', ProviderDateFormat.DATETIME_HOUR_24) self.model_verbose = kwargs.get('model_verbose', 1) self.n_envs = kwargs.get('n_envs', os.cpu_count()) self.n_minibatches = kwargs.get('n_minibatches', self.n_envs) self.train_split_percentage = kwargs.get('train_split_percentage', 0.8) self.data_provider = kwargs.get('data_provider', 'static') self.initialize_data() self.initialize_optuna() self.logger.debug(f'Initialize RLTrader: {self.study_name}') def initialize_data(self): if self.data_provider == 'static': if not os.path.isfile(self.input_data_path): class_dir = os.path.dirname(__file__) self.input_data_path = os.path.realpath( os.path.join(class_dir, "../{}".format(self.input_data_path))) data_columns = { 'Date': 'Date', 'Open': 'Open', 'High': 'High', 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom' } self.data_provider = StaticDataProvider( date_format=self.date_format, csv_data_path=self.input_data_path, data_columns=data_columns) elif self.data_provider == 'exchange': self.data_provider = ExchangeDataProvider(**self.exchange_args) self.logger.debug( f'Initialized Features: {self.data_provider.columns}') def initialize_optuna(self): try: train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)]) model = self.Model(self.Policy, train_env, nminibatches=1) strategy = self.Reward_Strategy() self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}__{strategy.__class__.__name__}' except: self.study_name = f'UnknownModel__UnknownPolicy__UnknownStrategy' self.optuna_study = optuna.create_study(study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) self.logger.debug('Initialized Optuna:') try: self.logger.debug( f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}' ) except: self.logger.debug('No trials have been finished yet.') def get_model_params(self): params = self.optuna_study.best_trial.params return { 'n_steps': int(params['n_steps']), 'gamma': params['gamma'], 'learning_rate': params['learning_rate'], 'ent_coef': params['ent_coef'], 'cliprange': params['cliprange'], 'noptepochs': int(params['noptepochs']), 'lam': params['lam'], } def optimize_agent_params(self, trial): if self.Model != PPO2: return { 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.) } return { 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), 'lam': trial.suggest_uniform('lam', 0.8, 1.) } def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_eval: int = 1): train_provider, test_provider = self.data_provider.split_data_train_test( self.train_split_percentage) train_provider, validation_provider = train_provider.split_data_train_test( self.train_split_percentage) del test_provider train_env = DummyVecEnv([lambda: TradingEnv(train_provider)]) validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider)]) model_params = self.optimize_agent_params(trial) model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=1, tensorboard_log=self.tensorboard_path, **model_params) last_reward = -np.finfo(np.float16).max n_steps_per_eval = int( len(train_provider.data_frame) / n_prune_evals_per_trial) for eval_idx in range(n_prune_evals_per_trial): try: model.learn(n_steps_per_eval) except AssertionError: raise rewards = [] n_episodes, reward_sum = 0, 0.0 trades = train_env.get_attr('trades') if len(trades[0]) < 1: self.logger.info( f'Pruning trial for not making any trades: {eval_idx}') raise optuna.structs.TrialPruned() state = None obs = validation_env.reset() while n_episodes < n_tests_per_eval: action, state = model.predict(obs, state=state) obs, reward, done, _ = validation_env.step([action]) reward_sum += reward[0] if all(done): rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = validation_env.reset() last_reward = np.mean(rewards) trial.report(-1 * last_reward, eval_idx) if trial.should_prune(eval_idx): raise optuna.structs.TrialPruned() return -1 * last_reward def optimize(self, n_trials: int = 20): try: self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1) except KeyboardInterrupt: pass self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}') self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}') self.logger.info('Params: ') for key, value in self.optuna_study.best_trial.params.items(): self.logger.info(f' {key}: {value}') return self.optuna_study.trials_dataframe() def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: bool = True, render_test_env: bool = False, render_report: bool = True, save_report: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test( self.train_split_percentage) del test_provider train_env = SubprocVecEnv( [make_env(train_provider, i) for i in range(self.n_envs)]) model_params = self.get_model_params() model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.n_minibatches, tensorboard_log=self.tensorboard_path, **model_params) self.logger.info(f'Training for {n_epochs} epochs') steps_per_epoch = len(train_provider.data_frame) for model_epoch in range(0, n_epochs): self.logger.info( f'[{model_epoch}] Training for: {steps_per_epoch} time steps') model.learn(total_timesteps=steps_per_epoch) if model_epoch % save_every == 0: model_path = path.join( 'data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model.save(model_path) if test_trained_model: self.test(model_epoch, render_env=render_test_env, render_report=render_report, save_report=save_report) self.logger.info(f'Trained {n_epochs} models') def test(self, model_epoch: int = 0, render_env: bool = True, render_report: bool = True, save_report: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test( self.train_split_percentage) del train_provider init_envs = DummyVecEnv( [make_env(test_provider) for _ in range(self.n_envs)]) model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model = self.Model.load(model_path, env=init_envs) test_env = DummyVecEnv([make_env(test_provider) for _ in range(1)]) self.logger.info(f'Testing model ({self.study_name}__{model_epoch})') zero_completed_obs = np.zeros((self.n_envs, ) + init_envs.observation_space.shape) zero_completed_obs[0, :] = test_env.reset() state = None rewards = [] for _ in range(len(test_provider.data_frame)): action, state = model.predict(zero_completed_obs, state=state) obs, reward, done, info = test_env.step([action[0]]) zero_completed_obs[0, :] = obs rewards.append(reward) if render_env: test_env.render(mode='human') if done: net_worths = pd.DataFrame({ 'Date': info[0]['timestamps'], 'Balance': info[0]['net_worths'], }) net_worths.set_index('Date', drop=True, inplace=True) returns = net_worths.pct_change()[1:] if render_report: qs.plots.snapshot(returns.Balance, title='RL Trader Performance') if save_report: reports_path = path.join( 'data', 'reports', f'{self.study_name}__{model_epoch}.html') qs.reports.html(returns.Balance, file=reports_path) self.logger.info( f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}' )
class Optuna: study_name = None def __init__( self, model_actor: BaseRLModel = PPO2, policy: BasePolicy = MlpLnLstmPolicy #(ActorCriticPolicy or str) The policy model to use (MlpPolicy, CnnPolicy, CnnLstmPolicy, ...) , reward_strategy: Reward_Strategy_BASE = RewardPnL #IncrementalProfit , exchange_args: Dict = {}, **kwargs): self.model_actor: BaseRLModel = model_actor self.policy = policy self.Reward_Strategy = reward_strategy self.exchange_args = exchange_args self.logger = kwargs.get( 'logger', init_logger(__name__, show_debug=kwargs.get('show_debug', True))) self.db_path = kwargs.get('db_path', 'sqlite:///data/params.db') self.date_format = kwargs.get('date_format', ProviderDateFormat.DATETIME_HOUR_24) self.data_path = kwargs.get('data_path', 'data/input/coinbase-1h-btc-usd.csv') self.data_train_split_pct = kwargs.get('train_split_percentage', 0.8) self.data_provider = kwargs.get('data_provider', 'static') #self.columns_map = kwargs.get('columns_map', {}) self.n_envs = kwargs.get('n_envs', os.cpu_count()) self.n_minibatches = kwargs.get('n_minibatches', self.n_envs) self.model_logs_tb = kwargs.get('tensorboard_path', os.path.join('data', 'logs_tb')) self.model_verbose = kwargs.get('model_verbose', 1) self.do_load_raw_data: bool = kwargs.get('do_load_raw_data', True) self.features_to_add: str = kwargs.get('features_to_add', 'none') self.initialize_data(self.do_load_raw_data, self.features_to_add) self.initialize_db_optuna() #optimization for hyper param search self.logger.info( f'sucsessfully Initialize RLTrader study name {self.study_name} , open terminal, tensorboard --logdir={self.model_logs_tb}, click to http://localhost:6006/' ) def initialize_data(self, load_raw_data, features_to_add): self.logger.debug('Initializing data:') if self.data_provider == 'static': if not os.path.isfile(self.data_path): class_dir = os.path.dirname(__file__) self.data_path = os.path.realpath( os.path.join(class_dir, "../{}".format(self.data_path))) # data_columns = {'Date': 'Date', # 'Open': 'Open' , 'High': 'High' , 'Low': 'Low' , 'Close': 'Close' , 'Volume': 'Volume' # ,'OpenVIX':'OpenVIX','HighVIX':'HighVIX', 'LowVIX':'LowVIX', 'CloseVIX':'CloseVIX', 'SKEW':'SKEW'# for *v2.csv files # }#VolumeFrom if load_raw_data: self.logger.info( f'Loading from disc raw data :{self.data_path} ') df = None else: d = self.data_path.replace('.csv', '_with_features.csv') self.logger.info(f'Loading from disc prepared data :{d} ') df = pd.read_csv(d) self.data_provider = StaticDataProvider( date_format=self.date_format, csv_data_path=self.data_path, df=df, do_prepare_data=load_raw_data, features_to_add=features_to_add) elif self.data_provider == 'exchange': self.data_provider = ExchangeDataProvider(**self.exchange_args) self.logger.debug( f'Successfully Initialized data , \nFeature list={self.data_provider.columns}' ) def initialize_db_optuna(self): self.logger.debug('Initializing Optuna and get best model from db') try: mlp = MLPClassifier( random_state=5, hidden_layer_sizes=( 250, 150, 100, 50, 20, 10, 5, ), shuffle=False, activation='relu', solver='adam', batch_size=100, max_iter=200, learning_rate_init=0.001 ) #50.86 % #activation=('identity', 'logistic', 'tanh', 'relu'), solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' # , nminibatches=1) self.study_name = f'{mlp.__class__.__name__}__{mlp.act_model.__class__.__name__}' except: self.study_name = f'ErrorModel__ErrorPolicy__ErrorStrategy {mlp}' #run we can load up the study from the sqlite database we told Optuna to create. self.optuna_study = optuna.create_study(study_name=self.study_name, storage=self.db_path, direction='minimize', load_if_exists=True) self.logger.debug( f'Successfully Initialized Optuna , study_name={self.study_name}') try: self.logger.debug( f'found in db {len(self.optuna_study.trials)} trials , Best value (minimum)={self.optuna_study.best_value} , params={self.optuna_study.best_params.items()}' ) # or {self.optuna_study.best_trial.params.items()}') except: self.logger.debug('Error: No trials have been finished yet.') def optuna_get_model_params(self): #get_model_params that found in optuna params = self.optuna_study.best_trial.params return { 'batch_size': int( params['batch_size'] ) #241.999 (int) The number of steps to run for each environment per update. aka ExperienceHorizon. must be > n_minibatch , 'hidden_size': params[ 'hidden_size'] #0.9 (float) aka future_reward_importance or decay or discount rate, determines the importance of future rewards.If=0 then agent will only learn to consider current rewards. if=1 it will make it strive for a long-term high reward. , 'learning_rate': params[ 'learning_rate'] #0.9 (float or callable) The learning rate, it can be a function , 'epoch': params[ 'epoch'] #11.999 (float) Entropy coefficient for the loss calculation. the higher the value the more explore , 'dropout': params[ 'dropout'] #0.2 (float or callable) Clip factor for limiting the change in each policy update step. parameter specific to the OpenAI implementation. If None is passed (default), then `dropout` (that is used for the policy) will be used. reduce volatility of Advantage KL } def optimize_agent_params(self, trial): return { #Defining Parameter Spaces 'batch_size': int( trial.suggest_loguniform('batch_size', 8, 512) ), #float between 16–2048 in a logarithmic manner (16, 32, 64, …, 1024, 2048) 'hidden_size': trial.suggest_loguniform( 'hidden_size', 20, 1000 ), #float Discount Factor hidden_size Range 0.8 to 0.9997 default 0.99 'learning_rate': trial.suggest_loguniform('learning_rate', 0.00001, 0.01), 'epoch': trial.suggest_loguniform('epoch', 10, 600), 'dropout': trial.suggest_uniform( 'dropout', 0.0, 0.4 ), #floats in a simple, additive manner (0.0, 0.1, 0.2, 0.3, 0.4) } ''' activations = [ 'sigmoid', 'softmax']#, 'softplus', 'softsign', 'sigmoid', 'tanh', 'hard_sigmoid', 'linear', 'relu']#best 'softmax', 'softplus', 'softsign' inits = ['glorot_uniform']#, 'zero', 'uniform', 'normal', 'lecun_uniform', 'glorot_uniform', 'he_uniform', 'he_normal']#all same except he_normal worse optimizers = ['RMSprop', 'SGD']#, 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'] # same for all losses = ['categorical_crossentropy']#, 'categorical_crossentropy']#['mse', 'mae'] #better=binary_crossentropy epochs = [300, 800] # , 100, 150] # default epochs=1, better=100 batch_size = [12,128]#],150,200] # , 10, 20] # default = none best=32 size_hiddens = [ 200, 600] # 5, 10, 20] best = 100 0.524993 Best score: 0.525712 using params {'batch_size': 128, 'dropout': 0.2, 'epochs': 100, 'loss': 'binary_crossentropy', 'size_hidden': 100} lrs = [0.01, 0.001, 0.00001]#,0.03, 0.05, 0.07]#,0.001,0.0001,1,0.1,0.00001]#best 0.01 0.001 0.0001 dropout = [0.2]#, 0.2, 0.3, 0.4] # , 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] ''' def optimize(self, n_trials: int = 10): self.logger.info(f'start optimizing {n_trials} trials') try: self.optuna_study.optimize( self. optimize_params #it is callable function where the learn process accurs!! , n_trials= n_trials #if `None`, there is no limitation on the number of trials. #, timeout=100100100 #seconds to run , n_jobs=1) #if n_jobs=1 it will not run in parralel except KeyboardInterrupt: pass self.logger.info( f'Finished optimizing. trials# in db : {len(self.optuna_study.trials)}' ) #self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}') self.logger.info('Params: ') for key, value in self.optuna_study.best_trial.params.items(): self.logger.info(f' {key}: {value}') #self.optuna_study._storage.set_trial_state(trial_id, structs.TrialState.COMPLETE) #optuna.visualization.plot_intermediate_values(self.optuna_study) df = self.optuna_study.trials_dataframe() return df def optimize_params( self, trial, n_epochs: int = 2 # for optimization process 2 is ok, for train need 5 milion , n_tests_per_eval: int = 1): #we must not hypertune model for the 20% of the test. so we split the train to 80 20 x_train, x_test = self.data_provider.split_data_train_test( self.data_train_split_pct) #0.8 x_train, x_valid = x_train.split_data_train_test( self.data_train_split_pct) #0.64 del x_test model_params = self.optimize_agent_params(trial) mlp = MLPClassifier( random_state=5, hidden_layer_sizes=( 250, 150, 100, 50, 20, 10, 5, ), shuffle=False, activation='relu', solver='adam', batch_size=100, max_iter=200, learning_rate_init=0.001 ) #50.86 % #activation=('identity', 'logistic', 'tanh', 'relu'), solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' #nminibatches =1, error_last = -np.finfo(np.float16).max n_samples = len(x_train.df) steps = int(n_samples / n_epochs) attempt = 0 for epoch in range(1, n_epochs + 1): #(1, n_epochs+1): self.logger.info( f'{epoch}/{n_epochs} epochs. Training on small sample size {steps} (time steps)' ) try: #learn mlp.fit(self.x_train, self.y_train) except AssertionError: raise if trades_s < (steps * 0.05): self.logger.info( f'Setting status of trial#{epoch} as TrialState.PRUNED due to small amount of shorts ({trades_s}). ' ) raise optuna.structs.TrialPruned() #predict while n_episodes < n_tests_per_eval: error = mlp.predict(x_test) errorsum += error[0] lll = len(error) error_last = np.mean(error) attempt += 1 self.logger.info( f'Found a setup. mean of {lll} rewards= {-1 * error_last}$. inserting to optuna db this attempt# {attempt}' ) #mean reward 5.39998984336853$ #optuna.trial.Trial trial.report( value=-1 * error_last, step=epoch ) #If step =None, the value is stored as a final value of the trial. Otherwise, it is saved as an intermediate value. if trial.should_prune(epoch): #Pruning Unpromising Trials raise optuna.structs.TrialPruned() return -1 * error_last # we muliply reawrd by -1 cause Optuna interprets lower return value as better trials.
class RLTrader: data_provider = None study_name = None def __init__(self, modelClass: BaseRLModel = PPO2, policyClass: BasePolicy = MlpPolicy, exchange_args: Dict = {}, **kwargs): self.logger = kwargs.get('logger', init_logger(__name__, show_debug=kwargs.get('show_debug', True))) self.Model = modelClass self.Policy = policyClass self.exchange_args = exchange_args self.tensorboard_path = kwargs.get('tensorboard_path', None) self.input_data_path = kwargs.get('input_data_path', 'data/input/coinbase-1h-btc-usd.csv') self.params_db_path = kwargs.get('params_db_path', 'sqlite:///data/params.db') self.date_format = kwargs.get('date_format', ProviderDateFormat.DATETIME_HOUR_24) self.model_verbose = kwargs.get('model_verbose', 1) self.n_envs = kwargs.get('n_envs', os.cpu_count()) self.n_minibatches = kwargs.get('n_minibatches', self.n_envs) self.train_split_percentage = kwargs.get('train_split_percentage', 0.8) self.data_provider = kwargs.get('data_provider', 'static') self.initialize_data() self.initialize_optuna() self.logger.debug(f'Initialize RLTrader: {self.study_name}') def initialize_data(self): if self.data_provider == 'static': if not os.path.isfile(self.input_data_path): class_dir = os.path.dirname(__file__) self.input_data_path = os.path.realpath(os.path.join(class_dir, "../{}".format(self.input_data_path))) data_columns = {'Date': 'Date', 'Open': 'Open', 'High': 'High', 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom'} self.data_provider = StaticDataProvider(date_format=self.date_format, csv_data_path=self.input_data_path, data_columns=data_columns) elif self.data_provider == 'exchange': self.data_provider = ExchangeDataProvider(**self.exchange_args) self.logger.debug(f'Initialized Features: {self.data_provider.columns}') def initialize_optuna(self): try: train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)]) model = self.Model(self.Policy, train_env, nminibatches=1) self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}' except: self.study_name = f'UnknownModel__UnknownPolicy' self.optuna_study = optuna.create_study( study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) self.logger.debug('Initialized Optuna:') try: self.logger.debug( f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}') except: self.logger.debug('No trials have been finished yet.') def get_model_params(self): params = self.optuna_study.best_trial.params return { 'n_steps': int(params['n_steps']), 'gamma': params['gamma'], 'learning_rate': params['learning_rate'], 'ent_coef': params['ent_coef'], 'cliprange': params['cliprange'], 'noptepochs': int(params['noptepochs']), 'lam': params['lam'], } def optimize_agent_params(self, trial): if self.Model != PPO2: return {'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.)} return { 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), 'lam': trial.suggest_uniform('lam', 0.8, 1.) } def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_eval: int = 1): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) train_provider, validation_provider = train_provider.split_data_train_test(self.train_split_percentage) del test_provider train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(1)]) validation_env = SubprocVecEnv([make_env(validation_provider, i) for i in range(1)]) model_params = self.optimize_agent_params(trial) model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=1, tensorboard_log=self.tensorboard_path, **model_params) last_reward = -np.finfo(np.float16).max n_steps_per_eval = int(len(train_provider.data_frame) / n_prune_evals_per_trial) for eval_idx in range(n_prune_evals_per_trial): try: model.learn(n_steps_per_eval) except AssertionError: raise rewards = [] n_episodes, reward_sum = 0, 0.0 state = None obs = validation_env.reset() while n_episodes < n_tests_per_eval: action, state = model.predict(obs, state=state) obs, reward, done, _ = validation_env.step(action) reward_sum += reward if all(done): rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = validation_env.reset() last_reward = np.mean(rewards) trial.report(-1 * last_reward, eval_idx) if trial.should_prune(eval_idx): raise optuna.structs.TrialPruned() return -1 * last_reward def optimize(self, n_trials: int = 100, n_parallel_jobs: int = 1, *optimize_params): try: self.optuna_study.optimize( self.optimize_params, n_trials=n_trials, n_jobs=n_parallel_jobs, *optimize_params) except KeyboardInterrupt: pass self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}') self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}') self.logger.info('Params: ') for key, value in self.optuna_study.best_trial.params.items(): self.logger.info(f' {key}: {value}') return self.optuna_study.trials_dataframe() def train(self, n_epochs: int = 100, save_every: int = 10, test_trained_model: bool = False, render_trained_model: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) del test_provider train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(self.n_envs)]) model_params = self.get_model_params() model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.n_minibatches, tensorboard_log=self.tensorboard_path, **model_params) self.logger.info(f'Training for {n_epochs} epochs') steps_per_epoch = len(train_provider.data_frame) for model_epoch in range(0, n_epochs): self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps') model.learn(total_timesteps=steps_per_epoch) if model_epoch % save_every == 0: model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model.save(model_path) if test_trained_model: self.test(model_epoch, should_render=render_trained_model) self.logger.info(f'Trained {n_epochs} models') def test(self, model_epoch: int = 0, should_render: bool = True): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) del train_provider test_env = SubprocVecEnv([make_env(test_provider, i) for i in range(self.n_envs)]) model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model = self.Model.load(model_path, env=test_env) self.logger.info(f'Testing model ({self.study_name}__{model_epoch})') state = None obs, done, rewards = test_env.reset(), [False], [] while not all(done): action, state = model.predict(obs, state=state) obs, reward, done, _ = test_env.step(action) rewards.append(reward) if should_render and self.n_envs == 1: test_env.render(mode='human') self.logger.info( f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}')