Beispiel #1
0
    def initialize_data(self, load_raw_data, features_to_add):
        self.logger.debug('Initializing data:')
        if self.data_provider == 'static':
            if not os.path.isfile(self.data_path):
                class_dir = os.path.dirname(__file__)
                self.data_path = os.path.realpath(
                    os.path.join(class_dir, "../{}".format(self.data_path)))

            # data_columns = {'Date': 'Date',
            #                 'Open': 'Open'     , 'High': 'High'    ,    'Low': 'Low'  , 'Close': 'Close'     , 'Volume': 'Volume'
            #                ,'OpenVIX':'OpenVIX','HighVIX':'HighVIX', 'LowVIX':'LowVIX', 'CloseVIX':'CloseVIX', 'SKEW':'SKEW'# for *v2.csv files
            #                 }#VolumeFrom

            if load_raw_data:
                self.logger.info(
                    f'Loading from disc raw data :{self.data_path} ')
                df = None
            else:
                d = self.data_path.replace('.csv', '_with_features.csv')
                self.logger.info(f'Loading from disc prepared data :{d} ')
                df = pd.read_csv(d)

            self.data_provider = StaticDataProvider(
                date_format=self.date_format,
                csv_data_path=self.data_path,
                df=df,
                do_prepare_data=load_raw_data,
                features_to_add=features_to_add)
        elif self.data_provider == 'exchange':
            self.data_provider = ExchangeDataProvider(**self.exchange_args)

        self.logger.debug(
            f'Successfully Initialized data , \nFeature list={self.data_provider.columns}'
        )
Beispiel #2
0
    def initialize_data(self):
        if self.data_provider == 'static':
            if not os.path.isfile(self.input_data_path):
                class_dir = os.path.dirname(__file__)
                self.input_data_path = os.path.realpath(
                    os.path.join(class_dir,
                                 "../{}".format(self.input_data_path)))

            data_columns = {
                'Date': 'Date',
                'Open': 'Open',
                'High': 'High',
                'Low': 'Low',
                'Close': 'Close',
                'Volume': 'VolumeFrom'
            }

            self.data_provider = StaticDataProvider(
                date_format=self.date_format,
                csv_data_path=self.input_data_path,
                data_columns=data_columns)
        elif self.data_provider == 'exchange':
            self.data_provider = ExchangeDataProvider(**self.exchange_args)

        self.logger.debug(
            f'Initialized Features: {self.data_provider.columns}')
Beispiel #3
0
    def initialize_data(self, provider, input_data_path):
        if 'static' == provider:
            if self.input_data_path is None:
                class_dir = os.path.dirname(__file__)
                self.input_data_path = os.path.realpath(
                    os.path.join(class_dir, "../{}".format(input_data_path)))

            data_columns = {
                'Date': 'Date',
                'Open': 'Open',
                'High': 'High',
                'Low': 'Low',
                'Close': 'Close',
                'Volume': 'VolumeFrom'
            }

            self.data_provider = StaticDataProvider(
                date_format=self.date_format,
                csv_data_path=self.input_data_path,
                data_columns=data_columns)
        else:
            self.data_provider = ExchangeDataProvider()

        self.logger.debug(
            f'Initialized Features: {self.data_provider.columns}')
Beispiel #4
0
    def initialize_data(self):
        if self.data_provider == 'static':
            if not os.path.isfile(self.input_data_path):
                class_dir = os.path.dirname(__file__)
                self.input_data_path = os.path.realpath(
                    os.path.join(class_dir,
                                 "../{}".format(self.input_data_path)))

            data_columns = {
                'Date': 'Date',
                'Open': 'Open',
                'High': 'High',
                'Low': 'Low',
                'Close': 'Close',
                'Volume': 'VolumeFrom'
            }

            self.data_provider = StaticDataProvider(
                date_format=self.date_format,
                csv_data_path=self.input_data_path,
                data_columns=data_columns)
        elif self.data_provider == 'exchange':
            self.data_provider = ExchangeDataProvider(**self.exchange_args)
        # edited
        elif self.data_provider == 'vb':
            variety = 'RB'
            in_columns = [
                'Date', 'open', 'close', 'a1', 'a1v', 'b1', 'b1v',
                'up_down_limit'
            ] + vb_factor_columns
            columns = [
                'Date', 'open', 'Close', 'a1', 'a1v', 'b1', 'b1v',
                'up_down_limit'
            ] + vb_factor_columns
            data_columns = dict(zip(columns, in_columns))
            self.data_provider = VbDataProvider(data_columns=data_columns,
                                                variety='RB')

        self.logger.debug(
            f'Initialized Features: {self.data_provider.columns}')
Beispiel #5
0
class RLTrader:
    data_provider = None
    study_name = None

    def __init__(self,
                 model: BaseRLModel = PPO2,
                 policy: BasePolicy = MlpLnLstmPolicy,
                 reward_strategy: BaseRewardStrategy = IncrementalProfit,
                 exchange_args: Dict = {},
                 **kwargs):
        self.logger = kwargs.get(
            'logger',
            init_logger(__name__, show_debug=kwargs.get('show_debug', True)))

        self.Model = model
        self.Policy = policy
        self.Reward_Strategy = reward_strategy
        self.exchange_args = exchange_args
        self.tensorboard_path = kwargs.get('tensorboard_path', None)
        self.input_data_path = kwargs.get('input_data_path',
                                          'data/input/EURUSD60.csv')
        self.params_db_path = kwargs.get('params_db_path',
                                         'sqlite:///data/params.db')

        self.date_format = kwargs.get('date_format',
                                      ProviderDateFormat.DATETIME_HOUR_24)

        self.model_verbose = kwargs.get('model_verbose', 1)
        self.n_envs = kwargs.get('n_envs', os.cpu_count())
        self.n_minibatches = kwargs.get('n_minibatches', self.n_envs)
        self.train_split_percentage = kwargs.get('train_split_percentage', 0.8)
        self.data_provider = kwargs.get('data_provider', 'static')

        self.initialize_data()
        self.initialize_optuna()

        self.logger.debug(f'Initialize RLTrader: {self.study_name}')

    def initialize_data(self):
        if self.data_provider == 'static':
            if not os.path.isfile(self.input_data_path):
                class_dir = os.path.dirname(__file__)
                self.input_data_path = os.path.realpath(
                    os.path.join(class_dir,
                                 "../{}".format(self.input_data_path)))

            data_columns = {
                'Date': 'Date',
                'Open': 'Open',
                'High': 'High',
                'Low': 'Low',
                'Close': 'Close',
                'Volume': 'VolumeFrom'
            }

            self.data_provider = StaticDataProvider(
                date_format=self.date_format,
                csv_data_path=self.input_data_path,
                data_columns=data_columns)
        elif self.data_provider == 'exchange':
            self.data_provider = ExchangeDataProvider(**self.exchange_args)

        self.logger.debug(
            f'Initialized Features: {self.data_provider.columns}')

    def initialize_optuna(self):
        try:
            train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)])
            model = self.Model(self.Policy, train_env, nminibatches=1)
            strategy = self.Reward_Strategy()

            self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}__{strategy.__class__.__name__}'
        except:
            self.study_name = f'UnknownModel__UnknownPolicy__UnknownStrategy'

        self.optuna_study = optuna.create_study(study_name=self.study_name,
                                                storage=self.params_db_path,
                                                load_if_exists=True)

        self.logger.debug('Initialized Optuna:')

        try:
            self.logger.debug(
                f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}'
            )
        except:
            self.logger.debug('No trials have been finished yet.')

    def get_model_params(self):
        params = self.optuna_study.best_trial.params
        return {
            'n_steps': int(params['n_steps']),
            'gamma': params['gamma'],
            'learning_rate': params['learning_rate'],
            'ent_coef': params['ent_coef'],
            'cliprange': params['cliprange'],
            'noptepochs': int(params['noptepochs']),
            'lam': params['lam'],
        }

    def optimize_agent_params(self, trial):
        if self.Model != PPO2:
            return {
                'learning_rate':
                trial.suggest_loguniform('learning_rate', 1e-5, 1.)
            }

        return {
            'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
            'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5,
                                                      1.),
            'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
            'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
            'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
            'lam': trial.suggest_uniform('lam', 0.8, 1.)
        }

    def optimize_params(self,
                        trial,
                        n_prune_evals_per_trial: int = 2,
                        n_tests_per_eval: int = 1):
        train_provider, test_provider = self.data_provider.split_data_train_test(
            self.train_split_percentage)
        train_provider, validation_provider = train_provider.split_data_train_test(
            self.train_split_percentage)

        del test_provider

        train_env = DummyVecEnv([lambda: TradingEnv(train_provider)])
        validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider)])

        model_params = self.optimize_agent_params(trial)
        model = self.Model(self.Policy,
                           train_env,
                           verbose=self.model_verbose,
                           nminibatches=1,
                           tensorboard_log=self.tensorboard_path,
                           **model_params)

        last_reward = -np.finfo(np.float16).max
        n_steps_per_eval = int(
            len(train_provider.data_frame) / n_prune_evals_per_trial)

        for eval_idx in range(n_prune_evals_per_trial):
            try:
                model.learn(n_steps_per_eval)
            except AssertionError:
                raise

            rewards = []
            n_episodes, reward_sum = 0, 0.0

            trades = train_env.get_attr('trades')

            if len(trades[0]) < 1:
                self.logger.info(
                    f'Pruning trial for not making any trades: {eval_idx}')
                raise optuna.structs.TrialPruned()

            state = None
            obs = validation_env.reset()
            while n_episodes < n_tests_per_eval:
                action, state = model.predict(obs, state=state)
                obs, reward, done, _ = validation_env.step([action])

                reward_sum += reward[0]

                if all(done):
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = validation_env.reset()

            last_reward = np.mean(rewards)
            trial.report(-1 * last_reward, eval_idx)

            if trial.should_prune(eval_idx):
                raise optuna.structs.TrialPruned()

        return -1 * last_reward

    def optimize(self, n_trials: int = 20):
        try:
            self.optuna_study.optimize(self.optimize_params,
                                       n_trials=n_trials,
                                       n_jobs=1)
        except KeyboardInterrupt:
            pass

        self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}')

        self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}')

        self.logger.info('Params: ')
        for key, value in self.optuna_study.best_trial.params.items():
            self.logger.info(f'    {key}: {value}')

        return self.optuna_study.trials_dataframe()

    def train(self,
              n_epochs: int = 10,
              save_every: int = 1,
              test_trained_model: bool = True,
              render_test_env: bool = False,
              render_report: bool = True,
              save_report: bool = False):
        train_provider, test_provider = self.data_provider.split_data_train_test(
            self.train_split_percentage)

        del test_provider

        train_env = SubprocVecEnv(
            [make_env(train_provider, i) for i in range(self.n_envs)])

        model_params = self.get_model_params()

        model = self.Model(self.Policy,
                           train_env,
                           verbose=self.model_verbose,
                           nminibatches=self.n_minibatches,
                           tensorboard_log=self.tensorboard_path,
                           **model_params)

        self.logger.info(f'Training for {n_epochs} epochs')

        steps_per_epoch = len(train_provider.data_frame)

        for model_epoch in range(0, n_epochs):
            self.logger.info(
                f'[{model_epoch}] Training for: {steps_per_epoch} time steps')

            model.learn(total_timesteps=steps_per_epoch)

            if model_epoch % save_every == 0:
                model_path = path.join(
                    'data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
                model.save(model_path)

                if test_trained_model:
                    self.test(model_epoch,
                              render_env=render_test_env,
                              render_report=render_report,
                              save_report=save_report)

        self.logger.info(f'Trained {n_epochs} models')

    def test(self,
             model_epoch: int = 0,
             render_env: bool = True,
             render_report: bool = True,
             save_report: bool = False):
        train_provider, test_provider = self.data_provider.split_data_train_test(
            self.train_split_percentage)

        del train_provider

        init_envs = DummyVecEnv(
            [make_env(test_provider) for _ in range(self.n_envs)])

        model_path = path.join('data', 'agents',
                               f'{self.study_name}__{model_epoch}.pkl')
        model = self.Model.load(model_path, env=init_envs)

        test_env = DummyVecEnv([make_env(test_provider) for _ in range(1)])

        self.logger.info(f'Testing model ({self.study_name}__{model_epoch})')

        zero_completed_obs = np.zeros((self.n_envs, ) +
                                      init_envs.observation_space.shape)
        zero_completed_obs[0, :] = test_env.reset()

        state = None
        rewards = []

        for _ in range(len(test_provider.data_frame)):
            action, state = model.predict(zero_completed_obs, state=state)
            obs, reward, done, info = test_env.step([action[0]])

            zero_completed_obs[0, :] = obs

            rewards.append(reward)

            if render_env:
                test_env.render(mode='human')

            if done:
                net_worths = pd.DataFrame({
                    'Date': info[0]['timestamps'],
                    'Balance': info[0]['net_worths'],
                })

                net_worths.set_index('Date', drop=True, inplace=True)
                returns = net_worths.pct_change()[1:]

                if render_report:
                    qs.plots.snapshot(returns.Balance,
                                      title='RL Trader Performance')

                if save_report:
                    reports_path = path.join(
                        'data', 'reports',
                        f'{self.study_name}__{model_epoch}.html')
                    qs.reports.html(returns.Balance, file=reports_path)

        self.logger.info(
            f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}'
        )
Beispiel #6
0
class Optuna:

    study_name = None

    def __init__(
            self,
            model_actor: BaseRLModel = PPO2,
            policy:
        BasePolicy = MlpLnLstmPolicy  #(ActorCriticPolicy or str) The policy model to use (MlpPolicy, CnnPolicy, CnnLstmPolicy, ...)
        ,
            reward_strategy: Reward_Strategy_BASE = RewardPnL  #IncrementalProfit
        ,
            exchange_args: Dict = {},
            **kwargs):
        self.model_actor: BaseRLModel = model_actor
        self.policy = policy
        self.Reward_Strategy = reward_strategy
        self.exchange_args = exchange_args
        self.logger = kwargs.get(
            'logger',
            init_logger(__name__, show_debug=kwargs.get('show_debug', True)))
        self.db_path = kwargs.get('db_path', 'sqlite:///data/params.db')
        self.date_format = kwargs.get('date_format',
                                      ProviderDateFormat.DATETIME_HOUR_24)
        self.data_path = kwargs.get('data_path',
                                    'data/input/coinbase-1h-btc-usd.csv')
        self.data_train_split_pct = kwargs.get('train_split_percentage', 0.8)
        self.data_provider = kwargs.get('data_provider', 'static')
        #self.columns_map            = kwargs.get('columns_map', {})
        self.n_envs = kwargs.get('n_envs', os.cpu_count())
        self.n_minibatches = kwargs.get('n_minibatches', self.n_envs)
        self.model_logs_tb = kwargs.get('tensorboard_path',
                                        os.path.join('data', 'logs_tb'))
        self.model_verbose = kwargs.get('model_verbose', 1)
        self.do_load_raw_data: bool = kwargs.get('do_load_raw_data', True)
        self.features_to_add: str = kwargs.get('features_to_add', 'none')
        self.initialize_data(self.do_load_raw_data, self.features_to_add)
        self.initialize_db_optuna()  #optimization for hyper param search

        self.logger.info(
            f'sucsessfully Initialize RLTrader study name {self.study_name} , open terminal, tensorboard --logdir={self.model_logs_tb}, click to http://localhost:6006/'
        )

    def initialize_data(self, load_raw_data, features_to_add):
        self.logger.debug('Initializing data:')
        if self.data_provider == 'static':
            if not os.path.isfile(self.data_path):
                class_dir = os.path.dirname(__file__)
                self.data_path = os.path.realpath(
                    os.path.join(class_dir, "../{}".format(self.data_path)))

            # data_columns = {'Date': 'Date',
            #                 'Open': 'Open'     , 'High': 'High'    ,    'Low': 'Low'  , 'Close': 'Close'     , 'Volume': 'Volume'
            #                ,'OpenVIX':'OpenVIX','HighVIX':'HighVIX', 'LowVIX':'LowVIX', 'CloseVIX':'CloseVIX', 'SKEW':'SKEW'# for *v2.csv files
            #                 }#VolumeFrom

            if load_raw_data:
                self.logger.info(
                    f'Loading from disc raw data :{self.data_path} ')
                df = None
            else:
                d = self.data_path.replace('.csv', '_with_features.csv')
                self.logger.info(f'Loading from disc prepared data :{d} ')
                df = pd.read_csv(d)

            self.data_provider = StaticDataProvider(
                date_format=self.date_format,
                csv_data_path=self.data_path,
                df=df,
                do_prepare_data=load_raw_data,
                features_to_add=features_to_add)
        elif self.data_provider == 'exchange':
            self.data_provider = ExchangeDataProvider(**self.exchange_args)

        self.logger.debug(
            f'Successfully Initialized data , \nFeature list={self.data_provider.columns}'
        )

    def initialize_db_optuna(self):
        self.logger.debug('Initializing Optuna and get best model from db')
        try:
            mlp = MLPClassifier(
                random_state=5,
                hidden_layer_sizes=(
                    250,
                    150,
                    100,
                    50,
                    20,
                    10,
                    5,
                ),
                shuffle=False,
                activation='relu',
                solver='adam',
                batch_size=100,
                max_iter=200,
                learning_rate_init=0.001
            )  #50.86 %  #activation=('identity', 'logistic', 'tanh', 'relu'),  solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
            #                              , nminibatches=1)
            self.study_name = f'{mlp.__class__.__name__}__{mlp.act_model.__class__.__name__}'

        except:
            self.study_name = f'ErrorModel__ErrorPolicy__ErrorStrategy {mlp}'
        #run  we can load up the study from the sqlite database we told Optuna to create.
        self.optuna_study = optuna.create_study(study_name=self.study_name,
                                                storage=self.db_path,
                                                direction='minimize',
                                                load_if_exists=True)

        self.logger.debug(
            f'Successfully Initialized Optuna , study_name={self.study_name}')

        try:
            self.logger.debug(
                f'found in db  {len(self.optuna_study.trials)} trials , Best value (minimum)={self.optuna_study.best_value} , params={self.optuna_study.best_params.items()}'
            )  # or {self.optuna_study.best_trial.params.items()}')

        except:
            self.logger.debug('Error: No trials have been finished yet.')

    def optuna_get_model_params(self):  #get_model_params that found in optuna
        params = self.optuna_study.best_trial.params
        return {
            'batch_size': int(
                params['batch_size']
            )  #241.999  (int) The number of steps to run for each environment per update. aka ExperienceHorizon. must be > n_minibatch
            ,
            'hidden_size': params[
                'hidden_size']  #0.9     (float) aka future_reward_importance or decay or discount rate, determines the importance of future rewards.If=0 then agent will only learn to consider current rewards. if=1 it will make it strive for a long-term high reward.
            ,
            'learning_rate': params[
                'learning_rate']  #0.9    (float or callable) The learning rate, it can be a function
            ,
            'epoch': params[
                'epoch']  #11.999 (float) Entropy coefficient for the loss calculation. the higher the value the more explore
            ,
            'dropout': params[
                'dropout']  #0.2    (float or callable) Clip factor for limiting the change in each policy update step. parameter specific to the OpenAI implementation. If None is passed (default), then `dropout` (that is used for the policy) will be used. reduce volatility of Advantage KL
        }

    def optimize_agent_params(self, trial):

        return {  #Defining Parameter Spaces
            'batch_size':
            int(
                trial.suggest_loguniform('batch_size', 8, 512)
            ),  #float between 16–2048 in a logarithmic manner (16, 32, 64, …, 1024, 2048)
            'hidden_size':
            trial.suggest_loguniform(
                'hidden_size', 20, 1000
            ),  #float   Discount Factor hidden_size Range 0.8 to 0.9997 default 0.99
            'learning_rate':
            trial.suggest_loguniform('learning_rate', 0.00001, 0.01),
            'epoch':
            trial.suggest_loguniform('epoch', 10, 600),
            'dropout':
            trial.suggest_uniform(
                'dropout', 0.0, 0.4
            ),  #floats in a simple, additive manner (0.0, 0.1, 0.2, 0.3, 0.4)
        }

    '''
      activations = [ 'sigmoid', 'softmax']#, 'softplus', 'softsign', 'sigmoid',  'tanh', 'hard_sigmoid', 'linear', 'relu']#best 'softmax', 'softplus', 'softsign'
      inits       = ['glorot_uniform']#, 'zero', 'uniform', 'normal', 'lecun_uniform',  'glorot_uniform',  'he_uniform', 'he_normal']#all same except he_normal worse
      optimizers  = ['RMSprop', 'SGD']#, 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'] # same for all
      losses =      ['categorical_crossentropy']#, 'categorical_crossentropy']#['mse', 'mae']  #better=binary_crossentropy
      epochs =      [300, 800]  # , 100, 150] # default epochs=1,  better=100
      batch_size = [12,128]#],150,200]  # , 10, 20]   #  default = none best=32
      size_hiddens = [ 200, 600]  # 5, 10, 20] best = 100 0.524993 Best score: 0.525712 using params {'batch_size': 128, 'dropout': 0.2, 'epochs': 100, 'loss': 'binary_crossentropy', 'size_hidden': 100}
      lrs     =      [0.01, 0.001, 0.00001]#,0.03, 0.05, 0.07]#,0.001,0.0001,1,0.1,0.00001]#best 0.01 0.001 0.0001
      dropout =     [0.2]#, 0.2, 0.3, 0.4]  # , 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    '''

    def optimize(self, n_trials: int = 10):
        self.logger.info(f'start optimizing  {n_trials} trials')

        try:
            self.optuna_study.optimize(
                self.
                optimize_params  #it is callable function where the learn process accurs!!
                ,
                n_trials=
                n_trials  #if `None`, there is no limitation on the number of trials.
                #, timeout=100100100 #seconds to run
                ,
                n_jobs=1)  #if n_jobs=1 it will not run in parralel
        except KeyboardInterrupt:
            pass

        self.logger.info(
            f'Finished optimizing. trials# in db : {len(self.optuna_study.trials)}'
        )
        #self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}')
        self.logger.info('Params: ')

        for key, value in self.optuna_study.best_trial.params.items():
            self.logger.info(f'    {key}: {value}')
        #self.optuna_study._storage.set_trial_state(trial_id, structs.TrialState.COMPLETE)
        #optuna.visualization.plot_intermediate_values(self.optuna_study)
        df = self.optuna_study.trials_dataframe()
        return df

    def optimize_params(
            self,
            trial,
            n_epochs:
        int = 2  #  for optimization process 2 is ok,  for train need 5 milion
        ,
            n_tests_per_eval: int = 1):
        #we must not  hypertune model for the 20% of the test. so we split the train to 80 20
        x_train, x_test = self.data_provider.split_data_train_test(
            self.data_train_split_pct)  #0.8
        x_train, x_valid = x_train.split_data_train_test(
            self.data_train_split_pct)  #0.64
        del x_test

        model_params = self.optimize_agent_params(trial)
        mlp = MLPClassifier(
            random_state=5,
            hidden_layer_sizes=(
                250,
                150,
                100,
                50,
                20,
                10,
                5,
            ),
            shuffle=False,
            activation='relu',
            solver='adam',
            batch_size=100,
            max_iter=200,
            learning_rate_init=0.001
        )  #50.86 %  #activation=('identity', 'logistic', 'tanh', 'relu'),  solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'

        #nminibatches     =1,

        error_last = -np.finfo(np.float16).max
        n_samples = len(x_train.df)
        steps = int(n_samples / n_epochs)
        attempt = 0
        for epoch in range(1, n_epochs + 1):  #(1, n_epochs+1):
            self.logger.info(
                f'{epoch}/{n_epochs} epochs. Training on small sample size {steps}  (time steps)'
            )
            try:  #learn
                mlp.fit(self.x_train, self.y_train)
            except AssertionError:
                raise

            if trades_s < (steps * 0.05):
                self.logger.info(
                    f'Setting status of trial#{epoch} as TrialState.PRUNED due to small amount of shorts ({trades_s}). '
                )
                raise optuna.structs.TrialPruned()

            #predict

            while n_episodes < n_tests_per_eval:
                error = mlp.predict(x_test)

                errorsum += error[0]

            lll = len(error)
            error_last = np.mean(error)
            attempt += 1
            self.logger.info(
                f'Found a setup. mean of {lll} rewards= {-1 * error_last}$. inserting to optuna db this attempt# {attempt}'
            )  #mean reward 5.39998984336853$
            #optuna.trial.Trial
            trial.report(
                value=-1 * error_last, step=epoch
            )  #If step =None, the value is stored as a final value of the trial. Otherwise, it is saved as an intermediate value.

            if trial.should_prune(epoch):  #Pruning Unpromising Trials
                raise optuna.structs.TrialPruned()

        return -1 * error_last  # we muliply reawrd by -1 cause  Optuna interprets lower return value as better trials.
Beispiel #7
0
class RLTrader:
    data_provider = None
    study_name = None

    def __init__(self, modelClass: BaseRLModel = PPO2, policyClass: BasePolicy = MlpPolicy, exchange_args: Dict = {}, **kwargs):
        self.logger = kwargs.get('logger', init_logger(__name__, show_debug=kwargs.get('show_debug', True)))

        self.Model = modelClass
        self.Policy = policyClass
        self.exchange_args = exchange_args
        self.tensorboard_path = kwargs.get('tensorboard_path', None)
        self.input_data_path = kwargs.get('input_data_path', 'data/input/coinbase-1h-btc-usd.csv')
        self.params_db_path = kwargs.get('params_db_path', 'sqlite:///data/params.db')

        self.date_format = kwargs.get('date_format', ProviderDateFormat.DATETIME_HOUR_24)

        self.model_verbose = kwargs.get('model_verbose', 1)
        self.n_envs = kwargs.get('n_envs', os.cpu_count())
        self.n_minibatches = kwargs.get('n_minibatches', self.n_envs)
        self.train_split_percentage = kwargs.get('train_split_percentage', 0.8)
        self.data_provider = kwargs.get('data_provider', 'static')

        self.initialize_data()
        self.initialize_optuna()

        self.logger.debug(f'Initialize RLTrader: {self.study_name}')

    def initialize_data(self):
        if self.data_provider == 'static':
            if not os.path.isfile(self.input_data_path):
                class_dir = os.path.dirname(__file__)
                self.input_data_path = os.path.realpath(os.path.join(class_dir, "../{}".format(self.input_data_path)))

            data_columns = {'Date': 'Date', 'Open': 'Open', 'High': 'High',
                            'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom'}

            self.data_provider = StaticDataProvider(date_format=self.date_format,
                                                    csv_data_path=self.input_data_path,
                                                    data_columns=data_columns)
        elif self.data_provider == 'exchange':
            self.data_provider = ExchangeDataProvider(**self.exchange_args)

        self.logger.debug(f'Initialized Features: {self.data_provider.columns}')

    def initialize_optuna(self):
        try:
            train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)])
            model = self.Model(self.Policy, train_env, nminibatches=1)
            self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}'
        except:
            self.study_name = f'UnknownModel__UnknownPolicy'

        self.optuna_study = optuna.create_study(
            study_name=self.study_name, storage=self.params_db_path, load_if_exists=True)

        self.logger.debug('Initialized Optuna:')

        try:
            self.logger.debug(
                f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}')
        except:
            self.logger.debug('No trials have been finished yet.')

    def get_model_params(self):
        params = self.optuna_study.best_trial.params
        return {
            'n_steps': int(params['n_steps']),
            'gamma': params['gamma'],
            'learning_rate': params['learning_rate'],
            'ent_coef': params['ent_coef'],
            'cliprange': params['cliprange'],
            'noptepochs': int(params['noptepochs']),
            'lam': params['lam'],
        }

    def optimize_agent_params(self, trial):
        if self.Model != PPO2:
            return {'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.)}

        return {
            'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
            'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
            'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
            'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
            'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
            'lam': trial.suggest_uniform('lam', 0.8, 1.)
        }

    def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_eval: int = 1):
        train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)
        train_provider, validation_provider = train_provider.split_data_train_test(self.train_split_percentage)

        del test_provider

        train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(1)])
        validation_env = SubprocVecEnv([make_env(validation_provider, i) for i in range(1)])

        model_params = self.optimize_agent_params(trial)
        model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=1,
                           tensorboard_log=self.tensorboard_path, **model_params)

        last_reward = -np.finfo(np.float16).max
        n_steps_per_eval = int(len(train_provider.data_frame) / n_prune_evals_per_trial)

        for eval_idx in range(n_prune_evals_per_trial):
            try:
                model.learn(n_steps_per_eval)
            except AssertionError:
                raise

            rewards = []
            n_episodes, reward_sum = 0, 0.0

            state = None
            obs = validation_env.reset()
            while n_episodes < n_tests_per_eval:
                action, state = model.predict(obs, state=state)
                obs, reward, done, _ = validation_env.step(action)
                reward_sum += reward

                if all(done):
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = validation_env.reset()

            last_reward = np.mean(rewards)
            trial.report(-1 * last_reward, eval_idx)

            if trial.should_prune(eval_idx):
                raise optuna.structs.TrialPruned()

        return -1 * last_reward

    def optimize(self, n_trials: int = 100, n_parallel_jobs: int = 1, *optimize_params):
        try:
            self.optuna_study.optimize(
                self.optimize_params, n_trials=n_trials, n_jobs=n_parallel_jobs, *optimize_params)
        except KeyboardInterrupt:
            pass

        self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}')

        self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}')

        self.logger.info('Params: ')
        for key, value in self.optuna_study.best_trial.params.items():
            self.logger.info(f'    {key}: {value}')

        return self.optuna_study.trials_dataframe()

    def train(self, n_epochs: int = 100, save_every: int = 10, test_trained_model: bool = False, render_trained_model: bool = False):
        train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)

        del test_provider

        train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(self.n_envs)])

        model_params = self.get_model_params()

        model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.n_minibatches,
                           tensorboard_log=self.tensorboard_path, **model_params)

        self.logger.info(f'Training for {n_epochs} epochs')

        steps_per_epoch = len(train_provider.data_frame)

        for model_epoch in range(0, n_epochs):
            self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps')

            model.learn(total_timesteps=steps_per_epoch)

            if model_epoch % save_every == 0:
                model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
                model.save(model_path)

            if test_trained_model:
                self.test(model_epoch, should_render=render_trained_model)

        self.logger.info(f'Trained {n_epochs} models')

    def test(self, model_epoch: int = 0, should_render: bool = True):
        train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)

        del train_provider

        test_env = SubprocVecEnv([make_env(test_provider, i) for i in range(self.n_envs)])

        model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
        model = self.Model.load(model_path, env=test_env)

        self.logger.info(f'Testing model ({self.study_name}__{model_epoch})')

        state = None
        obs, done, rewards = test_env.reset(), [False], []
        while not all(done):
            action, state = model.predict(obs, state=state)
            obs, reward, done, _ = test_env.step(action)

            rewards.append(reward)

            if should_render and self.n_envs == 1:
                test_env.render(mode='human')

        self.logger.info(
            f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}')