Exemple #1
0
def experiment(variant):

    expl_env = get_env()
    eval_env = get_env()

    post_epoch_funcs = []
    M = variant['layer_size']
    trainer = get_sac_model(env=eval_env, hidden_sizes=[M, M])
    policy = trainer.policy
    eval_policy = MakeDeterministic(policy)
    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        policy,
    )
    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )

    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algorithm_kwargs'])
    columns = ['Epoch', 'mean', 'std']
    eval_result = pd.DataFrame(columns=columns)
    eval_output_csv = os.path.join(variant['log_dir'], 'eval_result.csv')

    def post_epoch_func(self, epoch):
        nonlocal eval_result
        nonlocal policy
        print(f'-------------post_epoch_func start-------------')
        eval_result = my_eval_policy(
            env=get_env(),
            algorithm=self,
            epoch=epoch,
            eval_result=eval_result,
            output_csv=eval_output_csv,
        )
        print(f'-------------post_epoch_func done-------------')

    algorithm.post_epoch_funcs = [
        post_epoch_func,
    ]
    algorithm.to(ptu.device)
    algorithm.train()
def train_model(variant):
    gt.reset_root()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_dir = f"./output/train_out_{timestamp}/"

    setup_logger('name-of-experiment',
                 variant=variant,
                 snapshot_mode='gap_and_last',
                 snapshot_gap=20,
                 log_dir=log_dir)

    expl_env_kwargs = variant['expl_env_kwargs']
    eval_env_kwargs = variant['eval_env_kwargs']
    trainer_kwargs = variant['trainer_kwargs']

    df_ret_train, df_ret_val, df_feature = load_dataset()
    df_ret_train.to_csv(os.path.join(log_dir, 'df_ret_train.csv'))
    df_ret_val.to_csv(os.path.join(log_dir, 'df_ret_val.csv'))
    df_feature.to_csv(os.path.join(log_dir, 'df_feature.csv'))
    expl_env = NormalizedBoxEnv(
        gym.make('MarketEnv-v0',
                 returns=df_ret_train,
                 features=df_feature,
                 **expl_env_kwargs))

    eval_env = NormalizedBoxEnv(
        gym.make('MarketEnv-v0',
                 returns=df_ret_val,
                 features=df_feature,
                 **eval_env_kwargs))

    def post_epoch_func(self, epoch):
        progress_csv = os.path.join(log_dir, 'progress.csv')
        df = pd.read_csv(progress_csv)
        kpis = ['cagr', 'dd', 'mdd', 'wealths', 'std']
        srcs = ['evaluation', 'exploration']
        n = 50
        for kpi in kpis:
            series = map(lambda s: df[f'{s}/env_infos/final/{kpi} Mean'], srcs)
            plot_ma(series=series, lables=srcs, title=kpi, n=n)
            plt.savefig(os.path.join(log_dir, f'{kpi}.png'))
            plt.close()

    trainer = get_trainer(env=eval_env, **trainer_kwargs)
    policy = trainer.policy
    eval_policy = MakeDeterministic(policy)
    #eval_policy = policy
    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        policy,
    )
    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algorithm_kwargs'])
    algorithm.post_epoch_funcs = [
        post_epoch_func,
    ]
    algorithm.to(ptu.device)
    algorithm.train()