예제 #1
0
파일: explore.py 프로젝트: rlcyf/tbase-1
def env_eval(env, policy, print_actions):
    rewards = []
    daily_returns = []
    portfolios = []
    state = env.reset()
    n_days = 0
    while True:
        state_var = torch.tensor(state).unsqueeze(0).permute(1, 0, 2).to(
            torch.float)
        with torch.no_grad():
            action = policy.action(state_var)
            action = action.detach().cpu()[0].numpy().astype(np.float)
        if print_actions:
            print("tbase.agents.ddpg.agent action:" + str(action))
        next_state, reward, done, info, _ = env.step(action)
        n_days += 1
        rewards.append(reward)
        daily_returns.append(info["daily_pnl"] / env.investment)
        portfolios.append(info["portfolio_value"])
        if done:
            state = env.reset()
            break
        state = next_state
    mdd = max_drawdown(portfolios)
    sharpe_r = sharpe_ratio(daily_returns)
    annualized_return_ = annualized_return(portfolios[-1], n_days)
    logger.info("=" * 38 + "eval" + "=" * 38)
    logger.info("portfolio: %.3f" % portfolios[-1])
    logger.info("max_drawdown: %.3f" % mdd)
    logger.info("sharpe_ratio: %.3f" % sharpe_r)
    logger.info("annualized_return: %.3f" % annualized_return_)
    return mdd, sharpe_r, annualized_return_, portfolios
예제 #2
0
파일: explore.py 프로젝트: rlcyf/tbase-1
def buy_and_hold(env):
    """
    在回测第一个交易日均匀分仓买入,并持有到回策结束,用于基线策略
    """
    rewards = []
    daily_returns = []
    portfolios = [1.0]
    env.reset()
    action = env.get_buy_close_action(env.current_date)

    n_days = 0
    while True:
        if n_days < 1:
            _, reward, done, info, _ = env.step(action)
        _, reward, done, info, _ = env.step(action, only_update=True)
        n_days += 1
        rewards.append(reward)
        daily_returns.append(info["daily_pnl"] / env.investment)
        portfolios.append(info["portfolio_value"])
        if done:
            break
    mdd = max_drawdown(portfolios)
    sharpe_r = sharpe_ratio(daily_returns)
    annualized_return_ = annualized_return(portfolios[-1], n_days)
    logger.info("=" * 34 + "buy_and_hold" + "=" * 34)
    logger.info("portfolio: %.3f" % portfolios[-1])
    logger.info("max_drawdown: %.3f" % mdd)
    logger.info("sharpe_ratio: %.3f" % sharpe_r)
    logger.info("annualized_return: %.3f" % annualized_return_)
    return annualized_return_, portfolios
예제 #3
0
def main():
    args = common_arg_parser()
    if args.debug:
        import logging
        logger.setLevel(logging.DEBUG)
    set_global_seeds(args.seed)
    logger.info("tbase.run set global_seeds: %s" % str(args.seed))
    if torch.cuda.is_available() and args.num_env > 1 and args.device != 'cpu':
        set_start_method('spawn')
    env = make_env(args=args)
    print("\n" + "*" * 80)
    logger.info("Initializing agent by parameters:")
    logger.info(str(args))
    agent = get_agent(env, args)
    if not args.eval and not args.infer:
        logger.info("Training agent")
        agent.learn()
        logger.info("Finished, check details by run tensorboard --logdir=%s" %
                    args.tensorboard_dir)
    # eval models
    if args.eval:
        eval_env = make_eval_env(args=args)
        agent.eval(eval_env, args)

    # infer actions
    if args.infer:
        infer_env = make_infer_env(args=args)
        agent.infer(infer_env)
예제 #4
0
파일: ac_agent.py 프로젝트: iminders/tbase
 def warm_up(self):
     logger.info("warmming up: explore %d days in enviroment" %
                 self.args.warm_up)
     if self.num_env > 1:
         self.explore(self.args.warm_up, self.args.sample_size)
     else:
         self.simple_explore(self.args.warm_up, self.args.sample_size)
     logger.info("warm up: finished")
예제 #5
0
파일: ac_agent.py 프로젝트: iminders/tbase
 def eval(self, env, args):
     self.load(self.model_dir)
     _, _, annualized_return, portfolios = env_eval(env, self.policy,
                                                    args.print_action)
     bh_annualized_return, bh_portfolios = buy_and_hold(env)
     for i in range(len(portfolios)):
         self.writer.add_scalars('backtesting', {
             self.args.alg: portfolios[i],
             "buy&hold": bh_portfolios[i]
         }, i)
     excess_return = portfolios[-1] - bh_portfolios[-1]
     logger.info("excess_return: %.3f" % excess_return)
     annual_excess_return = annualized_return - bh_annualized_return
     logger.info("annualized excess_return: %.3f" % annual_excess_return)
예제 #6
0
    def learn(self):
        logger.info("learning started")
        i = 0
        current_portfolio = 1.0
        t_start = time.time()
        for i_iter in range(self.args.max_iter_num):
            [avg_reward, e_t, ports] = [None] * 3
            if self.args.num_env == 1:
                avg_reward, e_t, ports = self.simple_explore()
            else:
                avg_reward, e_t, ports = self.explore()
            # NOTE: Don't need update parameters
            for p in ports:
                i += 1
                self.writer.add_scalar('reward/portfolio', p, i)
                current_portfolio = p
                if current_portfolio > self.best_portfolio:
                    self.best_portfolio = current_portfolio
                    logger.info("iter: %d, new best portfolio: %.3f" %
                                (i_iter + 1, self.best_portfolio))
            self.writer.add_scalar('time/explore', e_t, i_iter)

            self.writer.add_scalar('reward/policy', torch.tensor(avg_reward),
                                   i_iter)

            if (i_iter + 1) % self.args.log_interval == 0:
                msg = "total update time: %.1f secs" % (time.time() - t_start)
                msg += ", iter=%d, avg_reward=%.3f" % (i_iter + 1, avg_reward)
                msg += ", current_portfolio: %.3f" % current_portfolio
                logger.info(msg)

        self.writer.close()
        logger.info("Final best portfolio: %.3f" % self.best_portfolio)
        self.save_best_portofolio(self.model_dir)
예제 #7
0
파일: ac_agent.py 프로젝트: rlcyf/tbase-1
 def infer(self, env):
     self.load(self.model_dir)
     state = env.reset(infer=True)
     state_var = torch.tensor(state).unsqueeze(0).permute(1, 0,
                                                          2).to(torch.float)
     with torch.no_grad():
         action = self.policy.action(state_var)
         action = action.detach().cpu()[0].numpy().astype(np.float)
         logger.info("infer %s result %s: " %
                     (self.args.infer_date, str(action)))
         actions = env.parse_infer_action(action)
         with open(self.args.infer_result_path, "w") as f:
             for act in actions:
                 linestr = ",".join([str(v) for v in act])
                 f.write(linestr + "\n")
         with open(self.args.progress_bar_path, "w") as progress_file:
             progress_file.write("%d,%d\n" % (1, 1))
예제 #8
0
    def learn(self):
        if self.args.num_env > 1:
            self.policy.share_memory()
        self.warm_up()
        logger.info("learning started")
        i = 0
        current_portfolio = 1.0
        t_start = time.time()
        for i_iter in range(self.args.max_iter_num):
            with open(self.args.progress_bar_path, "w") as progress_file:
                progress_file.write("%d,%d" % (i_iter, self.args.max_iter_num))
            obs, act, rew, obs_t, done, avg_reward, e_t, ports = [None] * 8
            if self.args.num_env == 1:
                obs, act, rew, obs_t, done, avg_reward, e_t, ports = \
                    self.simple_explore()
            else:
                obs, act, rew, obs_t, done, avg_reward, e_t, ports = \
                    self.explore()
            for p in ports:
                i += 1
                self.writer.add_scalar('reward/portfolio', p, i)
                current_portfolio = p
                if current_portfolio > self.best_portfolio:
                    self.best_portfolio = current_portfolio
                    logger.info("iter: %d, new best portfolio: %.3f" %
                                (i_iter + 1, self.best_portfolio))
                    self.save(self.model_dir)
            self.writer.add_scalar('time/explore', e_t, i_iter)
            try:
                v_loss, p_loss, p_reg, act_reg, u_t = self.update_params(
                    obs, act, rew, obs_t, done)
            except Exception as error:
                print(error)
            self.writer.add_scalar('time/update', u_t, i_iter)
            self.writer.add_scalar('loss/value', v_loss, i_iter)
            self.writer.add_scalar('loss/policy', p_loss, i_iter)
            self.writer.add_scalar('reg/action', act_reg, i_iter)
            self.writer.add_scalar('reg/policy', p_reg, i_iter)
            self.writer.add_scalar('reward/policy', torch.tensor(avg_reward),
                                   i_iter)

            if (i_iter + 1) % self.args.log_interval == 0:
                msg = "total update time: %.1f secs" % (time.time() - t_start)
                msg += ", iter=%d, avg_reward=%.3f" % (i_iter + 1, avg_reward)
                msg += ", current_portfolio: %.3f" % current_portfolio
                logger.info(msg)
            clear_memory()

        logger.info("Final best portfolio: %.3f" % self.best_portfolio)
        self.save_best_portofolio(self.model_dir)
예제 #9
0
파일: ac_agent.py 프로젝트: rlcyf/tbase-1
 def eval(self, env, args):
     self.load(self.model_dir)
     mdd, sharpe_ratio, annualized_return, portfolios = env_eval(
         env, self.policy, args.print_action)
     # base buy and hold strategy performance
     bh_annualized_return, bh_portfolios = buy_and_hold(env)
     for i in range(len(portfolios)):
         self.writer.add_scalars('backtesting', {
             self.args.alg: portfolios[i],
             "buy&hold": bh_portfolios[i]
         }, i)
     excess_return = portfolios[-1] - bh_portfolios[-1]
     logger.info("excess_return: %.3f" % excess_return)
     annual_excess_return = annualized_return - bh_annualized_return
     logger.info("annualized excess_return: %.3f" % annual_excess_return)
     # save eval results
     absolute_return = portfolios[-1]
     ex_base_code = self.args.codes
     ex_strategy = "buy&hold"
     self.save_eval(absolute_return, annualized_return, mdd, sharpe_ratio,
                    ex_base_code, ex_strategy, bh_portfolios[-1],
                    bh_annualized_return)
예제 #10
0
    def learn(self):
        logger.info("learning started")
        i = 0
        current_portfolio = 1.0
        t_start = time.time()
        state = self.envs[0].reset()
        for i_iter in range(self.args.max_iter_num):
            with open(self.args.progress_bar_path, "w") as progress_file:
                progress_file.write("%d,%d" % (i_iter, self.args.max_iter_num))
            obs, act, rew, obs_t, done, ports, e_t = \
                self.explore(
                    self.envs[0],
                    state,
                    self.args.t_max,
                    self.args.print_action)
            state = obs[-1]
            for p in ports:
                i += 1
                self.writer.add_scalar('reward/portfolio', p, i)
                current_portfolio = p
                if current_portfolio > self.best_portfolio:
                    self.best_portfolio = current_portfolio
                    logger.info("iter: %d, new best portfolio: %.3f" %
                                (i_iter + 1, self.best_portfolio))
                    self.save(self.model_dir)
            self.writer.add_scalar('time/explore', e_t, i_iter)
            self.writer.add_scalar('reward/policy', np.mean(rew), i_iter)

            self.update_params(obs, act, rew, obs_t, done, i_iter)

            if (i_iter + 1) % self.args.log_interval == 0:
                msg = "total update time: %.1f secs" % (time.time() - t_start)
                msg += ", current_portfolio: %.3f" % current_portfolio
                logger.info(msg)
            clear_memory()

        logger.info("Final best portfolio: %.3f" % self.best_portfolio)
        self.save_best_portofolio(self.model_dir)
예제 #11
0
파일: tbase.py 프로젝트: tradingAI/runner
 def __init__(self):
     self.name = "tbase_runner"
     logger.info("tenvs version: %s" % tenvs.__version__)
     logger.info("tbase version: %s" % tbase.__version__)