def env_eval(env, policy, print_actions): rewards = [] daily_returns = [] portfolios = [] state = env.reset() n_days = 0 while True: state_var = torch.tensor(state).unsqueeze(0).permute(1, 0, 2).to( torch.float) with torch.no_grad(): action = policy.action(state_var) action = action.detach().cpu()[0].numpy().astype(np.float) if print_actions: print("tbase.agents.ddpg.agent action:" + str(action)) next_state, reward, done, info, _ = env.step(action) n_days += 1 rewards.append(reward) daily_returns.append(info["daily_pnl"] / env.investment) portfolios.append(info["portfolio_value"]) if done: state = env.reset() break state = next_state mdd = max_drawdown(portfolios) sharpe_r = sharpe_ratio(daily_returns) annualized_return_ = annualized_return(portfolios[-1], n_days) logger.info("=" * 38 + "eval" + "=" * 38) logger.info("portfolio: %.3f" % portfolios[-1]) logger.info("max_drawdown: %.3f" % mdd) logger.info("sharpe_ratio: %.3f" % sharpe_r) logger.info("annualized_return: %.3f" % annualized_return_) return mdd, sharpe_r, annualized_return_, portfolios
def buy_and_hold(env): """ 在回测第一个交易日均匀分仓买入,并持有到回策结束,用于基线策略 """ rewards = [] daily_returns = [] portfolios = [1.0] env.reset() action = env.get_buy_close_action(env.current_date) n_days = 0 while True: if n_days < 1: _, reward, done, info, _ = env.step(action) _, reward, done, info, _ = env.step(action, only_update=True) n_days += 1 rewards.append(reward) daily_returns.append(info["daily_pnl"] / env.investment) portfolios.append(info["portfolio_value"]) if done: break mdd = max_drawdown(portfolios) sharpe_r = sharpe_ratio(daily_returns) annualized_return_ = annualized_return(portfolios[-1], n_days) logger.info("=" * 34 + "buy_and_hold" + "=" * 34) logger.info("portfolio: %.3f" % portfolios[-1]) logger.info("max_drawdown: %.3f" % mdd) logger.info("sharpe_ratio: %.3f" % sharpe_r) logger.info("annualized_return: %.3f" % annualized_return_) return annualized_return_, portfolios
def main(): args = common_arg_parser() if args.debug: import logging logger.setLevel(logging.DEBUG) set_global_seeds(args.seed) logger.info("tbase.run set global_seeds: %s" % str(args.seed)) if torch.cuda.is_available() and args.num_env > 1 and args.device != 'cpu': set_start_method('spawn') env = make_env(args=args) print("\n" + "*" * 80) logger.info("Initializing agent by parameters:") logger.info(str(args)) agent = get_agent(env, args) if not args.eval and not args.infer: logger.info("Training agent") agent.learn() logger.info("Finished, check details by run tensorboard --logdir=%s" % args.tensorboard_dir) # eval models if args.eval: eval_env = make_eval_env(args=args) agent.eval(eval_env, args) # infer actions if args.infer: infer_env = make_infer_env(args=args) agent.infer(infer_env)
def warm_up(self): logger.info("warmming up: explore %d days in enviroment" % self.args.warm_up) if self.num_env > 1: self.explore(self.args.warm_up, self.args.sample_size) else: self.simple_explore(self.args.warm_up, self.args.sample_size) logger.info("warm up: finished")
def eval(self, env, args): self.load(self.model_dir) _, _, annualized_return, portfolios = env_eval(env, self.policy, args.print_action) bh_annualized_return, bh_portfolios = buy_and_hold(env) for i in range(len(portfolios)): self.writer.add_scalars('backtesting', { self.args.alg: portfolios[i], "buy&hold": bh_portfolios[i] }, i) excess_return = portfolios[-1] - bh_portfolios[-1] logger.info("excess_return: %.3f" % excess_return) annual_excess_return = annualized_return - bh_annualized_return logger.info("annualized excess_return: %.3f" % annual_excess_return)
def learn(self): logger.info("learning started") i = 0 current_portfolio = 1.0 t_start = time.time() for i_iter in range(self.args.max_iter_num): [avg_reward, e_t, ports] = [None] * 3 if self.args.num_env == 1: avg_reward, e_t, ports = self.simple_explore() else: avg_reward, e_t, ports = self.explore() # NOTE: Don't need update parameters for p in ports: i += 1 self.writer.add_scalar('reward/portfolio', p, i) current_portfolio = p if current_portfolio > self.best_portfolio: self.best_portfolio = current_portfolio logger.info("iter: %d, new best portfolio: %.3f" % (i_iter + 1, self.best_portfolio)) self.writer.add_scalar('time/explore', e_t, i_iter) self.writer.add_scalar('reward/policy', torch.tensor(avg_reward), i_iter) if (i_iter + 1) % self.args.log_interval == 0: msg = "total update time: %.1f secs" % (time.time() - t_start) msg += ", iter=%d, avg_reward=%.3f" % (i_iter + 1, avg_reward) msg += ", current_portfolio: %.3f" % current_portfolio logger.info(msg) self.writer.close() logger.info("Final best portfolio: %.3f" % self.best_portfolio) self.save_best_portofolio(self.model_dir)
def infer(self, env): self.load(self.model_dir) state = env.reset(infer=True) state_var = torch.tensor(state).unsqueeze(0).permute(1, 0, 2).to(torch.float) with torch.no_grad(): action = self.policy.action(state_var) action = action.detach().cpu()[0].numpy().astype(np.float) logger.info("infer %s result %s: " % (self.args.infer_date, str(action))) actions = env.parse_infer_action(action) with open(self.args.infer_result_path, "w") as f: for act in actions: linestr = ",".join([str(v) for v in act]) f.write(linestr + "\n") with open(self.args.progress_bar_path, "w") as progress_file: progress_file.write("%d,%d\n" % (1, 1))
def learn(self): if self.args.num_env > 1: self.policy.share_memory() self.warm_up() logger.info("learning started") i = 0 current_portfolio = 1.0 t_start = time.time() for i_iter in range(self.args.max_iter_num): with open(self.args.progress_bar_path, "w") as progress_file: progress_file.write("%d,%d" % (i_iter, self.args.max_iter_num)) obs, act, rew, obs_t, done, avg_reward, e_t, ports = [None] * 8 if self.args.num_env == 1: obs, act, rew, obs_t, done, avg_reward, e_t, ports = \ self.simple_explore() else: obs, act, rew, obs_t, done, avg_reward, e_t, ports = \ self.explore() for p in ports: i += 1 self.writer.add_scalar('reward/portfolio', p, i) current_portfolio = p if current_portfolio > self.best_portfolio: self.best_portfolio = current_portfolio logger.info("iter: %d, new best portfolio: %.3f" % (i_iter + 1, self.best_portfolio)) self.save(self.model_dir) self.writer.add_scalar('time/explore', e_t, i_iter) try: v_loss, p_loss, p_reg, act_reg, u_t = self.update_params( obs, act, rew, obs_t, done) except Exception as error: print(error) self.writer.add_scalar('time/update', u_t, i_iter) self.writer.add_scalar('loss/value', v_loss, i_iter) self.writer.add_scalar('loss/policy', p_loss, i_iter) self.writer.add_scalar('reg/action', act_reg, i_iter) self.writer.add_scalar('reg/policy', p_reg, i_iter) self.writer.add_scalar('reward/policy', torch.tensor(avg_reward), i_iter) if (i_iter + 1) % self.args.log_interval == 0: msg = "total update time: %.1f secs" % (time.time() - t_start) msg += ", iter=%d, avg_reward=%.3f" % (i_iter + 1, avg_reward) msg += ", current_portfolio: %.3f" % current_portfolio logger.info(msg) clear_memory() logger.info("Final best portfolio: %.3f" % self.best_portfolio) self.save_best_portofolio(self.model_dir)
def eval(self, env, args): self.load(self.model_dir) mdd, sharpe_ratio, annualized_return, portfolios = env_eval( env, self.policy, args.print_action) # base buy and hold strategy performance bh_annualized_return, bh_portfolios = buy_and_hold(env) for i in range(len(portfolios)): self.writer.add_scalars('backtesting', { self.args.alg: portfolios[i], "buy&hold": bh_portfolios[i] }, i) excess_return = portfolios[-1] - bh_portfolios[-1] logger.info("excess_return: %.3f" % excess_return) annual_excess_return = annualized_return - bh_annualized_return logger.info("annualized excess_return: %.3f" % annual_excess_return) # save eval results absolute_return = portfolios[-1] ex_base_code = self.args.codes ex_strategy = "buy&hold" self.save_eval(absolute_return, annualized_return, mdd, sharpe_ratio, ex_base_code, ex_strategy, bh_portfolios[-1], bh_annualized_return)
def learn(self): logger.info("learning started") i = 0 current_portfolio = 1.0 t_start = time.time() state = self.envs[0].reset() for i_iter in range(self.args.max_iter_num): with open(self.args.progress_bar_path, "w") as progress_file: progress_file.write("%d,%d" % (i_iter, self.args.max_iter_num)) obs, act, rew, obs_t, done, ports, e_t = \ self.explore( self.envs[0], state, self.args.t_max, self.args.print_action) state = obs[-1] for p in ports: i += 1 self.writer.add_scalar('reward/portfolio', p, i) current_portfolio = p if current_portfolio > self.best_portfolio: self.best_portfolio = current_portfolio logger.info("iter: %d, new best portfolio: %.3f" % (i_iter + 1, self.best_portfolio)) self.save(self.model_dir) self.writer.add_scalar('time/explore', e_t, i_iter) self.writer.add_scalar('reward/policy', np.mean(rew), i_iter) self.update_params(obs, act, rew, obs_t, done, i_iter) if (i_iter + 1) % self.args.log_interval == 0: msg = "total update time: %.1f secs" % (time.time() - t_start) msg += ", current_portfolio: %.3f" % current_portfolio logger.info(msg) clear_memory() logger.info("Final best portfolio: %.3f" % self.best_portfolio) self.save_best_portofolio(self.model_dir)
def __init__(self): self.name = "tbase_runner" logger.info("tenvs version: %s" % tenvs.__version__) logger.info("tbase version: %s" % tbase.__version__)