parser.add_argument("-r", "--run", required=True, help="Run name") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = SAVE_DIR / f"simple-{args.run}" saves_path.mkdir(parents=True, exist_ok=True) data_path = pathlib.Path(args.data) val_path = pathlib.Path(args.val) if args.year is not None or data_path.is_file(): if args.year is not None: stock_data = data.load_year_data(args.year) else: stock_data = {"YNDX": data.load_relative(data_path)} env = environ.StockEnv(stock_data, bars_count=BARS_COUNT) env_tst = environ.StockEnv(stock_data, bars_count=BARS_COUNT) elif data_path.is_dir(): env = environ.StockEnv.from_dir(data_path, bars_count=BARS_COUNT) env_tst = environ.StockEnv.from_dir(data_path, bars_count=BARS_COUNT) else: raise RuntimeError("No data to train on") env = gym.wrappers.TimeLimit(env, max_episode_steps=1000) val_data = {"YNDX": data.load_relative(val_path)} net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n).to(device) tgt_net = drl.agent.TargetNet(net) selector = drl.actions.EpsilonGreedySelector(epsilon=EPS_START) eps_tracker = drl.tracker.EpsilonTracker(selector, EPS_START, EPS_END, EPS_STEPS)
action='store_true', help='enable cuda') args = parser.parse_args() device = 'cuda' if args.cuda else 'cpu' try: from lib import data play_data = data.read_csv(file_name='data/000001_%d.csv' % args.year) except ModuleNotFoundError: play_data = (pd.read_csv('data/prices_%d.csv' % args.year, index_col=0), pd.read_csv('data/factors_%d.csv' % args.year, index_col=0)) env = environ.StockEnv(play_data, bars_count=BARS_COUNT, commission=args.commission, reset_on_sell=False, random_ofs_on_reset=False) net = models.DQNConv1d(env.observation_space.shape, env.action_space.n) datestr = datetime.strftime(date(2019, 2, 2), '%Y-%m-%d') save_path = os.path.join('saves', datestr) state_dict = torch.load(os.path.join(save_path, 'best_mean_val.pth'), map_location=lambda storage, loc: storage) net.load_state_dict(state_dict) obs = env.reset() start_price = env.state._close() total_reward = 0.0 frame_idx = 0 rewards = []
args = parser.parse_args() device = torch.device('cuda' if args.cuda else 'cpu') try: from lib import data train_data = data.load_data(year=args.year) except ModuleNotFoundError: # workaround that Ta-lib cannot be installed on Colab train_data = (pd.read_csv('data/000001_prices_%d.csv' % args.year, index_col=0), pd.read_csv('data/000001_factors_%d.csv' % args.year, index_col=0)) env = environ.StockEnv(train_data, bars_count=BARS_COUNT, commission=0.0, reset_on_sell=False) # env = gym.wrappers.TimeLimit(env, max_episode_steps=1000) net = models.A2CConv1d(env.observation_space.shape, env.action_space.n).to(device) agent = agent.ProbabilityAgent(lambda x: net(x)[0], apply_softmax=True, device=device) exp_source = experience.ExperienceSource(env, agent, GAMMA, steps_count=REWARD_STEPS) optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)
args = parser.parse_args() device = torch.device('cuda' if args.cuda else 'cpu') try: from lib import data train_data = data.read_csv(file_name='data/000001_2017.csv') val_data = data.read_csv(file_name='data/000001_2018.csv') except ModuleNotFoundError: train_data = (pd.read_csv('data/prices_2017.csv', index_col=0), pd.read_csv('data/factors_2017.csv', index_col=0)) val_data = (pd.read_csv('data/prices_2018.csv', index_col=0), pd.read_csv('data/factors_2018.csv', index_col=0)) env = environ.StockEnv(train_data, bars_count=BARS_COUNT, reset_on_sell=True) env = gym.wrappers.TimeLimit(env, max_episode_steps=1000) env_test = environ.StockEnv(train_data, bars_count=BARS_COUNT, reset_on_sell=True) env_test = gym.wrappers.TimeLimit(env_test, max_episode_steps=1000) env_val = environ.StockEnv(val_data, bars_count=BARS_COUNT, reset_on_sell=True) env_val = gym.wrappers.TimeLimit(env_val, max_episode_steps=1000) net = models.DQNConv1d(env.observation_space.shape, env.action_space.n).to(device) tgt_net = models.DQNConv1d(env.observation_space.shape, env.action_space.n).to(device)
def worker(net, device, train_queue, proc_idx, save_path): try: from lib import data train_data = data.read_csv(file_name='data/000001_2018.csv') except ModuleNotFoundError: train_data = (pd.read_csv('data/prices_2018.csv', index_col=0), pd.read_csv('data/factors_2018.csv', index_col=0)) env = environ.StockEnv(train_data, bars_count=BARS_COUNT, reset_on_sell=True) env = gym.wrappers.TimeLimit(env, max_episode_steps=1000) agt = agent.ProbabilityAgent(lambda x: net(x)[0], apply_softmax=True, device=device) exp_source = experience.ExperienceSource(env, agt, GAMMA, steps_count=REWARD_STEPS) batch = [] frame_idx = 0 total_reward = [] total_steps = [] reward_buf = [] steps_buf = [] frame_idx = 0 frame_prev = 0 ts = time.time() best_mean_reward = None stats = collections.defaultdict(list) file_name = os.path.splitext(os.path.basename(__file__))[0] file_name = file_name.split('_')[-1] proc_name = 'worker_' + '%d' % proc_idx writer = SummaryWriter(os.path.join('runs', file_name, proc_name)) logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s', handlers=[ logging.FileHandler( os.path.join(save_path, 'console.log')), logging.StreamHandler() ]) for exp in exp_source: frame_idx += 1 batch.append(exp) if len(batch) < GRAD_BATCH: continue net.zero_grad() loss_val_v, loss_policy_v, loss_entropy_v = helper.a2c_loss( batch, net, GAMMA**REWARD_STEPS, ENTROPY_BETA, device) batch.clear() loss_v = loss_entropy_v + loss_val_v + loss_policy_v loss_v.backward() nn_utils.clip_grad_norm_(net.parameters(), CLIP_GRAD) grads = [ param.grad.data.cpu().numpy() if param.grad is not None else None for param in net.parameters() ] train_queue.put(grads) stats['loss_value'].append(loss_val_v) stats['loss_policy'].append(loss_policy_v) stats['loss_entropy'].append(loss_entropy_v) stats['loss_total'].append(loss_v) for stat in stats: if len(stat) >= STATS_GROUPS: writer.add_scalar(stat, torch.mean(torch.stack(stats[stat])).item(), frame_idx) stats[stat].clear() ep_reward, ep_steps = exp_source.pop_episode_result() if ep_reward: print( 'Worker_%d: %d done, Episode reward: %.4f, Episode step: %d' % (proc_idx, frame_idx, ep_reward, ep_steps)) reward_buf.append(ep_reward) steps_buf.append(ep_steps) if len(reward_buf) == REWARD_GROUPS: reward = np.mean(reward_buf) steps = np.mean(steps_buf) reward_buf.clear() steps_buf.clear() total_reward.append(reward) total_steps.append(steps) speed = (frame_idx - frame_prev) / (time.time() - ts) frame_prev = frame_idx ts = time.time() mean_reward = np.mean(total_reward[-100:]) mean_step = np.mean(total_steps[-100:]) logging.info( '%d done, mean reward %.3f, mean step %d, speed %d f/s' % (frame_idx, mean_reward, mean_step, speed)) writer.add_scalar('speed', speed, frame_idx) writer.add_scalar('reward', reward, frame_idx) writer.add_scalar('reward_100', mean_reward, frame_idx) writer.add_scalar('steps', steps, frame_idx) writer.add_scalar('steps_100', mean_step, frame_idx) if best_mean_reward is None or best_mean_reward < mean_reward: torch.save( net.state_dict(), os.path.join(save_path, 'best_mean_reward-%.3f.pth') % mean_reward) if best_mean_reward is not None: logging.info( 'Worker_%d: Best mean value updated %.3f -> %.3f' % (proc_idx, best_mean_reward, mean_reward)) best_mean_reward = mean_reward writer.close()
default=False, action='store_true', help='enable colab hosted runtime') args = parser.parse_args() device = torch.device('cuda' if args.cuda else 'cpu') try: from lib import data train_data = data.read_csv(file_name='data/000001_2018.csv') except ModuleNotFoundError: train_data = (pd.read_csv('data/prices_2018.csv', index_col=0), pd.read_csv('data/factors_2018.csv', index_col=0)) env = environ.StockEnv(train_data, bars_count=BARS_COUNT, reset_on_sell=True) env = gym.wrappers.TimeLimit(env, max_episode_steps=1000) net = models.A2CConv1d(env.observation_space.shape, env.action_space.n).to(device) net.share_memory() optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) file_name = os.path.splitext(os.path.basename(__file__))[0] file_name = file_name.split('_')[-1] save_path = os.path.join('saves', file_name) os.makedirs(save_path, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s', handlers=[