def infer(self, train, episode): if not train: episode_length = self.env.datacontainer.train_length - 1 - self.env.history_length tsm = TradingStateModel( datacontainer=self.env.datacontainer, episode_length=episode_length, history_length=self.env.history_length, is_training=True, commission_percentage=self.env.commission_percentage) state = tsm.reset() prices = [state.price] # [episode_length] rewards = [0] # [episode_length] allocations = [state.portfolio_allocation] # [episode_length] for _ in range(episode_length): batch_asset_features, batch_portfolio = convert_features( features=np.array([state.features]), asset_features_shape=self.actor.asset_features_shape, portfolio_features_shape=[self.actor.a_dim]) action = self.actor.predict_target( asset_inputs=batch_asset_features, portfolio_inputs=batch_portfolio)[0] trans_state, reward, terminal, info = tsm.step(action) prices.append(trans_state.price) rewards.append(reward) allocations.append(trans_state.portfolio_allocation) state = trans_state prices = np.array(prices) rewards = np.array(rewards) allocations = np.array(allocations) f, axarr = plt.subplots(3, sharex=True) axarr[0].set_ylabel('Price') for ind in range(self.env.datacontainer.num_assets): axarr[0].plot(prices[:, ind]) axarr[1].set_ylabel('Cumulative Reward') axarr[1].plot(np.cumsum(rewards)) axarr[2].set_ylabel('Action') for ind in range(self.env.datacontainer.num_assets): axarr[2].plot(allocations[:, ind]) dataset = 'Train' if train else 'Test' title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards)) plt.savefig( os.path.join(self.infer_directory, str(episode) + ".png"))
def infer(self, train, episode): if not train: episode_length = self.datacontainer.test_length - 1 tsm = TradingStateModel( datacontainer=self.datacontainer, episode_length=episode_length, is_training=train, commission_percentage=self.tsm.commission_percentage, coin_boundary=self.tsm.coin_boundary) state, reward = tsm.initialize() prices = [state.price] # [episode_length] rewards = [reward] # [episode_length] coins = [state.coins] # [episode_length] for _ in tqdm(range(episode_length)): action = self.actor_target.select_action( inputs=np.array([state.features]))[0][0] #action = self.random_action() trans_state, reward = tsm.step(action) prices.append(trans_state.price) rewards.append(reward) coins.append(trans_state.coins) state = trans_state prices = np.array(prices) rewards = np.array(rewards) coins = np.array(coins) f, axarr = plt.subplots(3, sharex=True) axarr[0].set_ylabel('Price') axarr[0].plot(prices) axarr[1].set_ylabel('Cumulative Reward') axarr[1].plot(np.cumsum(rewards)) axarr[2].set_ylabel('Action') axarr[2].plot(coins) dataset = 'Train' if train else 'Test' title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards)) plt.savefig("./infer" + str(episode) + ".png")
def infer(self, train): if not train: episode_length = self.datacontainer.test_length - 1 tsm = TradingStateModel(datacontainer=self.datacontainer, episode_length=episode_length, is_training=train, commission_percentage=0.0) state, reward = tsm.initialize() prices = [state.prices] # [episode_length, num_assets] rewards = [reward] # [episode_length] allocations = [state.portfolio_allocation ] # [episode_length, num_assets] for _ in tqdm(range(episode_length)): #action = self.actor_target.select_action(inputs=np.array([state.features]))[0] action = DDPG.random_action( num_dimensions=self.datacontainer.num_assets) trans_state, reward = tsm.step(action) prices.append(trans_state.prices) rewards.append(reward) allocations.append(action) state = trans_state prices = np.array(prices) rewards = np.array(rewards) allocations = np.array(allocations) f, axarr = plt.subplots(3, sharex=True) axarr[0].set_ylabel('Price') for ind in range(prices.shape[1]): axarr[0].plot(prices[:, ind]) axarr[1].set_ylabel('Cumulative Reward') axarr[1].plot(np.cumsum(rewards)) axarr[2].set_ylabel('Action') for ind in range(allocations.shape[1]): axarr[2].plot(allocations[:, ind]) dataset = 'Train' if train else 'Test' title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards)) plt.show()
def infer(env, agent, train, episode, infer_directory): print("INFERRING episode:", episode) episode_length = env.datacontainer.train_length - 40 tsm = TradingStateModel(datacontainer=env.datacontainer, episode_length=episode_length, is_training=True, commission_percentage=env.commission_percentage) state = tsm.reset() prices = [state.price] # [episode_length] rewards = [0] # [episode_length] allocations = [state.portfolio_allocation] # [episode_length] for _ in range(episode_length): action, _ = agent.pi(state.features, apply_noise=False, compute_Q=False) trans_state, reward, terminal, info = tsm.step(action) prices.append(trans_state.price) rewards.append(reward) allocations.append(trans_state.portfolio_allocation) state = trans_state prices = np.array(prices) rewards = np.array(rewards) allocations = np.array(allocations) f, axarr = plt.subplots(3, sharex=True) axarr[0].set_ylabel('Price') for ind in range(env.datacontainer.num_assets): axarr[0].plot(prices[:, ind]) axarr[1].set_ylabel('Cumulative Reward') axarr[1].plot(np.cumsum(rewards)) axarr[2].set_ylabel('Action') for ind in range(env.datacontainer.num_assets): axarr[2].plot(allocations[:, ind]) dataset = 'Train' if train else 'Test' title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards)) plt.savefig(os.path.join(infer_directory, str(episode) + ".png"))
TAU = 0.001 COMMISSION_PERCENTAGE = 0.0 # env = gym.make('Pendulum-v0') # state_dim = env.observation_space.shape[0] # action_dim = env.action_space.shape[0] # boundary = env.action_space.high[0] # dc = TestContainer(num_assets=3, # num_samples=2000) dc = EasyContainer(num_samples=2000) #dc = DataContainer(hdf_file_name='../data/hdfs/poloniex_30m.hf') #dc = BitcoinTestContainer(csv_file_name='../data/csvs/output.csv') env = TradingStateModel(datacontainer=dc, episode_length=EPISODE_LENGTH, is_training=True, history_length=HISTORY_LENGTH, commission_percentage=COMMISSION_PERCENTAGE) asset_features_shape = [dc.num_assets, HISTORY_LENGTH, dc.num_asset_features] action_dim = dc.num_assets actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) # rpb = ReplayBuffer(buffer_size=BUFFER_SIZE) # conf = { # 'size': BUFFER_SIZE, # 'batch_size': BATCH_SIZE, # 'learn_start': 1000, # 'steps': NUM_EPISODES * EPISODE_LENGTH # } # rpb = Experience(conf) rpb = PrioritizedReplayBuffer(size=BUFFER_SIZE, alpha=0.6)
from tradingstatemodel import TradingStateModel NUM_EPISODES = 1000 EPISODE_LENGTH = 50 COMMISSION_PERCENTAGE = 0.0 BATCH_SIZE = 32 BATCH_NORM = True BUFFER_SIZE = 1000000 COIN_BOUNDARY = 5 #tc = TestContainer(num_assets=1, num_samples=5000) tc = BitcoinTestContainer(csv_file_name='../data/csvs/output.csv') tc.plot_prices(train=True) tsm = TradingStateModel(datacontainer=tc, episode_length=EPISODE_LENGTH, is_training=True, commission_percentage=COMMISSION_PERCENTAGE, coin_boundary=COIN_BOUNDARY) print("FLATTENED:", tc.num_flattened_features) sess = tf.Session() actor_target = ActorNetwork(sess=sess, batch_size=BATCH_SIZE, batch_norm=BATCH_NORM, dropout=0.5, history_length=50, datacontainer=tc, epochs=50, is_target=True, coin_boundary=COIN_BOUNDARY) actor_trainer = ActorNetwork(sess=sess,
NUM_EPISODES = 10000 EPISODE_LENGTH = 250 GAMMA = 0.99 TAU = 0.001 # env = gym.make('Pendulum-v0') # state_dim = env.observation_space.shape[0] # action_dim = env.action_space.shape[0] # boundary = env.action_space.high[0] # dc = TestContainer(num_assets=1, # num_samples=2000) dc = BitcoinTestContainer(csv_file_name='../data/csvs/output.csv') env = TradingStateModel(datacontainer=dc, episode_length=EPISODE_LENGTH, is_training=True, commission_percentage=0, coin_boundary=5) state_dim = dc.num_flattened_features action_dim = 1 boundary = env.coin_boundary actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) # rpb = ReplayBuffer(buffer_size=BUFFER_SIZE) # conf = { # 'size': BUFFER_SIZE, # 'batch_size': BATCH_SIZE, # 'learn_start': 1000, # 'steps': NUM_EPISODES * EPISODE_LENGTH # } # rpb = Experience(conf)
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. # env = gym.make(env_id) # env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None #dc = TestContainer(num_assets=3, num_samples=20000) dc = BitcoinTestContainer(csv_file_name='../../../data/csvs/output.csv') env = TradingStateModel(datacontainer=dc, episode_length=kwargs['nb_rollout_steps'], is_training=True, commission_percentage=COMMISSION_PERCENTAGE) # Parse noise_type action_noise = None param_noise = None # nb_actions = env.action_space.shape[-1] nb_actions = env.datacontainer.num_assets for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. # memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(num_asset_features=env.datacontainer.total_asset_features, num_actions=env.datacontainer.num_assets, asset_features_shape=env.asset_features_shape, portfolio_features_shape=env.portfolio_features_shape, layer_norm=layer_norm) actor = Actor(nb_actions, num_asset_features=env.datacontainer.total_asset_features, num_actions=env.datacontainer.num_assets, asset_features_shape=env.asset_features_shape, portfolio_features_shape=env.portfolio_features_shape, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) # env.seed(seed) # if eval_env is not None: # eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, tensorboard_directory='./tensorboard_' + str(COMMISSION_PERCENTAGE), infer_directory='./infer_ims_' + str(COMMISSION_PERCENTAGE), **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
from data.datacontainer import BitcoinTestContainer from tradingstatemodel import TradingStateModel, QApproximator, ReplayBuffer csv_file_name = './data/csvs/output.csv' max_coins = 4 btc = BitcoinTestContainer(csv_file_name=csv_file_name) rpb = ReplayBuffer() q_approximator = QApproximator(num_features=btc.num_features + max_coins + 1, num_actions=max_coins + 1) tsm = TradingStateModel(bitcoin_container=btc, model=q_approximator, episode_length=2000, gamma=0.95, starting_coins=0, max_coins=max_coins, epochs=100, replay_buffer=rpb, batch_size=10) tsm.train()