def __init__(self, wid): trading_fee = .007 time_fee = .0073 history_length = 1 #self.env = gym.make(GAME).unwrapped generator = get_CSV_data(filename="./test_6.csv") self.env = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) self.wid = wid self.ppo = GLOBAL_PPO
class TestSpreadTrading(object): data_generator = WavySignal(period_1=3, period_2=1, epsilon=0) st = SpreadTrading(data_generator=data_generator, spread_coefficients=[1], episode_length=1000, trading_fee=0.2, time_fee=0.1, history_length=1) def test_init(self): assert self.st._data_generator == self.data_generator assert self.st._spread_coefficients == [1] assert self.st._first_render assert self.st._trading_fee == 0.2 assert self.st._time_fee == 0.1 assert self.st._episode_length == 1000 assert self.st.n_actions == 3 assert self.st._history_length == 1 assert len(self.st._prices_history) == 1 def test_step(self): # Buy state = self.st.step(np.array([0, 1, 0])) assert state[0][0] == state[0][1] assert (state[0][-3:] == np.array([0, 1, 0])).all() assert self.st._entry_price != 0 assert self.st._exit_price == 0 # Hold state = self.st.step(np.array([1, 0, 0])) assert (state[0][-3:] == np.array([0, 1, 0])).all() assert self.st._entry_price != 0 assert self.st._exit_price == 0 # Sell state = self.st.step(np.array([0, 0, 1])) assert (state[0][-3:] == np.array([1, 0, 0])).all() assert self.st._entry_price == 0 assert self.st._exit_price != 0 def test_reset(self): pass
class TestSpreadTrading(object): #data_generator = AR1(a=0.1, ba_spread=0.1) data_generator = CSVStreamer(filename='../../data/AMZN-L1.csv') st = SpreadTrading(data_generator=data_generator, spread_coefficients=[1], trading_fee=0.2, time_fee=0.1, history_length=2) def test_init(self): assert self.st._data_generator == self.data_generator assert self.st._spread_coefficients == [1] assert self.st._first_render assert self.st._trading_fee == 0.2 assert self.st._time_fee == 0.1 assert self.st._episode_length == 1000 assert self.st.n_actions == 3 assert self.st._history_length == 2 assert len(self.st._prices_history) == 2 def test_step(self): # Buy state = self.st.step(np.array([0, 1, 0])) #assert state[0][0] == state[0][1] assert all(state[0][-3:] == np.array([0, 1, 0])) assert self.st._entry_price != 0 assert self.st._exit_price == 0 # Hold state = self.st.step(np.array([1, 0, 0])) assert all(state[0][-3:] == np.array([0, 1, 0])) assert self.st._entry_price != 0 assert self.st._exit_price == 0 # Sell state = self.st.step(np.array([0, 0, 1])) assert all(state[0][-3:] == np.array([1, 0, 0])) assert self.st._entry_price == 0 assert self.st._exit_price != 0 def test_reset(self): return self.st.reset()
import numpy as np from tgym.core import DataGenerator from tgym.envs import SpreadTrading from tgym.gens.deterministic import WavySignal generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5) game_length = 200 trading_fee = 0.2 time_fee = 0 # history_length number of historical states in the observation vector. history_length = 2 environment = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length, game_length=game_length) environment.render() while True: action = raw_input("Action: Buy (b) / Sell (s) / Hold (enter): ") if action == 'b': action = [0, 1, 0] elif action == 's': action = [0, 0, 1] else: action = [1, 0, 0] environment.step(action) environment.render()
class Worker(object): def __init__(self, wid): trading_fee = .007 time_fee = .0073 history_length = 1 #self.env = gym.make(GAME).unwrapped generator = get_CSV_data(filename="./test_6.csv") self.env = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) self.wid = wid self.ppo = GLOBAL_PPO def work(self): global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER while not COORD.should_stop(): s = self.env.reset() #print("=======") #print(s) #print("========") ep_r = 0 buffer_s, buffer_a, buffer_r = [], [], [] for t in range(EP_LEN): if not ROLLING_EVENT.is_set(): # while global PPO is updating ROLLING_EVENT.wait() # wait until PPO is updated buffer_s, buffer_a, buffer_r = [], [], [ ] # clear history buffer, use new policy to collect data a = self.ppo.choose_action(s) #print("=========") #print("a: ", a) #print("=========") s_, r, done, _ = self.env.step(a) buffer_s.append(s) buffer_a.append(a) buffer_r.append( (r + 8) / 8) # normalize reward, find to be useful s = s_ ep_r += r GLOBAL_UPDATE_COUNTER += 1 # count to minimum batch size, no need to wait other workers if t == EP_LEN - 1 or GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE: v_s_ = self.ppo.get_v(s_) discounted_r = [] # compute discounted reward for r in buffer_r[::-1]: v_s_ = r + GAMMA * v_s_ discounted_r.append(v_s_) discounted_r.reverse() bs, ba, br = np.vstack(buffer_s), np.vstack( buffer_a), np.array(discounted_r)[:, np.newaxis] buffer_s, buffer_a, buffer_r = [], [], [] QUEUE.put(np.hstack((bs, ba, br))) # put data in the queue if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE: ROLLING_EVENT.clear() # stop collecting data UPDATE_EVENT.set() # globalPPO update if GLOBAL_EP >= EP_MAX: # stop training COORD.request_stop() break # record reward changes, plot later if len(GLOBAL_RUNNING_R) == 0: GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 + ep_r * 0.1) GLOBAL_EP += 1 print( '{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100), '|W%i' % self.wid, '|Ep_r: %.2f' % ep_r, )
threads = [] for worker in workers: # worker threads t = threading.Thread(target=worker.work, args=()) t.start() # training threads.append(t) # add a PPO updating thread threads.append(threading.Thread(target=GLOBAL_PPO.update, )) threads[-1].start() COORD.join(threads) # plot reward change and test plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R) plt.xlabel('Episode') plt.ylabel('Moving reward') plt.ion() plt.show() #env = gym.make('Pendulum-v0') trading_fee = .007 time_fee = .00724 history_length = 1 generator = get_CSV_data(filename="./test_6.csv") env = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) while True: s = env.reset() for t in range(3455): env.render() s = env.step(GLOBAL_PPO.choose_action(s))[0]
buy = np.array([0, 1, 0]) sell = np.array([0, 0, 1]) possible_actions = [hold, buy, sell] #Classes and variables generator = CSVStreamer(filename='/Users/tawehbeysolow/Downloads/amazon_order_book_data2.csv') #generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5) memory = Memory(max_size=memory_size) generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5) environment = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) state_size = len(environment.reset()) def baseline_model(n_actions, info, random=False): if random == True: action = np.random.choice(range(n_actions), p=np.repeat(1/float(n_actions), 3)) action = possible_actions[action] else: if len(info) == 0:
import numpy as np from generator import CSVStreamer market = sys.argv[1] # from generators.tickergenerator import TickerGenerator # Instantiating the environmnent generator = CSVStreamer(filename="data/" + market + "-history.csv") episodes = 7600 episode_length = 200 trading_fee = .2 time_fee = 0 history_length = 5 environment = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) state = environment.reset() # Instantiating the agent memory_size = 3000 state_size = len(state) gamma = 0.96 epsilon_min = 0.01 batch_size = 64 action_size = len(SpreadTrading._actions) train_interval = 10 learning_rate = 0.001 if not os.path.isfile("./model." + market + ".h5"):
max_grad_norm = 0.5 log_interval = 10 hold = np.array([1, 0, 0]) buy = np.array([0, 1, 0]) sell = np.array([0, 0, 1]) possible_actions = [hold, buy, sell] #Classes and variables generator = CSVStreamer(filename='/Users/tawehbeysolow/Downloads/amazon_order_book_data2.csv') #generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5) memory = Memory(max_size=memory_size) environment = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) state_size = len(environment.reset()) def baseline_model(n_actions, info, random=False): if random == True: action = np.random.choice(range(n_actions), p=np.repeat(1/float(n_actions), 3)) action = possible_actions[action] else: if len(info) == 0:
from tgym.gens.csvstream import CSVStreamer #from tgym.gens.deterministic import WavySignal from test import get_CSV_data var = 2. #generator = CSVStreamer(filename='./test_4.csv') #other_data = CSVStreamer(filename='./test_5.csv') generator = get_CSV_data(filename='./test_4.csv') #generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5) trading_fee = .005 time_fee = 0 history_length = 1 environment = SpreadTrading(spread_coefficients=[1], data_generator=generator, trading_fee=trading_fee, time_fee=time_fee, history_length=history_length) #OD = data_group(other_data) #print("=============") #print("s: ", generator.next()) #print("=============") #s = environment.reset() #state_size = len(s) #action_size = len(SpreadTrading._actions) state_size = 6 action_size = 3 #print("=============") #print("state_size: ", state_size) #print("action_size: ", action_size) #print("=============")
from tgym.envs import SpreadTrading from tgym.gens import CSVStreamer generator = CSVStreamer(filename='./examples/price_2.csv') episode_length = 200 environment = SpreadTrading(spread_coefficients=[2, -1], data_generator=generator, episode_length=episode_length) environment.render() while True: action = raw_input("Action: Buy (b) / Sell (s) / Hold (enter): ") if action == 'b': action = [0, 1, 0] elif action == 's': action = [0, 0, 1] else: action = [1, 0, 0] environment.step(action) environment.render()