def __init__(self, config): self._config = config self._eps_schedule = LinearSchedule(self._config.eps_begin, self._config.eps_end, self._config.nsteps) self._lr_schedule = LinearSchedule(self._config.lr_begin, self._config.lr_end, self._config.lr_nsteps) self._oq = Order_Queue(self._config.order_path) self._mq = Message_Queue(self._config.message_path) self._bf = ReplayBuffer(1000000, config) self._action_fn = self.get_action_fn() self.build()
def main(): config = Config() config.mode = 'test' config.dropout = 1.0 model = Neural_DQN(config) #model = DQN(config) model.initialize() oq = Order_Queue(config.order_path) mq = Message_Queue(config.message_path) rewards = evaluate_policy(model, oq, mq) print(np.mean(rewards))
class model(object): def __init__(self, config): self._config = config self._eps_schedule = LinearSchedule(self._config.eps_begin, self._config.eps_end, self._config.nsteps) self._lr_schedule = LinearSchedule(self._config.lr_begin, self._config.lr_end, self._config.lr_nsteps) self._oq = Order_Queue(self._config.order_path) self._mq = Message_Queue(self._config.message_path) self._bf = ReplayBuffer(1000000, config) self._action_fn = self.get_action_fn() self.build() def build(self): pass def initialize(self): pass def get_random_action(self, state): pass def get_best_action(self, state): ### return action, q value pass def get_action(self, state): if np.random.random() < self._eps_schedule.get_epsilon(): return self.get_random_action(state)[0] else: return self.get_best_action(state)[0] def get_random_action_fn(self): def random_action_fn(t, amount, state, mid_price): action = np.random.randint( self._config.L) # action = L for market order price = (action - self._config.L // 2) * self._config.base_point + mid_price return (price, action) return random_action_fn def get_action_fn(self): def action_fn(t, amount, state, mid_price): action = self.get_action(state) price = (action - self._config.L // 2) * self._config.base_point + mid_price return (price, action) return action_fn def pad_state(self, states, state_history): tmp_states, tmp_its = zip(*states) tmp_state = np.concatenate( [np.expand_dims(state, -1) for state in tmp_states], axis=-1) tmp_state = np.pad(tmp_state, ((0, 0), (0, 0), (state_history - tmp_state.shape[-1], 0)), 'constant', constant_values=0) tmp_it = tmp_its[-1] return ([tmp_state], [tmp_it]) def simulate_an_episode(self, amount, T, H, start_time, order_direction, action_fn, depth): dH = H // T self._mq.reset() lob_data = self._oq.create_orderbook_time(start_time, self._mq) lob = Limit_Order_book(**lob_data, own_amount_to_trade=0, own_init_price=-order_direction * Limit_Order_book._DUMMY_VARIABLE, own_trade_type=order_direction) rewards = [] states = [] actions = [] done_mask = [] amount_remain = amount cum_reward = 0 for t in range(start_time, start_time + H - dH, dH): tmp1 = 1.0 * amount_remain / amount # amount remain tmp2 = 1.0 * (start_time + H - t) / H # time remain state = (lob.display_book(depth), np.array([tmp1, tmp2], dtype=float)) state = self.process_state(state) states.append(state) mid_price = lob.get_mid_price() state_input = self.pad_state(states[-self._config.state_history:], self._config.state_history) price, action = action_fn(start_time + H - t, amount_remain, state_input, mid_price) actions.append(action) done_mask.append(False) lob.update_own_order(price, amount_remain) for idx, message in self._mq.pop_to_next_time(t + dH): lob.process(**message) if lob.own_amount_to_trade == 0: done_mask.append(True) state = (lob.display_book(depth), np.array([ 0, 1.0 * (start_time + H - self._mq._time) / H ], dtype=float)) state = self.process_state(state) states.append(state) rewards.append(lob.own_reward - cum_reward) break if done_mask[-1]: break else: # What is going on over here? rewards.append(lob.own_reward - cum_reward) cum_reward = lob.own_reward amount_remain = lob.own_amount_to_trade if not done_mask[-1]: tmp1 = 1.0 * amount_remain / amount tmp2 = 1.0 * (start_time + H - t - dH) / H state = (lob.display_book(depth), np.array([tmp1, tmp2], dtype=float)) state = self.process_state(state) states.append(state) done_mask.append(False) lob.update_own_order(lob.own_trade_type * Limit_Order_book._DUMMY_VARIABLE) if lob.own_amount_to_trade == 0: rewards.append(lob.own_reward - cum_reward) else: rewards.append(-Limit_Order_book._DUMMY_VARIABLE) tmp1 = 1.0 * lob.own_amount_to_trade / amount state = (lob.display_book(depth), np.array([tmp1, 0], dtype=float)) state = self.process_state(state) states.append(state) actions.append(self._config.L) done_mask.append(True) return (states, rewards, actions, done_mask[1:]) def sampling_buffer(self): for start_time in range(self._config.train_start, self._config.train_end, self._config.H): states, rewards, actions, done_mask = self.simulate_an_episode( self._config.I, self._config.T, self._config.H, start_time, self._config.direction, self._action_fn, self._config.depth) self._bf.store(states, actions, rewards, done_mask) def process_state(self, state): state_book, state_it = state state_book = state_book.astype('float32') state_book[:, 0] /= 1.e6 state_book[:, 1] /= 1.e2 state_book[:, 2] /= 1.e6 state_book[:, 3] /= 1.e2 return (state_book, state_it)
mq.reset() mq.jump_to_time(time) lob_copy = copy.deepcopy(lob) lob_copy.update_own_order(a_price, amount) for idx, message in mq.pop_to_next_time(next_time): lob_copy.process(**message) if lob_copy.own_amount_to_trade == 0: break return [lob_copy.own_amount_to_trade, lob_copy.own_reward] path_target = '../data/%s_Q_dp_%s.npy' % (args.tic,args.V) oq = Order_Queue(file_order) mq = Message_Queue(file_msg) if args.mode == 'train': np.save(path_target, Calculate_Q(args.V, args.H, args.T, args.I, args.L,oq,mq)) elif args.mode == 'test': Q = np.load(path_target) Optimal_Q = Optimal_strategy(Q) rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, Optimal_action) print(rewards) print(np.mean(rewards))
help='Buy 1, Sell -1', type=int) parser.add_argument('--start', default=34200, help='Start Time', type=float) parser.add_argument('--end', default=34500, help='End Time', type=float) parser.add_argument('--adj_freq', default=100, help='Adjustment Frequency', type=float) parser.add_argument('--tol', default=1e-8, help='Remaining Time To Submit Market Order', type=float) parser.add_argument('--base_point', default=100, help='Base Point', type=int) args = parser.parse_args() mq = Message_Queue(args.file_msg) lob = Limit_Order_book(own_amount_to_trade=args.order_size, own_init_price=-args.order_direction * Limit_Order_book._DUMMY_VARIABLE, own_trade_type=args.order_direction) for idx, message in mq.pop_to_next_time(args.start): lob.process(**message) def optimal(time, lob, mq): if time == ( args.end - args.tol ): # This code force that (args.end-args.start) is a multiple of args.tol if lob.own_amount_to_trade == 0: return lob.own_reward else:
print('Add Buy Order %s' % status) elif idx in [41]: print('Add Sell Order %s' % status) elif idx in [5]: print('Execute Hidden Order %s' % status) elif idx in [46]: print('Delete Buy Order %s' % status) elif idx in [47]: print('Delete Sell Order %s' % status) if status == '[FAIL]': print('ERROR! idx %d msg %s' % (idx, str(msg))) message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv' mq = Message_Queue(message_path) book_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv' df_book = pd.read_csv(book_path, header=None) level = 10 ask_book = df_book[np.arange(level) * 4].values ask_size_book = df_book[1 + np.arange(level) * 4].values bid_book = df_book[2 + np.arange(level) * 4].values bid_size_book = df_book[3 + np.arange(level) * 4].values book = np.concatenate([ tmp[:, :, np.newaxis] for tmp in [bid_book, bid_size_book, ask_book, ask_size_book] ], axis=2) for idx, message in mq.iterate_queue():
default=1, help='Buy 1, Sell -1', type=int) parser.add_argument('--start', default=34200, help='Start Time', type=float) parser.add_argument('--end', default=34500, help='End Time', type=float) parser.add_argument('--adj_freq', default=100, help='Adjustment Frequency', type=float) parser.add_argument('--tol', default=1e-8, help='Remaining Time To Submit Market Order', type=float) args = parser.parse_args() mq = Message_Queue(args.file_msg) lob = Limit_Order_book(own_amount_to_trade=args.order_size, own_init_price=-args.order_direction * Limit_Order_book._DUMMY_VARIABLE, own_trade_type=args.order_direction) for idx, message in mq.pop_to_next_time(args.start): lob.process(**message) lob.update_own_order(args.order_direction * Limit_Order_book._DUMMY_VARIABLE) current_time = args.start while lob.own_amount_to_trade > 0 and not mq.finished(): current_time += args.adj_freq for idx, message in mq.pop_to_next_time(current_time): lob.process(**message) if lob.own_amount_to_trade == 0:
help='Adjustment Frequency', type=float) parser.add_argument('--tol', default=100, help='Remaining Time To Submit Market Order', type=float) parser.add_argument('--num', default=10, help='The number of base points to go', type=int) args = parser.parse_args() # Use the train_start and train_end to find the best num. H: the total amount of time to execute the orders. file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % ( args.tic) mq = Message_Queue(file_msg) lob = Limit_Order_book(own_amount_to_trade=args.order_size, own_init_price=-args.order_direction * Limit_Order_book._DUMMY_VARIABLE, own_trade_type=args.order_direction) for idx, message in mq.pop_to_next_time(args.train_start): lob.process(**message) current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2 init_price = np.arange(current_mid_price - args.num * args.base_point, current_mid_price + args.num * args.base_point, args.base_point) init_price = init_price[init_price > 0] reward = np.zeros(init_price.shape)