def approximation(): try: os.mkdir("approximation/results/") except Exception as e: print(e) for K in [50, 100, 200]: stats = [] paths = get_paths(K) with open(f"approximation/{K}/linear_models.pkl", "rb") as file: model = joblib.load(file) for path_name in paths: state = get_initial_state() states, costs = [state], [0] for i, tick in enumerate(paths[path_name]): action = get_linear_parametrized_action(model, state.copy(), i) next_state, cost = transition_and_cost(state.copy(), action, tick) states.append(next_state) costs.append(cost) state = next_state stats.append(calculate_stats(states, costs, path_name)) df = pd.DataFrame(stats, columns=COLS) df.to_csv(f"approximation/results/{K}.csv", index=False)
def back_recursion(): try: os.mkdir("back_recursion/results/") except Exception as e: print(e) for K in [50, 100, 200]: stats = [] paths = get_paths(K) with open(f"back_recursion/{K}/back_recursion.pkl", "rb") as file: model = joblib.load(file) for path_name in paths: state = get_initial_state() states, costs = [state], [0] for i, tick in enumerate(paths[path_name]): next_state, cost = transition_and_cost( state.copy(), model[i]['U'][tuple(state)], tick) states.append(next_state) costs.append(cost) state = next_state stats.append(calculate_stats(states, costs, path_name)) df = pd.DataFrame(stats, columns=COLS) df.to_csv(f"back_recursion/results/{K}.csv", index=False)
def compute_policy(get_action, K, path_name): bid_prices = [] bid_volumes = [] ask_prices = [] ask_volumes = [] prices = [] cprices = [] unrealized_pnls = [] realized_pnls = [] net_position = [] path = np.load(f"paths/{K}/{path_name}.npy") state = get_initial_state() for i in range(K): print("Step", i) action = get_action(i, state, K) ## Log the action pre-jump prices.append(0) cprices = np.cumsum(prices) bid_prices.append(action[0] + cprices[-1]) bid_volumes.append(action[1]) ask_prices.append(action[2] + cprices[-1]) ask_volumes.append(action[3]) unrealized_pnls.append(state[1]) realized_pnls.append(0) net_position.append(state[0]) tick = path[i] state, cost = transition_and_cost(state, action, tick) ## Log the result post-action prices.append(tick) cprices = np.cumsum(prices) bid_prices.append(action[0] + cprices[-1] - tick) bid_volumes.append(action[1]) ask_prices.append(action[2] + cprices[-1] - tick) ask_volumes.append(action[3]) unrealized_pnls.append(state[1]) realized_pnls.append(cost) net_position.append(state[0]) realized_pnls = np.cumsum(realized_pnls) cprices = np.cumsum(prices) return bid_prices, bid_volumes, ask_prices, ask_volumes, unrealized_pnls, realized_pnls, net_position, cprices
def rollout_policy(K, path_name): bid_prices = [] bid_volumes = [] ask_prices = [] ask_volumes = [] prices = [] cprices = [] unrealized_pnls = [] realized_pnls = [] net_position = [] path = np.load(f"paths/{K}/{path_name}.npy") state = get_initial_state() for i, (state, action, cost, jump) in enumerate(zip(*rollout.values(), path)): print("Step", i) state, next_state = state ## Log the action pre-jump prices.append(0) cprices = np.cumsum(prices) bid_prices.append(action[0] + cprices[-1]) bid_volumes.append(action[1]) ask_prices.append(action[2] + cprices[-1]) ask_volumes.append(action[3]) unrealized_pnls.append(state[1]) realized_pnls.append(0) net_position.append(state[0]) state = next_state prices.append(jump) cprices = np.cumsum(prices) bid_prices.append(action[0] + cprices[-1] - jump) bid_volumes.append(action[1]) ask_prices.append(action[2] + cprices[-1] - jump) ask_volumes.append(action[3]) unrealized_pnls.append(state[1]) realized_pnls.append(cost) net_position.append(state[0]) realized_pnls = np.cumsum(realized_pnls) cprices = np.cumsum(prices) return bid_prices, bid_volumes, ask_prices, ask_volumes, unrealized_pnls, realized_pnls, net_position, cprices
def rollout(K_, path_name): try: os.mkdir(f"rollout/{K_}/") except Exception as e: print(e) path = np.load(f"paths/{K_}/{path_name}.npy") ################################################################################################### start = time.time() K = K_ state = get_initial_state() states, policy, rewards = [], [], [] ## Only 1 step ahead for k in range(K): print("Stage", k) actions = get_possible_actions(state) cost_to_gos = [] for action in actions: avg_cost = 0 for tick in TICKS: next_state, cost = transition_and_cost(state.copy(), action, tick) p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT] ## Approximate Cost To Go Function global costs costs = [] deeper(next_state, 0, 1, 0) avg_cost += (np.mean(costs) + cost) * p cost_to_gos.append(avg_cost) idx = np.argmax(cost_to_gos) best_action = actions[idx] next_state, reward = transition_and_cost(state.copy(), best_action, path[k]) states.append([state, next_state]) policy.append(best_action) rewards.append(reward) state = next_state end = time.time() ################################################################################################### objs = {"states": states, "policy": policy, "rewards": rewards} with open(f'rollout/{K_}/{path_name}_policy.pkl', 'wb') as file: joblib.dump(objs, file) try: with open("timers/timer_dict.pkl", "rb") as file: timer_dict = joblib.load(file) key = timer_dict.get("rollout", None) if not key: timer_dict["rollout"] = {} timer_dict["rollout"][K_] = end - start with open("timers/timer_dict.pkl", "wb") as file: joblib.dump(timer_dict, file) except Exception as e: print(e) with open("timers/timer_dict.pkl", "wb") as file: timer_dict = {"rollout": {}} timer_dict["rollout"][K_] = end - start joblib.dump(timer_dict, file)
import sys, os import pickle import joblib import time ################################################################################################### coocc = pd.read_csv('data/cooccurrence_matrix.csv', index_col=0) coocc = (coocc.T / coocc.sum(axis=1)).T.values paths = np.load('paths/sample_paths.npy') ################################################################################################### ## Get some set of actions sorted by prority to reduce the number of calculations during the rollout. actions = get_possible_actions(get_initial_state()) score = [abs(action[0]) + abs(action[2]) for action in actions] idc = np.argsort(score) SORTED_ACTIONS = [actions[idx] for idx in idc] A_PROBS = [5] * 20 + [2] * 4 A_PROBS = np.array(A_PROBS) / sum(A_PROBS) A_RANGE = np.arange(24) ################################################################################################### def get_action_subset(state): actions = get_possible_actions(state) if len(actions) == 5:
import sys, os import joblib import time ################################################################################################### argparser = ArgumentParser() argparser.add_argument("K") args = argparser.parse_args() coocc = pd.read_csv('data/cooccurrence_matrix.csv', index_col=0) coocc = (coocc.T / coocc.sum(axis=1)).T.values with open(f'states/states_10000.pkl', 'rb') as file: states = joblib.load(file) states[0] = {tuple(get_initial_state()): 10_000} assert int(args.K) <= len(states) ################################################################################################### def approx(K_): try: os.mkdir(f"approximation/{K_}") except Exception as e: print(e) start = time.time() np.random.seed(72) models = {}
np.random.seed(72) K = 1 LENGTH = 201 NUM_PATHS = 10_000 paths = np.load("data/sample_paths.npy")[:NUM_PATHS, :LENGTH] state_at_step = { i : {} for i in range(LENGTH) } start = time.time() for i, path in enumerate(paths): print(f"Progress: {(i + 1 ) / len(paths) * 100}%") state = get_initial_state() for i, v in enumerate(path): if i+1 == LENGTH: break actions = get_possible_actions(state) for action in actions: next_state, cost = transition_and_cost(state.copy(), action, v) next_state = tuple(next_state) try: state_at_step[i+1][next_state] += 1 except: state_at_step[i+1][next_state] = 1