def portfolio_ac(eval_episodes, environment, agent): """ Simulates a portfolio strategy suggested by a trained actor-critic agent in a given environment. Parameters ---------- :param eval_episodes : int Number of episodes to simulate. :param environment : Env instance Environment in which to simulate the portfolio strategy. :param agent : AgentAC instance Actor-critic agent (preferably trained in the same environment as is simulated). Returns ------- :returns final_u : ndarray Array containing utility of terminal wealth for each simulated episode. :returns alloc_to_risk : ndarray Array containing the share of wealth invested into the risky asset in each period for all simulated episodes. :returns ret : ndarray Array containing the simple gross returns realized in each period for all simulated episodes. """ print("Simulating actor-critic portfolio strategy.") env = copy.deepcopy(environment) final_u = [] alloc_to_risk = [[] for _ in range(eval_episodes)] ret = [] np.random.seed(111) for episode in range(eval_episodes): env.reset() while not env.done: s = env.get_state() a = agent.choose_action(s) trade = compute_trade(env.p, a, env.tcost) env.trade(trade) assert math.isclose(env.p[1] / np.sum(env.p), a) alloc_to_risk[episode].append(env.p[1] / np.sum(env.p)) sgr = env.update() ret.append(sgr) final_u.append(env.get_utility()) final_u = np.array(final_u) alloc_to_risk = np.array(alloc_to_risk) ret = np.array(ret) return final_u, alloc_to_risk, ret
def portfolio_dqn(eval_episodes, environment, agent): """ Simulates a portfolio strategy suggested by a trained DQN agent in a given environment. Parameters ---------- :param eval_episodes : int Number of episodes to simulate. :param environment : Env instance Environment in which to simulate the portfolio strategy. :param agent : AgentDQN instance DQN agent (preferably trained in the same environment as is simulated). Returns ------- :returns final_u : ndarray Array containing utility of terminal wealth for each simulated episode. :returns alloc_to_risk : ndarray Array containing the share of wealth invested into the risky asset in each period for all simulated episodes. :returns ret : ndarray Array containing the simple gross returns realized in each period for all simulated episodes. """ print("Simulating DQN portfolio strategy.") env = copy.deepcopy(environment) final_u = [] alloc_to_risk = [[] for _ in range(eval_episodes)] ret = [] np.random.seed(111) for episode in range(eval_episodes): env.reset() while not env.done: np.random.rand() # random context state = env.get_state() q_pred = np.squeeze(agent.qnn.predict(state)) action = agent.action_space[argmax_index(q_pred)] trade = compute_trade(env.p, action, env.tcost) env.trade(trade) assert math.isclose(env.p[1] / np.sum(env.p), action) alloc_to_risk[episode].append(env.p[1] / np.sum(env.p)) sgr = env.update() ret.append(sgr) final_u.append(env.get_utility()) final_u = np.array(final_u) alloc_to_risk = np.array(alloc_to_risk) ret = np.array(ret) return final_u, alloc_to_risk, ret
def portfolio_risky(eval_episodes, environment): """ Simulates a full-risk portfolio strategy in a given environment. Parameters ---------- :param eval_episodes : int Number of episodes to simulate. :param environment : Env instance Environment in which to simulate the portfolio strategy. Returns ------- :returns final_u : ndarray Array containing utility of terminal wealth for each simulated episode. :returns alloc_to_risk : ndarray Array containing the share of wealth invested into the risky asset in each period for all simulated episodes. :returns ret : ndarray Array containing the simple gross returns realized in each period for all simulated episodes. """ print("Simulating risky portfolio strategy.") env = copy.deepcopy(environment) final_u = [] alloc_to_risk = [[] for _ in range(eval_episodes)] ret = [] np.random.seed(111) for episode in range(eval_episodes): env.reset() while not env.done: np.random.rand() # random context action = 1. trade = compute_trade(env.p, action, env.tcost) env.trade(trade) assert math.isclose(env.p[1] / np.sum(env.p), action) alloc_to_risk[episode].append(env.p[1] / np.sum(env.p)) sgr = env.update() ret.append(sgr) final_u.append(env.get_utility()) final_u = np.array(final_u) alloc_to_risk = np.array(alloc_to_risk) ret = np.array(ret) return final_u, alloc_to_risk, ret
def take_action(self, action): """ Takes the agents action as an input and simulates the state transition. Returns the reward, the next state, a done flag, and the simple gross returns that have been realized on the assets in this step. Parameters ---------- :param action : float The action the agent wants to take in terms of the desired share of the risky asset in the portfolio. Returns ------- :returns r : float The reward from taking the desired action. :returns ss : ndarray The state the agent is in after taking the action. :returns self.done : bool Flag whether the environment is in a terminal state. :returns sgr : ndarray Simple gross returns realized on the portfolio holdings in this period. """ # compute trade vector to implement the desired allocation action: trade = compute_trade(self.p, action, self.tcost) # make the trade: self.trade(trade) # assert that the trade gives desired allocation: assert math.isclose(self.p[1] / np.sum(self.p), action) # realize returns for this timestep: sgr = self.update() # compute reward: if self.done: r = self.get_utility() - self.init_u else: r = 0 # observe new state: ss = self.get_state() return r, ss, self.done, sgr