Esempio n. 1
0
def portfolio_ac(eval_episodes, environment, agent):
    """
    Simulates a portfolio strategy suggested by a trained actor-critic agent
    in a given environment.

    Parameters
    ----------
    :param eval_episodes : int
           Number of episodes to simulate.
    :param environment : Env instance
           Environment in which to simulate the portfolio strategy.
    :param agent : AgentAC instance
           Actor-critic agent (preferably trained in the same environment as is
           simulated).

    Returns
    -------
    :returns final_u : ndarray
             Array containing utility of terminal wealth for each simulated
             episode.
    :returns alloc_to_risk : ndarray
             Array containing the share of wealth invested into the risky
             asset in each period for all simulated episodes.
    :returns ret : ndarray
             Array containing the simple gross returns realized in each
             period for all simulated episodes.
    """

    print("Simulating actor-critic portfolio strategy.")

    env = copy.deepcopy(environment)

    final_u = []
    alloc_to_risk = [[] for _ in range(eval_episodes)]
    ret = []

    np.random.seed(111)

    for episode in range(eval_episodes):

        env.reset()

        while not env.done:

            s = env.get_state()
            a = agent.choose_action(s)
            trade = compute_trade(env.p, a, env.tcost)
            env.trade(trade)
            assert math.isclose(env.p[1] / np.sum(env.p), a)
            alloc_to_risk[episode].append(env.p[1] / np.sum(env.p))
            sgr = env.update()
            ret.append(sgr)

        final_u.append(env.get_utility())

    final_u = np.array(final_u)
    alloc_to_risk = np.array(alloc_to_risk)
    ret = np.array(ret)

    return final_u, alloc_to_risk, ret
Esempio n. 2
0
def portfolio_dqn(eval_episodes, environment, agent):
    """
    Simulates a portfolio strategy suggested by a trained DQN
    agent in a given environment.

    Parameters
    ----------
    :param eval_episodes : int
           Number of episodes to simulate.
    :param environment : Env instance
           Environment in which to simulate the portfolio strategy.
    :param agent : AgentDQN instance
           DQN agent (preferably trained in the same environment as is
           simulated).

    Returns
    -------
    :returns final_u : ndarray
             Array containing utility of terminal wealth for each simulated
             episode.
    :returns alloc_to_risk : ndarray
             Array containing the share of wealth invested into the risky
             asset in each period for all simulated episodes.
    :returns ret : ndarray
             Array containing the simple gross returns realized in each
             period for all simulated episodes.
    """

    print("Simulating DQN portfolio strategy.")

    env = copy.deepcopy(environment)

    final_u = []
    alloc_to_risk = [[] for _ in range(eval_episodes)]
    ret = []

    np.random.seed(111)

    for episode in range(eval_episodes):
        env.reset()
        while not env.done:
            np.random.rand()  # random context
            state = env.get_state()
            q_pred = np.squeeze(agent.qnn.predict(state))
            action = agent.action_space[argmax_index(q_pred)]
            trade = compute_trade(env.p, action, env.tcost)
            env.trade(trade)
            assert math.isclose(env.p[1] / np.sum(env.p), action)
            alloc_to_risk[episode].append(env.p[1] / np.sum(env.p))
            sgr = env.update()
            ret.append(sgr)
        final_u.append(env.get_utility())

    final_u = np.array(final_u)
    alloc_to_risk = np.array(alloc_to_risk)
    ret = np.array(ret)

    return final_u, alloc_to_risk, ret
Esempio n. 3
0
def portfolio_risky(eval_episodes, environment):
    """
    Simulates a full-risk portfolio strategy in a given environment.

    Parameters
    ----------
    :param eval_episodes : int
           Number of episodes to simulate.
    :param environment : Env instance
           Environment in which to simulate the portfolio strategy.

    Returns
    -------
    :returns final_u : ndarray
             Array containing utility of terminal wealth for each simulated
             episode.
    :returns alloc_to_risk : ndarray
             Array containing the share of wealth invested into the risky
             asset in each period for all simulated episodes.
    :returns ret : ndarray
             Array containing the simple gross returns realized in each
             period for all simulated episodes.
    """

    print("Simulating risky portfolio strategy.")

    env = copy.deepcopy(environment)

    final_u = []
    alloc_to_risk = [[] for _ in range(eval_episodes)]
    ret = []

    np.random.seed(111)

    for episode in range(eval_episodes):
        env.reset()
        while not env.done:
            np.random.rand()  # random context
            action = 1.
            trade = compute_trade(env.p, action, env.tcost)
            env.trade(trade)
            assert math.isclose(env.p[1] / np.sum(env.p), action)
            alloc_to_risk[episode].append(env.p[1] / np.sum(env.p))
            sgr = env.update()
            ret.append(sgr)
        final_u.append(env.get_utility())

    final_u = np.array(final_u)
    alloc_to_risk = np.array(alloc_to_risk)
    ret = np.array(ret)

    return final_u, alloc_to_risk, ret
Esempio n. 4
0
    def take_action(self, action):

        """
        Takes the agents action as an input and simulates the state
        transition. Returns the reward, the next state, a done flag, and the
        simple gross returns that have been realized on the assets in this
        step.

        Parameters
        ----------
        :param action : float
               The action the agent wants to take in terms of the desired
               share of the risky asset in the portfolio.

        Returns
        -------
        :returns r : float
                 The reward from taking the desired action.
        :returns ss : ndarray
                 The state the agent is in after taking the action.
        :returns self.done : bool
                 Flag whether the environment is in a terminal state.
        :returns sgr : ndarray
                 Simple gross returns realized on the portfolio holdings in
                 this period.
        """

        # compute trade vector to implement the desired allocation action:
        trade = compute_trade(self.p, action, self.tcost)

        # make the trade:
        self.trade(trade)

        # assert that the trade gives desired allocation:
        assert math.isclose(self.p[1] / np.sum(self.p), action)

        # realize returns for this timestep:
        sgr = self.update()

        # compute reward:
        if self.done:
            r = self.get_utility() - self.init_u
        else:
            r = 0

        # observe new state:
        ss = self.get_state()

        return r, ss, self.done, sgr