예제 #1
0
class TradingEnvironment(Environment):
    '''
    A simple environment of financial trading
    
    State: float list (last 5 close prices, SMA10, position value, cash value)
    
    Action: (1, 0, -1) as BUY, HOLD, SELL
    
    Reward: return of total value (position value + cash value)
    '''
    def __init__(self, **kwargs):
        '''
        kwargs: {
                execution: {single_stock, multi_stocks},
                portfolio: {sharpe, dsharpe, ... ...} 
                valuer: {MarketValuer, SharpeValuer, ...  ... }
                }
        '''

        ### common variables
        self.__sym = kwargs['sym']
        self.__start = kwargs['start']
        self.__end = kwargs['end']

        ### market data providers
        self.__provider = MarketDataProvider('quandl', self.__sym,
                                             self.__start, self.__end)
        self.__OHLCV = self.__provider.getMarketData('OHLCV')
        self.__sma10 = self.__provider.getMarketData('close_sma', period=1)

        if (kwargs['execution'] == 'single_stock'):
            self.__executionservice = SingleStockExecutionSimulator(
                self.__sym, self.__start, self.__end, self.__OHLCV)

        self.__initial_value = 100000
        self.__current_value = self.__initial_value
        if (kwargs['portfolio'] == 'basic'):
            self.__portfolio = Portfolio(self.__current_value)

        if (kwargs['valuer'] == 'market'):
            self.__portfolio_valuer = MarketValuer()

        self.__oms = OMS(self.__executionservice, self.__portfolio,
                         self.__portfolio_valuer)

    @property
    def states(self):
        """
        Return the state space. Might include subdicts if multiple states are available simultaneously.
        Returns: dict of state properties (shape and type). 
        """
        return {'type': 'float', 'shape': (8, )}

    @property
    def actions(self):
        """
        Return the action space. Might include subdicts if multiple actions are available simultaneously.

        Returns: dict of action properties (continuous, number of actions)

        """
        return {'num_actions': 3, 'continuous': False}

    def execute(self, action):
        """
        Executes action, observes next state and reward.

        Args:
            action: Action to execute.
            1 - BUY, 0 - HOLD, -1 - SELL

        Returns: tuple of state (tuple), reward (float), and terminal_state (bool).
        """

        if (action[0] > 0):
            self.__oms.placeMarket(self.sym, 1, self.__portfolio.cash_value())
        elif (action[0] < 0):
            self.__oms.placeMarket(self.sym, -1,
                                   self.__portfolio.position_value(self.sym))
        else:
            self.__oms.no_operation(self.sym)

        ### time rolls to next period, e.g. next day, then place order
        self.__OHLCV.next()
        self.__sma10.next()

        _, _, _, self.current_price, _ = self.__OHLCV.current()

        self.position_value = self.__portfolio.position(
            self.__sym) * self.current_price
        self.market_value = self.position_value + self.__portfolio.cash
        self.reward = (self.market_value -
                       self.__current_value) / self.__current_value
        self.__current_value = self.market_value

        self.terminate = False
        if (self.market_value < self.__initial_value * 0.8):
            self.terminate = True

        return (self.current_price, self.__sma10.current, self.position_value,
                self.__portfolio.cash, self.reward, self.terminate)

    def reset(self):
        """
        Reset environment and setup for new episode.

        Returns: initial state of resetted environment.
        """
        pass

    def close(self):
        """
        Close environment. No other method calls possible afterwards.
        """
        pass

    def __str__(self):
        return 'TradingEnvironment'