def render(self, savefig=False, filename='myfig'): """Matlplotlib rendering of each step. Args: savefig (bool): Whether to save the figure as an image or not. filename (str): Name of the image file. """ if self._first_render: self._f, self._ax = plt.subplots( len(self._spread_coefficients) + int(len(self._spread_coefficients) > 1), sharex=True ) if len(self._spread_coefficients) == 1: self._ax = [self._ax] self._f.set_size_inches(12, 6) self._first_render = False self._f.canvas.mpl_connect('close_event', self._handle_close) if len(self._spread_coefficients) > 1: # TODO: To be checked for prod_i in range(len(self._spread_coefficients)): bid = self._prices_history[-1][2 * prod_i] ask = self._prices_history[-1][2 * prod_i + 1] self._ax[prod_i].plot([self._iteration, self._iteration + 1], [bid, bid], color='white') self._ax[prod_i].plot([self._iteration, self._iteration + 1], [ask, ask], color='white') self._ax[prod_i].set_title('Product {} (spread coef {})'.format( prod_i, str(self._spread_coefficients[prod_i]))) # Spread price prices = self._prices_history[-1] bid, ask = calc_spread(prices, self._spread_coefficients) self._ax[-1].plot([self._iteration, self._iteration + 1], [bid, bid], color='white') self._ax[-1].plot([self._iteration, self._iteration + 1], [ask, ask], color='white') ymin, ymax = self._ax[-1].get_ylim() yrange = ymax - ymin if (self._action == self._actions['sell']).all(): self._ax[-1].scatter(self._iteration + 0.5, bid + 0.03 * yrange, color='orangered', marker='v') elif (self._action == self._actions['buy']).all(): self._ax[-1].scatter(self._iteration + 0.5, ask - 0.03 * yrange, color='lawngreen', marker='^') plt.suptitle('Cumulated Reward: ' + "%.2f" % self._total_reward + ' ~ ' + 'Cumulated PnL: ' + "%.2f" % self._total_pnl + ' ~ ' + 'Position: ' + ['flat', 'long', 'short'][list(self._position).index(1)] + ' ~ ' + 'Entry Price: ' + "%.2f" % self._entry_price) self._f.tight_layout() plt.xticks(range(self._iteration)[::5]) plt.xlim([max(0, self._iteration - 80.5), self._iteration + 0.5]) plt.subplots_adjust(top=0.85) plt.pause(0.01) if savefig: plt.savefig(filename)
def test_calc_spread(): spread_coefficients = [1, -0.1] prices = np.array([1, 2, 10, 20]) spread_price = (-1, 1) assert calc_spread(prices, spread_coefficients) == spread_price
def step(self, action): """Take an action (buy/sell/hold) and computes the immediate reward. Args: action (numpy.array): Action to be taken, one-hot encoded. Returns: tuple: - observation (numpy.array): Agent's observation of the current environment. - reward (float) : Amount of reward returned after previous action. - done (bool): Whether the episode has ended, in which case further step() calls will return undefined results. - info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). """ assert any([(action == x).all() for x in self._actions.values()]) self._action = action self._iteration += 1 done = False instant_pnl = 0 info = {} reward = -self._time_fee if all(action == self._actions['buy']): reward -= self._trading_fee if all(self._position == self._positions['flat']): self._position = self._positions['long'] self._entry_price = calc_spread( self._prices_history[-1], self._spread_coefficients)[1] # Ask elif all(self._position == self._positions['short']): self._exit_price = calc_spread( self._prices_history[-1], self._spread_coefficients)[1] # Ask instant_pnl = self._entry_price - self._exit_price self._position = self._positions['flat'] self._entry_price = 0 elif all(action == self._actions['sell']): reward -= self._trading_fee if all(self._position == self._positions['flat']): self._position = self._positions['short'] self._entry_price = calc_spread( self._prices_history[-1], self._spread_coefficients)[0] # Bid elif all(self._position == self._positions['long']): self._exit_price = calc_spread( self._prices_history[-1], self._spread_coefficients)[0] # Bid instant_pnl = self._exit_price - self._entry_price self._position = self._positions['flat'] self._entry_price = 0 reward += instant_pnl self._total_pnl += instant_pnl self._total_reward += reward # Game over logic try: self._prices_history.append(self._data_generator.next()) except StopIteration: done = True info['status'] = 'No more data.' if self._iteration >= self._episode_length: done = True info['status'] = 'Time out.' if self._closed_plot: info['status'] = 'Closed plot' observation = self._get_observation() return observation, reward, done, info