def run_live(self, agent, test=True): raise_refactor()
def xform_data(self, df): # TODO here was autoencoder, talib indicators, price-anchoring raise_refactor()
def execute(self, action): acc = self.acc[self.mode.value] totals = acc.step.totals h = self.hypers act_pct = {0: -.02, 1: 0, 2: .02}[action] act_btc = act_pct * (acc.step.cash if act_pct > 0 else acc.step.value) fee = { Exchange.cbpro: 0.0025, # https://support.cbpro.com/customer/en/portal/articles/2425097-what-are-the-fees-on-cbpro- Exchange.KRAKEN: 0.0026 # https://www.kraken.com/en-us/help/fees }[EXCHANGE] # Perform the trade. In training mode, we'll let it dip into negative here, but then kill and punish below. # In testing/live, we'll just block the trade if they can't afford it if act_pct > 0: if acc.step.cash < self.min_trade: act_btc = -(self.start_cash + self.start_value) elif act_btc < self.min_trade: act_btc = 0 else: acc.step.value += act_btc - act_btc * fee acc.step.cash -= act_btc elif act_pct < 0: if acc.step.value < self.min_trade: act_btc = -(self.start_cash + self.start_value) elif abs(act_btc) < self.min_trade: act_btc = 0 else: acc.step.cash += abs(act_btc) - abs(act_btc) * fee acc.step.value -= abs(act_btc) acc.step.signals.append(float(act_btc)) # clipped signal # acc.step.signals.append(np.sign(act_pct)) # indicates an attempted trade # next delta. [1,2,2].pct_change() == [NaN, 1, 0] # pct_change = self.prices_diff[acc.step.i + 1] _, y = self.data.get_data(acc.ep.i, acc.step.i) # TODO verify pct_change = y[self.data.target] acc.step.value += pct_change * acc.step.value total_now = acc.step.value + acc.step.cash totals.trade.append(total_now) # calculate what the reward would be "if I held", to calculate the actual reward's _advantage_ over holding hold_before = acc.step.hold_value acc.step.hold_value += pct_change * hold_before totals.hold.append(acc.step.hold_value + self.start_cash) reward = 0 acc.step.i += 1 self.data.set_cash_val(acc.ep.i, acc.step.i, acc.step.cash / self.start_cash, acc.step.value / self.start_value) next_state = self.get_next_state() terminal = int(acc.step.i + 1 >= self.EPISODE_LEN) if acc.step.value < 0 or acc.step.cash < 0: terminal = True if terminal and self.mode in (Mode.TRAIN, Mode.TEST): # We're done. acc.step.signals.append(0) # Add one last signal (to match length) reward = self.get_return() if np.unique(acc.step.signals).shape[0] == 1: reward = -(self.start_cash + self.start_value ) # slam if you don't do anything if terminal and self.mode in (Mode.LIVE, Mode.TEST_LIVE): raise_refactor() # if acc.step.value <= 0 or acc.step.cash <= 0: terminal = 1 return next_state, terminal, reward
def fetch_more(self): raise_refactor()