Ejemplo n.º 1
0
 def run_live(self, agent, test=True):
     raise_refactor()
Ejemplo n.º 2
0
 def xform_data(self, df):
     # TODO here was autoencoder, talib indicators, price-anchoring
     raise_refactor()
Ejemplo n.º 3
0
    def execute(self, action):
        acc = self.acc[self.mode.value]
        totals = acc.step.totals
        h = self.hypers

        act_pct = {0: -.02, 1: 0, 2: .02}[action]
        act_btc = act_pct * (acc.step.cash if act_pct > 0 else acc.step.value)

        fee = {
            Exchange.cbpro:
            0.0025,  # https://support.cbpro.com/customer/en/portal/articles/2425097-what-are-the-fees-on-cbpro-
            Exchange.KRAKEN: 0.0026  # https://www.kraken.com/en-us/help/fees
        }[EXCHANGE]

        # Perform the trade. In training mode, we'll let it dip into negative here, but then kill and punish below.
        # In testing/live, we'll just block the trade if they can't afford it
        if act_pct > 0:
            if acc.step.cash < self.min_trade:
                act_btc = -(self.start_cash + self.start_value)
            elif act_btc < self.min_trade:
                act_btc = 0
            else:
                acc.step.value += act_btc - act_btc * fee
            acc.step.cash -= act_btc

        elif act_pct < 0:
            if acc.step.value < self.min_trade:
                act_btc = -(self.start_cash + self.start_value)
            elif abs(act_btc) < self.min_trade:
                act_btc = 0
            else:
                acc.step.cash += abs(act_btc) - abs(act_btc) * fee
            acc.step.value -= abs(act_btc)

        acc.step.signals.append(float(act_btc))  # clipped signal
        # acc.step.signals.append(np.sign(act_pct))  # indicates an attempted trade

        # next delta. [1,2,2].pct_change() == [NaN, 1, 0]
        # pct_change = self.prices_diff[acc.step.i + 1]
        _, y = self.data.get_data(acc.ep.i, acc.step.i)  # TODO verify
        pct_change = y[self.data.target]

        acc.step.value += pct_change * acc.step.value
        total_now = acc.step.value + acc.step.cash
        totals.trade.append(total_now)

        # calculate what the reward would be "if I held", to calculate the actual reward's _advantage_ over holding
        hold_before = acc.step.hold_value
        acc.step.hold_value += pct_change * hold_before
        totals.hold.append(acc.step.hold_value + self.start_cash)

        reward = 0

        acc.step.i += 1

        self.data.set_cash_val(acc.ep.i, acc.step.i,
                               acc.step.cash / self.start_cash,
                               acc.step.value / self.start_value)
        next_state = self.get_next_state()

        terminal = int(acc.step.i + 1 >= self.EPISODE_LEN)
        if acc.step.value < 0 or acc.step.cash < 0:
            terminal = True
        if terminal and self.mode in (Mode.TRAIN, Mode.TEST):
            # We're done.
            acc.step.signals.append(0)  # Add one last signal (to match length)
            reward = self.get_return()
            if np.unique(acc.step.signals).shape[0] == 1:
                reward = -(self.start_cash + self.start_value
                           )  # slam if you don't do anything

        if terminal and self.mode in (Mode.LIVE, Mode.TEST_LIVE):
            raise_refactor()

        # if acc.step.value <= 0 or acc.step.cash <= 0: terminal = 1
        return next_state, terminal, reward
Ejemplo n.º 4
0
 def fetch_more(self):
     raise_refactor()