Exemple #1
0
    def use_dataset(self, mode, full_set=False):
        """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test
        data, or the live database). Make sure to call this before reset()!
        """
        self.mode = mode
        if mode in (Mode.LIVE, Mode.TEST_LIVE):
            self.conn = data.engine_live.connect()
            # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM)
            # Offset=0 data.py currently pulls recent-to-oldest, then reverses
            rampup = int(
                1e5
            )  # 6000  # FIXME temporarily using big number to build up Scaler (since it's not saved)
            limit, offset = (
                rampup, 0
            )  # if not self.conv2d else (self.hypers.step_window + 1, 0)
            df, self.last_timestamp = data.db_to_dataframe(
                self.conn,
                limit=limit,
                offset=offset,
                arbitrage=self.hypers.arbitrage,
                last_timestamp=True)
            # save away for now so we can keep transforming it as we add new data (find a more efficient way)
            self.df = df
        else:
            row_ct = data.count_rows(self.conn,
                                     arbitrage=self.hypers.arbitrage)
            split = .9  # Using 90% training data.
            n_train, n_test = int(row_ct * split), int(row_ct * (1 - split))
            if mode == mode.TEST:
                offset = n_train
                limit = 40000 if full_set else 10000  # should be `n_test` in full_set, getting idx errors
            else:
                # Grab a random window from the 90% training data. The random bit is important so the agent
                # sees a variety of data. The window-size bit is a hack: as long as the agent doesn't die (doesn't cause
                # `terminal=True`), PPO's MemoryModel can keep filling up until it crashes TensorFlow. This ensures
                # there's a stopping point (limit). I'd rather see how far he can get w/o dying, figure out a solution.
                limit = self.EPISODE_LEN
                offset_start = 0 if not self.conv2d else self.hypers.step_window + 1
                offset = random.randint(offset_start,
                                        n_train - self.EPISODE_LEN)

        self.offset, self.limit = offset, limit
        self.prices = self.all_prices[offset:offset + limit]
        self.prices_diff = self.all_prices_diff[offset:offset + limit]
    def use_dataset(self, mode, no_kill=False):
        """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test
        data, or the live database). Make sure to call this before reset()!
        """
        before_time = time.time()
        self.mode = mode
        self.no_kill = no_kill
        if mode in (Mode.LIVE, Mode.TEST_LIVE):
            self.conn = data.engine_live.connect()
            # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM)
            # Offset=0 data.py currently pulls recent-to-oldest, then reverses
            rampup = int(
                3e4
            )  # 6000  # FIXME temporarily using big number to build up Scaler (since it's not saved)
            limit, offset = (
                rampup, 0
            )  # if not self.conv2d else (self.hypers.step_window + 1, 0)
            df, self.last_timestamp = data.db_to_dataframe(
                self.conn,
                limit=limit,
                offset=offset,
                arbitrage=self.hypers.arbitrage,
                last_timestamp=True)
            # save away for now so we can keep transforming it as we add new data (find a more efficient way)
            self.df = df
        else:
            self.row_ct = data.count_rows(self.conn,
                                          arbitrage=self.hypers.arbitrage)
            split = .9  # Using 90% training data.
            n_train, n_test = int(self.row_ct * split), int(self.row_ct *
                                                            (1 - split))
            limit, offset = (n_test,
                             n_train) if mode == mode.TEST else (n_train, 0)
            df = data.db_to_dataframe(self.conn,
                                      limit=limit,
                                      offset=offset,
                                      arbitrage=self.hypers.arbitrage)

        self.observations, self.prices = self._xform_data(df)
        self.prices_diff = self._diff(self.prices, percent=True)
        after_time = round(time.time() - before_time)