def use_dataset(self, mode, full_set=False): """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test data, or the live database). Make sure to call this before reset()! """ self.mode = mode if mode in (Mode.LIVE, Mode.TEST_LIVE): self.conn = data.engine_live.connect() # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM) # Offset=0 data.py currently pulls recent-to-oldest, then reverses rampup = int( 1e5 ) # 6000 # FIXME temporarily using big number to build up Scaler (since it's not saved) limit, offset = ( rampup, 0 ) # if not self.conv2d else (self.hypers.step_window + 1, 0) df, self.last_timestamp = data.db_to_dataframe( self.conn, limit=limit, offset=offset, arbitrage=self.hypers.arbitrage, last_timestamp=True) # save away for now so we can keep transforming it as we add new data (find a more efficient way) self.df = df else: row_ct = data.count_rows(self.conn, arbitrage=self.hypers.arbitrage) split = .9 # Using 90% training data. n_train, n_test = int(row_ct * split), int(row_ct * (1 - split)) if mode == mode.TEST: offset = n_train limit = 40000 if full_set else 10000 # should be `n_test` in full_set, getting idx errors else: # Grab a random window from the 90% training data. The random bit is important so the agent # sees a variety of data. The window-size bit is a hack: as long as the agent doesn't die (doesn't cause # `terminal=True`), PPO's MemoryModel can keep filling up until it crashes TensorFlow. This ensures # there's a stopping point (limit). I'd rather see how far he can get w/o dying, figure out a solution. limit = self.EPISODE_LEN offset_start = 0 if not self.conv2d else self.hypers.step_window + 1 offset = random.randint(offset_start, n_train - self.EPISODE_LEN) self.offset, self.limit = offset, limit self.prices = self.all_prices[offset:offset + limit] self.prices_diff = self.all_prices_diff[offset:offset + limit]
def use_dataset(self, mode, no_kill=False): """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test data, or the live database). Make sure to call this before reset()! """ before_time = time.time() self.mode = mode self.no_kill = no_kill if mode in (Mode.LIVE, Mode.TEST_LIVE): self.conn = data.engine_live.connect() # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM) # Offset=0 data.py currently pulls recent-to-oldest, then reverses rampup = int( 3e4 ) # 6000 # FIXME temporarily using big number to build up Scaler (since it's not saved) limit, offset = ( rampup, 0 ) # if not self.conv2d else (self.hypers.step_window + 1, 0) df, self.last_timestamp = data.db_to_dataframe( self.conn, limit=limit, offset=offset, arbitrage=self.hypers.arbitrage, last_timestamp=True) # save away for now so we can keep transforming it as we add new data (find a more efficient way) self.df = df else: self.row_ct = data.count_rows(self.conn, arbitrage=self.hypers.arbitrage) split = .9 # Using 90% training data. n_train, n_test = int(self.row_ct * split), int(self.row_ct * (1 - split)) limit, offset = (n_test, n_train) if mode == mode.TEST else (n_train, 0) df = data.db_to_dataframe(self.conn, limit=limit, offset=offset, arbitrage=self.hypers.arbitrage) self.observations, self.prices = self._xform_data(df) self.prices_diff = self._diff(self.prices, percent=True) after_time = round(time.time() - before_time)