def preprocess(self): data = pd.read_csv(self.dataset_path) message = 'Columns found in the dataset {}'.format(data.columns) print_and_log_message(message, self.logger) data = data.dropna() start_time_stamp = data['Timestamp'][0] timestamps = data['Timestamp'].apply(lambda x: (x - start_time_stamp) / 60) timestamps = timestamps - range(timestamps.shape[0]) data.insert(0, 'blocks', timestamps) blocks = data.groupby('blocks') message = 'Number of blocks of continuous prices found are {}'.format( len(blocks)) print_and_log_message(message, self.logger) self._data_blocks = [] distinct_episodes = 0 for name, indices in blocks.indices.items(): if len(indices) > (self.history_length + self.horizon): self._data_blocks.append(blocks.get_group(name)) distinct_episodes = distinct_episodes + ( len(indices) - (self.history_length + self.horizon) + 1) data = None message_list = [ 'Number of usable blocks obtained from the dataset are {}'.format( len(self._data_blocks)) ] message_list.append( 'Number of distinct episodes for the current configuration are {}'. format(distinct_episodes)) print_and_log_message_list(message_list, self.logger)
def preprocess(self): data = pd.read_csv(self.dataset_path) message = 'Columns found in the dataset {}'.format(data.columns) print_and_log_message(message, self.logger) data = data.dropna() start_time_stamp = data['Timestamp'][0] timestamps = data['Timestamp'].apply(lambda x: (x - start_time_stamp) / 60) timestamps = timestamps - range(timestamps.shape[0]) data.insert(0, 'blocks', timestamps) blocks = data.groupby('blocks') message = 'Number of blocks of continuous prices found are {}'.format(len(blocks)) print_and_log_message(message, self.logger) self._data_blocks = [] distinct_episodes = 0 for name, indices in blocks.indices.items(): ''' Length of the block should exceed the history length and horizon by 1. Extra 1 is required to normalize each price block by previos time stamp ''' if len(indices) > (self.history_length + self.horizon + 1): self._data_blocks.append(blocks.get_group(name)) # similarly, we subtract an extra 1 to calculate the number of distinct episodes distinct_episodes = distinct_episodes + (len(indices) - (self.history_length + self.horizon) + 1 + 1) data = None message_list = ['Number of usable blocks obtained from the dataset are {}'.format(len(self._data_blocks))] message_list.append('Number of distinct episodes for the current configuration are {}'.format(distinct_episodes)) print_and_log_message_list(message_list, self.logger)
def new_random_episode(self, history): ''' TODO: In the current setting, the selection of an episode does not follow pure uniform process. Need to index every episode and then generate a random index rather than going on multiple levels of selection. ''' message_list = [] self.episode_number = self.episode_number + 1 message_list.append("Starting a new episode numbered {}".format( self.episode_number)) self.liquid, self.borrow, self.long, self.short = 0., 0., 0, 0 self.timesteps = 0 block_index = random.randint(0, len(self.price_blocks) - 1) message_list.append( "Block index selected for episode number {} is {}".format( self.episode_number, block_index)) self.historical_prices = self.price_blocks[block_index] self.current = random.randint( self.history_length, len(self.historical_prices) - self.horizon) message_list.append( "Starting index and timestamp point selected for episode number {} is {}:{}" .format(self.episode_number, self.current, self.timestamp_blocks[block_index][self.current])) history.set_history( self.historical_prices[self.current - self.history_length:self.current]) print_and_log_message_list(message_list, self.logger)
def save_model(self, step=None): save_path = join(self._model_dir, self.__name__) message_list = ["Saving model to {}".format(save_path)] save_path = self._saver.save(self.sess, save_path, global_step=step) message_list.append("Model saved to {}".format(save_path)) print_and_log_message_list(message_list, self.logger)
def load_model(self): message_list = ["Loading checkpoints from {}".format(self._model_dir)] ckpt = tf.train.get_checkpoint_state(self._model_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) fname = join(self._model_dir, ckpt_name) self._saver.restore(self.sess, fname) message_list.append("Model successfully loaded from {}".format(fname)) print_and_log_message_list(message_list, self.logger) return True else: message_list.append("Model could not be loaded from {}".format(self._model_dir)) print_and_log_message_list(message_list, self.logger) return False