Esempio n. 1
0
    def sample(self):
        if self.count <= self.history_length:
            print_and_log_message(REPLAY_MEMORY_INSUFFICIENT, self.logger)

        else:
            indexes = []
            while len(indexes) < self.batch_size:
                # find random index
                while True:
                    # sample one index (ignore states wraping over)
                    index = random.randint(self.history_length, self.count - 1)
                    # if wraps over current pointer, then get new one
                    if index >= self.current and index - self.history_length < self.current:
                        continue
                    # if wraps over episode end, then get new one
                    # NB! poststate (last screen) can be terminal state!
                    if self.terminals[(index -
                                       self.history_length):index].any():
                        continue
                    # otherwise use this index
                    break

                # NB! having index first is fastest in C-order matrices
                self.prestates[len(indexes), ...] = self.getState(index - 1)
                self.poststates[len(indexes), ...] = self.getState(index)
                indexes.append(index)

            actions = self.actions[indexes]
            rewards = self.rewards[indexes]
            terminals = self.terminals[indexes]

            return self.prestates, actions, rewards, self.poststates, terminals
Esempio n. 2
0
 def add(self, screen, reward, action, terminal):
     if screen.shape != self.dims:
         print_and_log_message(INVALID_TIMESTEP, self.logger)
     else:
         self.actions[self.current] = action
         self.rewards[self.current] = reward
         self.screens[self.current, ...] = screen
         self.terminals[self.current] = terminal
         self.count = max(self.count, self.current + 1)
         self.current = (self.current + 1) % self.memory_size
Esempio n. 3
0
 def getState(self, index):
     if self.count == 0:
         print_and_log_message(REPLAY_MEMORY_ZERO, self.logger)
     else:
         index = index % self.count
         if index >= self.history_length - 1:
             return self.screens[(index -
                                  (self.history_length - 1)):(index + 1),
                                 ...]
         else:
             indexes = [(index - i) % self.count
                        for i in reversed(range(self.history_length))]
             return self.screens[indexes, ...]
Esempio n. 4
0
    def save(self):
        message = "Saving replay memory to {}".format(self._model_dir)
        print_and_log_message(message, self.logger)
        for idx, (name, array) in enumerate(
                zip([
                    ACTIONS, REWARDS, SCREENS, TERMINALS, PRESTATES, POSTSTATES
                ], [
                    self.actions, self.rewards, self.screens, self.terminals,
                    self.prestates, self.poststates
                ])):
            save_npy(array, join(self._model_dir, name))

        message = "Replay memory successfully saved to {}".format(
            self._model_dir)
        print_and_log_message(message, self.logger)
Esempio n. 5
0
    def preprocess(self):
        data = pd.read_csv(self.dataset_path)
        message = 'Columns found in the dataset {}'.format(data.columns)
        print_and_log_message(message, self.logger)
        data = data.dropna()
        start_time_stamp = data['Timestamp'][0]
        timestamps = data['Timestamp'].apply(lambda x: (x - start_time_stamp) / 60)
        timestamps = timestamps - range(timestamps.shape[0])
        data.insert(0, 'blocks', timestamps)
        blocks = data.groupby('blocks')
        message = 'Number of blocks of continuous prices found are {}'.format(len(blocks))
        print_and_log_message(message, self.logger)
        
        self._data_blocks = []
        distinct_episodes = 0
        for name, indices in blocks.indices.items():
            if len(indices) > (self.history_length + self.horizon):
                self._data_blocks.append(blocks.get_group(name))
                distinct_episodes = distinct_episodes + (len(indices) - (self.history_length + self.horizon) + 1)

        data = None
        message_list = ['Number of usable blocks obtained from the dataset are {}'.format(len(self._data_blocks))]
        message_list.append('Number of distinct episodes for the current configuration are {}'.format(distinct_episodes))
        print_and_log_message_list(message_list, self.logger)
Esempio n. 6
0
    def load_model(self):
        message = "Loading checkpoint from {}".format(self.checkpoint_dir)
        print_and_log_message(message, self.logger)

        ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            fname = os.path.join(self.checkpoint_dir, ckpt_name)
            self.saver.restore(self.sess, fname)
            message = "Checkpoint successfully loaded from {}".format(fname)
            print_and_log_message(message, self.logger)
            return True
        else:
            message = "Checkpoint could not be loaded from {}".format(
                self.checkpoint_dir)
            print_and_log_message(message, self.logger)
            return False
Esempio n. 7
0
def load_npy(path, logger):
    obj = np.load(path)
    message = "  [*] loaded from {}".format(path)
    print_and_log_message(message, logger)
    return obj
Esempio n. 8
0
def save_npy(obj, path, logger):
    np.save(path, obj)
    message = "  [*] saved at {}".format(path)
    print_and_log_message(message, logger)
Esempio n. 9
0
 def save_model(self, step=None):
     message = "Saving checkpoint to {}".format(self.checkpoint_dir)
     print_and_log_message(message, self.logger)
     self.saver.save(self.sess, self.checkpoint_dir, global_step=step)