Esempio n. 1
0
 def add_experience(self, state, action, reward, next_state, done):
     '''Interface helper method for update() to add experience to memory'''
     self.most_recent = (state, action, reward, next_state, done)
     for idx, k in enumerate(self.data_keys):
         self.cur_epi_data[k].append(self.most_recent[idx])
     # If episode ended, add to memory and clear cur_epi_data
     if util.epi_done(done):
         for k in self.data_keys:
             getattr(self, k).append(self.cur_epi_data[k])
         self.cur_epi_data = {k: [] for k in self.data_keys}
         # If agent has collected the desired number of episodes, it is ready to train
         # length is num of epis due to nested structure
         if len(self.states
                ) == self.body.agent.algorithm.training_frequency:
             self.body.agent.algorithm.to_train = 1
     # Track memory size and num experiences
     self.size += 1
     self.seen_size += 1
Esempio n. 2
0
 def run_rl(self):
     '''Run the main RL loop until clock.max_frame'''
     logger.info(
         f'Running RL loop for trial {self.spec["meta"]["trial"]} session {self.index}'
     )
     clock = self.env.clock
     state = self.env.reset()
     done = False
     while True:
         if util.epi_done(done):  # before starting another episode
             self.try_ckpt(self.agent, self.env)
             if clock.get() < clock.max_frame:  # reset and continue
                 clock.tick('epi')
                 state = self.env.reset()
                 done = False
         self.try_ckpt(self.agent, self.env)
         if clock.get() >= clock.max_frame:  # finish
             break
         clock.tick('t')
         action = self.agent.act(state)
         next_state, reward, done, info = self.env.step(action)
         self.agent.update(state, action, reward, next_state, done)
         state = next_state