예제 #1
0
 def write_test_results(self):
     helper.write_stats_file(
         self.run_stats_file, self.current_episode, sum(self.test_steps),
         np.mean(self.test_steps), sum(self.test_rewards),
         np.mean(self.test_rewards),
         float("{0:.5f}".format(self.learner.last_epsilon)), self.run_steps)
     self._write_test_results()
예제 #2
0
 def _init_run(self):
     self.learner.init_Q(states=self.env.get_all_states(), how='zero')
     self.run_lib_probs_file = os.path.join(self.run_dir,
                                            'stats_policy_probs.csv')
     self.run_lib_absolute_file = os.path.join(self.run_dir,
                                               'stats_policy_absolute.csv')
     policy_usage_header = self.task_policies[:]
     policy_usage_header.insert(0, 'episode')
     helper.write_stats_file(self.run_lib_probs_file, policy_usage_header)
     helper.write_stats_file(self.run_lib_absolute_file,
                             policy_usage_header)
     self.tau_policy = self.params['tau_policy']
     self.importance_limit = self.params['policy_importance_limit']
     for policy_name in self.current_library:
         self.current_library[policy_name]['active'] = True
     self.active_policies = len(self.task_policies)
     epsilon = (self.params['epsilon'] * (
         (1 + self.params['task_library_size'] - len(self.current_library))
         / self.params['task_library_size']))
     self.learner.set_epsilon(epsilon)
     self.learner.set_epsilon(epsilon)
     # if len(self.current_library) > 1:
     #    self.learner.set_epsilon(epsilon)
     #    self.learner.set_epsilon(epsilon)
     # else:
     # self.learner.set_epsilon(self.params['epsilon'])
     # self.learner.set_epsilon(self.params['epsilon'])
     self.evaluate_current_library()
예제 #3
0
 def init_run(self):
     _logger.info("..... Starting run %s" % str(self.current_run))
     run_dir = os.path.join(self.task_dir, 'run_' + str(self.current_run))
     self.run_dir = helper.create_dir(run_dir)
     # Create run stats file: run_stats.csv
     self.run_stats_file = os.path.join(self.run_dir, 'stats_run.csv')
     self.run_steps = 0
     helper.write_stats_file(self.run_stats_file, 'episode', 'steps_total',
                             'steps_mean', 'reward_total', 'reward_mean',
                             'epsilon', 'step_count')
     self._init_run()
예제 #4
0
 def _init_run(self):
     self.learner.init_Q(states=self.env.get_all_states(),
                         how='zero')
     self.learner.set_epsilon(self.params['epsilon'])
     self.learner.set_epsilon(self.params['epsilon'])
     self.run_lib_W_file = os.path.join(self.run_dir,
                                        'stats_policy_W.csv')
     self.run_lib_W_mean_file = os.path.join(self.run_dir,
                                             'stats_policy_W_mean.csv')
     self.run_lib_U_file = os.path.join(self.run_dir,
                                        'stats_policy_U.csv')
     self.run_lib_P_file = os.path.join(self.run_dir,
                                        'stats_policy_P.csv')
     policy_usage_header = []
     for policy_name in self.library:
         policy_usage_header.append(policy_name)
     policy_usage_header.insert(0, 'episode')
     helper.write_stats_file(self.run_lib_W_file,
                             policy_usage_header)
     helper.write_stats_file(self.run_lib_W_mean_file,
                             ['episode', 'W_mean'])
     helper.write_stats_file(self.run_lib_U_file,
                             policy_usage_header)
     helper.write_stats_file(self.run_lib_P_file,
                             policy_usage_header)
     self.tau_policy = self.params['tau_policy']
예제 #5
0
 def save_best_episode(self):
     df = pd.read_csv(os.path.join(self.run_dir, 'stats_run.csv'))
     least_steps_row = df.ix[df['steps_mean'].idxmin()]
     run_best_file = os.path.join(self.run_dir, 'stats_run_best.csv')
     headers = ['run']
     content = [int(self.current_run)]
     for column in df:
         headers.append(str(column))
         content.append(least_steps_row[column])
     helper.write_stats_file(run_best_file, headers)
     helper.write_stats_file(run_best_file, content)
     helper.copy_file(
         os.path.join(self.run_dir,
                      'episode_' + str(int(least_steps_row['episode'])),
                      'Qs.npy'), os.path.join(self.run_dir, 'best_Qs.npy'))
예제 #6
0
 def evaluate_current_library(self):
     self.set_status('policy_eval')
     removed_policy = True
     while removed_policy:
         removed_policy = False
         divider = 0.0
         absolute = [self.current_episode]
         for policy_name in self.current_library:
             if self.current_library[policy_name]['active']:
                 if self.active_policies > 1:
                     policy_results_mean = self.get_mean_test_results(
                         policy_name)
                     self.current_library[policy_name]['weight'] = \
                         self.get_policy_weight(policy_results_mean)
                     absolute.append(policy_results_mean)
                 else:
                     absolute.append(0.0)
             else:
                 self.current_library[policy_name]['weight'] = 0.0
                 absolute.append(0.0)
             divider += self.current_library[policy_name]['weight']
         weights = [self.current_episode]
         for policy_name in self.current_library:
             if self.current_library[policy_name]['active']:
                 if self.active_policies > 1:
                     self.current_library[policy_name]['weight'] /= divider
                     if self.current_library[policy_name]['weight'] < \
                             self.importance_limit and not\
                             policy_name == self.current_task['name']:
                         self.current_library[policy_name]['active'] = False
                         self.active_policies -= 1
                         _logger.debug(
                             "Episode %s: Deactivated policy %s" %
                             (str(self.current_episode), policy_name))
                         removed_policy = True
                         break
                 else:
                     self.current_library[policy_name]['weight'] = 1.0
             weights.append(self.current_library[policy_name]['weight'])
             _logger.debug(
                 "%s: weight = %s" %
                 (policy_name,
                  str(self.current_library[policy_name]['weight'])))
     helper.write_stats_file(self.run_lib_absolute_file, absolute)
     helper.write_stats_file(self.run_lib_probs_file, weights)
     self.update_train_settings()
예제 #7
0
 def save_best_run(self):
     # Save best Q-table for current task
     df = pd.read_csv(
         os.path.join(self.task_dir, 'run_' + str(1), 'stats_run_best.csv'))
     for i in range(2, self.params['runs']):
         df.append(pd.read_csv(
             os.path.join(self.task_dir, 'run_' + str(i),
                          'stats_run_best.csv')),
                   ignore_index=True)
     least_steps_row = df.ix[df['steps_mean'].idxmin()]
     task_best_file = os.path.join(self.task_dir, 'stats_task_best.csv')
     headers = ['task']
     content = [str(self.current_task['name'])]
     for column in df:
         headers.append(str(column))
         content.append(least_steps_row[column])
     helper.write_stats_file(task_best_file, headers)
     helper.write_stats_file(task_best_file, content)
     helper.copy_file(
         os.path.join(self.task_dir,
                      'run_' + str(int(least_steps_row['run'])),
                      'best_Qs.npy'),
         os.path.join(self.task_dir, 'best_Qs.npy'))
예제 #8
0
 def _cleanup_episode(self):
     if self.status == 'training':
         # if self.learner.epsilon > -1 * self.learner.epsilon_change:
         #    self.learner.set_epsilon(self.learner.epsilon +
         #                             self.learner.epsilon_change)
         self.current_W = \
             ((self.params['gamma'] ** self.steps_in_episode) *
              self.reward_in_episode)
         self.library[self.current_policy]['W'] = \
             (((self.library[self.current_policy]['W'] *
                self.library[self.current_policy]['U']) +
               self.current_W) /
              (self.library[self.current_policy]['U'] + 1))
         self.library[self.current_policy]['U'] += 1
         self.tau_policy += self.params['tau_policy_delta']
         _logger.debug('%s: current_W=%s, W=%s, U=%s, tau=%s' %
                       (str(self.current_policy),
                        str(self.current_W),
                        str(self.library[self.current_policy]['W']),
                        str(self.library[self.current_policy]['U']),
                        str(self.tau_policy)))
         Ws = [self.current_episode]
         W_mean = [self.current_episode]
         Us = [self.current_episode]
         Ps = [self.current_episode]
         W_sum = 0
         for policy in self.library:
             W_sum += self.library[policy]['W']
             Ws.append(self.library[policy]['W'])
             Us.append(self.library[policy]['U'])
             Ps.append(self.library[policy]['P'])
         W_mean.append(W_sum / len(self.library))
         helper.write_stats_file(self.run_lib_W_file, Ws)
         helper.write_stats_file(self.run_lib_W_mean_file, W_mean)
         helper.write_stats_file(self.run_lib_U_file, Us)
         helper.write_stats_file(self.run_lib_P_file, Ps)