def reset(self):
     self.step_number = 0
     self.episode_number = 0
     if self.custom_q_init:
         self.q_func = self.custom_q_init
     else:
         self.q_func = defaultdict(lambda : defaultdict(lambda: self.default_q))
     Agent.reset(self)
Esempio n. 2
0
 def reset(self, mdp=None):
     self.step_number = 0
     self.episode_number = 0
     if mdp is not None:
         self.update_init_q_function(mdp)
     if self.task_number < self.num_sample_tasks:
         self.q_func = defaultdict(
             lambda: defaultdict(lambda: self.default_q))
     else:
         self.q_func = copy.deepcopy(self.default_q_func)
     self.task_number = self.task_number + 1
     Agent.reset(self)
 def reset(self):
     self.step_number = 0
     self.episode_number = 0
     # print "#####################################"
     # print "Reset", self.name, "Q-function"
     # # print self.q_func
     # for x in self.q_func:
     #     print (x)
     #     for y in self.q_func[x]:
     #         print (y, ':', self.q_func[x][y])
     self.update_init_q_function()
     self.q_func = copy.deepcopy(self.init_q_func)
     Agent.reset(self)
Esempio n. 4
0
 def reset(self, mdp=None):
     self.step_number = 0
     self.episode_number = 0
     # print "#####################################"
     # print "Reset", self.name, "Q-function"
     # # print self.q_func
     # for x in self.q_func:
     #     print (x)
     #     for y in self.q_func[x]:
     #         print (y, ':', self.q_func[x][y])
     if mdp is not None:
         self.update_init_q_function(mdp)
     if self.task_number >= self.num_sample_tasks:
         for x in self.init_q_func:
             for y in self.init_q_func[x]:
                 assert (self.init_q_func[x][y] >= -0.001)
         self.q_func = copy.deepcopy(self.init_q_func)
     else:
         self.q_func = defaultdict(
             lambda: defaultdict(lambda: self.default_q))
     self.task_number = self.task_number + 1
     if self.sample_with_q:
         self.q_agent.reset()
     Agent.reset(self)
 def reset(self):
     self.step_number = 0
     self.episode_number = 0
     self.q_func = defaultdict(lambda : defaultdict(lambda: self.default_q))
     Agent.reset(self)
Esempio n. 6
0
 def reset(self):
     self.step_number = 0
     self.episode_number = 0
     self.q_func = defaultdict(lambda: defaultdict(lambda: self.default_q))
     Agent.reset(self)
Esempio n. 7
0
 def reset(self):
     self.step_number = 0
     self.episode_number = 0
     self.q_func = copy.deepcopy(self.default_q_func)
     Agent.reset(self)
Esempio n. 8
0
 def reset(self):
     self.step_number = 0
     self.episode_number = 0
     self.q_funcs = {"A":defaultdict(lambda : defaultdict(lambda: self.default_q)), \
                     "B":defaultdict(lambda : defaultdict(lambda: self.default_q))}
     Agent.reset(self)
Esempio n. 9
0
 def reset(self):
     self.step_number = 0
     self.episode_number = 0
     self.q_funcs = {"A":defaultdict(lambda : defaultdict(lambda: self.default_q)), \
                     "B":defaultdict(lambda : defaultdict(lambda: self.default_q))}
     Agent.reset(self)