def punishLastAction(self, howmuch):
     super().punishLastAction(howmuch)
     if self.containers.showscreen:
         infoscreen.print(str(-abs(howmuch)),
                          time.strftime("%H:%M:%S", time.gmtime()),
                          containers=self.containers,
                          wname="Last big punish")
 def learnANN(self):
     tmp = super().learnANN()
     print("ReinfLearnSteps:", self.model.step(), level=3)
     if self.containers.showscreen:
         infoscreen.print(self.model.step(),
                          "Iterations: >" +
                          str(self.model.run_inferences()),
                          containers=self.containers,
                          wname="ReinfLearnSteps")
     return tmp
Beispiel #3
0
 def policyAction(self, agentState):
     action, qvals = self.model.inference(self.makeInferenceUsable(agentState)) #former is argmax, latter are individual qvals
     throttle, brake, steer = self.dediscretize(action[0])
     toUse = "["+str(throttle)+", "+str(brake)+", "+str(steer)+"]"
     self.showqvals(qvals[0])
     if self.containers.showscreen:
         infoscreen.print(toUse, containers=self.containers, wname="Last command")
         if self.model.run_inferences() % 100 == 0:
             infoscreen.print(self.model.step(), "Iterations: >"+str(self.model.run_inferences()), containers=self.containers, wname="ReinfLearnSteps")
     return toUse, (throttle, brake, steer) #er returned immer toUse, toSave
Beispiel #4
0
 def showqvals(self, qvals):
     amount = self.conf.steering_steps*4 if self.conf.INCLUDE_ACCPLUSBREAK else self.conf.steering_steps*3
     b = []
     for i in range(amount):
         a = [0]*amount
         a[i] = 1
         b.append(str(self.dediscretize(a)))
     b = list(zip(b, qvals))
     toprint = [str(i[0])[1:-1]+": "+str(i[1]) for i in b]
     toprint = "\n".join(toprint)
     print(b, level=3)
     if self.containers.showscreen:
         infoscreen.print(toprint, containers= self.containers, wname="Current Q Vals")
Beispiel #5
0
 def addToMemory(self, gameState, pastState):
     a, r, qval, count, changestring = super().addToMemory(
         gameState, pastState)
     if self.containers.showscreen:
         infoscreen.print(a,
                          round(r, 2),
                          round(qval, 2),
                          changestring,
                          containers=self.containers,
                          wname="Last memory")
         if len(self.memory) % 20 == 0:
             infoscreen.print(">" + str(len(self.memory)),
                              containers=self.containers,
                              wname="Memorysize")
Beispiel #6
0
 def policyAction(self, agentState):
     action, _ = self.model.inference(self.makeInferenceUsable(agentState))
     action = self.make_noisy(action[0])
     action = [round(i, 3) for i in action]
     toUse = "[" + str(action[0]) + ", " + str(action[1]) + ", " + str(
         action[2]) + "]"
     if self.containers.showscreen:
         infoscreen.print(toUse,
                          containers=self.containers,
                          wname="Last command")
         if self.model.run_inferences() % 100 == 0:
             infoscreen.print(self.model.step(),
                              "Iterations: >" +
                              str(self.model.run_inferences()),
                              containers=self.containers,
                              wname="ReinfLearnSteps")
             infoscreen.print(self.epsilon,
                              containers=self.containers,
                              wname="Epsilon")
     return toUse, action
Beispiel #7
0
 def eval_episodeVals(self, mem_epi_slice, gameState, endReason):
     string = super().eval_episodeVals(mem_epi_slice, gameState, endReason)
     if self.containers.showscreen: 
         infoscreen.print(string, containers=self.containers, wname="Last Epsd")
Beispiel #8
0
 def randomAction(self, agentState):
     toUse, toSave = super().randomAction(agentState)
     if self.containers.showscreen:
         infoscreen.print(toUse, "(random)", containers=self.containers, wname="Last command")
         infoscreen.print(self.epsilon, containers=self.containers, wname="Epsilon")
     return toUse, toSave