class MountainCarEnv(Env): print_interval = 100 def __init__(self): self.env = MountainCar() self.noiseRange = 1.0 self.om = 0 self.alpha = 0.6 self.beta = 0.4 self.t = 0 self.totStep = 0 self.r = 0 self.ep = 0 self.perfs = result_log(algo="DDPG", l1=20, l2=10) self.actif = True #self.plot = result_plot() def state(self): return [self.env.getObservation()] def act(self, action): actNoise = action + self.noise_func() self.env.performAction(actNoise[0]) r = self.env.getReward() self.t += 1 self.r += r return actNoise, [r] def reset(self, noise=True): self.actif = True self.env.reset() self.om = 0 self.totStep+=self.t if self.totStep != 0: self.perfs.addData(self.totStep, self.t, self.r) self.t = 0 self.r = 0 self.ep += 1 if not noise: self.noiseRange = 0.0 else: self.noiseRange = random.uniform(0.,1.0) def noise_func(self): self.om = self.om-self.alpha*self.om + self.beta*random.gauss(0,1)*self.noiseRange return self.om def isFinished(self): if self.actif and not self.env.isFinished(): return False else: self.actif = False return True def getActionSize(self): return 1 def getStateSize(self): return 2 def getActionBounds(self): return [[1.2], [-1.2]] def printEpisode(self): print time.strftime("[%H:%M:%S]"), " Episode : " , self.ep, " steps : ", self.t, " reward : ", self.r def performances(self): pass#self.plot.clear() #self.plot.add_row(self.perfs)