class Environment: def __init__(self, g=10.0, d=100.0, H=10., m=10.0, F=3.0): """Instanciate a new environement in its initial state. """ self.mc = MountainCar(g=g, d=d, H=H, m=m, F=F, R=50.0, T=0.0) def reset(self): self.mc.reset() # place the car at a random place near the bottom self.mc.x = np.random.uniform(-self.mc.d*1.3, -self.mc.d*0.7) def get_range(self): return [-1.5*self.mc.d, 0.0] def observe(self): """Returns the current observation that the agent can make of the environment, if applicable. """ return (self.mc.x, self.mc.vx) def act(self, action): """Perform given action by the agent on the environment, and returns a reward. """ reward = self.mc.act(action) return (reward, "victory" if reward > 0.0 else None)
class MountainCarEnv(Env): print_interval = 100 def __init__(self): self.env = MountainCar() self.noiseRange = 1.0 self.om = 0 self.alpha = 0.6 self.beta = 0.4 self.t = 0 self.totStep = 0 self.r = 0 self.ep = 0 self.perfs = result_log(algo="DDPG", l1=20, l2=10) self.actif = True #self.plot = result_plot() def state(self): return [self.env.getObservation()] def act(self, action): actNoise = action + self.noise_func() self.env.performAction(actNoise[0]) r = self.env.getReward() self.t += 1 self.r += r return actNoise, [r] def reset(self, noise=True): self.actif = True self.env.reset() self.om = 0 self.totStep+=self.t if self.totStep != 0: self.perfs.addData(self.totStep, self.t, self.r) self.t = 0 self.r = 0 self.ep += 1 if not noise: self.noiseRange = 0.0 else: self.noiseRange = random.uniform(0.,1.0) def noise_func(self): self.om = self.om-self.alpha*self.om + self.beta*random.gauss(0,1)*self.noiseRange return self.om def isFinished(self): if self.actif and not self.env.isFinished(): return False else: self.actif = False return True def getActionSize(self): return 1 def getStateSize(self): return 2 def getActionBounds(self): return [[1.2], [-1.2]] def printEpisode(self): print time.strftime("[%H:%M:%S]"), " Episode : " , self.ep, " steps : ", self.t, " reward : ", self.r def performances(self): pass#self.plot.clear() #self.plot.add_row(self.perfs)
return X, Y, val.reshape((100,100)).T render_value_fn = True file_path = './' if render_value_fn: plt.contourf(*getValueFn(valuefn)) plt.title('initial') plt.savefig(file_path + '0.png') num_episodes = 500 k = 1 for i in xrange(num_episodes): r_t, s_t = domain.reset() agent.reset() count = 0 cumulative_reward = 0 while s_t != None: # apply an action from the agent # the domain will return a 'None' state when terminating r_t, s_t = domain.step(agent.step(r_t, s_t)) count += 1 cumulative_reward += r_t # final update step for the agent agent.step(r_t, s_t) if i % 2 == 0: