예제 #1
0
class Environment:
    def __init__(self, g=10.0, d=100.0, H=10., m=10.0, F=3.0):
        """Instanciate a new environement in its initial state.
        """
        self.mc = MountainCar(g=g, d=d, H=H, m=m, F=F, R=50.0, T=0.0)

    def reset(self):
        self.mc.reset()
        # place the car at a random place near the bottom
        self.mc.x = np.random.uniform(-self.mc.d*1.3, -self.mc.d*0.7)

    def get_range(self):
        return [-1.5*self.mc.d, 0.0]

    def observe(self):
        """Returns the current observation that the agent can make
        of the environment, if applicable.
        """
        return (self.mc.x, self.mc.vx)

    def act(self, action):
        """Perform given action by the agent on the environment,
        and returns a reward.
        """
        reward = self.mc.act(action)
        return (reward, "victory" if reward > 0.0 else None)
예제 #2
0
class MountainCarEnv(Env):
    print_interval = 100
    def __init__(self):
        self.env = MountainCar()
        self.noiseRange = 1.0
        self.om = 0
        self.alpha = 0.6
        self.beta = 0.4
        self.t = 0
        self.totStep = 0
        self.r = 0
        self.ep = 0
        self.perfs = result_log(algo="DDPG", l1=20, l2=10)
        self.actif = True
        #self.plot = result_plot()
    
    def state(self):
        return [self.env.getObservation()]
    def act(self, action):
        actNoise = action + self.noise_func()
        self.env.performAction(actNoise[0])
        r = self.env.getReward()
        self.t += 1
        self.r += r
        return actNoise, [r]
    def reset(self, noise=True):
        self.actif = True
        self.env.reset()
        self.om = 0
        self.totStep+=self.t
        if self.totStep != 0:
            self.perfs.addData(self.totStep, self.t, self.r)
        self.t = 0
        self.r = 0
        self.ep += 1
        if not noise:
            self.noiseRange = 0.0
        else:
            self.noiseRange = random.uniform(0.,1.0)
    def noise_func(self):
        self.om = self.om-self.alpha*self.om + self.beta*random.gauss(0,1)*self.noiseRange
        return self.om
    def isFinished(self):
        if self.actif and not self.env.isFinished():
            return False
        else:
            self.actif = False
            return True
    def getActionSize(self):
        return 1
    def getStateSize(self):
        return 2
    def getActionBounds(self):
        return [[1.2], [-1.2]]
    def printEpisode(self):
        print time.strftime("[%H:%M:%S]"), " Episode : " , self.ep, " steps : ", self.t, " reward : ", self.r
    def performances(self):
        pass#self.plot.clear()
        #self.plot.add_row(self.perfs)
예제 #3
0
    return X, Y, val.reshape((100,100)).T

render_value_fn = True

file_path = './'

if render_value_fn:
    plt.contourf(*getValueFn(valuefn))
    plt.title('initial')
    plt.savefig(file_path + '0.png')

num_episodes = 500
k = 1
for i in xrange(num_episodes):

    r_t, s_t = domain.reset()
    agent.reset()
    count = 0
    cumulative_reward = 0

    while s_t != None:
        # apply an action from the agent
        # the domain will return a 'None' state when terminating
        r_t, s_t = domain.step(agent.step(r_t, s_t))
        count += 1
        cumulative_reward += r_t

    # final update step for the agent
    agent.step(r_t, s_t)

    if i % 2 == 0: