Ejemplo n.º 1
0
def problemA():
    print("PROBLEM A...")
    episodes = 10000
    arr = np.zeros(episodes)
    G = Gridworld()
    G.gamma = 0.9
    for e in range(episodes):  # number of episodes loop
        G.timeStep = 0
        # print("episode %d" % (e+1))
        while (not G.isEnd):
            # print(G.currentState)
            G.step(G.action)
        arr[e] = G.reward
        G.reset()

    opt_disc_returns = np.amax(arr)
    opt_episode = np.argmax(arr) + 1
    mean = np.mean(arr)
    variance = np.var(arr)
    std_dev = np.std(arr)
    min = np.amin(arr)
    print("Highest observed discounted returns is %f achieved in"
          " episode number %d" % (opt_disc_returns, opt_episode))
    print("The mean of discounted returns is %f, variance is %f"
          " and standard deviation is %f" % (mean, variance, std_dev))
    print("Max is %f and min is %f" % (opt_disc_returns, min))
    return arr
Ejemplo n.º 2
0
def problemE():
    print("PROBLEM E...")
    episodes = 10000
    count = 0
    G = Gridworld(startState=19)
    G.gamma = 0.9
    for e in range(episodes):
        G.timeStep = 0
        while ((G.timeStep < 11) and (not G.isEnd)):
            G.step(G.action)
        if G.state == 22:
            count = count + 1
        G.reset()
    print("The empirical probability of S19 = 21 given S8 = 18 is %f" %
          (count / episodes))
Ejemplo n.º 3
0
def problemE():
    env = Gridworld(startState=18)
    env.gamma = 0.9
    episode = 0
    hit = 0
    total_try = 100000
    while episode < total_try:
        episode += 1
        env.timeStep = 8
        while env.timeStep < 19:
            act = env.action
            state, reward, isEnd = Gridworld.step(env, act)
            if isEnd:
                break
        if env.currentState == 21:
            hit += 1
    print('P is {}'.format(hit / total_try))