Exemple #1
0
def main(MazeEnv):
    env = MazeEnv()
    env.render()
    plt.pause(2)
    n_epochs = 10000
    robot_loc = []
    steps = 0
    rewards = 0.0
    for i in range(n_epochs):
        steps += 1
        # next_action = np.random.randint(4,size = 1)
        next_action, robot_loc = env.expert(robot_loc)
        state_img, reward, done, _ = env.step(next_action)
        rewards += reward
        env.render()
        print('Step = %d, rewards = %.1f, reward = %.1f, done = %d' %
              (steps, rewards, reward, done),
              end='\r')
        if done:
            print('\n')

        if done:
            steps = 0
            rewards = 0.0
            plt.pause(2)
            env.reset()
Exemple #2
0
import numpy as np
from MazeEnv import *
import copy
from collections import defaultdict

world_size = np.array([5, 5])
env = MazeEnv(world_size=world_size, \
              gold_pos=np.array([[1, 2]]), \
              bad_pos=np.array([[3, 4]]), \
              max_ite=50)
env.reset()
DISCOUNT = 0.9
epsilon = 0.1
Q = np.zeros((world_size[0] * world_size[1], 4))
Q_old = copy.deepcopy(Q)
PI = np.zeros((world_size[0] * world_size[1]), dtype=np.int)  # 0,1,2,3分别代表4个动作
#
Return = defaultdict(list)

ite = 0
while True:  # episode循环
    ite += 1
    Q_old = copy.deepcopy(Q)
    s = env.reset()
    s_a_his = []
    r_his = []
    while True:  # step循环
        #a = PI[s] #随机策略
        a = PI[s] if (np.random.rand() > epsilon) else env.random_action()
        s_, r, d = env.step(a)
        s_a_his.append(env.encode_s_a(s, a))
Exemple #3
0
import numpy as np
from MazeEnv import *

world_size = np.array([5, 5])
env = MazeEnv(world_size=world_size, \
              gold_pos=np.array([[1,2]]), \
              bad_pos=np.array([[3,4]]), \
              max_ite=100)
env.reset()
# 四个动作各1/4
ACTION_PROB = 0.25
DISCOUNT = 0.9

A1 = -1 * np.eye(world_size[0] * world_size[1])
A2 = np.zeros((world_size[0] * world_size[1], world_size[0] * world_size[1]))
b = np.zeros(world_size[0] * world_size[1])

for s in env.feasible_states:
    for a in env.feasible_actions:
        s_, r, d = env.step_state(s, a)
        A2[s, s_] += ACTION_PROB * DISCOUNT
        b[s] += ACTION_PROB * r

V = np.linalg.solve(A1 + A2, -b).reshape(world_size)
print(V)