Beispiel #1
0
def p_loop(EPISODE, GAMMA, LAMBDA, ALPHA, path):
    """
    训练函数
    """
    # 初始化w
    try:
        w = np.load(path)
        print("Load {}".format(path))
        print("-" * 30)
    except:
        w = np.zeros((12 * 12 * 12 * 12 * 4, 1))
        print("Initialize Value")
        print("-" * 30)

    # 初始化Feature_Encoder & Actor
    encoder = FEATURE_ENCODER(ACTION)
    actor = ACTOR(encoder, ACTION, is_train=True)

    # 初始化训练参数
    step_a = INTERVAL_A / INTERVAL_ENV

    # 循环更新
    for ep in range(EPISODE):
        # 训练log记录
        w_hist = []
        r_hist = []

        # 初始化资格迹
        et = np.zeros_like(w)

        #随机初始化环境和状态
        e = ENV()

        # 初次动作生成a_t & 特征编码s_t & 状态更新
        a = actor.act([e.c.dx, e.c.dy, e.c.vx, e.c.vy], w)
        en = encoder.encode([e.c.dx, e.c.dy, e.c.vx, e.c.vy], a)
        e.update(a)

        for t in range(int(T / INTERVAL_ENV)):
            # 动作仿真
            if t % step_a == 0:
                # 更新动作 a_{t+1}
                a_new = actor.act([e.c.dx, e.c.dy, e.c.vx, e.c.vy], w)
                # 更新特征 s_{t+1}
                en_new = encoder.encode([e.c.dx, e.c.dy, e.c.vx, e.c.vy],
                                        a_new)
                # 计算delta
                delta = e.r + GAMMA * np.matmul(en_new.T, w) - np.matmul(
                    en.T, w)
                # 更新资格迹
                et = GAMMA * LAMBDA * et + en
                # 更新参数矩阵w
                w += ALPHA * delta * et

                a = a_new
                en = en_new

                # Log记录
                w_hist.append(np.sum(np.abs(delta)))
                r_hist.append(e.r)

            # 状态仿真
            e.update(a)

        # Log输出
        w_hist = np.array(w_hist)
        r_hist = np.array(r_hist)
        print(
            "EP{}:  delta_w:{:.2f}  total_r:{:.2f}  final_dist:{:.2f}  Vx:{:.2f}  Vy:{:.2f}"
            .format(ep + 1, np.sum(w_hist), np.sum(r_hist), -e.r, e.c.vx,
                    e.c.vy))

        # 每10个ep存储一次参数矩阵w
        if (ep + 1) % 10 == 0:
            np.save(path, w)
            print("Saved in {}".format(path))
            print("-" * 30)
Beispiel #2
0
def main(EP, VIS, path, FAST):
    # 初始化仿真参数
    step_a = INTERVAL_A / INTERVAL_ENV
    # 初始化特征编码器 & 动作生成器
    encoder = FEATURE_ENCODER(ACTION)
    actor = ACTOR(encoder, ACTION, is_train=False)
    # 加载参数矩阵
    try:
        w = np.load(path)
        print("Load {}".format(path))
        print("-" * 30)
    except:
        print("Could not find {}".format(path))
        return 0
    # 实时可视化的初始化设置
    if VIS:
        plt.ion()
        plt.figure(figsize=(5, 5))
        plt.axis([0, 100, 0, 100])

    for ep in range(EP):
        sys.stdout.write("EP:{} ".format(ep + 1))
        # 初始化环境
        # e = ENV(w=100, h=100, target=[85.0, 85.0], c_x=10.0, c_y=10.0, c_vx=0.0, c_vy=0.0)
        # e = ENV(w=100, h=100, c_vx=0.0, c_vy=0.0)
        e = ENV(w=100, h=100)

        # 可视化
        if VIS:
            plt.scatter(e.target[0], e.target[1], s=30, c='red')
        else:
            track_x = []
            track_y = []

        for t in range(int(T / INTERVAL_ENV)):

            if t % step_a == 0:
                a = actor.act([e.c.dx, e.c.dy, e.c.vx, e.c.vy], w)

            e.update(a)

            # 可视化
            if VIS and t % FAST == 0:
                sys.stdout.write(
                    "Ep:{}-{}  Vx:{:.2f}  Vy:{:.2f}  Action:{}        \r".
                    format(ep, t + 1, e.c.vx, e.c.vy, a))
                sys.stdout.flush()

                plt.scatter(e.c.x, e.c.y, s=10, c='blue', alpha=0.2)
                plt.scatter(e.target[0], e.target[1], s=30, c='red')
                plt.pause(0.01)
            elif not VIS:
                track_x.append(e.c.x)
                track_y.append(e.c.y)
                str_out = "processing"
                if (t + 1) % 300 == 0:
                    sys.stdout.write(str_out[(t + 1) // 300 - 1])
                    sys.stdout.flush()

        print(
            "  Final_distance:{:.2f}                            ".format(-e.r))

        if VIS:
            plt.scatter(e.c.x, e.c.y, s=30, c='orange')
            plt.text(e.c.x, e.c.y - 1, "EP{} Dist:{:.2f}".format(ep + 1, -e.r))
            plt.pause(5)
        if not VIS:
            plt.scatter(track_x, track_y, s=5, c='blue', alpha=0.2)
            plt.scatter(e.target[0], e.target[1], s=30, c='red')
            plt.scatter(track_x[-1], track_y[-1], s=30, c='orange')
            plt.text(e.c.x, e.c.y - 1, "Dist:{:.2f}".format(-e.r))
            plt.axis([0, 100, 0, 100])
            plt.show()