コード例 #1
0
VAR = 3                     # 随机策略随机部分方差
KESI = 0.9995               # 随机策略随机部分方差衰减因子

RENDER = True               # 是否展示
# endregion

env = gym.make('Pendulum-v0')

s_dim = env.observation_space.shape[0]                  # 状态空间维度
a_dim = env.action_space.shape[0]                       # 动作空间维度
a_bound = env.action_space.low, env.action_space.high   # 动作取值上下界

ddpg = DDPG(s_dim, a_dim, a_bound,
            MEMORY_CAPACITY, BATCH_SIZE,
            GAMMA, ALPHA_A, ALPHA_C, TAO)
ddpg.initail_net('./result.ckpt')

for each_episode in range(MAX_EPISODES):

    ep_reward = 0
    s = env.reset()
    for each_step in range(MAX_EP_STEPS):

        if RENDER:

            env.render()

        a = ddpg.choose_action(s[np.newaxis, :])[0]
        print(a)

        s_, r, done, _ = env.step(a)
コード例 #2
0
ファイル: run_this.py プロジェクト: Xzavier0214/DDPG
VAR = 3  # 随机策略随机部分方差
KESI = .99995  # 随机策略随机部分方差衰减因子

RENDER = True  # 是否展示
# endregion

env = gym.make('Pendulum-v0')
env.seed(1)

s_dim = env.observation_space.shape[0]  # 状态空间维度
a_dim = env.action_space.shape[0]  # 动作空间维度
a_bound = env.action_space.low, env.action_space.high  # 动作取值上下界

ddpg = DDPG(s_dim, a_dim, a_bound, MEMORY_CAPACITY, BATCH_SIZE, GAMMA, ALPHA_A,
            ALPHA_C, TAO)
ddpg.initail_net()

var = VAR
for each_episode in range(MAX_EPISODES):

    ep_reward = 0
    s = env.reset()
    for each_step in range(MAX_EP_STEPS):

        if RENDER:

            env.render()

        # 根据状态选择动作并加上随机部分
        # 这里必须加上[0]索引,因为env.step一次只能接受一个动作
        a = ddpg.choose_action(s[np.newaxis, :])[0]