Python DDPGの例

プログラミング言語: Python

名前空間/パッケージ名: agents.agent

メソッド/関数: DDPG

hotexamples.comのコード掲載数: 5

Python DDPG - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのagents.agent.DDPGの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: main.py プロジェクト: suvoganguli/DeepLearning

def main():
    with tf.Session() as sess:

        # Task: take-off and hover
        init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        init_velocities = [0.0, 0.0, 0.0]
        init_angle_velocities = [0.0, 0.0, 0.0]
        run_time = 5
        target_pos = [0.0, 0.0, 10.0]

        ddpg = agent.DDPG(
            Task(init_pose, init_velocities, init_angle_velocities, run_time,
                 target_pos))

        train(sess, ddpg)

コード例 #2

ファイルを表示

def main():
    with tf.Session() as sess:

        env = gym.make('Pendulum-v0')

        random_seed = 1234
        np.random.seed(random_seed)
        tf.set_random_seed(random_seed)
        env.seed(random_seed)

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        action_low = -action_bound
        action_high = action_bound

        ddpg = agent.DDPG(env, state_dim, action_dim, action_low, action_high)

        train(sess, env, ddpg)

コード例 #3

ファイルを表示

ファイル: main.py プロジェクト: suvoganguli/DeepLearning

def main():

    # ===========================
    # Task: take-off and hover
    # ===========================

    tf.reset_default_graph()
    with tf.Graph().as_default():
        tf.set_random_seed(1234)

        with tf.Session() as sess:

            init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            init_velocities = [0.0, 0.0, 0.0]
            init_angle_velocities = [0.0, 0.0, 0.0]
            run_time = 5
            target_pos = [0.0, 0.0, 10.0]

            ddpg = agent.DDPG(
                Task(init_pose, init_velocities, init_angle_velocities,
                     run_time, target_pos))

            reward_all = train(sess, ddpg)

コード例 #4

ファイルを表示

ファイル: main.py プロジェクト: suvoganguli/DeepLearning

def main(max_episodes):

    with tf.Session() as sess:

        # Task: take-off and hover
        init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        init_velocities = [0.0, 0.0, 0.0]
        init_angle_velocities = [0.0, 0.0, 0.0]
        run_time = 20
        target_pos = [0.0, 0.0, 10.0]

        np.random.seed(1234)
        tf.set_random_seed(2345)

        ddpg = agent.DDPG(
            Task(init_pose, init_velocities, init_angle_velocities, run_time,
                 target_pos))

        reward_all = train(sess, ddpg, max_episodes)

        print('done')

        return reward_all

コード例 #5

ファイルを表示

ファイル: main.py プロジェクト: suvoganguli/DeepLearning

import sys

# Task: take-off and hover
init_pose = [0.0, 0.0, 100.0, 0.0, 0.0, 0.0]
init_velocities = [0.0, 0.0, 0.0]
init_angle_velocities = [0.0, 0.0, 0.0]
run_time = 10
target_pos = [0.0, 0.0, 100.0]
num_episodes = 20  #1000
best_score = -np.inf

np.random.seed(1234)

task = Task(init_pose, init_velocities, init_angle_velocities, run_time,
            target_pos)
ddpg = agent.DDPG(task)

reward_all = np.array([], dtype=float)

for i_episode in range(1, num_episodes + 1):

    state = ddpg.reset_episode()  # start a new episode

    count = 0
    total_reward = 0.0

    while True:

        action = ddpg.act(state)
        next_state, reward, done = task.step(action)
        ddpg.step(action, reward, next_state, done)