コード例 #1
0
ファイル: main.py プロジェクト: suvoganguli/DeepLearning
def main():
    with tf.Session() as sess:

        # Task: take-off and hover
        init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        init_velocities = [0.0, 0.0, 0.0]
        init_angle_velocities = [0.0, 0.0, 0.0]
        run_time = 5
        target_pos = [0.0, 0.0, 10.0]

        ddpg = agent.DDPG(
            Task(init_pose, init_velocities, init_angle_velocities, run_time,
                 target_pos))

        train(sess, ddpg)
コード例 #2
0
def main():
    with tf.Session() as sess:

        env = gym.make('Pendulum-v0')

        random_seed = 1234
        np.random.seed(random_seed)
        tf.set_random_seed(random_seed)
        env.seed(random_seed)

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        action_low = -action_bound
        action_high = action_bound

        ddpg = agent.DDPG(env, state_dim, action_dim, action_low, action_high)

        train(sess, env, ddpg)
コード例 #3
0
ファイル: main.py プロジェクト: suvoganguli/DeepLearning
def main():

    # ===========================
    # Task: take-off and hover
    # ===========================

    tf.reset_default_graph()
    with tf.Graph().as_default():
        tf.set_random_seed(1234)

        with tf.Session() as sess:

            init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            init_velocities = [0.0, 0.0, 0.0]
            init_angle_velocities = [0.0, 0.0, 0.0]
            run_time = 5
            target_pos = [0.0, 0.0, 10.0]

            ddpg = agent.DDPG(
                Task(init_pose, init_velocities, init_angle_velocities,
                     run_time, target_pos))

            reward_all = train(sess, ddpg)
コード例 #4
0
ファイル: main.py プロジェクト: suvoganguli/DeepLearning
def main(max_episodes):

    with tf.Session() as sess:

        # Task: take-off and hover
        init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        init_velocities = [0.0, 0.0, 0.0]
        init_angle_velocities = [0.0, 0.0, 0.0]
        run_time = 20
        target_pos = [0.0, 0.0, 10.0]

        np.random.seed(1234)
        tf.set_random_seed(2345)

        ddpg = agent.DDPG(
            Task(init_pose, init_velocities, init_angle_velocities, run_time,
                 target_pos))

        reward_all = train(sess, ddpg, max_episodes)

        print('done')

        return reward_all
コード例 #5
0
ファイル: main.py プロジェクト: suvoganguli/DeepLearning
import sys

# Task: take-off and hover
init_pose = [0.0, 0.0, 100.0, 0.0, 0.0, 0.0]
init_velocities = [0.0, 0.0, 0.0]
init_angle_velocities = [0.0, 0.0, 0.0]
run_time = 10
target_pos = [0.0, 0.0, 100.0]
num_episodes = 20  #1000
best_score = -np.inf

np.random.seed(1234)

task = Task(init_pose, init_velocities, init_angle_velocities, run_time,
            target_pos)
ddpg = agent.DDPG(task)

reward_all = np.array([], dtype=float)

for i_episode in range(1, num_episodes + 1):

    state = ddpg.reset_episode()  # start a new episode

    count = 0
    total_reward = 0.0

    while True:

        action = ddpg.act(state)
        next_state, reward, done = task.step(action)
        ddpg.step(action, reward, next_state, done)