def main(): with tf.Session() as sess: # Task: take-off and hover init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] init_velocities = [0.0, 0.0, 0.0] init_angle_velocities = [0.0, 0.0, 0.0] run_time = 5 target_pos = [0.0, 0.0, 10.0] ddpg = agent.DDPG( Task(init_pose, init_velocities, init_angle_velocities, run_time, target_pos)) train(sess, ddpg)
def main(): with tf.Session() as sess: env = gym.make('Pendulum-v0') random_seed = 1234 np.random.seed(random_seed) tf.set_random_seed(random_seed) env.seed(random_seed) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high action_low = -action_bound action_high = action_bound ddpg = agent.DDPG(env, state_dim, action_dim, action_low, action_high) train(sess, env, ddpg)
def main(): # =========================== # Task: take-off and hover # =========================== tf.reset_default_graph() with tf.Graph().as_default(): tf.set_random_seed(1234) with tf.Session() as sess: init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] init_velocities = [0.0, 0.0, 0.0] init_angle_velocities = [0.0, 0.0, 0.0] run_time = 5 target_pos = [0.0, 0.0, 10.0] ddpg = agent.DDPG( Task(init_pose, init_velocities, init_angle_velocities, run_time, target_pos)) reward_all = train(sess, ddpg)
def main(max_episodes): with tf.Session() as sess: # Task: take-off and hover init_pose = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] init_velocities = [0.0, 0.0, 0.0] init_angle_velocities = [0.0, 0.0, 0.0] run_time = 20 target_pos = [0.0, 0.0, 10.0] np.random.seed(1234) tf.set_random_seed(2345) ddpg = agent.DDPG( Task(init_pose, init_velocities, init_angle_velocities, run_time, target_pos)) reward_all = train(sess, ddpg, max_episodes) print('done') return reward_all
import sys # Task: take-off and hover init_pose = [0.0, 0.0, 100.0, 0.0, 0.0, 0.0] init_velocities = [0.0, 0.0, 0.0] init_angle_velocities = [0.0, 0.0, 0.0] run_time = 10 target_pos = [0.0, 0.0, 100.0] num_episodes = 20 #1000 best_score = -np.inf np.random.seed(1234) task = Task(init_pose, init_velocities, init_angle_velocities, run_time, target_pos) ddpg = agent.DDPG(task) reward_all = np.array([], dtype=float) for i_episode in range(1, num_episodes + 1): state = ddpg.reset_episode() # start a new episode count = 0 total_reward = 0.0 while True: action = ddpg.act(state) next_state, reward, done = task.step(action) ddpg.step(action, reward, next_state, done)