def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 2000 file_name = "mini_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=True, on_rack=False) dt = env._time_step movetype = "walk" # movetype = "trot" # movetype = "bound" # movetype = "pace" # movetype = "pronk" TG = TGPolicy(movetype=movetype, center_swing=0.0, amplitude_extension=0.5, amplitude_lift=0.2) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # # Initialize Normalizer # normalizer = Normalizer(state_dim) # # Initialize Policy # policy = Policy(state_dim, action_dim) # # Initialize Agent with normalizer, policy and gym env # agent = ARSAgent(normalizer, policy, env) # agent_num = raw_input("Policy Number: ") # if os.path.exists(models_path + "/" + file_name + str(agent_num) + # "_policy"): # print("Loading Existing agent") # agent.load(models_path + "/" + file_name + str(agent_num)) # agent.policy.episode_steps = 3000 # policy = agent.policy env.reset() print("STARTED MINITAUR TEST SCRIPT") # Just to store correct action space action = env.action_space.sample() # ELEMENTS PROVIDED BY POLICY f_tg = 4.0 Beta = 3.0 h_tg = 0.0 alpha_tg = 0.5 intensity = 1.0 # Record extends for plot LF_ext = [] LB_ext = [] RF_ext = [] RB_ext = [] LF_tp = [] LB_tp = [] RF_tp = [] RB_tp = [] t = 0 while t < (int(max_timesteps)): action[:] = 0.0 # Get Action from TG [no policies here] action = TG.get_utg(action, alpha_tg, h_tg, intensity, env.minitaur.num_motors) LF_ext.append(action[env.minitaur.num_motors / 2]) LB_ext.append(action[1 + env.minitaur.num_motors / 2]) RF_ext.append(action[2 + env.minitaur.num_motors / 2]) RB_ext.append(action[3 + env.minitaur.num_motors / 2]) # Perform action next_state, reward, done, _ = env.step(action) obs = TG.get_TG_state() # LF_tp.append(obs[0]) # LB_tp.append(obs[1]) # RF_tp.append(obs[2]) # RB_tp.append(obs[3]) # Increment phase TG.increment(dt, f_tg, Beta) # time.sleep(1.0) t += 1 plt.plot(0) plt.plot(LF_ext, label="LF") plt.plot(LB_ext, label="LB") plt.plot(RF_ext, label="RF") plt.plot(RB_ext, label="RB") plt.xlabel("t") plt.ylabel("EXT") plt.title("Leg Extensions") plt.legend() plt.show() env.close()
def main(): """ The main() function. """ # Hold mp pipes mp.freeze_support() print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 eval_freq = 1e1 save_model = True file_name = "mini_tg_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=False) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) # TRAJECTORY GENERATOR movetype = "walk" # movetype = "trot" # movetype = "bound" # movetype = "pace" # movetype = "pronk" TG = TGPolicy(movetype=movetype, center_swing=0.0, amplitude_extension=0.6, amplitude_lift=0.3) TG_state_dim = len(TG.get_TG_state()) TG_action_dim = 5 # f_tg, alpha_tg, h_tg, Beta, Intensity state_dim = env.observation_space.shape[0] + TG_state_dim print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] + TG_action_dim print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, TGP=TG) agent_num = 0 if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) # Evaluate untrained agent and init list for storage evaluations = [] env.reset(agent.desired_velocity, agent.desired_rate) episode_reward = 0 episode_timesteps = 0 episode_num = 0 # MULTIPROCESSING # Create mp pipes num_processes = policy.num_deltas processes = [] childPipes = [] parentPipes = [] # Store mp pipes for pr in range(num_processes): parentPipe, childPipe = Pipe() parentPipes.append(parentPipe) childPipes.append(childPipe) # Start multiprocessing for proc_num in range(num_processes): p = mp.Process(target=ParallelWorker, args=(childPipes[proc_num], env, state_dim)) p.start() processes.append(p) print("STARTED MINITAUR ARS") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout t += policy.episode_steps episode_timesteps += 1 episode_reward = agent.train_parallel(parentPipes) # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}, >400: {}". format(t, episode_num, policy.episode_steps, episode_reward, agent.successes)) # Reset environment evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 # Evaluate episode if (episode_num + 1) % eval_freq == 0: # evaluate_agent(agent, env_name, seed, np.save(results_path + "/" + str(file_name), evaluations) if save_model: agent.save(models_path + "/" + str(file_name) + str(episode_num)) # replay_buffer.save(t) episode_num += 1 # Close pipes and hence envs for parentPipe in parentPipes: parentPipe.send([_CLOSE, "pay2"]) for p in processes: p.join()
def main(): """ The main() function. """ print("STARTING MINITAUR TD3") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "mini_td3_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=True) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) policy = TD3Agent(state_dim, action_dim, max_action) policy_num = 2239999 if os.path.exists(models_path + "/" + file_name + str(policy_num) + "_critic"): print("Loading Existing Policy") policy.load(models_path + "/" + file_name + str(policy_num)) replay_buffer = ReplayBuffer() # Optionally load existing policy, replace 9999 with num buffer_number = 0 # BY DEFAULT WILL LOAD NOTHING, CHANGE THIS if os.path.exists(replay_buffer.buffer_path + "/" + "replay_buffer_" + str(buffer_number) + '.data'): print("Loading Replay Buffer " + str(buffer_number)) replay_buffer.load(buffer_number) print(replay_buffer.storage) # Evaluate untrained policy and init list for storage evaluations = [] state = env.reset() done = False episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") for t in range(int(max_timesteps)): episode_timesteps += 1 # Deterministic Policy Action action = np.clip(policy.select_action(np.array(state)), -max_action, max_action) # rospy.logdebug("Selected Acton: {}".format(action)) # Perform action next_state, reward, done, _ = env.step(action) state = next_state episode_reward += reward # print("DT REWARD: {}".format(reward)) if done: # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print( "Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t + 1, episode_num, episode_timesteps, episode_reward)) # Reset environment state, done = env.reset(), False evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 episode_num += 1
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "mini_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=True) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env) agent_num = raw_input("Policy Number: ") if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 3000 policy = agent.policy # Evaluate untrained agent and init list for storage evaluations = [] env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout t += policy.episode_steps episode_timesteps += 1 episode_reward = agent.deploy() # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, policy.episode_steps, episode_reward)) # Reset environment evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 episode_num += 1 env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR TD3") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 start_timesteps = 1e4 # 1e3 for testing purposes, use 1e4 for real expl_noise = 0.1 batch_size = 100 eval_freq = 1e4 save_model = True file_name = "mini_td3_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=False) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) policy = TD3Agent(state_dim, action_dim, max_action) policy_num = 0 if os.path.exists(models_path + "/" + file_name + str(policy_num) + "_critic"): print("Loading Existing Policy") policy.load(models_path + "/" + file_name + str(policy_num)) replay_buffer = ReplayBuffer() # Optionally load existing policy, replace 9999 with num buffer_number = 0 # BY DEFAULT WILL LOAD NOTHING, CHANGE THIS if os.path.exists(replay_buffer.buffer_path + "/" + "replay_buffer_" + str(buffer_number) + '.data'): print("Loading Replay Buffer " + str(buffer_number)) replay_buffer.load(buffer_number) # print(replay_buffer.storage) # Evaluate untrained policy and init list for storage evaluations = [] state = env.reset() done = False episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TD3") for t in range(int(max_timesteps)): episode_timesteps += 1 # Select action randomly or according to policy # Random Action - no training yet, just storing in buffer if t < start_timesteps: action = env.action_space.sample() # rospy.logdebug("Sampled Action") else: # According to policy + Exploraton Noise # print("POLICY Action") """ Note we clip at +-0.99.... because Gazebo has problems executing actions at the position limit (breaks model) """ action = np.clip( (policy.select_action(np.array(state)) + np.random.normal( 0, max_action * expl_noise, size=action_dim)), -max_action, max_action) # rospy.logdebug("Selected Acton: {}".format(action)) # Perform action next_state, reward, done, _ = env.step(action) done_bool = float(done) # Store data in replay buffer replay_buffer.add((state, action, next_state, reward, done_bool)) state = next_state episode_reward += reward # Train agent after collecting sufficient data for buffer if t >= start_timesteps: policy.train(replay_buffer, batch_size) if done: # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print( "Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t + 1, episode_num, episode_timesteps, episode_reward)) # Reset environment state, done = env.reset(), False evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 episode_num += 1 # Evaluate episode if (t + 1) % eval_freq == 0: # evaluate_policy(policy, env_name, seed, np.save(results_path + "/" + str(file_name), evaluations) if save_model: policy.save(models_path + "/" + str(file_name) + str(t)) # replay_buffer.save(t) env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 1e6 file_name = "mini_tg_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = MinitaurBulletEnv(render=True, on_rack=False) dt = env._time_step # TRAJECTORY GENERATOR movetype = "walk" # movetype = "trot" # movetype = "bound" # movetype = "pace" # movetype = "pronk" TG = TGPolicy(movetype=movetype, center_swing=0.0, amplitude_extension=0.2, amplitude_lift=0.4) TG_state_dim = len(TG.get_TG_state()) TG_action_dim = 5 # f_tg, Beta, alpha_tg, h_tg, intensity state_dim = env.observation_space.shape[0] + TG_state_dim print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] + TG_action_dim print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, TGP=TG) agent_num = raw_input("Policy Number: ") if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 1000 policy = agent.policy # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Just to store correct action space action = env.action_space.sample() # Record extends for plot # LF_ext = [] # LB_ext = [] # RF_ext = [] # RB_ext = [] LF_tp = [] LB_tp = [] RF_tp = [] RB_tp = [] t = 0 while t < (int(max_timesteps)): action[:] = 0.0 # # Get Action from TG [no policies here] # action = TG.get_utg(action, alpha_tg, h_tg, intensity, # env.minitaur.num_motors) # LF_ext.append(action[env.minitaur.num_motors / 2]) # LB_ext.append(action[1 + env.minitaur.num_motors / 2]) # RF_ext.append(action[2 + env.minitaur.num_motors / 2]) # RB_ext.append(action[3 + env.minitaur.num_motors / 2]) # # Perform action # next_state, reward, done, _ = env.step(action) obs = agent.TGP.get_TG_state() # LF_tp.append(obs[0]) # LB_tp.append(obs[1]) # RF_tp.append(obs[2]) # RB_tp.append(obs[3]) # # Increment phase # TG.increment(dt, f_tg, Beta) # # time.sleep(1.0) # t += 1 # Maximum timesteps per rollout t += policy.episode_steps episode_timesteps += 1 episode_reward = agent.deployTG() # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, policy.episode_steps, episode_reward)) # Reset environment episode_reward = 0 episode_timesteps = 0 episode_num += 1 plt.plot(0) plt.plot(LF_tp, label="LF") plt.plot(LB_tp, label="LB") plt.plot(RF_tp, label="RF") plt.plot(RB_tp, label="RB") plt.xlabel("t") plt.ylabel("EXT") plt.title("Leg Extensions") plt.legend() plt.show() env.close()