def main(): """ The main() function. """ print("STARTING SPOT TEST ENV") seed = 0 max_timesteps = 4e6 file_name = "spot_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotGymEnv(render=True, on_rack=False) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() g_u_i = GUI() spot = SpotModel() T_bf = spot.WorldToFoot print("STARTED SPOT TEST ENV") t = 0 while t < (int(max_timesteps)): # GUI: x, y, z | r, p , y pos, orn, _, _, _, _ = g_u_i.UserInput() # Get Roll, Pitch, Yaw joint_angles = spot.IK(orn, pos, T_bf) action = joint_angles.reshape(-1) action = env.action_space.sample() next_state, reward, done, _ = env.step(action) # time.sleep(1.0) t += 1 env.close() print(joint_angles)
def main(): """ The main() function. """ print("STARTING SPOT TEST ENV") seed = 0 max_timesteps = 4e6 # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False env = spotBezierEnv(render=True, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) state = env.reset() g_u_i = GUI(env.spot.quadruped) spot = SpotModel() T_bf0 = spot.WorldToFoot T_bf = copy.deepcopy(T_bf0) bzg = BezierGait(dt=env._time_step) bz_step = BezierStepper(dt=env._time_step, mode=0) action = env.action_space.sample() FL_phases = [] FR_phases = [] BL_phases = [] BR_phases = [] yaw = 0.0 print("STARTED SPOT TEST ENV") t = 0 while t < (int(max_timesteps)): bz_step.ramp_up() pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = bz_step.StateMachine( ) pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = g_u_i.UserInput( ) yaw = env.return_yaw() P_yaw = 5.0 if ARGS.AutoYaw: YawRate += -yaw * P_yaw # print("YAW RATE: {}".format(YawRate)) # TEMP bz_step.StepLength = StepLength bz_step.LateralFraction = LateralFraction bz_step.YawRate = YawRate bz_step.StepVelocity = StepVelocity contacts = state[-4:] FL_phases.append(env.spot.LegPhases[0]) FR_phases.append(env.spot.LegPhases[1]) BL_phases.append(env.spot.LegPhases[2]) BR_phases.append(env.spot.LegPhases[3]) # Get Desired Foot Poses T_bf = bzg.GenerateTrajectory(StepLength, LateralFraction, YawRate, StepVelocity, T_bf0, T_bf, ClearanceHeight, PenetrationDepth, contacts) joint_angles = spot.IK(orn, pos, T_bf) env.pass_joint_angles(joint_angles.reshape(-1)) # Get External Observations env.spot.GetExternalObservations(bzg, bz_step) # Step state, reward, done, _ = env.step(action) if done: print("DONE") if ARGS.AutoReset: env.reset() # plt.plot() # plt.plot(FL_phases, label="FL") # plt.plot(FR_phases, label="FR") # plt.plot(BL_phases, label="BL") # plt.plot(BR_phases, label="BR") # plt.xlabel("dt") # plt.ylabel("value") # plt.title("Leg Phases") # plt.legend() # plt.show() # time.sleep(1.0) t += 1 env.close() print(joint_angles)
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 if ARGS.Seed: seed = ARGS.Seed max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = np.inf policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): episode_reward, episode_timesteps = agent.deployTG() t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, episode_timesteps, episode_reward)) episode_num += 1 # Plot Policy Output if ARGS.PlotPolicy or ARGS.TrueAction or ARGS.SaveData: if ARGS.TrueAction: action_name = "robot_act" action = np.array(agent.true_action_history) else: action_name = "agent_act" action = np.array(agent.action_history) if ARGS.SaveData: if height_field: terrain_name = "rough_" else: terrain_name = "flat_" np.save( results_path + "/" + "policy_out_" + terrain_name + action_name, action) print("SAVED DATA") ClearHeight_act = action[:, 0] BodyHeight_act = action[:, 1] Residuals_act = action[:, 2:] plt.plot(ClearHeight_act, label='Clearance Height Mod', color='black') plt.plot(BodyHeight_act, label='Body Height Mod', color='darkviolet') # FL plt.plot(Residuals_act[:, 0], label='Residual: FL (x)', color='limegreen') plt.plot(Residuals_act[:, 1], label='Residual: FL (y)', color='lime') plt.plot(Residuals_act[:, 2], label='Residual: FL (z)', color='green') # FR plt.plot(Residuals_act[:, 3], label='Residual: FR (x)', color='lightskyblue') plt.plot(Residuals_act[:, 4], label='Residual: FR (y)', color='dodgerblue') plt.plot(Residuals_act[:, 5], label='Residual: FR (z)', color='blue') # BL plt.plot(Residuals_act[:, 6], label='Residual: BL (x)', color='firebrick') plt.plot(Residuals_act[:, 7], label='Residual: BL (y)', color='crimson') plt.plot(Residuals_act[:, 8], label='Residual: BL (z)', color='red') # BR plt.plot(Residuals_act[:, 9], label='Residual: BR (x)', color='gold') plt.plot(Residuals_act[:, 10], label='Residual: BR (y)', color='orange') plt.plot(Residuals_act[:, 11], label='Residual: BR (z)', color='coral') plt.xlabel("Epoch Iteration") plt.ylabel("Action Value") plt.title("Policy Output") plt.legend() plt.show() env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = np.inf policy = agent.policy # Evaluate untrained agent and init list for storage evaluations = [] env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): episode_reward, episode_timesteps = agent.deployTG() t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, episode_timesteps, episode_reward)) episode_num += 1 env.close()
def main(): """ The main() function. """ # Hold mp pipes mp.freeze_support() print("STARTING SPOT TRAINING ENV") seed = 0 max_timesteps = 4e6 eval_freq = 1e1 save_model = True file_name = "spot_ars_" if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() g_u_i = GUI(env.spot.quadruped) spot = SpotModel() T_bf = spot.WorldToFoot bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot) agent_num = 0 if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) env.reset(agent.desired_velocity, agent.desired_rate) episode_reward = 0 episode_timesteps = 0 episode_num = 0 # Create mp pipes num_processes = policy.num_deltas processes = [] childPipes = [] parentPipes = [] # Store mp pipes for pr in range(num_processes): parentPipe, childPipe = Pipe() parentPipes.append(parentPipe) childPipes.append(childPipe) # Start multiprocessing # Start multiprocessing for proc_num in range(num_processes): p = mp.Process(target=ParallelWorker, args=(childPipes[proc_num], env, state_dim)) p.start() processes.append(p) print("STARTED SPOT TRAINING ENV") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout episode_reward, episode_timesteps = agent.train_parallel(parentPipes) t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print( "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}" .format(t + 1, episode_num, episode_timesteps, episode_reward, episode_reward / float(episode_timesteps))) # Evaluate episode if (episode_num + 1) % eval_freq == 0: if save_model: agent.save(models_path + "/" + str(file_name) + str(episode_num)) # replay_buffer.save(t) episode_num += 1 # Close pipes and hence envs for parentPipe in parentPipes: parentPipe.send([_CLOSE, "pay2"]) for p in processes: p.join()
def __init__(self, pybullet_client, urdf_root=pybullet_data.getDataPath(), time_step=0.01, action_repeat=1, self_collision_enabled=False, motor_velocity_limit=9.7, pd_control_enabled=False, accurate_motor_model_enabled=False, remove_default_joint_damping=False, max_force=100.0, motor_kp=1.0, motor_kd=0.02, pd_latency=0.0, control_latency=0.0, observation_noise_stdev=SENSOR_NOISE_STDDEV, torque_control_enabled=False, motor_overheat_protection=False, on_rack=False, kd_for_pd_controllers=0.3, pose_id='stand', np_random=np.random, contacts=True): """Constructs a spot and reset it to the initial states. Args: pybullet_client: The instance of BulletClient to manage different simulations. urdf_root: The path to the urdf folder. time_step: The time step of the simulation. action_repeat: The number of ApplyAction() for each control step. self_collision_enabled: Whether to enable self collision. motor_velocity_limit: The upper limit of the motor velocity. pd_control_enabled: Whether to use PD control for the motors. accurate_motor_model_enabled: Whether to use the accurate DC motor model. remove_default_joint_damping: Whether to remove the default joint damping. motor_kp: proportional gain for the accurate motor model. motor_kd: derivative gain for the accurate motor model. pd_latency: The latency of the observations (in seconds) used to calculate PD control. On the real hardware, it is the latency between the microcontroller and the motor controller. control_latency: The latency of the observations (in second) used to calculate action. On the real hardware, it is the latency from the motor controller, the microcontroller to the host (Nvidia TX2). observation_noise_stdev: The standard deviation of a Gaussian noise model for the sensor. It should be an array for separate sensors in the following order [motor_angle, motor_velocity, motor_torque, base_roll_pitch_yaw, base_angular_velocity] torque_control_enabled: Whether to use the torque control, if set to False, pose control will be used. motor_overheat_protection: Whether to shutdown the motor that has exerted large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in spot.py for more details. on_rack: Whether to place the spot on rack. This is only used to debug the walking gait. In this mode, the spot's base is hanged midair so that its walking gait is clearer to visualize. """ # SPOT MODEL self.spot = SpotModel() # Whether to include contact sensing self.contacts = contacts # Control Inputs self.StepLength = 0.0 self.StepVelocity = 0.0 self.LateralFraction = 0.0 self.YawRate = 0.0 # Leg Phases self.LegPhases = [0.0, 0.0, 0.0, 0.0] # used to calculate minitaur acceleration self.init_leg = INIT_LEG_POS self.init_foot = INIT_FOOT_POS self.prev_ang_twist = np.array([0, 0, 0]) self.prev_lin_twist = np.array([0, 0, 0]) self.prev_lin_acc = np.array([0, 0, 0]) self.num_motors = 12 self.num_legs = int(self.num_motors / 3) self._pybullet_client = pybullet_client self._action_repeat = action_repeat self._urdf_root = urdf_root self._self_collision_enabled = self_collision_enabled self._motor_velocity_limit = motor_velocity_limit self._pd_control_enabled = pd_control_enabled self._motor_direction = np.ones(self.num_motors) self._observed_motor_torques = np.zeros(self.num_motors) self._applied_motor_torques = np.zeros(self.num_motors) self._max_force = max_force self._pd_latency = pd_latency self._control_latency = control_latency self._observation_noise_stdev = observation_noise_stdev self._accurate_motor_model_enabled = accurate_motor_model_enabled self._remove_default_joint_damping = remove_default_joint_damping self._observation_history = collections.deque(maxlen=100) self._control_observation = [] self._chassis_link_ids = [-1] self._leg_link_ids = [] self._motor_link_ids = [] self._foot_link_ids = [] self._torque_control_enabled = torque_control_enabled self._motor_overheat_protection = motor_overheat_protection self._on_rack = on_rack self._pose_id = pose_id self.np_random = np_random if self._accurate_motor_model_enabled: self._kp = motor_kp self._kd = motor_kd self._motor_model = motor.MotorModel( torque_control_enabled=self._torque_control_enabled, kp=self._kp, kd=self._kd) elif self._pd_control_enabled: self._kp = 8 self._kd = kd_for_pd_controllers else: self._kp = 1 self._kd = 1 self.time_step = time_step self._step_counter = 0 # reset_time=-1.0 means skipping the reset motion. # See Reset for more details. self.Reset(reset_time=-1) self.init_on_rack_position = INIT_RACK_POSITION self.init_position = INIT_POSITION self.initial_pose = self.INIT_POSES[pose_id]
def main(): """ The main() function. """ print("STARTING SPOT SAC") # TRAINING PARAMETERS seed = 0 max_timesteps = 4e6 batch_size = 256 eval_freq = 1e4 save_model = True file_name = "spot_sac_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=False, on_rack=False, height_field=False, draw_foot_path=False) env = NormalizedActions(env) # Set seeds env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) print("RECORDED MAX ACTION: {}".format(max_action)) hidden_dim = 256 policy = PolicyNetwork(state_dim, action_dim, hidden_dim) replay_buffer_size = 1000000 replay_buffer = ReplayBuffer(replay_buffer_size) sac = SoftActorCritic(policy=policy, state_dim=state_dim, action_dim=action_dim, replay_buffer=replay_buffer) policy_num = 0 if os.path.exists(models_path + "/" + file_name + str(policy_num) + "_critic"): print("Loading Existing Policy") sac.load(models_path + "/" + file_name + str(policy_num)) policy = sac.policy_net # Evaluate untrained policy and init list for storage evaluations = [] state = env.reset() done = False episode_reward = 0 episode_timesteps = 0 episode_num = 0 max_t_per_ep = 5000 # State Machine for Random Controller Commands bz_step = BezierStepper(dt=0.01) # Bezier Gait Generator bzg = BezierGait(dt=0.01) # Spot Model spot = SpotModel() T_bf0 = spot.WorldToFoot T_bf = copy.deepcopy(T_bf0) BaseClearanceHeight = bz_step.ClearanceHeight BasePenetrationDepth = bz_step.PenetrationDepth print("STARTED SPOT SAC") for t in range(int(max_timesteps)): pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = bz_step.StateMachine( ) env.spot.GetExternalObservations(bzg, bz_step) # Read UPDATED state based on controls and phase state = env.return_state() action = sac.policy_net.get_action(state) # Bezier params specced by action CD_SCALE = 0.002 SLV_SCALE = 0.01 StepLength += action[0] * CD_SCALE StepVelocity += action[1] * SLV_SCALE LateralFraction += action[2] * SLV_SCALE YawRate = action[3] ClearanceHeight += action[4] * CD_SCALE PenetrationDepth += action[5] * CD_SCALE # CLIP EVERYTHING StepLength = np.clip(StepLength, bz_step.StepLength_LIMITS[0], bz_step.StepLength_LIMITS[1]) StepVelocity = np.clip(StepVelocity, bz_step.StepVelocity_LIMITS[0], bz_step.StepVelocity_LIMITS[1]) LateralFraction = np.clip(LateralFraction, bz_step.LateralFraction_LIMITS[0], bz_step.LateralFraction_LIMITS[1]) YawRate = np.clip(YawRate, bz_step.YawRate_LIMITS[0], bz_step.YawRate_LIMITS[1]) ClearanceHeight = np.clip(ClearanceHeight, bz_step.ClearanceHeight_LIMITS[0], bz_step.ClearanceHeight_LIMITS[1]) PenetrationDepth = np.clip(PenetrationDepth, bz_step.PenetrationDepth_LIMITS[0], bz_step.PenetrationDepth_LIMITS[1]) contacts = state[-4:] # Get Desired Foot Poses T_bf = bzg.GenerateTrajectory(StepLength, LateralFraction, YawRate, StepVelocity, T_bf0, T_bf, ClearanceHeight, PenetrationDepth, contacts) # Add DELTA to XYZ Foot Poses RESIDUALS_SCALE = 0.05 # T_bf["FL"][3, :3] += action[6:9] * RESIDUALS_SCALE # T_bf["FR"][3, :3] += action[9:12] * RESIDUALS_SCALE # T_bf["BL"][3, :3] += action[12:15] * RESIDUALS_SCALE # T_bf["BR"][3, :3] += action[15:18] * RESIDUALS_SCALE T_bf["FL"][3, 2] += action[6] * RESIDUALS_SCALE T_bf["FR"][3, 2] += action[7] * RESIDUALS_SCALE T_bf["BL"][3, 2] += action[8] * RESIDUALS_SCALE T_bf["BR"][3, 2] += action[9] * RESIDUALS_SCALE joint_angles = spot.IK(orn, pos, T_bf) # Pass Joint Angles env.pass_joint_angles(joint_angles.reshape(-1)) # Perform action next_state, reward, done, _ = env.step(action) done_bool = float(done) episode_timesteps += 1 # Store data in replay buffer replay_buffer.push(state, action, reward, next_state, done_bool) state = next_state episode_reward += reward # Train agent after collecting sufficient data for buffer if len(replay_buffer) > batch_size: sac.soft_q_update(batch_size) if episode_timesteps > max_t_per_ep: done = True if done: # Reshuffle State Machine bzg.reset() bz_step.reshuffle() bz_step.ClearanceHeight = BaseClearanceHeight bz_step.PenetrationDepth = BasePenetrationDepth # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print( "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}" .format(t + 1, episode_num, episode_timesteps, episode_reward, episode_reward / float(episode_timesteps))) # Reset environment state, done = env.reset(), False evaluations.append(episode_reward) episode_reward = 0 episode_timesteps = 0 episode_num += 1 # Evaluate episode if (t + 1) % eval_freq == 0: # evaluate_policy(policy, env_name, seed, np.save(results_path + "/" + str(file_name), evaluations) if save_model: sac.save(models_path + "/" + str(file_name) + str(t)) # replay_buffer.save(t) env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 9 still_going = True print("Loading and Saving") while still_going: if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent: {}".format(agent_num)) # Load Class agent.load(models_path + "/" + file_name + str(agent_num)) # Save np array agent.save(models_path + "/" + file_name + str(agent_num)) else: still_going = False agent_num += 10
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_episodes = 1000 if ARGS.NumberOfEpisodes: max_episodes = ARGS.NumberOfEpisodes file_name = "spot_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.HeightField: height_field = True else: height_field = False env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim, episode_steps=np.inf) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False) use_agent = False agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum use_agent = True if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 50000 policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Used to create gaussian distribution of survival surv_dt = [] while episode_num < (int(max_episodes)): episode_reward, episode_timesteps = agent.deployTG() episode_num += 1 # Store dt and frequency for prob distribution surv_dt.append(episode_timesteps) print("Episode Num: {} Episode T: {} Reward: {}".format( episode_num, episode_timesteps, episode_reward)) env.close() print("---------------------------------------") # Store results if use_agent: # Store _agent agt = "agent" else: # Store _vanilla agt = "vanilla" with open( results_path + "/" + str(file_name) + agt + '_survival_' + str(max_episodes), 'wb') as filehandle: pickle.dump(surv_dt, filehandle)
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_episodes = 1000 if ARGS.NumberOfEpisodes: max_episodes = ARGS.NumberOfEpisodes if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() file_name = "spot_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.HeightField: height_field = True else: height_field = False env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim, episode_steps=np.inf) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False) use_agent = False agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum use_agent = True if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 50000 policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Used to create gaussian distribution of survival distance surv_pos = [] # Reset every 200 episodes (pb client doesn't like running for long) reset_ep = 200 while episode_num < (int(max_episodes)): episode_reward, episode_timesteps = agent.deployTG() # We only care about x/y pos travelled_pos = list(agent.returnPose()) # NOTE: FORMAT: X, Y, TIMESTEPS - # tells us if robobt was just stuck forever. didn't actually fall. travelled_pos[-1] = episode_timesteps episode_num += 1 # Store dt and frequency for prob distribution surv_pos.append(travelled_pos) print("Episode Num: {} Episode T: {} Reward: {}".format( episode_num, episode_timesteps, episode_reward)) print("Survival Pos: {}".format(surv_pos[-1])) # Reset every X episodes (pb client doesn't like running for long) if episode_num % reset_ep == 0: env.close() env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) agent.env = env env.close() print("---------------------------------------") # Store results if use_agent: # Store _agent agt = "agent_" + str(agent_num) else: # Store _vanilla agt = "vanilla" with open( results_path + "/" + str(file_name) + agt + '_survival_' + str(max_episodes), 'wb') as filehandle: pickle.dump(surv_pos, filehandle)