def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 if ARGS.Seed: seed = ARGS.Seed max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = np.inf policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): episode_reward, episode_timesteps = agent.deployTG() t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, episode_timesteps, episode_reward)) episode_num += 1 # Plot Policy Output if ARGS.PlotPolicy or ARGS.TrueAction or ARGS.SaveData: if ARGS.TrueAction: action_name = "robot_act" action = np.array(agent.true_action_history) else: action_name = "agent_act" action = np.array(agent.action_history) if ARGS.SaveData: if height_field: terrain_name = "rough_" else: terrain_name = "flat_" np.save( results_path + "/" + "policy_out_" + terrain_name + action_name, action) print("SAVED DATA") ClearHeight_act = action[:, 0] BodyHeight_act = action[:, 1] Residuals_act = action[:, 2:] plt.plot(ClearHeight_act, label='Clearance Height Mod', color='black') plt.plot(BodyHeight_act, label='Body Height Mod', color='darkviolet') # FL plt.plot(Residuals_act[:, 0], label='Residual: FL (x)', color='limegreen') plt.plot(Residuals_act[:, 1], label='Residual: FL (y)', color='lime') plt.plot(Residuals_act[:, 2], label='Residual: FL (z)', color='green') # FR plt.plot(Residuals_act[:, 3], label='Residual: FR (x)', color='lightskyblue') plt.plot(Residuals_act[:, 4], label='Residual: FR (y)', color='dodgerblue') plt.plot(Residuals_act[:, 5], label='Residual: FR (z)', color='blue') # BL plt.plot(Residuals_act[:, 6], label='Residual: BL (x)', color='firebrick') plt.plot(Residuals_act[:, 7], label='Residual: BL (y)', color='crimson') plt.plot(Residuals_act[:, 8], label='Residual: BL (z)', color='red') # BR plt.plot(Residuals_act[:, 9], label='Residual: BR (x)', color='gold') plt.plot(Residuals_act[:, 10], label='Residual: BR (y)', color='orange') plt.plot(Residuals_act[:, 11], label='Residual: BR (z)', color='coral') plt.xlabel("Epoch Iteration") plt.ylabel("Action Value") plt.title("Policy Output") plt.legend() plt.show() env.close()
def __init__(self, distance_weight=1.0, rotation_weight=1.0, energy_weight=0.0005, shake_weight=0.005, drift_weight=2.0, rp_weight=0.1, rate_weight=0.1, urdf_root=pybullet_data.getDataPath(), urdf_version=None, distance_limit=float("inf"), observation_noise_stdev=SENSOR_NOISE_STDDEV, self_collision_enabled=True, motor_velocity_limit=np.inf, pd_control_enabled=False, leg_model_enabled=False, accurate_motor_model_enabled=False, remove_default_joint_damping=False, motor_kp=2.0, motor_kd=0.03, control_latency=0.0, pd_latency=0.0, torque_control_enabled=False, motor_overheat_protection=False, hard_reset=False, on_rack=False, render=True, num_steps_to_log=1000, action_repeat=1, control_time_step=None, env_randomizer=SpotEnvRandomizer(), forward_reward_cap=float("inf"), reflection=True, log_path=None, desired_velocity=0.5, desired_rate=0.0, lateral=False, draw_foot_path=False, height_field=False, height_field_iters=2, AutoStepper=False, contacts=True): """Initialize the spot gym environment. Args: urdf_root: The path to the urdf data folder. urdf_version: [DEFAULT_URDF_VERSION] are allowable versions. If None, DEFAULT_URDF_VERSION is used. distance_weight: The weight of the distance term in the reward. energy_weight: The weight of the energy term in the reward. shake_weight: The weight of the vertical shakiness term in the reward. drift_weight: The weight of the sideways drift term in the reward. distance_limit: The maximum distance to terminate the episode. observation_noise_stdev: The standard deviation of observation noise. self_collision_enabled: Whether to enable self collision in the sim. motor_velocity_limit: The velocity limit of each motor. pd_control_enabled: Whether to use PD controller for each motor. leg_model_enabled: Whether to use a leg motor to reparameterize the action space. accurate_motor_model_enabled: Whether to use the accurate DC motor model. remove_default_joint_damping: Whether to remove the default joint damping. motor_kp: proportional gain for the accurate motor model. motor_kd: derivative gain for the accurate motor model. control_latency: It is the delay in the controller between when an observation is made at some point, and when that reading is reported back to the Neural Network. pd_latency: latency of the PD controller loop. PD calculates PWM based on the motor angle and velocity. The latency measures the time between when the motor angle and velocity are observed on the microcontroller and when the true state happens on the motor. It is typically (0.001- 0.002s). torque_control_enabled: Whether to use the torque control, if set to False, pose control will be used. motor_overheat_protection: Whether to shutdown the motor that has exerted large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in spot.py for more details. hard_reset: Whether to wipe the simulation and load everything when reset is called. If set to false, reset just place spot back to start position and set its pose to initial configuration. on_rack: Whether to place spot on rack. This is only used to debug the walking gait. In this mode, spot's base is hanged midair so that its walking gait is clearer to visualize. render: Whether to render the simulation. num_steps_to_log: The max number of control steps in one episode that will be logged. If the number of steps is more than num_steps_to_log, the environment will still be running, but only first num_steps_to_log will be recorded in logging. action_repeat: The number of simulation steps before actions are applied. control_time_step: The time step between two successive control signals. env_randomizer: An instance (or a list) of EnvRandomizer(s). An EnvRandomizer may randomize the physical property of spot, change the terrrain during reset(), or add perturbation forces during step(). forward_reward_cap: The maximum value that forward reward is capped at. Disabled (Inf) by default. log_path: The path to write out logs. For the details of logging, refer to spot_logging.proto. Raises: ValueError: If the urdf_version is not supported. """ # Sense Contacts self.contacts = contacts # Enable Auto Stepper State Machine self.AutoStepper = AutoStepper # Enable Rough Terrain or Not self.height_field = height_field self.draw_foot_path = draw_foot_path # DRAWING FEET PATH self.prev_feet_path = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]) # CONTROL METRICS self.desired_velocity = desired_velocity self.desired_rate = desired_rate self.lateral = lateral # Set up logging. self._log_path = log_path # @TODO fix logging # NUM ITERS self._time_step = 0.01 self._action_repeat = action_repeat self._num_bullet_solver_iterations = 300 self.logging = None if pd_control_enabled or accurate_motor_model_enabled: self._time_step /= NUM_SUBSTEPS self._num_bullet_solver_iterations /= NUM_SUBSTEPS self._action_repeat *= NUM_SUBSTEPS # PD control needs smaller time step for stability. if control_time_step is not None: self.control_time_step = control_time_step else: # Get Control Timestep self.control_time_step = self._time_step * self._action_repeat # TODO: Fix the value of self._num_bullet_solver_iterations. self._num_bullet_solver_iterations = int( NUM_SIMULATION_ITERATION_STEPS / self._action_repeat) # URDF self._urdf_root = urdf_root self._self_collision_enabled = self_collision_enabled self._motor_velocity_limit = motor_velocity_limit self._observation = [] self._true_observation = [] self._objectives = [] self._objective_weights = [ distance_weight, energy_weight, drift_weight, shake_weight ] self._env_step_counter = 0 self._num_steps_to_log = num_steps_to_log self._is_render = render self._last_base_position = [0, 0, 0] self._last_base_orientation = [0, 0, 0, 1] self._distance_weight = distance_weight self._rotation_weight = rotation_weight self._energy_weight = energy_weight self._drift_weight = drift_weight self._shake_weight = shake_weight self._rp_weight = rp_weight self._rate_weight = rate_weight self._distance_limit = distance_limit self._observation_noise_stdev = observation_noise_stdev self._action_bound = 1 self._pd_control_enabled = pd_control_enabled self._leg_model_enabled = leg_model_enabled self._accurate_motor_model_enabled = accurate_motor_model_enabled self._remove_default_joint_damping = remove_default_joint_damping self._motor_kp = motor_kp self._motor_kd = motor_kd self._torque_control_enabled = torque_control_enabled self._motor_overheat_protection = motor_overheat_protection self._on_rack = on_rack self._cam_dist = 1.0 self._cam_yaw = 0 self._cam_pitch = -30 self._forward_reward_cap = forward_reward_cap self._hard_reset = True self._last_frame_time = 0.0 self._control_latency = control_latency self._pd_latency = pd_latency self._urdf_version = urdf_version self._ground_id = None self._reflection = reflection self._env_randomizer = env_randomizer # @TODO fix logging self._episode_proto = None if self._is_render: self._pybullet_client = bullet_client.BulletClient( connection_mode=pybullet.GUI) else: self._pybullet_client = bullet_client.BulletClient() if self._urdf_version is None: self._urdf_version = DEFAULT_URDF_VERSION self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0) self.seed() # Only update after HF has been generated self.height_field = False self.reset() observation_high = (self.spot.GetObservationUpperBound() + OBSERVATION_EPS) observation_low = (self.spot.GetObservationLowerBound() - OBSERVATION_EPS) action_dim = NUM_MOTORS action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high) self.observation_space = spaces.Box(observation_low, observation_high) self.viewer = None self._hard_reset = hard_reset # This assignment need to be after reset() self.goal_reached = False # Generate HeightField or not self.height_field = height_field self.hf = HeightField() if self.height_field: # Do 3x for extra roughness for i in range(height_field_iters): self.hf._generate_field(self)
def __init__(self, distance_weight=1.0, rotation_weight=0.0, energy_weight=0.000, shake_weight=0.00, drift_weight=0.0, rp_weight=10.0, rate_weight=.03, urdf_root=pybullet_data.getDataPath(), urdf_version=None, distance_limit=float("inf"), observation_noise_stdev=SENSOR_NOISE_STDDEV, self_collision_enabled=True, motor_velocity_limit=np.inf, pd_control_enabled=False, leg_model_enabled=False, accurate_motor_model_enabled=False, remove_default_joint_damping=False, motor_kp=2.0, motor_kd=0.03, control_latency=0.0, pd_latency=0.0, torque_control_enabled=False, motor_overheat_protection=False, hard_reset=False, on_rack=False, render=True, num_steps_to_log=1000, action_repeat=1, control_time_step=None, env_randomizer=SpotEnvRandomizer(), forward_reward_cap=float("inf"), reflection=True, log_path=None, desired_velocity=0.5, desired_rate=0.0, lateral=False, draw_foot_path=False, height_field=False, AutoStepper=True, action_dim=14, contacts=True): super(spotBezierEnv, self).__init__( distance_weight=distance_weight, rotation_weight=rotation_weight, energy_weight=energy_weight, shake_weight=shake_weight, drift_weight=drift_weight, rp_weight=rp_weight, rate_weight=rate_weight, urdf_root=urdf_root, urdf_version=urdf_version, distance_limit=distance_limit, observation_noise_stdev=observation_noise_stdev, self_collision_enabled=self_collision_enabled, motor_velocity_limit=motor_velocity_limit, pd_control_enabled=pd_control_enabled, leg_model_enabled=leg_model_enabled, accurate_motor_model_enabled=accurate_motor_model_enabled, remove_default_joint_damping=remove_default_joint_damping, motor_kp=motor_kp, motor_kd=motor_kd, control_latency=control_latency, pd_latency=pd_latency, torque_control_enabled=torque_control_enabled, motor_overheat_protection=motor_overheat_protection, hard_reset=hard_reset, on_rack=on_rack, render=render, num_steps_to_log=num_steps_to_log, action_repeat=action_repeat, control_time_step=control_time_step, env_randomizer=env_randomizer, forward_reward_cap=forward_reward_cap, reflection=reflection, log_path=log_path, desired_velocity=desired_velocity, desired_rate=desired_rate, lateral=lateral, draw_foot_path=draw_foot_path, height_field=height_field, AutoStepper=AutoStepper, contacts=contacts) # Residuals + Clearance Height + Penetration Depth action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high) print("Action SPACE: {}".format(self.action_space)) self.prev_pos = np.array([0.0, 0.0, 0.0]) self.yaw = 0.0
def main(): """ The main() function. """ # Hold mp pipes mp.freeze_support() print("STARTING SPOT TRAINING ENV") seed = 0 max_timesteps = 4e6 eval_freq = 1e1 save_model = True file_name = "spot_ars_" if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() g_u_i = GUI(env.spot.quadruped) spot = SpotModel() T_bf = spot.WorldToFoot bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot) agent_num = 0 if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) env.reset(agent.desired_velocity, agent.desired_rate) episode_reward = 0 episode_timesteps = 0 episode_num = 0 # Create mp pipes num_processes = policy.num_deltas processes = [] childPipes = [] parentPipes = [] # Store mp pipes for pr in range(num_processes): parentPipe, childPipe = Pipe() parentPipes.append(parentPipe) childPipes.append(childPipe) # Start multiprocessing # Start multiprocessing for proc_num in range(num_processes): p = mp.Process(target=ParallelWorker, args=(childPipes[proc_num], env, state_dim)) p.start() processes.append(p) print("STARTED SPOT TRAINING ENV") t = 0 while t < (int(max_timesteps)): # Maximum timesteps per rollout episode_reward, episode_timesteps = agent.train_parallel(parentPipes) t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print( "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}" .format(t + 1, episode_num, episode_timesteps, episode_reward, episode_reward / float(episode_timesteps))) # Store Results (concat) if episode_num == 0: res = np.array( [[episode_reward, episode_reward / float(episode_timesteps)]]) else: new_res = np.array( [[episode_reward, episode_reward / float(episode_timesteps)]]) res = np.concatenate((res, new_res)) # Also Save Results So Far (Overwrite) # Results contain 2D numpy array of total reward for each ep # and reward per timestep for each ep np.save(results_path + "/" + str(file_name), res) # Evaluate episode if (episode_num + 1) % eval_freq == 0: if save_model: agent.save(models_path + "/" + str(file_name) + str(episode_num)) episode_num += 1 # Close pipes and hence envs for parentPipe in parentPipes: parentPipe.send([_CLOSE, "pay2"]) for p in processes: p.join()
def main(): """ The main() function. """ print("STARTING SPOT TEST ENV") seed = 0 max_timesteps = 4e6 # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") models_path = os.path.join(my_path, "../models") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() env = spotBezierEnv(render=True, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) state = env.reset() g_u_i = GUI(env.spot.quadruped) spot = SpotModel() T_bf0 = spot.WorldToFoot T_bf = copy.deepcopy(T_bf0) bzg = BezierGait(dt=env._time_step) bz_step = BezierStepper(dt=env._time_step, mode=0) action = env.action_space.sample() FL_phases = [] FR_phases = [] BL_phases = [] BR_phases = [] FL_Elbow = [] yaw = 0.0 print("STARTED SPOT TEST ENV") t = 0 while t < (int(max_timesteps)): bz_step.ramp_up() pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = bz_step.StateMachine( ) pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth, SwingPeriod = g_u_i.UserInput( ) # Update Swing Period bzg.Tswing = SwingPeriod yaw = env.return_yaw() P_yaw = 5.0 if ARGS.AutoYaw: YawRate += -yaw * P_yaw # print("YAW RATE: {}".format(YawRate)) # TEMP bz_step.StepLength = StepLength bz_step.LateralFraction = LateralFraction bz_step.YawRate = YawRate bz_step.StepVelocity = StepVelocity contacts = state[-4:] FL_phases.append(env.spot.LegPhases[0]) FR_phases.append(env.spot.LegPhases[1]) BL_phases.append(env.spot.LegPhases[2]) BR_phases.append(env.spot.LegPhases[3]) # Get Desired Foot Poses T_bf = bzg.GenerateTrajectory(StepLength, LateralFraction, YawRate, StepVelocity, T_bf0, T_bf, ClearanceHeight, PenetrationDepth, contacts) joint_angles = spot.IK(orn, pos, T_bf) FL_Elbow.append(np.degrees(joint_angles[0][-1])) for i, (key, Tbf_in) in enumerate(T_bf.items()): print("{}: \t Angle: {}".format(key, np.degrees(joint_angles[i]))) print("-------------------------") env.pass_joint_angles(joint_angles.reshape(-1)) # Get External Observations env.spot.GetExternalObservations(bzg, bz_step) # Step state, reward, done, _ = env.step(action) # print("IMU Roll: {}".format(state[0])) # print("IMU Pitch: {}".format(state[1])) # print("IMU GX: {}".format(state[2])) # print("IMU GY: {}".format(state[3])) # print("IMU GZ: {}".format(state[4])) # print("IMU AX: {}".format(state[5])) # print("IMU AY: {}".format(state[6])) # print("IMU AZ: {}".format(state[7])) # print("-------------------------") if done: print("DONE") if ARGS.AutoReset: env.reset() # plt.plot() # # plt.plot(FL_phases, label="FL") # # plt.plot(FR_phases, label="FR") # # plt.plot(BL_phases, label="BL") # # plt.plot(BR_phases, label="BR") # plt.plot(FL_Elbow, label="FL ELbow (Deg)") # plt.xlabel("dt") # plt.ylabel("value") # plt.title("Leg Phases") # plt.legend() # plt.show() # time.sleep(1.0) t += 1 env.close() print(joint_angles)
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_timesteps = 4e6 file_name = "spot_ars_" if ARGS.DebugRack: on_rack = True else: on_rack = False if ARGS.DebugPath: draw_foot_path = True else: draw_foot_path = False if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRender: render = False else: render = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) env = spotBezierEnv(render=render, on_rack=on_rack, height_field=height_field, draw_foot_path=draw_foot_path, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim) # to GUI or not to GUI if ARGS.GUI: gui = True else: gui = False # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui) agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = np.inf policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") t = 0 while t < (int(max_timesteps)): episode_reward, episode_timesteps = agent.deployTG() t += episode_timesteps # episode_reward = agent.train() # +1 to account for 0 indexing. # +0 on ep_timesteps since it will increment +1 even if done=True print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format( t, episode_num, episode_timesteps, episode_reward)) episode_num += 1 env.close()
def main(): """ The main() function. """ print("STARTING MINITAUR ARS") # TRAINING PARAMETERS # env_name = "MinitaurBulletEnv-v0" seed = 0 max_episodes = 1000 if ARGS.NumberOfEpisodes: max_episodes = ARGS.NumberOfEpisodes if ARGS.HeightField: height_field = True else: height_field = False if ARGS.NoContactSensing: contacts = False else: contacts = True if ARGS.DontRandomize: env_randomizer = None else: env_randomizer = SpotEnvRandomizer() file_name = "spot_ars_" # Find abs path to this file my_path = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(my_path, "../results") if contacts: models_path = os.path.join(my_path, "../models/contact") else: models_path = os.path.join(my_path, "../models/no_contact") if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(models_path): os.makedirs(models_path) if ARGS.HeightField: height_field = True else: height_field = False env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) np.random.seed(seed) state_dim = env.observation_space.shape[0] print("STATE DIM: {}".format(state_dim)) action_dim = env.action_space.shape[0] print("ACTION DIM: {}".format(action_dim)) max_action = float(env.action_space.high[0]) env.reset() spot = SpotModel() bz_step = BezierStepper(dt=env._time_step) bzg = BezierGait(dt=env._time_step) # Initialize Normalizer normalizer = Normalizer(state_dim) # Initialize Policy policy = Policy(state_dim, action_dim, episode_steps=np.inf) # Initialize Agent with normalizer, policy and gym env agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False) use_agent = False agent_num = 0 if ARGS.AgentNum: agent_num = ARGS.AgentNum use_agent = True if os.path.exists(models_path + "/" + file_name + str(agent_num) + "_policy"): print("Loading Existing agent") agent.load(models_path + "/" + file_name + str(agent_num)) agent.policy.episode_steps = 50000 policy = agent.policy env.reset() episode_reward = 0 episode_timesteps = 0 episode_num = 0 print("STARTED MINITAUR TEST SCRIPT") # Used to create gaussian distribution of survival distance surv_pos = [] # Reset every 200 episodes (pb client doesn't like running for long) reset_ep = 200 while episode_num < (int(max_episodes)): episode_reward, episode_timesteps = agent.deployTG() # We only care about x/y pos travelled_pos = list(agent.returnPose()) # NOTE: FORMAT: X, Y, TIMESTEPS - # tells us if robobt was just stuck forever. didn't actually fall. travelled_pos[-1] = episode_timesteps episode_num += 1 # Store dt and frequency for prob distribution surv_pos.append(travelled_pos) print("Episode Num: {} Episode T: {} Reward: {}".format( episode_num, episode_timesteps, episode_reward)) print("Survival Pos: {}".format(surv_pos[-1])) # Reset every X episodes (pb client doesn't like running for long) if episode_num % reset_ep == 0: env.close() env = spotBezierEnv(render=False, on_rack=False, height_field=height_field, draw_foot_path=False, contacts=contacts, env_randomizer=env_randomizer) # Set seeds env.seed(seed) agent.env = env env.close() print("---------------------------------------") # Store results if use_agent: # Store _agent agt = "agent_" + str(agent_num) else: # Store _vanilla agt = "vanilla" with open( results_path + "/" + str(file_name) + agt + '_survival_' + str(max_episodes), 'wb') as filehandle: pickle.dump(surv_pos, filehandle)