class MonopedEnv(gym.Env): def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="monoped") self.monoped_state_object = MonopedState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5) self.monoped_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.monoped_joint_pubisher_object = JointPub() """ For this version, we consider 6 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5-6) Increment/Decrement kfe_joint """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_monoped_joint_controllers...") self.controllers_object.reset_monoped_joint_controllers() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose...") self.monoped_joint_pubisher_object.set_init_pose() # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.monoped_state_object.check_all_systems_ready() rospy.logdebug("get_observations...") observation = self.monoped_state_object.get_observations() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state def _step(self, action): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponsd to which joint is incremented next_action_position = self.monoped_state_object.get_action_to_position( action) # We move it to that pos self.gazebo.unpauseSim() self.monoped_joint_pubisher_object.move_joints(next_action_position) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.monoped_state_object.get_observations() # finally we get an evaluation based on what happened in the sim reward, done = self.monoped_state_object.process_data() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state, reward, done, {} def get_state(self, observation): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ return self.monoped_state_object.get_state_as_string(observation)
time.sleep(wait_time * 10) rospy.loginfo("END CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK BEFORE RESET") debug_object.get_clock_time() rospy.loginfo("SETTING INITIAL POSE TO AVOID") debug_object.set_init_pose() time.sleep(wait_time * 2.0) rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo( "We deactivate gravity to check any reasidual effect of reseting the simulation" ) gazebo.change_gravity(0.0, 0.0, 0.0) rospy.loginfo("RESETING SIMULATION") gazebo.pauseSim() gazebo.resetSim() gazebo.unpauseSim() rospy.loginfo("CLOCK AFTER RESET") debug_object.get_clock_time() rospy.loginfo( "RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") controllers_object.reset_cartpole_joint_controllers() rospy.loginfo("AFTER RESET CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN") debug_object.get_clock_time()
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # list for recording the minimal value from the laser self.mini_laser = [] self.moco = model_control() self.pipe_angle = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # reset the controllers self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready( ) # this step is very important!!!! the system will be easily failed if it is removed # delete the old target self.moco.delete_model('target') # spawn the new target target_pos = self.change_target_pos(self.moco, self.pipe_angle) # set the target point for the gym env self.set_target_position([target_pos[0], target_pos[1], 0.0]) # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend( [0.0, 0.0, 0.0, 0.0]) # assume the previous action is 0 for the first step self.reset_controller_pub.publish(Bool(False)) avg_laser = 0.0 # average the recorded minimal distance from the end-effector to the pipe if len(self.mini_laser) > 0: avg_laser = np.mean(self.mini_laser) self.mini_laser = [] return observation, avg_laser def _step(self, action): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(action) start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state # record the minimal value of laser data every time step laser_data = observation[8:18] self.mini_laser.append(min(laser_data)) if end - start > 1.0: stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_target_pos(self, moco, angle): # how far will be the target away from the turning center approximately dis_target_left = [0.23, 0.26] dis_target = random.uniform(dis_target_left[0], dis_target_left[1]) target_pos = [ 0.06 + dis_target * cos((angle - 90.0) / 180 * pi), -0.34 - dis_target * sin((angle - 90.0) / 180 * pi) ] # add some noise to vertical position target_pos[1] -= random.uniform(0.04, 0.07) target_name = 'target' # the model name and namespace targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro' # visulize the target in Gazebo moco.spawn_model(targetdir, target_name, target_name, [target_pos[0], target_pos[1], 0.0], [0, 0, 0]) return target_pos
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.desired_target.x = rospy.get_param("/desired_target_point/x") self.desired_target.y = rospy.get_param("/desired_target_point/y") self.desired_target.z = rospy.get_param("/desired_target_point/z") self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_state_object.set_desired_target_point(self.desired_target.x, self.desired_target.y, self.desired_target.z) self.mrm_joint_pubisher_object = JointPub() self._seed() self.passed_episode = 0 self.total_episode = rospy.get_param("/total_episode") # list for recording the minimal value from the laser self.mini_laser = [] self.obstacle_penalty = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): self.passed_episode += 1 # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # change the weights of rewards to enable curriculum learning self.weight_r3 **= self.inr_r3 #self.weight_r3 = 1 - np.exp(-90 / (self.total_episode - self.passed_episode)) self.mrm_state_object.change_weights(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready( ) # this step is very important!!!! the system will be easily failed if it is removed # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend( [0.0, 0.0, 0.0, 0.0]) # assume the previous action is 0 for the first step return observation def _step(self, action): # We apply torques to each joint for a duration of time; in order to apply torques to each joint at the same time, we call the service while # gazebo is paused self.mrm_joint_pubisher_object.apply_joints_effort( action, self.running_step) # unpasue the simulation and start applying the effort self.gazebo.unpauseSim() rospy.sleep(self.running_step) # pause the simelation again to process data self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state # record the minimal value of laser data every time step laser_data = observation[8:18] self.mini_laser.append(min(laser_data)) return observation, reward, done, reach_target, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2])
class MonopedEnv(gym.Env): def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.list_of_observations = rospy.get_param("/list_of_observations") haa_max = rospy.get_param("/joint_limits_array/haa_max") haa_min = rospy.get_param("/joint_limits_array/haa_min") hfe_max = rospy.get_param("/joint_limits_array/hfe_max") hfe_min = rospy.get_param("/joint_limits_array/hfe_min") kfe_max = rospy.get_param("/joint_limits_array/kfe_max") kfe_min = rospy.get_param("/joint_limits_array/kfe_min") self.joint_limits = { "haa_max": haa_max, "haa_min": haa_min, "hfe_max": hfe_max, "hfe_min": hfe_min, "kfe_max": kfe_max, "kfe_min": kfe_min } self.discrete_division = rospy.get_param("/discrete_division") self.maximum_base_linear_acceleration = rospy.get_param( "/maximum_base_linear_acceleration") self.maximum_base_angular_velocity = rospy.get_param( "/maximum_base_angular_velocity") self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") haa_init_value = rospy.get_param("/init_joint_pose/haa") hfe_init_value = rospy.get_param("/init_joint_pose/hfe") kfe_init_value = rospy.get_param("/init_joint_pose/kfe") self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # Jump Increment Value in Radians self.jump_increment = rospy.get_param("/jump_increment") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="monoped") self.monoped_state_object = MonopedState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, list_of_observations=self.list_of_observations, joint_limits=self.joint_limits, episode_done_criteria=self.episode_done_criteria, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5, discrete_division=self.discrete_division, maximum_base_linear_acceleration=self. maximum_base_linear_acceleration, maximum_base_angular_velocity=self.maximum_base_angular_velocity, maximum_joint_effort=self.maximum_joint_effort, jump_increment=self.jump_increment) self.monoped_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.monoped_joint_pubisher_object = JointPub() """ For this version, we consider 5 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5) Dont Move 6) Perform One Jump """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_monoped_joint_controllers...") self.controllers_object.reset_monoped_joint_controllers() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose)) # We save that position as the current joint desired position init_pos = self.monoped_state_object.init_joints_pose( self.init_joint_pose) # 4th: We Set the init pose to the jump topic so that the jump control can update rospy.logdebug("Publish init_pose for Jump Control...>>>" + str(init_pos)) # We check the jump publisher has connection self.monoped_joint_pubisher_object.check_publishers_connection() # We move the joints to position, no jump do_jump = False self.monoped_joint_pubisher_object.move_joints_jump(init_pos, do_jump) # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.monoped_state_object.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.monoped_state_object.get_observations() state = self.get_state(observation) return state def _step(self, action): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponds to which joint is incremented next_action_position, do_jump = self.monoped_state_object.get_action_to_position( action) # We move it to that pos self.gazebo.unpauseSim() self.monoped_joint_pubisher_object.move_joints_jump( next_action_position, do_jump) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.monoped_state_object.get_observations() # finally we get an evaluation based on what happened in the sim reward, done = self.monoped_state_object.process_data() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state, reward, done, {} def get_state(self, observation): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ return self.monoped_state_object.get_state_as_string(observation)
class multi_UAV_Env(gym.Env): def __init__(self): # We assume that a ROS node has already been created # before initialising the environment self.set_link = rospy.ServiceProxy('/gazebo/set_link_state', SetLinkState) rospy.wait_for_service('/gazebo/set_link_state') # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param( "/desired_pose/position/x") self.desired_pose.position.y = rospy.get_param( "/desired_pose/position/y") self.desired_pose.position.z = rospy.get_param( "/desired_pose/position/z") self.desired_pose.orientation.x = rospy.get_param( "/desired_pose/orientation/x") self.desired_pose.orientation.y = rospy.get_param( "/desired_pose/orientation/y") self.desired_pose.orientation.z = rospy.get_param( "/desired_pose/orientation/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_vel = rospy.get_param("/max_vel") self.min_vel = rospy.get_param("/min_vel") self.max_acc = rospy.get_param("/max_acc") self.min_acc = rospy.get_param("/min_acc") self.max_jerk = rospy.get_param("/max_jerk") self.min_jerk = rospy.get_param("/min_jerk") self.max_snap = rospy.get_param("/max_snap") self.min_snap = rospy.get_param("/min_snap") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.num_action_space = 15 high = np.array([1. for _ in range(self.num_action_space)]) low = np.array([-1. for _ in range(self.num_action_space)]) self.action_space = spaces.Box(low=low, high=high) # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="monoped") self.multi_uav_state_object = MultiUavState( max_vel=self.max_vel, min_vel=self.min_vel, max_acc=self.max_acc, min_acc=self.min_acc, max_jerk=self.max_jerk, min_jerk=self.min_jerk, max_snap=self.max_snap, min_snap=self.min_snap, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, done_reward=self.done_reward, alive_reward=self.alive_reward) self.multi_uav_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z, self.desired_pose.orientation.x, self.desired_pose.orientation.y, self.desired_pose.orientation.z) self.mellingers = [ MellingerMARL('hummingbird', 0, 0.95, 0.0, 0.35), MellingerMARL('hummingbird', 1, 0.0, 0.95, 0.35), MellingerMARL('hummingbird', 2, -0.95, 0.0, 0.35), MellingerMARL('hummingbird', 3, 0.0, -0.95, 0.35) ] """ For this version, we consider 6 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5-6) Increment/Decrement kfe_joint """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() print("end of init...") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator print("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values print("Reset SIM...") self.gazebo.resetSim() self.gazebo.unpauseSim() # model_name = "four_quad_payload" # rospy.wait_for_service('/gazebo/delete_model') # del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel) # del_model_prox(model_name) # model_xml = rospy.get_param("robot_description") # pose = Pose() # pose.orientation.w = 1.0 # spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # req = SpawnModelRequest() # req.model_name = model_name # req.model_xml = model_xml # # should this be unique so ros_control can use each individually? # req.robot_namespace = "/foo" # req.initial_pose = pose # resp = spawn_model(req) self.gazebo.resetWorld() # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch') # initial_pose = Pose() # initial_pose.position.x = 0 # initial_pose.position.y = 0 # initial_pose.position.z = 0 # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro') # urdf = f.read() # rospy.wait_for_service('gazebo/spawn_urdf_model') # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel) # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world") # for i in range(4): # rospy.wait_for_service('/gazebo/set_link_state') # link_imu_name = 'hummingbird_' + str(i) + '/imugt_link' # self.set_link(LinkState(link_name=link_imu_name)) # for j in range(4): # rospy.wait_for_service('/gazebo/set_link_state') # link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j) # print link_name # self.set_link(LinkState(link_name=link_name)) # rospy.wait_for_service('/gazebo/set_link_state') # link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link' # self.set_link(LinkState(link_name=link_odom_name)) # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation print("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.8) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems # rospy.logdebug("reset_monoped_joint_controllers...") # self.controllers_object.reset_monoped_joint_controllers() # 3rd: resets the robot to initial conditions # print("set_init_pose...") # for controller in self.mellingers: # motor_speed = np.zeros((4, 1)) # controller.set_desired_motor_speed(motor_speed) # controller.send_motor_command() # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints print("check_all_systems_ready...") self.multi_uav_state_object.check_all_systems_ready() # rospy.logdebug("get_observations...") # observation = self.multi_uav_state_object.get_states() # 6th: We restore the gravity to original print("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation print("Pause SIM...") self.gazebo.pauseSim() # Get the State Discrete Stringuified version of the observations state = self.get_state() return state def _step(self, actions): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponsd to which joint is incremented # next_action_position = self.multi_uav_state_object.get_action_to_position(action) # update the reference in the mellinger controller from the action for i, action in enumerate(actions): self.mellingers[i].set_ref_from_action(action) # We move it to that pos self.gazebo.unpauseSim() # action to the gazebo environment for i_mellinger in self.mellingers: i_mellinger.publish_err() i_mellinger.update_current_state() i_mellinger.update_desired_values() i_mellinger.motorSpeedFromU() i_mellinger.send_motor_command() # self.monoped_joint_pubisher_object.move_joints(next_action_position) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations # observation = self.multi_uav_state_object.get_observations() # finally we get an evaluation based on what happened in the sim # reward,done = self.multi_uav_state_object.process_data() reward, done = self.multi_uav_state_object.calculate_reward_payload_orientation( ) # Get the State Discrete Stringuified version of the observations # state = self.get_state(observation) state = self.get_state() return state, reward, done def get_state(self): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ # return self.multi_uav_state_object.get_state_as_string(observation) return self.multi_uav_state_object.get_states()