class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 22 self.action_space = 4 # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.done_reward = rospy.get_param("/collision_reward") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="") self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # close the controller # self.controllers_object.shut_down_controller() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # 4th: probably add the function to change the target position is the target is reached # reset the controllers self.reset_controller_pub.publish(Bool(True)) # self.controllers_object.reset_mrm_joint_controllers() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready() # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() rospy.logdebug("get_observations...") observation = self.mrm_state_object.get_observations() self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, joint_targets): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(joint_targets) # Then we send the command to the robot and let it go for running_step seconds start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() return observation, reward, done, reach_target, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_joints_init(self, diameter): joints_inits = { 80: [0.28, -0.78, -0.7, 0], 90: [0.3, -0.8, -0.75, 0], 100: [0.4, -1.05, -0.9, 0], 110: [0.51, -1.27, -0.95, 0], 120: [0.53, -1.29, -0.98, 0] } self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter] def get_joints_init(self): return self.mrm_joint_pubisher_object.init_pos
class multi_UAV_Env(gym.Env): def __init__(self): # os.system('rosparam load /home/lucasyu/catkin_ws/src/collaborative_transportation/rotors_gazebo/scripts/collaborative/MARL/config/MADDPG_params.yaml') # We assume that a ROS node has already been created # before initialising the environment self.set_link = rospy.ServiceProxy('/gazebo/set_link_state', SetLinkState) rospy.wait_for_service('/gazebo/set_link_state') # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param( "/desired_pose/position/x") self.desired_pose.position.y = rospy.get_param( "/desired_pose/position/y") self.desired_pose.position.z = rospy.get_param( "/desired_pose/position/z") self.desired_pose.orientation.x = rospy.get_param( "/desired_pose/orientation/x") self.desired_pose.orientation.y = rospy.get_param( "/desired_pose/orientation/y") self.desired_pose.orientation.z = rospy.get_param( "/desired_pose/orientation/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_vel = rospy.get_param("/max_vel") self.min_vel = rospy.get_param("/min_vel") self.max_acc = rospy.get_param("/max_acc") self.min_acc = rospy.get_param("/min_acc") self.max_jerk = rospy.get_param("/max_jerk") self.min_jerk = rospy.get_param("/min_jerk") self.max_snap = rospy.get_param("/max_snap") self.min_snap = rospy.get_param("/min_snap") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.sub_clock = rospy.Subscriber('/rosout', Log, callback_log) self.num_agents = 4 self.num_action_space = 12 self.rate = rospy.Rate(50.0) high = np.array([1. for _ in range(self.num_action_space)]) low = np.array([0. for _ in range(self.num_action_space)]) self.action_space = spaces.Box(low=low, high=high) # stablishes connection with simulator self.gazebo = GazeboConnection() # self.controllers_object = ControllersConnection(namespace="monoped") self.multi_uav_state_object = MultiUavState( max_vel=self.max_vel, min_vel=self.min_vel, max_acc=self.max_acc, min_acc=self.min_acc, max_jerk=self.max_jerk, min_jerk=self.min_jerk, max_snap=self.max_snap, min_snap=self.min_snap, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, done_reward=self.done_reward, alive_reward=self.alive_reward) self.multi_uav_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z, self.desired_pose.orientation.x, self.desired_pose.orientation.y, self.desired_pose.orientation.z) self.mellingers = [ Mellinger_Agent(mav_name='hummingbird', index=0, num_agents=4, c=0.4, x=0.95, y=0.0, z=0.35, dim=3), Mellinger_Agent('hummingbird', index=1, num_agents=4, c=0.1, x=0.0, y=0.95, z=0.35, dim=3), Mellinger_Agent('hummingbird', index=2, num_agents=4, c=0.15, x=-0.95, y=0.0, z=0.35, dim=3), Mellinger_Agent('hummingbird', index=3, num_agents=4, c=0.35, x=0.0, y=-0.95, z=0.35, dim=3) ] self.goal_position = np.zeros((3, 1)) # print "clock_flag: ", clock_flag # timer_0 = rospy.Time.now() # while not clock_flag: # print "No clock message received!" # try: # rospy.wait_for_message("/clock", Clock, timeout=5.0) # timer_1 = rospy.Time.now() # time_spend_waiting = (timer_1 - timer_0).to_sec() # print ("time_spend_waiting: ", time_spend_waiting) # except: # print "core dumped... kill" # f_done = open(done_file,'w') # f_done.write('1') # f_done.close() # break # if time_spend_waiting > 5.0: # print "core dumped... kill" # f_done = open(done_file,'w') # f_done.write('1') # f_done.close() """ For this version, we consider 6 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5-6) Increment/Decrement kfe_joint """ #self.action_space = spaces.Discrete(6) #self.reward_range = (-np.inf, np.inf) #self._seed() print("end of init...") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # print("Pausing SIM...") # self.gazebo.pauseSim() # del self.mellingers # self.mellingers = [ # Mellinger_Agent( # mav_name='hummingbird', # index=0, # num_agents=4, # c=0.45, # x=0.95, y=0.0, z=0.26, # dim=3 # ), # Mellinger_Agent( # 'hummingbird', # index=1, # num_agents=4, # c=0.05, # x=0.0, y=0.95, z=0.26, # dim=3 # ), # Mellinger_Agent( # 'hummingbird', # index=2, # num_agents=4, # c=0.25, # x=-0.95, y=0.0, z=0.26, # dim=3 # ), # Mellinger_Agent( # 'hummingbird', # index=3, # num_agents=4, # c=0.25, # x=0.0, y=-0.95, z=0.26, # dim=3 # ) # ] # print "pppppppppp" # 0st: We pause the Simulator # 1st: resets the simulation to initial values print("Reset SIM...") model_xml = rospy.get_param("robot_description") model_name = "four_quad_payload" rospy.wait_for_service('/gazebo/delete_model') del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel) del_model_prox(model_name) # pose = Pose() # pose.orientation.w = 1.0 spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) req = SpawnModelRequest() req.model_name = model_name req.model_xml = model_xml # should this be unique so ros_control can use each individually? # req.robot_namespace = "/foo" # req.initial_pose = pose resp = spawn_model(req) time.sleep(2) self.gazebo.resetWorld() time.sleep(1) # self.gazebo.resetSim() # check_publishers_connection() # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch') # initial_pose = Pose() # initial_pose.position.x = 0 # initial_pose.position.y = 0 # initial_pose.position.z = 0 # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro') # urdf = f.read() # rospy.wait_for_service('gazebo/spawn_urdf_model') # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel) # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world") # for i in range(4): # rospy.wait_for_service('/gazebo/set_link_state') # link_imu_name = 'hummingbird_' + str(i) + '/imugt_link' # self.set_link(LinkState(link_name=link_imu_name)) # for j in range(4): # rospy.wait_for_service('/gazebo/set_link_state') # link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j) # print link_name # self.set_link(LinkState(link_name=link_name)) # rospy.wait_for_service('/gazebo/set_link_state') # link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link' # self.set_link(LinkState(link_name=link_odom_name)) # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation # print("Remove Gravity...") # self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation print("Pause SIM...") self.gazebo.pauseSim() time.sleep(0.2) # Get the State Discrete Stringuified version of the observations state = self.get_init_state() return state def _step(self, actions): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponsd to which joint is incremented # next_action_position = self.multi_uav_state_object.get_action_to_position(action) # update the reference in the mellinger controller from the action # for i, action in enumerate(actions): # self.mellingers[i].set_ref_from_action(action) # We move it to that pos self.gazebo.unpauseSim() # action to the gazebo environment # hzyu # self.update_c_from_action(actions) reward_multi = [0, 0, 0, 0] count = 0 for i_mellinger in self.mellingers: # reward_multi[count] = -i_mellinger.reward_ft reward_multi[count] = -i_mellinger.reward_payload count = count + 1 now = time.time() i_mellinger.publish_poly3d_trj() i_mellinger.publish_err() i_mellinger.update_current_state() i_mellinger.update_des_distributor() i_mellinger.motorSpeedFromU() i_mellinger.send_motor_command() # self.rate.sleep() # hzyu # time.sleep(0.02) # self.monoped_joint_pubisher_object.move_joints(next_action_position) # Then we send the command to the robot and let it go # for running_step seconds self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations # observation = self.multi_uav_state_object.get_observations() # finally we get an evaluation based on what happened in the sim # reward,done = self.multi_uav_state_object.process_data() # Get the State Discrete Stringuified version of the observations # state = self.get_state(observation) state = self.get_step_state() return state, reward_multi def show_accumulated_force(self): for i_mellinger in self.mellingers: print "self.sum_force: ", i_mellinger.sum_force def compute_reward_arrive(self): self.mellingers[0].reward_arrive_goal(self.goal_position) print "reward arrive: ", self.mellingers[0].reward_arrive return self.mellingers[0].reward_arrive def set_goal_position_vel(self, goal_position, goal_velocity): for controller in self.mellingers: controller.NL_planner.setVerticesPosVel(goal_position, goal_velocity) def compute_trj(self, goal_position, goal_velocity): self.goal_position = np.array(goal_position)[-1, :].reshape((3, 1)) print "ssssssssssssssss self.goal_position: ", self.goal_position self.set_goal_position_vel(goal_position, goal_velocity) for controller in self.mellingers: controller.optimize() controller.getPlanUpToSnap(frequency=50.0) def load_payload_references(self): for controller in self.mellingers: controller.load_ref_trj_payload() def hover_and_load_trj(self, dimension='xyz'): is_all_good = True count = 0 while not rospy.is_shutdown(): for i_mellinger in self.mellingers: if i_mellinger.hover_duration < 1.5: is_all_good = False break # print "distributor c: ", i_mellinger.c, "; distributor M0: ", i_mellinger .get_M0() # print "distributor c_M0: ", i_mellinger.c_M0 for i_mellinger in self.mellingers: #now = time.time() if i_mellinger.hover_duration < 1.5: i_mellinger.set_hover_des(target_height=1.5) if is_all_good: i_mellinger.update_offset_xyz( i_mellinger.payload_position[0, 0], i_mellinger.payload_position[1, 0], i_mellinger.payload_position[2, 0]) i_mellinger.load_ref_trj_payload(dimension=dimension) i_mellinger.offset_added = False if not i_mellinger.offset_added: print "hovering finished, going into the next phase..." print "i_mellinger.positions_quads[2, 0]: ", i_mellinger.positions_quads[ 2, 0] i_mellinger.update_offset_xyz( i_mellinger.positions_quads[0, 0], i_mellinger.positions_quads[1, 0], i_mellinger.positions_quads[2, 0]) i_mellinger.load_trj_lists(dimension=dimension) # i_mellinger.update_offset() # i_mellinger.load_trj_lists(dimension=dimension) print "offset added: ", i_mellinger.name count = count + 1 # i_mellinger.publish_poly3d_trj() if count == 4: # print "ssssssssssssss offset goal_position: ", self.goal_position # print "ssssssssssssss self.mellingers[0].payload_position.reshape((3, 1)): ", self.mellingers[0].payload_position.reshape((3, 1)) self.goal_position = self.goal_position + self.mellingers[ 0].payload_position.reshape((3, 1)) # print "ssssssssssssss offset goal_position: ", self.goal_position return True i_mellinger.publish_err() i_mellinger.update_current_state() i_mellinger.update_des_distributor() i_mellinger.motorSpeedFromU() i_mellinger.send_motor_command() self.rate.sleep() is_all_good = True def update_c_from_action(self, action): for i in range(self.num_agents): c = list(action[i * 3:i * 3 + 3]) # print "c for ", i ,'th mellinger: ', c # self.mellingers[i].publish_poly3d_trj() self.mellingers[i].update_estimated_c(c) self.mellingers[i].update_current_state() # self.mellingers[i].update_des_distributor() def get_init_state(self): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ # return self.multi_uav_state_object.get_state_as_string(observation) return self.multi_uav_state_object.get_init_states() def get_step_state(self): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ # return self.multi_uav_state_object.get_state_as_string(observation) return self.multi_uav_state_object.get_step_states()
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 22 self.action_space = 4 # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.running_step = rospy.get_param("/running_step") self.done_reward = rospy.get_param("/collision_reward") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="") self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) self.moco = model_control() self.pipe_angle = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # 4th: probably add the function to change the target position is the target is reached self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready() # delete the old target self.moco.delete_model('target') # spawn the new target target_pos = self.change_target_pos(self.moco, self.pipe_angle) # set the target point for the gym env self.set_target_position([target_pos[0], target_pos[1], 0.0]) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() rospy.logdebug("get_observations...") observation = self.mrm_state_object.get_observations() self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, joint_targets): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(joint_targets) start = time.time() # Then we send the command to the robot and let it go for running_step seconds rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() if end - start > 1.6: # if the real time spent on one step is too long, it means the robot is heavily stuck in the pipe, we should discard this episode stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_target_pos(self, moco, angle): # how far will be the target away from the turning center approximately dis_target_left = [0.22, 0.23] b = 0.07 + random.uniform(0.0, 0.018) dis_target = np.asarray( random.uniform(dis_target_left[0], dis_target_left[1])) dis_target += (-0.018 + (angle - 60) / 10 * 0.006) target_pos = [ 0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin( (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin( (angle - 90.0) / 180 * pi) - b * cos( (angle - 90.0) / 180 * pi) ] target_name = 'target' # the model name and namespace targetdir = '/home/joshua/high_fidelity_exp/src/mrm_training/urdf/target.xacro' # visulize the target in Gazebo moco.spawn_model(targetdir, target_name, target_name, [target_pos[0], target_pos[1], 0.0], [0, 0, 0]) return target_pos def change_joints_init(self, diameter): joints_inits = { 80: [0.28, -0.78, -0.7, 0], 90: [0.3, -0.8, -0.75, 0], 100: [0.4, -1.05, -0.9, 0], 110: [0.51, -1.27, -0.95, 0], 120: [0.51, -1.27, -0.95, 0] } self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter]
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.desired_target.x = rospy.get_param("/desired_target_point/x") self.desired_target.y = rospy.get_param("/desired_target_point/y") self.desired_target.z = rospy.get_param("/desired_target_point/z") self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState( weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_state_object.set_desired_target_point(self.desired_target.x, self.desired_target.y, self.desired_target.z) self.mrm_joint_pubisher_object = JointPub() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) self._seed() # list for recording the minimal value from the laser self.mini_laser = [] # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # reset the controllers self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready() # this step is very important!!!! the system will be easily failed if it is removed # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend([0.0,0.0,0.0,0.0]) # assume the previous action is 0 for the first step self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, action): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(action) # Then we send the command to the robot and let it go for running_step seconds start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state if end - start > 1.0: stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2])
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # list for recording the minimal value from the laser self.mini_laser = [] self.moco = model_control() self.pipe_angle = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # reset the controllers self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready( ) # this step is very important!!!! the system will be easily failed if it is removed # delete the old target self.moco.delete_model('target') # spawn the new target target_pos = self.change_target_pos(self.moco, self.pipe_angle) # set the target point for the gym env self.set_target_position([target_pos[0], target_pos[1], 0.0]) # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend( [0.0, 0.0, 0.0, 0.0]) # assume the previous action is 0 for the first step self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, action): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(action) start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state # record the minimal value of laser data every time step # laser_data = observation[8:18] # self.mini_laser.append(min(laser_data)) if end - start > 1.6: stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_target_pos(self, moco, angle): # how far will be the target away from the turning center approximately dis_target_left = [0.22, 0.23] b = 0.07 + random.uniform(0.0, 0.018) dis_target = np.asarray( random.uniform(dis_target_left[0], dis_target_left[1])) dis_target += (-0.018 + (angle - 60) / 10 * 0.006) target_pos = [ 0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin( (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin( (angle - 90.0) / 180 * pi) - b * cos( (angle - 90.0) / 180 * pi) ] # add some noise to vertical position # target_pos[1] -= random.uniform(0.04, 0.07) target_name = 'target' # the model name and namespace targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro' # visulize the target in Gazebo moco.spawn_model(targetdir, target_name, target_name, [target_pos[0], target_pos[1], 0.0], [0, 0, 0]) return target_pos
class RobotGazeboEnv(gym.GoalEnv): def __init__(self, robot_name_space, controllers_list, reset_controls): # To reset Simulations print("Entered Gazebo Env") self.gazebo = GazeboConnection(start_init_physics_parameters=False, reset_world_or_sim="WORLD") self.controllers_object = ControllersConnection( namespace=robot_name_space, controllers_list=controllers_list) self.reset_controls = reset_controls print(self.reset_controls) self.seed() # Set up ROS related variables self.episode_num = 0 self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1) # Env methods def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Function executed each time step. Here we get the action execute it in a time step and retrieve the observations generated by that action. :param action: :return: obs, reward, done, info """ """ Here we should convert the action num to movement action, execute the action in the simulation and get the observations result of performing that action. """ print("Entered step") print("Unpause sim") self.gazebo.unpauseSim() print("Set action") print("Action:") print(action) self._set_action(action) print("Get Obs") obs = self._get_obs() print("Is done") done = self._is_done(obs) info = {} reward = self._compute_reward(obs, done) self._publish_reward_topic(reward, self.episode_num) return obs, reward, done, info def reset(self): rospy.logdebug("Reseting RobotGazeboEnvironment") print("Entered reset") self._reset_sim() self._init_env_variables() self._init_obj_pose() self._update_episode() obs = self._get_obs() return obs def close(self): """ Function executed when closing the environment. Use it for closing GUIS and other systems that need closing. :return: """ rospy.logdebug("Closing RobotGazeboEnvironment") rospy.signal_shutdown("Closing RobotGazeboEnvironment") def _update_episode(self): """ Increases the episode number by one :return: """ self.episode_num += 1 def _publish_reward_topic(self, reward, episode_number=1): """ This function publishes the given reward in the reward topic for easy access from ROS infrastructure. :param reward: :param episode_number: :return: """ reward_msg = RLExperimentInfo() reward_msg.episode_number = episode_number reward_msg.episode_reward = reward self.reward_pub.publish(reward_msg) # Extension methods # ---------------------------- def _reset_sim(self): """Resets a simulation """ if self.reset_controls: self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self._check_all_systems_ready() self._set_init_pose() self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self._check_all_systems_ready() self.gazebo.pauseSim() else: self.gazebo.unpauseSim() self._check_all_systems_ready() self._set_init_pose() self.gazebo.resetWorld() self._check_all_systems_ready() return True def _check_all_systems_ready(self): """ Checks that all the sensors, publishers and other simulation systems are operational. """ raise NotImplementedError() def _get_obs(self): """Returns the observation. """ raise NotImplementedError() def _init_env_variables(self): """Inits variables needed to be initialised each time we reset at the start of an episode. """ raise NotImplementedError() def _init_obj_pose(self): """Inits object pose for arrangement at reset time """ raise NotImplementedError() def _set_init_pose(self): """Sets the Robot in its init pose """ raise NotImplementedError() def _set_action(self, action): """Applies the given action to the simulation. """ raise NotImplementedError() def _is_done(self, observations): """Indicates whether or not the episode is done ( the robot has fallen for example). """ raise NotImplementedError() def _compute_reward(self, observations, done): """Calculates the reward to give based on the observations given. """ raise NotImplementedError() def _env_setup(self, initial_qpos): """Initial configuration of the environment. Can be used to configure initial state and extract information from the simulation. """ raise NotImplementedError()
class multi_UAV_Env(gym.Env): def __init__(self): # We assume that a ROS node has already been created # before initialising the environment self.set_link = rospy.ServiceProxy('/gazebo/set_link_state', SetLinkState) rospy.wait_for_service('/gazebo/set_link_state') # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param( "/desired_pose/position/x") self.desired_pose.position.y = rospy.get_param( "/desired_pose/position/y") self.desired_pose.position.z = rospy.get_param( "/desired_pose/position/z") self.desired_pose.orientation.x = rospy.get_param( "/desired_pose/orientation/x") self.desired_pose.orientation.y = rospy.get_param( "/desired_pose/orientation/y") self.desired_pose.orientation.z = rospy.get_param( "/desired_pose/orientation/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_vel = rospy.get_param("/max_vel") self.min_vel = rospy.get_param("/min_vel") self.max_acc = rospy.get_param("/max_acc") self.min_acc = rospy.get_param("/min_acc") self.max_jerk = rospy.get_param("/max_jerk") self.min_jerk = rospy.get_param("/min_jerk") self.max_snap = rospy.get_param("/max_snap") self.min_snap = rospy.get_param("/min_snap") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.num_action_space = 15 high = np.array([1. for _ in range(self.num_action_space)]) low = np.array([-1. for _ in range(self.num_action_space)]) self.action_space = spaces.Box(low=low, high=high) # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="monoped") self.multi_uav_state_object = MultiUavState( max_vel=self.max_vel, min_vel=self.min_vel, max_acc=self.max_acc, min_acc=self.min_acc, max_jerk=self.max_jerk, min_jerk=self.min_jerk, max_snap=self.max_snap, min_snap=self.min_snap, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, done_reward=self.done_reward, alive_reward=self.alive_reward) self.multi_uav_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z, self.desired_pose.orientation.x, self.desired_pose.orientation.y, self.desired_pose.orientation.z) self.mellingers = [ MellingerMARL('hummingbird', 0, 0.95, 0.0, 0.35), MellingerMARL('hummingbird', 1, 0.0, 0.95, 0.35), MellingerMARL('hummingbird', 2, -0.95, 0.0, 0.35), MellingerMARL('hummingbird', 3, 0.0, -0.95, 0.35) ] """ For this version, we consider 6 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5-6) Increment/Decrement kfe_joint """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() print("end of init...") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator print("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values print("Reset SIM...") self.gazebo.resetSim() self.gazebo.unpauseSim() # model_name = "four_quad_payload" # rospy.wait_for_service('/gazebo/delete_model') # del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel) # del_model_prox(model_name) # model_xml = rospy.get_param("robot_description") # pose = Pose() # pose.orientation.w = 1.0 # spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel) # req = SpawnModelRequest() # req.model_name = model_name # req.model_xml = model_xml # # should this be unique so ros_control can use each individually? # req.robot_namespace = "/foo" # req.initial_pose = pose # resp = spawn_model(req) self.gazebo.resetWorld() # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch') # initial_pose = Pose() # initial_pose.position.x = 0 # initial_pose.position.y = 0 # initial_pose.position.z = 0 # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro') # urdf = f.read() # rospy.wait_for_service('gazebo/spawn_urdf_model') # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel) # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world") # for i in range(4): # rospy.wait_for_service('/gazebo/set_link_state') # link_imu_name = 'hummingbird_' + str(i) + '/imugt_link' # self.set_link(LinkState(link_name=link_imu_name)) # for j in range(4): # rospy.wait_for_service('/gazebo/set_link_state') # link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j) # print link_name # self.set_link(LinkState(link_name=link_name)) # rospy.wait_for_service('/gazebo/set_link_state') # link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link' # self.set_link(LinkState(link_name=link_odom_name)) # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation print("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.8) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems # rospy.logdebug("reset_monoped_joint_controllers...") # self.controllers_object.reset_monoped_joint_controllers() # 3rd: resets the robot to initial conditions # print("set_init_pose...") # for controller in self.mellingers: # motor_speed = np.zeros((4, 1)) # controller.set_desired_motor_speed(motor_speed) # controller.send_motor_command() # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints print("check_all_systems_ready...") self.multi_uav_state_object.check_all_systems_ready() # rospy.logdebug("get_observations...") # observation = self.multi_uav_state_object.get_states() # 6th: We restore the gravity to original print("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation print("Pause SIM...") self.gazebo.pauseSim() # Get the State Discrete Stringuified version of the observations state = self.get_state() return state def _step(self, actions): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponsd to which joint is incremented # next_action_position = self.multi_uav_state_object.get_action_to_position(action) # update the reference in the mellinger controller from the action for i, action in enumerate(actions): self.mellingers[i].set_ref_from_action(action) # We move it to that pos self.gazebo.unpauseSim() # action to the gazebo environment for i_mellinger in self.mellingers: i_mellinger.publish_err() i_mellinger.update_current_state() i_mellinger.update_desired_values() i_mellinger.motorSpeedFromU() i_mellinger.send_motor_command() # self.monoped_joint_pubisher_object.move_joints(next_action_position) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations # observation = self.multi_uav_state_object.get_observations() # finally we get an evaluation based on what happened in the sim # reward,done = self.multi_uav_state_object.process_data() reward, done = self.multi_uav_state_object.calculate_reward_payload_orientation( ) # Get the State Discrete Stringuified version of the observations # state = self.get_state(observation) state = self.get_state() return state, reward, done def get_state(self): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ # return self.multi_uav_state_object.get_state_as_string(observation) return self.multi_uav_state_object.get_states()