class Rlbot_env_loco(gym.Env): def __init__( self, setting_file, test, # if True will use the test_xy as start point timeDependent, action_type, # 'discrete', 'continuous' observation_type, # 'color', 'depth', 'rgbd' reward_type # distance, bbox, bbox_distance, ): self.show = False self.collision = False self.timeDependent = timeDependent self.target = None # self.dis2target_now= 100 # self.isTargetChanged = False print("Reading setting file: ", setting_file) self.test = test self.action_type = action_type self.observation_type = observation_type self.reward_type = reward_type assert self.action_type == 'discrete' or self.action_type == 'continuous' assert self.observation_type == 'depth' setting = self.load_env_setting(setting_file) # connect UnrealCV self.unrealcv = UnrealCv(9000, self.cam_id, self.env_ip, self.message_handler) print("Camera_id: ", self.cam_id) print("Environment ip: ", self.env_ip) print("Observation type: ", observation_type) print("action_type: ", action_type) print("Reward type: ", reward_type) print("timeDependent: ", str(self.timeDependent)) self.unrealcv.notify_connection() self.count_steps = 0 # action if self.action_type == 'discrete': self.action_space = spaces.Discrete(len(self.discrete_angular)) elif self.action_type == 'continuous': self.action_space = spaces.Box( low=np.array(self.continous_actions['low']), high=np.array(self.continous_actions['high'])) # get start position self.startPoint = self.unrealcv.get_pose() self.unrealcv.get_target_point() while (self.target == None): time.sleep(0.5) # for reset point generation self.trajectory = [] self.start_id = 0 def _step(self, actions): info = dict( Collision=False, Arrival=False, Done=False, Maxstep=False, Reward=0.0, Action=actions, Pose=[], Trajectory=self.trajectory, Steps=self.count_steps, Target=[], Distance=0.0, Direction=0.0, Color=None, Depth=None, ) action_angular = actions[0] ########################### first time ignore ########################### if action_angular == -8: if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) info['Done'] = True return resized_state, 0, info['Done'], info ########################### return state immediately ########################### if action_angular == -9: if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) return resized_state, 0, False, info ############################ disconnect UE ########################### elif action_angular == -99: if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) self.unreadcv.notify_disconnection() return resized_state, 0, False, info ############################## Start ################################### if self.action_type == 'discrete': angle = self.discrete_angular[action_angular] else: angle = None info['Done'] = False # take action distance_travelled = self.unrealcv.move(10, angle, self.timeDependent) info['Pose'] = self.unrealcv.get_pose() # info['Distance'] = self.get_distance(info['Pose'][:3],self.targets_pos) if self.reward_type == 'distance': # info['Reward'] = self.reward_distance(info['Pose']) # info['Reward'] = distance_travelled / 100 if angle == 0: info['Reward'] = 0.01 else: info['Reward'] = 0 # info['Direction'] = self.get_direction (info['Pose'],self.targets_pos) info['Collision'] = self.isCollided() # info['Arrival'] = self.unrealcv.get_arrival() # self.unrealcv.reset_arrival() if info['Collision']: info['Reward'] = -1 info['Done'] = True #print ("R: ", str(round(info['Reward'], 3)), " D: ", str(round(info['Distance'], 3)), " A: ", str(round(info['Direction'], 3))) self.count_steps += 1 # limit the max steps of every episode if self.count_steps > self.max_steps: info['Done'] = True info['Maxstep'] = True print '@@@ Reach Max Steps' # save the trajectory self.trajectory.append(info['Pose']) info['Trajectory'] = self.trajectory # update observation if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() if self.show: state = cv2.resize(state, (432, 243)) cv2.imshow('state', state / np.max(state)) cv2.waitKey(1) resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) # return np.array([state, np.array([info['Distance']])]), info['Reward'], info['Done'], info return resized_state, info['Reward'], info['Done'], info def _reset(self, ): self.unrealcv.reset_env() if self.test: current_pose = self.reset_from_testpoint() else: # current_pose = self.unrealcv.get_pose() current_pose = self.reset_from_startPoint() if self.observation_type == 'depth': state = self.unrealcv.read_depth_npy() self.trajectory = [] self.trajectory.append(current_pose) self.count_steps = 0 #angle = self.get_direction (current_pose,self.targets_pos) resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) return resized_state def _close(self): pass def message_handler(self, message): if (message == 'collision'): self.collision = True if (message.startswith('nextTarget')): tmp = message.split(':')[1].split(',') self.target = [float(tmp[0]), float(tmp[1])] self.isTargetChanged = True def isCollided(self): collided = self.collision self.collision = False return collided # functions for starting point module # def reset_from_testpoint(self): # x,y = self.testpoints[self.start_id] # z = self.height # # noise # x += random.uniform(-500, 2000) # y += random.uniform(-80, 80) # yaw = random.uniform(-45, 45) # # self.unrealcv.set_position(x, y, z) # self.unrealcv.set_rotation(0, yaw, 0) # return [x,y,z,yaw] def reset_from_startPoint(self): x, y, z, yaw = self.startPoint # noise # x += random.uniform(-500, 2000) # y += random.uniform(-80, 80) # yaw = random.uniform(-45, 45) self.unrealcv.set_position(x, y, z) self.unrealcv.set_rotation(0, yaw, 0) return [x, y, z, yaw] def get_distance(self, current, target): dis = abs(np.array(target)[:2] - np.array(current)[:2]) # only x and y distance = math.sqrt(sum(dis * dis)) return distance # def get_direction(self,current_pose,target_pose): # y_delt = target_pose[1] - current_pose[1] # x_delt = target_pose[0] - current_pose[0] # if x_delt == 0: # x_delt = 0.00001 # # target_angle = np.arctan(y_delt / x_delt) / 3.1415926 * 180 # # if (y_delt > 0 and x_delt < 0): # target_angle += 180 # if (y_delt > 0 and x_delt > 0): # target_angle = 360 - target_angle # if (y_delt < 0 and x_delt > 0): # target_angle += 360 # # difference = target_angle - current_pose[-1] # # if (difference > 180): # difference -= 360 # # #The angle difference: (-ve) means should rotate to left # #(+ve) means should rotate to right # #range is (-180,180] # return difference def reward_distance(self, current_pose): if self.isTargetChanged: self.dis2target_now = self.get_distance(current_pose, self.target) self.isTargetChanged = False reward = 0.05 else: distance = self.get_distance(current_pose, self.target) reward = (self.dis2target_now - distance) / 100 self.dis2target_now = distance return reward def load_env_setting(self, filename): f = open(self.get_settingpath(filename)) type = os.path.splitext(filename)[1] if type == '.json': import json setting = json.load(f) elif type == '.yaml': import yaml setting = yaml.load(f) else: print 'unknown type' print setting self.cam_id = setting['cam_id'] self.env_ip = setting['env_ip'] self.max_steps = setting['max_steps'] self.height = setting['height'] # self.testpoints = setting['test_xy'] self.discrete_angular = setting['discrete_angular'] self.continous_actions = setting['continous_actions'] return setting def get_settingpath(self, filename): import gym_rlbot gympath = os.path.dirname(gym_rlbot.__file__) return os.path.join(gympath, 'envs/setting', filename)
class RLBOT_base(gym.Env): def __init__( self, setting_file, test, # if True will use the test_xy as start point action_type, # 'discrete', 'continuous' observation_type, # 'color', 'depth', 'rgbd' reward_type # distance, bbox, bbox_distance, ): self.show = True print("Reading setting file: ", setting_file) self.test = test self.action_type = action_type self.observation_type = observation_type self.reward_type = reward_type assert self.action_type == 'discrete' or self.action_type == 'continuous' assert self.observation_type == 'depth' setting = self.load_env_setting(setting_file) # connect UnrealCV self.unrealcv = UnrealCv(9000, self.cam_id, self.env_ip) print("Camera_id: ", self.cam_id) print("Environment ip: ", self.env_ip) print("Observation type: ", observation_type) print("action_type: ", action_type) print("Reward type: ", reward_type) self.unrealcv.notify_connection() self.unrealcv.declare_step_by_step() self.count_steps = 0 # self.targets_pos = self.target_points[0] # print ("Target points: ",self.target_points) # print ("The target pose: ",self.targets_pos) # action if self.action_type == 'discrete': self.action_space = spaces.Discrete(len(self.discrete_actions)) elif self.action_type == 'continuous': self.action_space = spaces.Box( low=np.array(self.continous_actions['low']), high=np.array(self.continous_actions['high'])) # get start position current_pose = self.unrealcv.get_pose() # self.dis2target_now = self.get_distance(current_pose, self.targets_pos) # for reset point generation self.trajectory = [] self.start_id = 0 def _step(self, action): info = dict( Collision=False, Arrival=False, Done=False, Maxstep=False, Reward=0.0, Action=action, Pose=[], Trajectory=self.trajectory, Steps=self.count_steps, Target=[], Distance=0.0, Direction=0.0, Color=None, Depth=None, ) # return state immediately if action == -1: if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) return resized_state, 0, False, info # disconnect UE elif action == -2: if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) self.unreadcv.notify_disconnection() return resized_state, 0, False, info if self.action_type == 'discrete': (velocity, angle) = self.discrete_actions[action] else: (velocity, angle) = action info['Done'] = False # take action distance_travelled = self.unrealcv.move(velocity, angle) # ready = self.unrealcv.get_ready() # while (not ready): # ready = self.unrealcv.get_ready() # self.unrealcv.reset_ready() info['Pose'] = self.unrealcv.get_pose() # info['Distance'] = self.get_distance(info['Pose'][:3],self.targets_pos) if self.reward_type == 'distance': info['Reward'] = self.reward_distance(info['Pose']) else: info['Reward'] = distance_travelled / 1000 # info['Direction'] = self.get_direction (info['Pose'],self.targets_pos) info['Collision'] = self.unrealcv.get_collision() self.unrealcv.reset_collision() # info['Arrival'] = self.unrealcv.get_arrival() # self.unrealcv.reset_arrival() if info['Collision']: info['Reward'] = -5 info['Done'] = True # elif info['Arrival']: # info['Reward'] = 1 # info['Done'] = True #print ("R: ", str(round(info['Reward'], 3)), " D: ", str(round(info['Distance'], 3)), " A: ", str(round(info['Direction'], 3))) self.count_steps += 1 # limit the max steps of every episode if self.count_steps > self.max_steps: info['Done'] = True info['Maxstep'] = True print '@@@ Reach Max Steps' # save the trajectory self.trajectory.append(info['Pose']) info['Trajectory'] = self.trajectory # update observation if self.observation_type == 'depth': state = info['Depth'] = self.unrealcv.read_depth_npy() if self.show: cv2.imshow('state', state / np.max(state)) cv2.waitKey(1) resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) # return np.array([state, np.array([info['Distance']])]), info['Reward'], info['Done'], info return resized_state, info['Reward'], info['Done'], info def _reset(self, ): self.unrealcv.reset_env() # ready = self.unrealcv.get_ready() # while (not ready): # ready = self.unrealcv.get_ready() # self.unrealcv.reset_ready() if self.test: current_pose = self.reset_from_testpoint() else: current_pose = self.unrealcv.get_pose() if self.observation_type == 'depth': state = self.unrealcv.read_depth_npy() self.trajectory = [] self.trajectory.append(current_pose) self.count_steps = 0 # self.dis2target_now = self.get_distance(current_pose, self.targets_pos) #angle = self.get_direction (current_pose,self.targets_pos) resized_state = cv2.resize(state, (INPUT_WIDTH, INPUT_HEIGHT)) resized_state = np.expand_dims(resized_state, axis=2) return resized_state def _close(self): pass # def _get_action_size(self): # return len(self.action) # functions for starting point module def reset_from_testpoint(self): x, y = self.testpoints[self.start_id] z = self.height # noise x += random.uniform(-500, 2000) y += random.uniform(-80, 80) yaw = random.uniform(-45, 45) self.unrealcv.set_position(x, y, z) self.unrealcv.set_rotation(0, yaw, 0) return [x, y, z, yaw] # def get_distance(self,current,target): # # error = abs(np.array(target)[:2] - np.array(current)[:2])# only x and y # # distance = math.sqrt(sum(error * error)) # distance = target[0] - current[0] # return distance # # def get_direction(self,current_pose,target_pose): # y_delt = target_pose[1] - current_pose[1] # x_delt = target_pose[0] - current_pose[0] # if x_delt == 0: # x_delt = 0.00001 # # target_angle = np.arctan(y_delt / x_delt) / 3.1415926 * 180 # # if (y_delt > 0 and x_delt < 0): # target_angle += 180 # if (y_delt > 0 and x_delt > 0): # target_angle = 360 - target_angle # if (y_delt < 0 and x_delt > 0): # target_angle += 360 # # difference = target_angle - current_pose[-1] # # if (difference > 180): # difference -= 360 # # #The angle difference: (-ve) means should rotate to left # #(+ve) means should rotate to right # #range is (-180,180] # return difference # # def reward_distance(self, dis2target_now): # reward = (self.dis2target_now - dis2target_now[0]) / max(self.dis2target_now, 100) # self.dis2target_now = dis2target_now[0] # # return reward def load_env_setting(self, filename): f = open(self.get_settingpath(filename)) type = os.path.splitext(filename)[1] if type == '.json': import json setting = json.load(f) elif type == '.yaml': import yaml setting = yaml.load(f) else: print 'unknown type' print setting self.cam_id = setting['cam_id'] self.env_ip = setting['env_ip'] # self.target_points = setting['target_points'] self.max_steps = setting['max_steps'] self.height = setting['height'] self.testpoints = setting['test_xy'] self.discrete_actions = setting['discrete_actions'] self.continous_actions = setting['continous_actions'] return setting def get_settingpath(self, filename): import gym_rlbot gympath = os.path.dirname(gym_rlbot.__file__) return os.path.join(gympath, 'envs/setting', filename)
class UnrealCvSimple(gym.Env): # init the Unreal Gym Environment def __init__(self, ENV_NAME='RealisticRendering'): self.cam_id = 0 # run virtual enrionment in docker container self.docker = run_docker.RunDocker() env_ip, env_dir = self.docker.start(ENV_NAME=ENV_NAME) # connect unrealcv client to server self.unrealcv = UnrealCv(self.cam_id, ip=env_ip, env=env_dir) self.startpose = self.unrealcv.get_pose() # ACTION: (linear velocity ,angle velocity) self.ACTION_LIST = [ (20, 0), (20, 15), (20, -15), (20, 30), (20, -30), ] self.count_steps = 0 self.max_steps = 100 self.target_pos = (-60, 0, 50) self.action_space = gym.spaces.Discrete(len(self.ACTION_LIST)) state = self.unrealcv.read_image(self.cam_id, 'lit') self.observation_space = gym.spaces.Box(low=0, high=255, shape=state.shape) # update the environment step by step def _step(self, action=0): (velocity, angle) = self.ACTION_LIST[action] self.count_steps += 1 collision = self.unrealcv.move(self.cam_id, angle, velocity) reward, done = self.reward(collision) state = self.unrealcv.read_image(self.cam_id, 'lit') # limit max step per episode if self.count_steps > self.max_steps: done = True print 'Reach Max Steps' return state, reward, done, {} # reset the environment def _reset(self, ): x, y, z, yaw = self.startpose self.unrealcv.set_position(self.cam_id, x, y, z) self.unrealcv.set_rotation(self.cam_id, 0, yaw, 0) state = self.unrealcv.read_image(self.cam_id, 'lit') self.count_steps = 0 return state # close docker while closing openai gym def _close(self): self.docker.close() # calcuate reward according to your task def reward(self, collision): done = False reward = -0.01 if collision: done = True reward = -1 print 'Collision Detected!!' else: distance = self.cauculate_distance(self.target_pos, self.unrealcv.get_pos()) if distance < 50: reward = 10 done = True print('Get Target Place!') return reward, done # calcuate the 2D distance between the target and camera def cauculate_distance(self, target, current): error = abs(np.array(target) - np.array(current))[:2] # only x and y distance = math.sqrt(sum(error * error)) return distance
class UnrealCvSearch(gym.Env): def __init__(self, TARGETS = ['SM_Plant_7','SM_Plant_8'],# you can change it for other targets DOCKER = True, ENV_NAME='RealisticRendering', cam_id = 0, MAX_STEPS = 100): self.cam_id = cam_id self.target_list = TARGETS if DOCKER: self.docker = run_docker.RunDocker() env_ip, env_dir = self.docker.start(ENV_NAME = ENV_NAME) self.unrealcv = UnrealCv(self.cam_id, ip=env_ip, targets=self.target_list, env=env_dir) else: self.docker = False #you need run the environment previously env_ip = '127.0.0.1' self.unrealcv = UnrealCv(self.cam_id, ip=env_ip, targets=self.target_list) print env_ip self.reward_th = 0.2 self.trigger_th = 0.9 self.origin = [ (-106.195, 437.424, 40), ( 27.897, -162.437, 40), ( -66.601, -4.503, 40), ( 10.832, 135.126, 40), ( 67.903, 26.995, 40), ( -23.558, -187.354, 40), ( -80.312, 254.579, 40), ] self.count_steps = 0 self.max_steps = MAX_STEPS self.action = (0,0,0) #(velocity,angle,trigger) self.targets_pos = self.unrealcv.get_objects_pos(self.target_list) self.trajectory = [] state = self.unrealcv.read_image(self.cam_id, 'lit') #self.action_space = spaces.Discrete(len(self.ACTION_LIST)) self.action_space = spaces.Box(low = 0,high = 100, shape = self.action) self.observation_space = spaces.Box(low=0, high=255, shape=state.shape) self.trigger_count = 0 def _step(self, action = (0,0,0), show = False): info = dict( Collision=False, Done = False, Trigger=0.0, Maxstep=False, Reward=0.0, Action = action, Bbox =[], Pose = [], Trajectory = [], Steps = self.count_steps, Target = self.targets_pos[0] ) (velocity, angle, info['Trigger']) = action self.count_steps += 1 info['Done'] = False # the robot think that it found the target object,the episode is done # and get a reward by bounding box size # only three times false trigger allowed in every episode if info['Trigger'] > self.trigger_th : state = self.unrealcv.read_image(self.cam_id, 'lit', show=False) self.trigger_count += 1 info['Reward'],info['Bbox'] = self.reward_bbox() if info['Reward'] > 0 or self.trigger_count > 3: info['Done'] = True print 'Trigger Terminal!' # if collision occurs, the episode is done and reward is -1 else : info['Collision'] = self.unrealcv.move(self.cam_id, angle, velocity) state = self.unrealcv.read_image(self.cam_id, 'lit', show=False) info['Reward'] = 0 if info['Collision']: info['Reward'] = -1 info['Done'] = True print ('Collision!!') # limit the max steps of every episode if self.count_steps > self.max_steps: info['Done'] = True info['Maxstep'] = True print 'Reach Max Steps' # save the trajectory info['Pose'] = self.unrealcv.get_pose() self.trajectory.append(info['Pose']) info['Trajectory'] = self.trajectory if show: self.unrealcv.show_img(state, 'state') return state, info['Reward'], info['Done'], info def _reset(self, ): # set a random start point according to the origin list self.count_steps = 0 start_point = random.randint(0, len(self.origin)-1) x,y,z = self.origin[start_point] self.unrealcv.set_position(self.cam_id, x, y, z) self.unrealcv.set_rotation(self.cam_id, 0, random.randint(0,360), 0) state = self.unrealcv.read_image(self.cam_id , 'lit') current_pos = self.unrealcv.get_pos() self.trajectory = [] self.trajectory.append(current_pos) self.trigger_count = 0 return state def _close(self): if self.docker: self.docker.close() def _get_action_size(self): return len(self.action) def reward_bbox(self): object_mask = self.unrealcv.read_image(self.cam_id, 'object_mask') #mask, box = self.unrealcv.get_bbox(object_mask, self.target_list[0]) boxes = self.unrealcv.get_bboxes(object_mask,self.target_list) reward = 0 for box in boxes: reward += self.calculate_bbox_reward(box) if reward > self.reward_th: reward = reward * 10 print ('Get ideal Target!!!') elif reward == 0: reward = -1 print ('Get Nothing') else: reward = 0 print ('Get small Target!!!') return reward,boxes def calculate_bbox_reward(self,box): (xmin,ymin),(xmax,ymax) = box boxsize = (ymax - ymin) * (xmax - xmin) x_c = (xmax + xmin) / 2.0 x_bias = x_c - 0.5 discount = max(0, 1 - x_bias ** 2) reward = discount * boxsize return reward