def observation(self): # Image Processing에서 값을 받아옴 way_middle_pos = ip.getOrigin() way_dot_pos = ip.getPoints() player_pos = ip.getPlayerVertex() # Env1에서 값을 가져옴 global Env1_action # 값을 가공함 speed = min(ip.getSpeed() / 250, 1) curved = min((way_dot_pos[0][0] - way_dot_pos[2][0]) + (way_dot_pos[1][0] - way_dot_pos[3][0]), 1) middle_diff = min(abs(way_middle_pos[0] - player_pos[0]) / 80, 1) reverse = 1 if ip.getReverse() else 0 e1_act = Env1_action / 2 # 추가로 저장해야 할 값 설정 self.way_width = abs(way_dot_pos[2][0] - way_dot_pos[3][0]) self.way_player_diff = max(abs(way_dot_pos[2][0] - player_pos[0]), abs(way_dot_pos[3][0] - player_pos[0])) # 그대로 return return np.array([speed, curved, middle_diff, reverse, e1_act])
def step(self, action): # environment에 action을 취하는 것이며 # 다음 관찰, 보상, 에피소드가 종료되었는지, 기타 정보 4개를 return한다. start_step = time.time() self.pre_direction = change_direction(self.pre_direction, action) road_center = ip.getOrigin() roadtop_center = ip.getOrigin2() road_points = ip.getPoints() player_pos = ip.getPlayerVertex() road_diff = self.get_road_diff(road_points) reverse = ip.getReverse() cur_speed = ip.getSpeed() car_edge = ip.getPlayerEdge() car_shifted = func.get_shifted( func.get_player_detailed_pos(car_edge[0], player_pos)) car_shifted = int(max(0, min(car_shifted * 100 + 100, 200))) road_shifted = func.get_reverse_gradient(road_center, roadtop_center) road_shifted = int(max(0, min(road_shifted * 100 + 100, 200))) self.speed_queue.popleft() self.speed_queue.append(cur_speed) # observation은 총 3개 - [ 중앙의 정도, 속도, 길의 커브정도] 로 오고 # 보상으로 중앙의 정도에 대한 보상(reward_diff), 속도에 대한 보상(reward_speed), 거꾸로 갈 때 음수를 주는 보상(reward_backward)이 온다. reward_diff, diff = self.reward_player_reddot_diff( road_center, player_pos, road_points, road_diff) reward_speed_diff = self.reward_speed_diff() reward_backward = self.reward_going_back(reverse) observation = np.array([ diff / 200, cur_speed / 200, road_diff / 200, car_shifted / 200, road_shifted / 200 ]) if self.speed_queue[0] == 0 and self.speed_queue[1] == 0: print("Episode Ended, with return state True") return observation, -20, True, {} if ip.isLap2() and not self.continuos: # 두 번째 맵에 도달하면 게임 종료 및 로그 남김 cur_time = datetime.datetime.now() print("한바퀴 돌기 성공!") with open("success_" + cur_time.strftime("%d%H%M%S") + ".txt", "w", encoding="utf8") as file: file.writelines("한 바퀴 주행 성공!\n") file.writelines(str(cur_time - self.time1)) return observation, 1000, True, {} while True: # 시간 Delay줌 end_time = time.time() if end_time - start_step > 0.025: break self.pre_speed = cur_speed print(observation, reward_diff + reward_speed_diff + reward_backward, False, {'direction': printLoc[action]}) return observation, reward_diff + reward_speed_diff + reward_backward, False, { 'direction': printLoc[action] }
def observation(self): minimap = ip.getSimpleMap() / 255 # print(minimap.shape) # Image Processing에서 값을 받아옴 finished = ip.isLap2() # 추후에 추가될 변수, 맵을 완주함을 표시함, bool형태로 받으면 좋을듯 # 값을 가공함 speed = ip.getSpeed() # max값이 250이라 가정 reverse = ip.getReverse() # 그대로 return return minimap, speed, reverse, finished
def observation(self): minimap = ip.getSimpleMap() / 255 # print(minimap.shape) # Image Processing에서 값을 받아옴 way_middle_pos = ip.getOrigin() way_dot_pos = ip.getPoints() player_pos = ip.getPlayerVertex() # 값을 가공함 speed = min(ip.getSpeed() / 250, 1) middle_diff = min(abs(way_middle_pos[0] - player_pos[0]) / 80, 1) reverse = ip.getReverse() # 추가로 저장해야 할 값 설정 self.way_width = abs(way_dot_pos[2][0] - way_dot_pos[3][0]) self.way_player_diff = max(abs(way_dot_pos[2][0] - player_pos[0]), abs(way_dot_pos[3][0] - player_pos[0])) # 그대로 return return minimap, speed, middle_diff, reverse
def observation(self): # 미니맵 정보를 완전히 받아오는 무언가의 함수 minimap = np.ravel(list(ip.getSimpleMap()), order='C') # print(minimap.shape) # print(minimap) way_middle_pos = (ip.getOrigin()) way_dot_pos = ip.getPoints() player_pos = ip.getPlayerVertex() speed = np.array([ip.getSpeed()]) reverse = np.array([1 if ip.getReverse() else 0]) curved = np.array([(way_dot_pos[0][0] - way_dot_pos[2][0]) + (way_dot_pos[1][0] - way_dot_pos[3][0])]) middle_diff = np.array([abs(way_middle_pos[0] - player_pos[0])]) way_down_length = abs(way_dot_pos[2][0] - way_dot_pos[3][0]) # print(speed, middle_diff, curved, reverse) result = np.concatenate((minimap, speed, middle_diff, curved, reverse)) result = np.array(result, dtype=np.float32) return result, speed, reverse, middle_diff, way_down_length