def observation(self): # Image Processing에서 값을 받아옴 way_middle_pos = ip.getOrigin() way_dot_pos = ip.getPoints() player_pos = ip.getPlayerVertex() # Env1에서 값을 가져옴 global Env1_action # 값을 가공함 speed = min(ip.getSpeed() / 250, 1) curved = min((way_dot_pos[0][0] - way_dot_pos[2][0]) + (way_dot_pos[1][0] - way_dot_pos[3][0]), 1) middle_diff = min(abs(way_middle_pos[0] - player_pos[0]) / 80, 1) reverse = 1 if ip.getReverse() else 0 e1_act = Env1_action / 2 # 추가로 저장해야 할 값 설정 self.way_width = abs(way_dot_pos[2][0] - way_dot_pos[3][0]) self.way_player_diff = max(abs(way_dot_pos[2][0] - player_pos[0]), abs(way_dot_pos[3][0] - player_pos[0])) # 그대로 return return np.array([speed, curved, middle_diff, reverse, e1_act])
def reset(self): # 에피소드의 시작에 불려지며, observation을 돌려준다 print("reset called") self.speed_queue.clear() self.speed_queue.append(-10) self.speed_queue.append(-10) func.release_all() reset_env.manualReset() ip.ipCountdown() road_center = ip.getOrigin() road_points = ip.getPoints() player_pos = ip.getPlayerVertex() road_diff = self.get_road_diff(road_points) self.pre_direction = 4 # observation은 총 5개 - [ 중앙의 정도, 속도, 길의 커브정도, 차의 꺾인정도, 길의 꺾인정도] 로 오고 # 보상으로 중앙의 정도에 대한 보상(reward_diff), 속도에 대한 보상(reward_speed), 거꾸로 갈 때 음수를 주는 보상(reward_backward)이 온다. reward_diff, diff = self.reward_player_reddot_diff( road_center, player_pos, road_points, road_diff) observation = np.array([diff, -10, road_diff, 0, 100]) # reset 출발후 시간측정 시작 self.time1 = datetime.datetime.now() return observation
def step(self, action): # environment에 action을 취하는 것이며 # 다음 관찰, 보상, 에피소드가 종료되었는지, 기타 정보 4개를 return한다. start_step = time.time() self.pre_direction = change_direction(self.pre_direction, action) road_center = ip.getOrigin() roadtop_center = ip.getOrigin2() road_points = ip.getPoints() player_pos = ip.getPlayerVertex() road_diff = self.get_road_diff(road_points) reverse = ip.getReverse() cur_speed = ip.getSpeed() car_edge = ip.getPlayerEdge() car_shifted = func.get_shifted( func.get_player_detailed_pos(car_edge[0], player_pos)) car_shifted = int(max(0, min(car_shifted * 100 + 100, 200))) road_shifted = func.get_reverse_gradient(road_center, roadtop_center) road_shifted = int(max(0, min(road_shifted * 100 + 100, 200))) self.speed_queue.popleft() self.speed_queue.append(cur_speed) # observation은 총 3개 - [ 중앙의 정도, 속도, 길의 커브정도] 로 오고 # 보상으로 중앙의 정도에 대한 보상(reward_diff), 속도에 대한 보상(reward_speed), 거꾸로 갈 때 음수를 주는 보상(reward_backward)이 온다. reward_diff, diff = self.reward_player_reddot_diff( road_center, player_pos, road_points, road_diff) reward_speed_diff = self.reward_speed_diff() reward_backward = self.reward_going_back(reverse) observation = np.array([ diff / 200, cur_speed / 200, road_diff / 200, car_shifted / 200, road_shifted / 200 ]) if self.speed_queue[0] == 0 and self.speed_queue[1] == 0: print("Episode Ended, with return state True") return observation, -20, True, {} if ip.isLap2() and not self.continuos: # 두 번째 맵에 도달하면 게임 종료 및 로그 남김 cur_time = datetime.datetime.now() print("한바퀴 돌기 성공!") with open("success_" + cur_time.strftime("%d%H%M%S") + ".txt", "w", encoding="utf8") as file: file.writelines("한 바퀴 주행 성공!\n") file.writelines(str(cur_time - self.time1)) return observation, 1000, True, {} while True: # 시간 Delay줌 end_time = time.time() if end_time - start_step > 0.025: break self.pre_speed = cur_speed print(observation, reward_diff + reward_speed_diff + reward_backward, False, {'direction': printLoc[action]}) return observation, reward_diff + reward_speed_diff + reward_backward, False, { 'direction': printLoc[action] }
def observation(self): minimap = ip.getSimpleMap() / 255 # print(minimap.shape) # Image Processing에서 값을 받아옴 way_middle_pos = ip.getOrigin() way_dot_pos = ip.getPoints() player_pos = ip.getPlayerVertex() # 값을 가공함 speed = min(ip.getSpeed() / 250, 1) middle_diff = min(abs(way_middle_pos[0] - player_pos[0]) / 80, 1) reverse = ip.getReverse() # 추가로 저장해야 할 값 설정 self.way_width = abs(way_dot_pos[2][0] - way_dot_pos[3][0]) self.way_player_diff = max(abs(way_dot_pos[2][0] - player_pos[0]), abs(way_dot_pos[3][0] - player_pos[0])) # 그대로 return return minimap, speed, middle_diff, reverse
def drive_v2(): diff = getXdiff() speed = getSpeed() points = getPoints() edge = getPlayerEdge() car_shifted = func.get_shifted( func.get_player_detailed_pos(edge[0], getPlayerVertex())) way_width = points[3][0] - points[2][0] o1, o2 = getOrigin(), getOrigin2() print(func.get_reverse_gradient(o1, o2)) # diff가 음수일땐 차가 좌측에 있을 때, diff가 양수일땐 차가 우측에 있을 때 # diff가 음수면서, car_shifted가 양수면 (차가 좌측으로 휘어있으면), 우측으로 틀어야 함 # diff가 양수면서, car_shifted가 음수면 (차가 우측으로 휘어있으면), 좌측으로 틀어야 함 if speed < 100: speed_time_val = 1 else: speed_time_val = 100 / speed
def observation(self): # 미니맵 정보를 완전히 받아오는 무언가의 함수 minimap = np.ravel(list(ip.getSimpleMap()), order='C') # print(minimap.shape) # print(minimap) way_middle_pos = (ip.getOrigin()) way_dot_pos = ip.getPoints() player_pos = ip.getPlayerVertex() speed = np.array([ip.getSpeed()]) reverse = np.array([1 if ip.getReverse() else 0]) curved = np.array([(way_dot_pos[0][0] - way_dot_pos[2][0]) + (way_dot_pos[1][0] - way_dot_pos[3][0])]) middle_diff = np.array([abs(way_middle_pos[0] - player_pos[0])]) way_down_length = abs(way_dot_pos[2][0] - way_dot_pos[3][0]) # print(speed, middle_diff, curved, reverse) result = np.concatenate((minimap, speed, middle_diff, curved, reverse)) result = np.array(result, dtype=np.float32) return result, speed, reverse, middle_diff, way_down_length
def getXdiff(): px, py = getPlayerVertex() # 초록점 ox, oy = getOrigin() # 빨간점 diff = px - ox # 두 점의 x 좌표 차이 # print("px-ox:", diff) return diff