def start_evolve(self): if REAL: env = gym.make('gym_robobo_predator_prey_real-v0') else: env = gym.make('gym_robobo_predator_prey-v0') self.set_next_evolution_target() while not rospy.is_shutdown(): self.game_start.wait() if DEBUG_THREAD: print("start game") self.game_start.clear() play_net = [] for i in range(NUM_PREDATORS): if self.predators[i].name == self.target.name: play_net += [self.predators[i].select_player(True)] else: play_net += [self.predators[i].select_player()] for i in range(NUM_PREY): if self.prey[i].name == self.target.name: play_net += [self.prey[i].select_player(True)] else: play_net += [self.prey[i].select_player()] done = False step = 0 observations = env.reset() fitness = 0.0 look_at_fitness = 0.0 info = None init_distance = None while not done and not rospy.is_shutdown(): action = np.zeros((NUM_PREDATORS + NUM_PREY, 2), dtype=int) step += 1 if type(info) != type(None): #print(look_at_fitness/step) if (info["time"] > 3 and info["time"] < 5 and info[self.target.name + "_position"].y < -1.4 ) or (info["time"] > 3 and look_at_fitness < 0.2): break for predator_idx, obs in enumerate(observations.predator): if obs == None: continue obs_img = obs[-1] #print("obs", obs) #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]]) #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0]) obs = np.array([obs[2], obs[5], obs[7]]) obs_img_feature = np.array([0.0, -1.0]) #obs = np.array([0.0, -1.0]) obs = np.concatenate((obs_img_feature, obs)) #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None) if type(obs_img) != type(None): rects, detect_img = robobo_detection.detect_robobo( obs_img, real=True) window_name = "predator" + str(predator_idx + 1) + "_image" if DEBUG_IMAGE: cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.moveWindow(window_name, 0, 30 + predator_idx * 350) cv2.resizeWindow(window_name, 300, 300) cv2.imshow(window_name, detect_img) cv2.waitKey(1) detect_img_height = detect_img.shape[0] detect_img_width = detect_img.shape[1] detect_img_area = detect_img_height * detect_img_width #print(detect_img.shape) #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img) count_predators = 0 feature1 = [] feature2 = [] for rect in rects: target_name = rect[4] area = rect[2] * rect[3] if target_name == "Prey" and area > 200: #print('area:', area, predator_idx) #if self.predators[predator_idx].name == self.target.name: # fitness += area / (abs(rect[0] - (detect_img_width / 2)) + detect_img_area) #if DEBUG_FITNESS: # print('FFFF', area / (abs(rect[0] - (detect_img_width / 2)) + 1.0) ** 2) obs[0] = rect[0] / ( detect_img_width) - 0.5 # -1.0 - 1.0 #obs[-2] = rect[1] / (detect_img_height / 2) - 1.0 obs[1] = area / AGENT_IMAGE_MAX elif target_name == "Predator" and area > 200: feature1 += [ rect[0] / (detect_img_width / 2) - 1.0 ] feature2 += [area / AGENT_IMAGE_MAX] if self.predators[ predator_idx].name == self.target.name: if obs[1] > 0: look_at_fitness += 1.0 feature2_sorted_idx = sorted(range(len(feature2)), key=lambda k: feature2[k], reverse=True) ''' for idx in feature2_sorted_idx: count_predators += 1 obs[count_predators * 2] = feature1[idx] obs[count_predators * 2 + 1] = feature2[idx] if count_predators == 2: break ''' '''if self.predators[predator_idx].name == self.target.name: play_net = self.predators[predator_idx].select_player(True) else: play_net = self.predators[predator_idx].select_player()''' if DEBUG_MANUAL: action[predator_idx, :] = np.array( play_net[predator_idx].activate(obs), dtype=float) * 0 else: action[predator_idx, :] = np.array( play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED #if predator_idx == 1: # print("predator input:", obs) #print("predator output:", np.array(play_net.activate(obs), dtype=float) * MAX_SPEED) '''if EVO_FLAG == 0: action[predator_idx,:] = nets[predator_idx](torch.FloatTensor(obs)).numpy() else: action[predator_idx,:] = best_nets[predator_idx](torch.FloatTensor(obs)).numpy()''' if DEBUG_INOUT: print("predator input:", predator_idx, obs) print( "predator output:", predator_idx, np.array(play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED) for prey_idx, obs in enumerate(observations.prey): if obs == None: continue obs_img = obs[-1] #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]]) #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0]) #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0]) obs = np.array([obs[2], obs[5], obs[7]]) obs_img_feature = np.array([0.0, -1.0]) obs = np.concatenate((obs_img_feature, obs)) #obs = np.array([0.0, -1.0]) count_predators = 0 feature1 = [] feature2 = [] if type(obs_img) != type(None): rects, detect_img = robobo_detection.detect_robobo( obs_img, real=True) window_name = "prey" + str(prey_idx + 1) + "_image" if DEBUG_IMAGE: cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.moveWindow(window_name, 390, 30 + prey_idx * 350) cv2.resizeWindow(window_name, 300, 300) cv2.imshow(window_name, detect_img) cv2.waitKey(1) detect_img_height = detect_img.shape[0] detect_img_width = detect_img.shape[1] detect_img_area = detect_img_height * detect_img_width #print(detect_img.shape) #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img) for rect in rects: target_name = rect[4] area = rect[2] * rect[3] if target_name == "Predator": #if self.prey[prey_idx].name == self.target.name: # fitness -= area / detect_img_area feature1 += [ rect[0] / (detect_img_width / 2) - 1.0 ] feature2 += [area / AGENT_IMAGE_MAX] #obs[count_predators * 2] = rect[0] / (detect_img_width / 2) - 1.0 #obs[count_predators * 2 + 1] = area / MAX_SPEED000 # big to small feature2_sorted_idx = sorted(range(len(feature2)), key=lambda k: feature2[k], reverse=True) for idx in feature2_sorted_idx: obs[count_predators * 2] = feature1[idx] obs[count_predators * 2 + 1] = feature2[idx] count_predators += 1 if count_predators == 1: break '''if self.prey[prey_idx].name == self.target.name: miss_num = (self.num_predators - len(rects)) if miss_num <= 0: miss_num = 0 ''' #fitness -= miss_num #if len(rects) == 0: # fitnesses_prey[prey_idx] += -detect_img_width * detect_img_height * 3 '''if self.prey[prey_idx].name == self.target.name: play_net = self.prey[prey_idx].select_player(True) else: play_net = self.prey[prey_idx].select_player()''' if self.prey[prey_idx].name == self.target.name: if obs[1] > 0: look_at_fitness += 1.0 #action[prey_idx + NUM_PREDATORS,:] = np.array(play_net.activate(obs), dtype=float) * MAX_SPEED if DEBUG_MANUAL: action[prey_idx + NUM_PREDATORS, :] = np.array( play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * 0 else: action[prey_idx + NUM_PREDATORS, :] = np.array( play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED if DEBUG_INOUT: print("prey input:", obs) print( "prey output:", np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED) observations, reward, done, info = env.step(action) if self.target.name == self.prey[0].name: print("Prey fitness!!!") fitness = reward['prey'] look_at_fitness = look_at_fitness / step fitness = fitness * look_at_fitness else: print( "Predator fitness", reward['predators'], info["distances"][self.target.idx], info["arena_length"] - info["distances"][self.target.idx]) fitness = reward['predators'] * ( info["arena_length"] - info["distances"][self.target.idx]) look_at_fitness = look_at_fitness / step fitness = fitness * look_at_fitness if DEBUG_FITNESS: print("look_at_fitness:", look_at_fitness) print("fitnessfitness", fitness) self.target.set_fitness(fitness) print("Total step:", step) if DEBUG_THREAD: print("end game") self.set_next_evolution_target()
fitnesses = [0.0] * 3 step = 0 while not done and not rospy.is_shutdown(): action = np.zeros((NUM_PREDATORS, 2), dtype=int) step += 1 for predator_idx, obs in enumerate(observations): if obs == None: continue obs_img = obs[-1] #obs = obs[0:8] obs = np.array([0.0, 0.0, -1.0]) #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None) if type(obs_img) != type(None): rects, detect_img = robobo_detection.detect_robobo(obs_img) window_name = "predator" + str(predator_idx + 1) + "_image" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.moveWindow(window_name, 0, 30 + predator_idx * 350) cv2.resizeWindow(window_name, 300, 300) cv2.imshow(window_name, detect_img) cv2.waitKey(1) detect_img_height = detect_img.shape[0] detect_img_width = detect_img.shape[1] detect_img_area = detect_img_height * detect_img_width #print(detect_img.shape) #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img)
def start_evolve(self): if REAL: env = gym.make('gym_robobo_predator_prey_real-v0') else: env = gym.make('gym_robobo_predator_prey-v0') self.set_next_evolution_target() while not rospy.is_shutdown(): self.game_start.wait() if DEBUG_THREAD: print("start game") self.game_start.clear() count_evalution = 0 average_fitness = 0.0 while count_evalution < NUM_EVALUATION and count_evalution <= len( self.prey[0].best_nets): prey_bool = False if self.prey[0].name == self.target.name: prey_bool = True if count_evalution == 0 or len(self.prey[0].best_nets) == 0: print("LOADNEAT") play_net = [] for i in range(NUM_PREDATORS): if self.predators[i].name == self.target.name: play_net += [self.predators[i].select_player(True)] else: play_net += [self.predators[i].select_player()] for i in range(NUM_PREY): if self.prey[i].name == self.target.name: play_net += [self.prey[i].select_player(True)] else: play_net += [self.prey[i].select_player()] else: print("LOADBEST") play_net = [] if prey_bool: for i in range(NUM_PREDATORS): play_net += [ random.choice(self.predators[i].best_nets) ] for i in range(NUM_PREY): play_net += [self.prey[i].select_player(True)] else: for i in range(NUM_PREDATORS): if self.predators[i].name == self.target.name: play_net += [ self.predators[i].select_player(True) ] else: play_net += [self.predators[i].select_player()] for i in range(NUM_PREY): play_net += [random.choice(self.prey[i].best_nets)] count_evalution += 1 print('count_evalution:', count_evalution) done = False step = 0 observations, reward, done, info = env.reset() fitness = 0.0 look_at_fitness = 0.0 init_distance = None previous_position = np.array([0.0] * (NUM_PREDATORS + NUM_PREY)) sigma_x_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA sigma_y_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA w_x_list = [2.0, -2.0, 0.0, 0.0] w_y_list = [0.0, 0.0, 2.0, -2.0] w_sigma_x_list = np.array([1.0, 1.0, 0.0, 0.0]) * WALL_SIGMA w_sigma_y_list = np.array([0.0, 0.0, 1.0, 1.0]) * WALL_SIGMA s0_avg = 0.0 s1_avg = 0.0 s2_avg = 0.0 sp_avg = 0.0 prey_break = False predator0_break = False predator1_break = False predator2_break = False while not done and not rospy.is_shutdown(): action = np.zeros((NUM_PREDATORS + NUM_PREY, 2), dtype=int) step += 1 prey_input = [] if type(info) != type( None) and 'time' in info and not EVALUATION: #print(look_at_fitness/step) if self.target.name != 'prey': #if (info["time"] > 3 and info["time"] < 5 and info[self.target.name + "_position"].y < -1.4) or ( # info["time"] > 3 and look_at_fitness / step < 0.2): #print("LLLL:", look_at_fitness/ step) # break pass if self.target.name == 'predator0': if (info["time"] > 3 and s0_avg / step < 0.002 ) or (info["time"] > 3 and look_at_fitness / step < 0.2): predator0_break = True break if self.target.name == 'predator1': if (info["time"] > 3 and s1_avg / step < 0.002 ) or (info["time"] > 3 and look_at_fitness / step < 0.2): predator1_break = True break if self.target.name == 'predator2': if (info["time"] > 3 and s2_avg / step < 0.002 ) or (info["time"] > 3 and look_at_fitness / step < 0.2): predator2_break = True break else: pass #if info["time"] > 3 and sp_avg / step < 0.004: # prey_break = True # break s0 = self.compute_speed(env.predators[0]) s1 = self.compute_speed(env.predators[1]) s2 = self.compute_speed(env.predators[2]) sp = self.compute_speed(env.prey[0]) s0_avg += s0 s1_avg += s1 s2_avg += s2 sp_avg += sp x = env.prey[0].position.x / 2.0 y = env.prey[0].position.y / 2.0 d0p = self.compute_distance( env.predators[0], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2)) d1p = self.compute_distance( env.predators[1], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2)) d2p = self.compute_distance( env.predators[2], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2)) yp0 = self.compute_face_yaw_diff(env.prey[0], env.predators[0]) / np.pi yp1 = self.compute_face_yaw_diff(env.prey[0], env.predators[1]) / np.pi yp2 = self.compute_face_yaw_diff(env.prey[0], env.predators[2]) / np.pi obsp = np.array([x, y, d0p, d1p, d2p, yp0, yp1, yp2]) #print("????????????????", info, step) if not EVOLVE_PREY: x_list = [ info['predator0_position'].x, info['predator1_position'].x, info['predator2_position'].x ] y_list = [ info['predator0_position'].y, info['predator1_position'].y, info['predator2_position'].y ] prey_x = info['prey_position'].x prey_y = info['prey_position'].y prey_orientation = info['prey_orientation'] for predator_idx, obs in enumerate(observations.predator): if obs == None: continue obs_img = obs[-1] #print("obs", obs) #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]]) #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0]) obs = np.array([obs[2]]) obs_img_feature = np.array( [previous_position[predator_idx], -1.0]) #obs = np.array([0.0, -1.0]) obs = np.concatenate((obs_img_feature, obs)) #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None) if type(obs_img) != type(None): rects, detect_img = robobo_detection.detect_robobo( obs_img, real=REAL) window_name = "predator" + str(predator_idx + 1) + "_image" if DEBUG_IMAGE: cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.moveWindow(window_name, 0, 30 + predator_idx * 350) cv2.resizeWindow(window_name, 300, 300) cv2.imshow(window_name, detect_img) cv2.waitKey(1) detect_img_height = detect_img.shape[0] detect_img_width = detect_img.shape[1] detect_img_area = detect_img_height * detect_img_width #print(detect_img.shape) #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img) count_predators = 0 feature1 = [] feature2 = [] if SAVE_IMAGE: cv2.imwrite( 'images/' + str(time.time()) + ".png", detect_img) largest_area = 0 for rect in rects: #print("FFFFFFFFFFFFFFFFFFFFFFFFFFF", predator_idx) target_name = rect[4] area = rect[2] * rect[3] if target_name == "Prey" and area > 50 and area > largest_area: #print('area:', area, predator_idx) #if self.predators[predator_idx].name == self.target.name: # fitness += area / (abs(rect[0] - (detect_img_width / 2)) + detect_img_area) #if DEBUG_FITNESS: # print('FFFF', area / (abs(rect[0] - (detect_img_width / 2)) + 1.0) ** 2) obs[0] = rect[0] / ( detect_img_width) - 0.5 # -1.0 - 1.0 #obs[-2] = rect[1] / (detect_img_height / 2) - 1.0 obs[1] = area / AGENT_IMAGE_MAX previous_position[predator_idx] = obs[0] largest_area = area elif target_name == "Predator" and area > 50: feature1 += [ rect[0] / (detect_img_width / 2) - 1.0 ] feature2 += [area / AGENT_IMAGE_MAX] if self.predators[ predator_idx].name == self.target.name: if obs[1] > 0: look_at_fitness += 1.0 feature2_sorted_idx = sorted( range(len(feature2)), key=lambda k: feature2[k], reverse=True) ''' for idx in feature2_sorted_idx: count_predators += 1 obs[count_predators * 2] = feature1[idx] obs[count_predators * 2 + 1] = feature2[idx] if count_predators == 2: break ''' '''if self.predators[predator_idx].name == self.target.name: play_net = self.predators[predator_idx].select_player(True) else: play_net = self.predators[predator_idx].select_player()''' if DEBUG_MANUAL: action[predator_idx, :] = np.array( play_net[predator_idx].activate(obs), dtype=float) * 0 #action[predator_idx,:] = np.array([1.0, 1.0]) * MAX_SPEED else: action[predator_idx, :] = np.array( play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED #if predator_idx == 1: # print("predator input:", obs) #print("predator output:", np.array(play_net.activate(obs), dtype=float) * MAX_SPEED) '''if EVO_FLAG == 0: action[predator_idx,:] = nets[predator_idx](torch.FloatTensor(obs)).numpy() else: action[predator_idx,:] = best_nets[predator_idx](torch.FloatTensor(obs)).numpy()''' if DEBUG_INOUT: print("predator input:", predator_idx, obs) print( "predator output:", predator_idx, np.array(play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED) '''for prey_idx, obs in enumerate(observations.prey): if obs == None: continue obs_img = obs[-1] #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]]) #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0]) obs_img_feature = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) obs = np.array([obs[2]]) #obs_img_feature = np.array([0.0, -1.0]) obs = np.concatenate((obs_img_feature, obs)) #obs = np.array([0.0, -1.0]) count_predators = 0 feature1 = [] feature2 = [] if type(obs_img) != type(None) and EVOLVE_PREY: rects, detect_img = robobo_detection.detect_robobo(obs_img, real=REAL) window_name = "prey" + str(prey_idx + 1) + "_image" if DEBUG_IMAGE: cv2.namedWindow(window_name,cv2.WINDOW_NORMAL) cv2.moveWindow(window_name, 390, 30 + prey_idx * 350) cv2.resizeWindow(window_name, 300,300) cv2.imshow(window_name, detect_img) cv2.waitKey(1) detect_img_height = detect_img.shape[0] detect_img_width = detect_img.shape[1] detect_img_area = detect_img_height * detect_img_width #print(detect_img.shape) for rect in rects: target_name = rect[4] area = rect[2] * rect[3] if target_name == "Predator": #if self.prey[prey_idx].name == self.target.name: # fitness -= area / detect_img_area feature1 += [rect[0] / (detect_img_width / 2) - 1.0] feature2 += [area / AGENT_IMAGE_MAX] #obs[count_predators * 2] = rect[0] / (detect_img_width / 2) - 1.0 #obs[count_predators * 2 + 1] = area / MAX_SPEED000 # big to small feature2_sorted_idx = sorted(range(len(feature2)), key=lambda k: feature2[k], reverse=True) for idx in feature2_sorted_idx: obs[count_predators * 2] = feature1[idx] obs[count_predators * 2 + 1] = feature2[idx] count_predators += 1 if count_predators == NUM_PREDATORS: break if self.prey[prey_idx].name == self.target.name: if obs[1] > 0: look_at_fitness += 1.0 #action[prey_idx + NUM_PREDATORS,:] = np.array(play_net.activate(obs), dtype=float) * MAX_SPEED ''' for prey_idx, obs in enumerate(observations.prey): if obs == None: continue #obs = np.concatenate((np.array([obs[2]]), obsp)) obs = obsp if DEBUG_MANUAL: if EVOLVE_PREY: action[prey_idx + NUM_PREDATORS, :] = np.array( play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * 0 else: action[prey_idx + NUM_PREDATORS, :] = np.array( self.prey[prey_idx].fixed_strategy( x_list, y_list, sigma_x_list, sigma_y_list, w_x_list, w_y_list, w_sigma_x_list, w_sigma_y_list, prey_x, prey_y, prey_orientation, PW_RATIO)) * MAX_SPEED else: if EVOLVE_PREY: action[prey_idx + NUM_PREDATORS, :] = np.array( play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED else: action[prey_idx + NUM_PREDATORS, :] = np.array( self.prey[prey_idx].fixed_strategy( x_list, y_list, sigma_x_list, sigma_y_list, w_x_list, w_y_list, w_sigma_x_list, w_sigma_y_list, prey_x, prey_y, prey_orientation, PW_RATIO)) * MAX_SPEED if DEBUG_INOUT: print("prey input:", obs) print( "prey output:", np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED) observations, reward, done, info = env.step(action) if not EVOLVE_PREY and self.target.name == "prey": break s0_avg = s0_avg / step s1_avg = s1_avg / step s2_avg = s2_avg / step sp_avg = sp_avg / step print('s0_avg', s0_avg) print('s1_avg', s1_avg) print('s2_avg', s2_avg) print('sp_avg', sp_avg) if self.target.name == self.prey[0].name: if not prey_break: print("Prey fitness!!!") fitness = reward['prey'] look_at_fitness = look_at_fitness / step fitness = fitness # * look_at_fitness else: print("Prey break") fitness = fitness else: print( "Predator fitness", reward['predators'], info["distances"][self.target.idx], info["arena_length"] * np.sqrt(2) - info["distances"][self.target.idx]) #fitness = #reward['predators'] * (info["arena_length"] * np.sqrt(2) - info["distances"][self.target.idx]) fitness = 1 / info["distances"][self.target.idx] look_at_fitness = look_at_fitness / step print("look_at_fitness:", look_at_fitness) if self.target.name == 'predator0' and predator0_break: fitness = fitness * 0.3 if self.target.name == 'predator1' and predator1_break: fitness = fitness * 0.3 if self.target.name == 'predator2' and predator2_break: fitness = fitness * 0.3 average_fitness += fitness print("FINAL count_evalution:", count_evalution) average_fitness /= count_evalution if DEBUG_FITNESS: print("average_fitness", average_fitness) self.target.set_fitness(average_fitness) print("Total step:", step) if DEBUG_THREAD: print("end game") self.set_next_evolution_target()
def start_evolve(self): global END_GAME if REAL: env = gym.make('gym_robobo_predator_prey_real-v0') else: env = gym.make('gym_robobo_predator_prey-v0') #self.set_next_evolution_target() interval = 1 prey_fitnesses = np.zeros((len(range(0, len(self.predators[0].best_nets), interval)), len(range(0, len(self.predators[0].best_nets), interval)))) fitnesses = np.zeros((len(range(0, len(self.predators[0].best_nets), interval)), len(range(0, len(self.predators[0].best_nets), interval)))) caught_matrix = np.zeros((len(range(0, len(self.prey[0].best_nets), interval)), len(range(0, len(self.prey[0].best_nets), interval))), dtype=int) end_time = np.zeros((len(range(0, len(self.predators[0].best_nets), interval)), len(range(0, len(self.predators[0].best_nets), interval))), dtype=float) #print(len(self.prey[0].best_nets), len(self.predators[0].best_nets), "???") try: if not rospy.is_shutdown(): #print(len(self.predators[0].best_nets), len(self.predators[1].best_nets), len(self.prey[0].best_nets)) if not HUMAN: predator_start = 0 predator_end = 100 num_prey_controller = 100 else: predator_start = PREDATOR_START predator_end = PREDATOR_START + 1 num_prey_controller = 8 for save_predator_idx, predator_generation_idx in enumerate(range(predator_start, predator_end, interval)): for save_prey_idx, prey_generation_idx in enumerate(range(0, num_prey_controller, interval)): #print(predator_generation_idx, prey_generation_idx) print("Round " + str(prey_generation_idx) + ":") play_net = [] for i in range(NUM_PREDATORS): play_net += [self.predators[i].best_nets[predator_generation_idx]] for i in range(NUM_PREY): play_net += [self.prey[i].best_nets[prey_generation_idx]] self.prey[i].human_left = 0.0 self.prey[i].human_right = 0.0 done = False step = 0 observations, reward, done, info = env.reset() fitness = 0.0 look_at_fitness = 0.0 init_distance = None END_GAME = False previous_position = np.array([0.0] * (NUM_PREDATORS + NUM_PREY)) sigma_x_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA sigma_y_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA w_x_list = [2.0,-2.0,0.0,0.0] w_y_list = [0.0,0.0,2.0,-2.0] w_sigma_x_list = np.array([1.0,1.0,0.0,0.0]) * WALL_SIGMA w_sigma_y_list = np.array([0.0,0.0,1.0,1.0]) * WALL_SIGMA s0_avg = 0.0 s1_avg = 0.0 s2_avg = 0.0 sp_avg = 0.0 prey_break = False predator0_break = False predator1_break = False predator2_break = False tracking = [] while not done and not rospy.is_shutdown(): if TIME: print("start", time.time()) action = np.zeros((NUM_PREDATORS + NUM_PREY, 2), dtype=int) step += 1 if not REAL: prey_input = [] s0 = self.compute_speed(env.predators[0]) s1 = self.compute_speed(env.predators[1]) s2 = self.compute_speed(env.predators[2]) sp = self.compute_speed(env.prey[0]) s0_avg += s0 s1_avg += s1 s2_avg += s2 sp_avg += sp x = env.prey[0].position.x / 2.0 y = env.prey[0].position.y / 2.0 d0p = self.compute_distance(env.predators[0], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2)) d1p = self.compute_distance(env.predators[1], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2)) d2p = self.compute_distance(env.predators[2], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2)) yp0 = self.compute_face_yaw_diff(env.prey[0], env.predators[0]) / np.pi yp1 = self.compute_face_yaw_diff(env.prey[0], env.predators[1]) / np.pi yp2 = self.compute_face_yaw_diff(env.prey[0], env.predators[2]) / np.pi obsp = np.array([x, y, d0p, d1p, d2p, yp0, yp1, yp2]) #print("????????????????", info, step) if not EVOLVE_PREY: x_list = [info['predator0_position'].x, info['predator1_position'].x, info['predator2_position'].x] y_list = [info['predator0_position'].y, info['predator1_position'].y, info['predator2_position'].y] prey_x = info['prey_position'].x prey_y = info['prey_position'].y prey_orientation = info['prey_orientation'] else: x_list = [0.0, 0.0, 0.0] y_list = [0.0, 0.0, 0.0] prey_x = 0.0 prey_y = 0.0 prey_orientation = 0.0 for predator_idx, obs in enumerate(observations.predator): if obs == None: continue obs_img = obs[-1] #print("obs", obs) #obs = np.array([obs[2], obs[], obs[7], obs[8], obs[9]]) #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0]) obs = np.array([obs[2]]) obs_img_feature = np.array([previous_position[predator_idx], -1.0]) #obs = np.array([0.0, -1.0]) obs = np.concatenate((obs_img_feature, obs)) #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None) if type(obs_img) != type(None): if TIME: print("robobo_detection start", time.time()) rects, detect_img = robobo_detection.detect_robobo(obs_img, real=REAL) if TIME: print("robobo_detection end", time.time()) window_name = "predator" + str(predator_idx + 1) + "_image" if DEBUG_IMAGE: cv2.namedWindow(window_name,cv2.WINDOW_NORMAL) '''if not REAL: cv2.moveWindow(window_name, 100 + predator_idx * 320, 30) else:''' cv2.moveWindow(window_name, 100, 30 + predator_idx * 350) cv2.resizeWindow(window_name, 300,300) cv2.imshow(window_name, detect_img) cv2.waitKey(1) detect_img_height = detect_img.shape[0] detect_img_width = detect_img.shape[1] detect_img_area = detect_img_height * detect_img_width #print(detect_img.shape) #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img) count_predators = 0 feature1 = [] feature2 = [] if SAVE_IMAGE: cv2.imwrite('images/' + str(time.time()) + ".png", detect_img) largest_area = 0 for rect in rects: #print("FFFFFFFFFFFFFFFFFFFFFFFFFFF", predator_idx) target_name = rect[4] area = rect[2] * rect[3] #print(area) if target_name == "Prey" and area > 50 and area > largest_area: #print('area:', area, predator_idx) #if self.predators[predator_idx].name == self.target.name: # fitness += area / (abs(rect[0] - (detect_img_width / 2)) + detect_img_area) #if DEBUG_FITNESS: # print('FFFF', area / (abs(rect[0] - (detect_img_width / 2)) + 1.0) ** 2) obs[0] = rect[0] / (detect_img_width) - 0.5 # -1.0 - 1.0 #obs[-2] = rect[1] / (detect_img_height / 2) - 1.0 obs[1] = area / AGENT_IMAGE_MAX previous_position[predator_idx] = obs[0] largest_area = area elif target_name == "Predator" and area > 50: feature1 += [rect[0] / (detect_img_width / 2) - 1.0] feature2 += [area / AGENT_IMAGE_MAX] '''if self.predators[predator_idx].name == self.target.name: if obs[1] > 0: look_at_fitness += 1.0''' feature2_sorted_idx = sorted(range(len(feature2)), key=lambda k: feature2[k], reverse=True) ''' for idx in feature2_sorted_idx: count_predators += 1 obs[count_predators * 2] = feature1[idx] obs[count_predators * 2 + 1] = feature2[idx] if count_predators == 2: break ''' '''if self.predators[predator_idx].name == self.target.name: play_net = self.predators[predator_idx].select_player(True) else: play_net = self.predators[predator_idx].select_player()''' if DEBUG_MANUAL: action[predator_idx,:] = np.array(play_net[predator_idx].activate(obs), dtype=float) * 0 #action[predator_idx,:] = np.array([1.0, 1.0]) * MAX_SPEED else: action[predator_idx,:] = np.array(play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED #if predator_idx == 1: # print("predator input:", obs) #print("predator output:", np.array(play_net.activate(obs), dtype=float) * MAX_SPEED) '''if EVO_FLAG == 0: action[predator_idx,:] = nets[predator_idx](torch.FloatTensor(obs)).numpy() else: action[predator_idx,:] = best_nets[predator_idx](torch.FloatTensor(obs)).numpy()''' if DEBUG_INOUT: print("predator input:", predator_idx, obs) print("predator output:", predator_idx, np.array(play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED) for prey_idx, obs in enumerate(observations.prey): if obs == None: continue if not REAL: obs = obsp if DEBUG_MANUAL: if EVOLVE_PREY: action[prey_idx + NUM_PREDATORS,:] = np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * 0 else: action[prey_idx + NUM_PREDATORS,:] = np.array(self.prey[prey_idx].fixed_strategy(x_list, y_list, sigma_x_list, sigma_y_list, w_x_list, w_y_list, w_sigma_x_list, w_sigma_y_list, prey_x, prey_y, prey_orientation, PW_RATIO)) * 0 else: if EVOLVE_PREY: action[prey_idx + NUM_PREDATORS,:] = np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED else: action[prey_idx + NUM_PREDATORS,:] = np.array(self.prey[prey_idx].fixed_strategy(x_list, y_list, sigma_x_list, sigma_y_list, w_x_list, w_y_list, w_sigma_x_list, w_sigma_y_list, prey_x, prey_y, prey_orientation, PW_RATIO)) * MAX_SPEED * EVOVLED_PREY_SPEED_FACTOR #print(action[prey_idx + NUM_PREDATORS,:]) #if DEBUG_INOUT: # print("prey input:", obs) # #print("prey output:", np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED) if not REAL: xp = info['prey_position'].x yp = info['prey_position'].y yaw_p, _, _ = self.orientation_quaternion_to_euler(info['prey_orientation']) x0 = info['predator0_position'].x y0 = info['predator0_position'].y yaw_0, _, _ = self.orientation_quaternion_to_euler(info['predator0_orientation']) x1 = info['predator1_position'].x y1 = info['predator1_position'].y yaw_1, _, _ = self.orientation_quaternion_to_euler(info['predator1_orientation']) x2 = info['predator2_position'].x y2 = info['predator2_position'].y yaw_2, _, _ = self.orientation_quaternion_to_euler(info['predator2_orientation']) xp_info = (xp, yp, yaw_p) x0_info = (x0, y0, yaw_0) x1_info = (x1, y1, yaw_1) x2_info = (x2, y2, yaw_2) predators_info = (x0_info, x1_info, x2_info) tracking += [(xp_info, predators_info)] if TIME: print("step start", time.time()) observations, reward, done, info = env.step(action) if TIME: print("step end", time.time()) print("end", time.time()) if END_GAME: break #if not EVOLVE_PREY and self.target.name == "prey": # break #if self.target.name == self.prey[0].name: if not REAL: fitness = reward['prey'] #look_at_fitness = look_at_fitness / step prey_fitness = fitness# * look_at_fitness #print("Prey fitness!!!", prey_fitness) #else: #print("Predator fitness", reward['predators'], info["distances"][self.target.idx], info["arena_length"] * np.sqrt(2) - info["distances"][self.target.idx]) fitness = (1 / info["distances"][0] + 1 / info["distances"][1] + 1 / info["distances"][2]) / 3 #look_at_fitness = look_at_fitness / step fitness = fitness# * look_at_fitness #print("predators fitness!!!", fitness) if info["caught"]: caught_matrix[save_predator_idx, save_prey_idx] = 1 else: caught_matrix[save_predator_idx, save_prey_idx] = -1 fitnesses[save_predator_idx, save_prey_idx] = fitness prey_fitnesses[save_predator_idx, save_prey_idx] = prey_fitness if info["time"] > 30: info["time"] = 30.0 print("END TIME:", info["time"]) print("-----------------------------------------------------------------------------------") end_time[save_predator_idx, save_prey_idx] = info["time"] output = open("output/tracking" + str(save_predator_idx) + "_" + str(save_prey_idx), 'wb') pickle.dump(tracking, output) output.close() else: if info["time"] > 30: info["time"] = 30.0 end_time[save_predator_idx, save_prey_idx] = info["time"] #print("END TIME:", info["time"]) print("-----------------------------------------------------------------------------------") #np.save("output/end_time", end_time) timestamp = time.time() if SAVE: if not REAL: np.save("output/fitnesses", fitnesses) np.save("output/prey_fitnesses", prey_fitnesses) np.save("output/caught_matrix", caught_matrix) np.save("output/end_time", end_time) if HUMAN: np.save("output/fitnesses_" + str(int(timestamp)), fitnesses) np.save("output/prey_fitnesses_" + str(int(timestamp)), prey_fitnesses) np.save("output/caught_matrix_" + str(int(timestamp)), caught_matrix) np.save("output/end_time_" + str(int(timestamp)), end_time) else: np.save("output/end_time" + "_real_" + str(int(timestamp)), end_time) print("Avg: " + str(end_time.sum().sum() / 7)) if SAVE: if not REAL: np.save("output/fitnesses", fitnesses) np.save("output/prey_fitnesses", prey_fitnesses) np.save("output/caught_matrix", caught_matrix) np.save("output/end_time", end_time) if HUMAN: np.save("human/sim/fitnesses_" + str(int(timestamp)), fitnesses) np.save("human/sim/prey_fitnesses_" + str(int(timestamp)), prey_fitnesses) np.save("human/sim/caught_matrix_" + str(int(timestamp)), caught_matrix) np.save("human/sim/end_time_" + str(int(timestamp)), end_time) else: np.save("human/real/end_time" + "_real_" + str(int(timestamp)), end_time) except: pass