コード例 #1
0
    def start_evolve(self):

        if REAL:
            env = gym.make('gym_robobo_predator_prey_real-v0')
        else:
            env = gym.make('gym_robobo_predator_prey-v0')

        self.set_next_evolution_target()

        while not rospy.is_shutdown():

            self.game_start.wait()
            if DEBUG_THREAD:
                print("start game")
            self.game_start.clear()

            play_net = []

            for i in range(NUM_PREDATORS):
                if self.predators[i].name == self.target.name:
                    play_net += [self.predators[i].select_player(True)]
                else:
                    play_net += [self.predators[i].select_player()]

            for i in range(NUM_PREY):
                if self.prey[i].name == self.target.name:
                    play_net += [self.prey[i].select_player(True)]
                else:
                    play_net += [self.prey[i].select_player()]

            done = False
            step = 0
            observations = env.reset()
            fitness = 0.0
            look_at_fitness = 0.0
            info = None
            init_distance = None

            while not done and not rospy.is_shutdown():
                action = np.zeros((NUM_PREDATORS + NUM_PREY, 2), dtype=int)
                step += 1

                if type(info) != type(None):
                    #print(look_at_fitness/step)
                    if (info["time"] > 3 and info["time"] < 5
                            and info[self.target.name + "_position"].y < -1.4
                        ) or (info["time"] > 3 and look_at_fitness < 0.2):
                        break

                for predator_idx, obs in enumerate(observations.predator):

                    if obs == None:
                        continue

                    obs_img = obs[-1]
                    #print("obs", obs)
                    #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]])
                    #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0])
                    obs = np.array([obs[2], obs[5], obs[7]])
                    obs_img_feature = np.array([0.0, -1.0])
                    #obs = np.array([0.0, -1.0])
                    obs = np.concatenate((obs_img_feature, obs))
                    #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None)

                    if type(obs_img) != type(None):
                        rects, detect_img = robobo_detection.detect_robobo(
                            obs_img, real=True)

                        window_name = "predator" + str(predator_idx +
                                                       1) + "_image"
                        if DEBUG_IMAGE:
                            cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                            cv2.moveWindow(window_name, 0,
                                           30 + predator_idx * 350)
                            cv2.resizeWindow(window_name, 300, 300)
                            cv2.imshow(window_name, detect_img)
                            cv2.waitKey(1)

                        detect_img_height = detect_img.shape[0]
                        detect_img_width = detect_img.shape[1]
                        detect_img_area = detect_img_height * detect_img_width

                        #print(detect_img.shape)
                        #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img)

                        count_predators = 0
                        feature1 = []
                        feature2 = []

                        for rect in rects:
                            target_name = rect[4]
                            area = rect[2] * rect[3]
                            if target_name == "Prey" and area > 200:
                                #print('area:', area, predator_idx)

                                #if self.predators[predator_idx].name == self.target.name:

                                #    fitness += area / (abs(rect[0] - (detect_img_width / 2)) + detect_img_area)

                                #if DEBUG_FITNESS:
                                #    print('FFFF', area / (abs(rect[0] - (detect_img_width / 2)) + 1.0) ** 2)

                                obs[0] = rect[0] / (
                                    detect_img_width) - 0.5  # -1.0 - 1.0
                                #obs[-2] = rect[1] / (detect_img_height / 2) - 1.0
                                obs[1] = area / AGENT_IMAGE_MAX
                            elif target_name == "Predator" and area > 200:

                                feature1 += [
                                    rect[0] / (detect_img_width / 2) - 1.0
                                ]
                                feature2 += [area / AGENT_IMAGE_MAX]

                        if self.predators[
                                predator_idx].name == self.target.name:
                            if obs[1] > 0:
                                look_at_fitness += 1.0

                        feature2_sorted_idx = sorted(range(len(feature2)),
                                                     key=lambda k: feature2[k],
                                                     reverse=True)
                        '''
                        for idx in feature2_sorted_idx:
                            count_predators += 1
                            obs[count_predators * 2] = feature1[idx]
                            obs[count_predators * 2 + 1] = feature2[idx]
                            
                            if count_predators == 2:
                                break
                        '''
                    '''if self.predators[predator_idx].name == self.target.name:
                        play_net = self.predators[predator_idx].select_player(True)
                    else:
                        play_net = self.predators[predator_idx].select_player()'''

                    if DEBUG_MANUAL:
                        action[predator_idx, :] = np.array(
                            play_net[predator_idx].activate(obs),
                            dtype=float) * 0
                    else:
                        action[predator_idx, :] = np.array(
                            play_net[predator_idx].activate(obs),
                            dtype=float) * MAX_SPEED
                    #if predator_idx == 1:
                    #    print("predator input:", obs)
                    #print("predator output:", np.array(play_net.activate(obs), dtype=float) * MAX_SPEED)
                    '''if EVO_FLAG == 0:    
                        action[predator_idx,:] = nets[predator_idx](torch.FloatTensor(obs)).numpy()
                    else:
                        action[predator_idx,:] = best_nets[predator_idx](torch.FloatTensor(obs)).numpy()'''

                    if DEBUG_INOUT:
                        print("predator input:", predator_idx, obs)
                        print(
                            "predator output:", predator_idx,
                            np.array(play_net[predator_idx].activate(obs),
                                     dtype=float) * MAX_SPEED)

                for prey_idx, obs in enumerate(observations.prey):
                    if obs == None:
                        continue
                    obs_img = obs[-1]

                    #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]])
                    #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0])
                    #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0])
                    obs = np.array([obs[2], obs[5], obs[7]])
                    obs_img_feature = np.array([0.0, -1.0])
                    obs = np.concatenate((obs_img_feature, obs))
                    #obs = np.array([0.0, -1.0])

                    count_predators = 0
                    feature1 = []
                    feature2 = []

                    if type(obs_img) != type(None):
                        rects, detect_img = robobo_detection.detect_robobo(
                            obs_img, real=True)

                        window_name = "prey" + str(prey_idx + 1) + "_image"

                        if DEBUG_IMAGE:
                            cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                            cv2.moveWindow(window_name, 390,
                                           30 + prey_idx * 350)
                            cv2.resizeWindow(window_name, 300, 300)
                            cv2.imshow(window_name, detect_img)
                            cv2.waitKey(1)

                        detect_img_height = detect_img.shape[0]
                        detect_img_width = detect_img.shape[1]
                        detect_img_area = detect_img_height * detect_img_width

                        #print(detect_img.shape)
                        #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img)

                        for rect in rects:
                            target_name = rect[4]
                            area = rect[2] * rect[3]
                            if target_name == "Predator":

                                #if self.prey[prey_idx].name == self.target.name:
                                #    fitness -= area / detect_img_area

                                feature1 += [
                                    rect[0] / (detect_img_width / 2) - 1.0
                                ]
                                feature2 += [area / AGENT_IMAGE_MAX]

                                #obs[count_predators * 2] = rect[0] / (detect_img_width / 2) - 1.0
                                #obs[count_predators * 2 + 1] = area / MAX_SPEED000

                        # big to small
                        feature2_sorted_idx = sorted(range(len(feature2)),
                                                     key=lambda k: feature2[k],
                                                     reverse=True)

                        for idx in feature2_sorted_idx:
                            obs[count_predators * 2] = feature1[idx]
                            obs[count_predators * 2 + 1] = feature2[idx]
                            count_predators += 1
                            if count_predators == 1:
                                break
                        '''if self.prey[prey_idx].name == self.target.name:   
                            miss_num = (self.num_predators - len(rects))
                            if miss_num <= 0:
                                miss_num = 0
                        '''
                        #fitness -= miss_num

                        #if len(rects) == 0:
                        #    fitnesses_prey[prey_idx] += -detect_img_width * detect_img_height * 3
                    '''if self.prey[prey_idx].name == self.target.name:                        
                        play_net = self.prey[prey_idx].select_player(True)
                    else:
                        play_net = self.prey[prey_idx].select_player()'''

                    if self.prey[prey_idx].name == self.target.name:
                        if obs[1] > 0:
                            look_at_fitness += 1.0

                    #action[prey_idx + NUM_PREDATORS,:] = np.array(play_net.activate(obs), dtype=float) * MAX_SPEED
                    if DEBUG_MANUAL:
                        action[prey_idx + NUM_PREDATORS, :] = np.array(
                            play_net[prey_idx + NUM_PREDATORS].activate(obs),
                            dtype=float) * 0
                    else:
                        action[prey_idx + NUM_PREDATORS, :] = np.array(
                            play_net[prey_idx + NUM_PREDATORS].activate(obs),
                            dtype=float) * MAX_SPEED

                    if DEBUG_INOUT:
                        print("prey input:", obs)
                        print(
                            "prey output:",
                            np.array(play_net[prey_idx +
                                              NUM_PREDATORS].activate(obs),
                                     dtype=float) * MAX_SPEED)

                observations, reward, done, info = env.step(action)

            if self.target.name == self.prey[0].name:
                print("Prey fitness!!!")
                fitness = reward['prey']
                look_at_fitness = look_at_fitness / step
                fitness = fitness * look_at_fitness
            else:
                print(
                    "Predator fitness", reward['predators'],
                    info["distances"][self.target.idx],
                    info["arena_length"] - info["distances"][self.target.idx])
                fitness = reward['predators'] * (
                    info["arena_length"] - info["distances"][self.target.idx])
                look_at_fitness = look_at_fitness / step
                fitness = fitness * look_at_fitness

            if DEBUG_FITNESS:
                print("look_at_fitness:", look_at_fitness)
                print("fitnessfitness", fitness)

            self.target.set_fitness(fitness)

            print("Total step:", step)

            if DEBUG_THREAD:
                print("end game")
            self.set_next_evolution_target()
コード例 #2
0
    fitnesses = [0.0] * 3
    step = 0

    while not done and not rospy.is_shutdown():
        action = np.zeros((NUM_PREDATORS, 2), dtype=int)
        step += 1
        for predator_idx, obs in enumerate(observations):
            if obs == None:
                continue
            obs_img = obs[-1]
            #obs = obs[0:8]
            obs = np.array([0.0, 0.0, -1.0])
            #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None)

            if type(obs_img) != type(None):
                rects, detect_img = robobo_detection.detect_robobo(obs_img)

                window_name = "predator" + str(predator_idx + 1) + "_image"
                cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                cv2.moveWindow(window_name, 0, 30 + predator_idx * 350)
                cv2.resizeWindow(window_name, 300, 300)
                cv2.imshow(window_name, detect_img)
                cv2.waitKey(1)

                detect_img_height = detect_img.shape[0]
                detect_img_width = detect_img.shape[1]
                detect_img_area = detect_img_height * detect_img_width

                #print(detect_img.shape)
                #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img)
    def start_evolve(self):

        if REAL:
            env = gym.make('gym_robobo_predator_prey_real-v0')
        else:
            env = gym.make('gym_robobo_predator_prey-v0')

        self.set_next_evolution_target()

        while not rospy.is_shutdown():

            self.game_start.wait()
            if DEBUG_THREAD:
                print("start game")
            self.game_start.clear()

            count_evalution = 0
            average_fitness = 0.0
            while count_evalution < NUM_EVALUATION and count_evalution <= len(
                    self.prey[0].best_nets):

                prey_bool = False
                if self.prey[0].name == self.target.name:
                    prey_bool = True

                if count_evalution == 0 or len(self.prey[0].best_nets) == 0:

                    print("LOADNEAT")

                    play_net = []

                    for i in range(NUM_PREDATORS):
                        if self.predators[i].name == self.target.name:
                            play_net += [self.predators[i].select_player(True)]
                        else:
                            play_net += [self.predators[i].select_player()]

                    for i in range(NUM_PREY):
                        if self.prey[i].name == self.target.name:
                            play_net += [self.prey[i].select_player(True)]
                        else:
                            play_net += [self.prey[i].select_player()]

                else:

                    print("LOADBEST")

                    play_net = []

                    if prey_bool:

                        for i in range(NUM_PREDATORS):
                            play_net += [
                                random.choice(self.predators[i].best_nets)
                            ]

                        for i in range(NUM_PREY):
                            play_net += [self.prey[i].select_player(True)]

                    else:

                        for i in range(NUM_PREDATORS):
                            if self.predators[i].name == self.target.name:
                                play_net += [
                                    self.predators[i].select_player(True)
                                ]
                            else:
                                play_net += [self.predators[i].select_player()]

                        for i in range(NUM_PREY):
                            play_net += [random.choice(self.prey[i].best_nets)]

                count_evalution += 1
                print('count_evalution:', count_evalution)
                done = False
                step = 0
                observations, reward, done, info = env.reset()
                fitness = 0.0
                look_at_fitness = 0.0
                init_distance = None

                previous_position = np.array([0.0] *
                                             (NUM_PREDATORS + NUM_PREY))

                sigma_x_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA
                sigma_y_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA
                w_x_list = [2.0, -2.0, 0.0, 0.0]
                w_y_list = [0.0, 0.0, 2.0, -2.0]
                w_sigma_x_list = np.array([1.0, 1.0, 0.0, 0.0]) * WALL_SIGMA
                w_sigma_y_list = np.array([0.0, 0.0, 1.0, 1.0]) * WALL_SIGMA

                s0_avg = 0.0
                s1_avg = 0.0
                s2_avg = 0.0
                sp_avg = 0.0

                prey_break = False
                predator0_break = False
                predator1_break = False
                predator2_break = False

                while not done and not rospy.is_shutdown():

                    action = np.zeros((NUM_PREDATORS + NUM_PREY, 2), dtype=int)
                    step += 1

                    prey_input = []

                    if type(info) != type(
                            None) and 'time' in info and not EVALUATION:
                        #print(look_at_fitness/step)
                        if self.target.name != 'prey':
                            #if (info["time"] > 3 and info["time"] < 5 and info[self.target.name + "_position"].y < -1.4) or (
                            #    info["time"] > 3 and look_at_fitness / step < 0.2):
                            #print("LLLL:", look_at_fitness/ step)
                            #    break
                            pass
                            if self.target.name == 'predator0':
                                if (info["time"] > 3 and s0_avg / step < 0.002
                                    ) or (info["time"] > 3
                                          and look_at_fitness / step < 0.2):
                                    predator0_break = True
                                    break

                            if self.target.name == 'predator1':
                                if (info["time"] > 3 and s1_avg / step < 0.002
                                    ) or (info["time"] > 3
                                          and look_at_fitness / step < 0.2):
                                    predator1_break = True
                                    break

                            if self.target.name == 'predator2':
                                if (info["time"] > 3 and s2_avg / step < 0.002
                                    ) or (info["time"] > 3
                                          and look_at_fitness / step < 0.2):
                                    predator2_break = True
                                    break

                        else:
                            pass
                            #if info["time"] > 3 and sp_avg / step < 0.004:
                            #    prey_break = True
                            #    break

                    s0 = self.compute_speed(env.predators[0])
                    s1 = self.compute_speed(env.predators[1])
                    s2 = self.compute_speed(env.predators[2])
                    sp = self.compute_speed(env.prey[0])

                    s0_avg += s0
                    s1_avg += s1
                    s2_avg += s2
                    sp_avg += sp

                    x = env.prey[0].position.x / 2.0
                    y = env.prey[0].position.y / 2.0

                    d0p = self.compute_distance(
                        env.predators[0],
                        env.prey[0]) / (ARENA_LENGTH * np.sqrt(2))
                    d1p = self.compute_distance(
                        env.predators[1],
                        env.prey[0]) / (ARENA_LENGTH * np.sqrt(2))
                    d2p = self.compute_distance(
                        env.predators[2],
                        env.prey[0]) / (ARENA_LENGTH * np.sqrt(2))

                    yp0 = self.compute_face_yaw_diff(env.prey[0],
                                                     env.predators[0]) / np.pi
                    yp1 = self.compute_face_yaw_diff(env.prey[0],
                                                     env.predators[1]) / np.pi
                    yp2 = self.compute_face_yaw_diff(env.prey[0],
                                                     env.predators[2]) / np.pi

                    obsp = np.array([x, y, d0p, d1p, d2p, yp0, yp1, yp2])

                    #print("????????????????", info, step)

                    if not EVOLVE_PREY:
                        x_list = [
                            info['predator0_position'].x,
                            info['predator1_position'].x,
                            info['predator2_position'].x
                        ]
                        y_list = [
                            info['predator0_position'].y,
                            info['predator1_position'].y,
                            info['predator2_position'].y
                        ]
                        prey_x = info['prey_position'].x
                        prey_y = info['prey_position'].y
                        prey_orientation = info['prey_orientation']

                    for predator_idx, obs in enumerate(observations.predator):

                        if obs == None:
                            continue

                        obs_img = obs[-1]
                        #print("obs", obs)
                        #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]])
                        #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0])
                        obs = np.array([obs[2]])
                        obs_img_feature = np.array(
                            [previous_position[predator_idx], -1.0])
                        #obs = np.array([0.0, -1.0])
                        obs = np.concatenate((obs_img_feature, obs))
                        #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None)

                        if type(obs_img) != type(None):
                            rects, detect_img = robobo_detection.detect_robobo(
                                obs_img, real=REAL)

                            window_name = "predator" + str(predator_idx +
                                                           1) + "_image"

                            if DEBUG_IMAGE:
                                cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                                cv2.moveWindow(window_name, 0,
                                               30 + predator_idx * 350)
                                cv2.resizeWindow(window_name, 300, 300)
                                cv2.imshow(window_name, detect_img)
                                cv2.waitKey(1)

                            detect_img_height = detect_img.shape[0]
                            detect_img_width = detect_img.shape[1]
                            detect_img_area = detect_img_height * detect_img_width

                            #print(detect_img.shape)
                            #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img)

                            count_predators = 0
                            feature1 = []
                            feature2 = []

                            if SAVE_IMAGE:
                                cv2.imwrite(
                                    'images/' + str(time.time()) + ".png",
                                    detect_img)

                            largest_area = 0
                            for rect in rects:
                                #print("FFFFFFFFFFFFFFFFFFFFFFFFFFF", predator_idx)
                                target_name = rect[4]
                                area = rect[2] * rect[3]

                                if target_name == "Prey" and area > 50 and area > largest_area:
                                    #print('area:', area, predator_idx)

                                    #if self.predators[predator_idx].name == self.target.name:

                                    #    fitness += area / (abs(rect[0] - (detect_img_width / 2)) + detect_img_area)

                                    #if DEBUG_FITNESS:
                                    #    print('FFFF', area / (abs(rect[0] - (detect_img_width / 2)) + 1.0) ** 2)

                                    obs[0] = rect[0] / (
                                        detect_img_width) - 0.5  # -1.0 - 1.0
                                    #obs[-2] = rect[1] / (detect_img_height / 2) - 1.0
                                    obs[1] = area / AGENT_IMAGE_MAX

                                    previous_position[predator_idx] = obs[0]

                                    largest_area = area

                                elif target_name == "Predator" and area > 50:

                                    feature1 += [
                                        rect[0] / (detect_img_width / 2) - 1.0
                                    ]
                                    feature2 += [area / AGENT_IMAGE_MAX]

                            if self.predators[
                                    predator_idx].name == self.target.name:
                                if obs[1] > 0:
                                    look_at_fitness += 1.0

                            feature2_sorted_idx = sorted(
                                range(len(feature2)),
                                key=lambda k: feature2[k],
                                reverse=True)
                            '''
                            
                            for idx in feature2_sorted_idx:
                                count_predators += 1
                                obs[count_predators * 2] = feature1[idx]
                                obs[count_predators * 2 + 1] = feature2[idx]
                                
                                if count_predators == 2:
                                    break
                            
                            '''
                        '''if self.predators[predator_idx].name == self.target.name:
                            play_net = self.predators[predator_idx].select_player(True)
                        else:
                            play_net = self.predators[predator_idx].select_player()'''

                        if DEBUG_MANUAL:
                            action[predator_idx, :] = np.array(
                                play_net[predator_idx].activate(obs),
                                dtype=float) * 0
                            #action[predator_idx,:] = np.array([1.0, 1.0]) * MAX_SPEED
                        else:
                            action[predator_idx, :] = np.array(
                                play_net[predator_idx].activate(obs),
                                dtype=float) * MAX_SPEED
                        #if predator_idx == 1:
                        #    print("predator input:", obs)
                        #print("predator output:", np.array(play_net.activate(obs), dtype=float) * MAX_SPEED)
                        '''if EVO_FLAG == 0:    
                            action[predator_idx,:] = nets[predator_idx](torch.FloatTensor(obs)).numpy()
                        else:
                            action[predator_idx,:] = best_nets[predator_idx](torch.FloatTensor(obs)).numpy()'''

                        if DEBUG_INOUT:
                            print("predator input:", predator_idx, obs)
                            print(
                                "predator output:", predator_idx,
                                np.array(play_net[predator_idx].activate(obs),
                                         dtype=float) * MAX_SPEED)
                    '''for prey_idx, obs in enumerate(observations.prey):
                        if obs == None:
                            continue
                            
                        obs_img = obs[-1]
                        
                        #obs = np.array([obs[2], obs[5], obs[7], obs[8], obs[9]])
                        #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0])
                        obs_img_feature = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
                        obs = np.array([obs[2]])
                        #obs_img_feature = np.array([0.0, -1.0])
                        obs = np.concatenate((obs_img_feature, obs))
                        #obs = np.array([0.0, -1.0])
                        
                        count_predators = 0
                        feature1 = []
                        feature2 = []
                        
                        if type(obs_img) != type(None) and EVOLVE_PREY:
                            rects, detect_img = robobo_detection.detect_robobo(obs_img, real=REAL)
                                    
                            window_name = "prey" + str(prey_idx + 1) + "_image"
                            
                            if DEBUG_IMAGE:
                                cv2.namedWindow(window_name,cv2.WINDOW_NORMAL)
                                cv2.moveWindow(window_name, 390, 30 + prey_idx * 350)                
                                cv2.resizeWindow(window_name, 300,300)
                                cv2.imshow(window_name, detect_img)
                                cv2.waitKey(1)                                            
                            
                            detect_img_height = detect_img.shape[0]
                            detect_img_width = detect_img.shape[1]
                            detect_img_area = detect_img_height * detect_img_width
                            
                            #print(detect_img.shape)
                            
                            
                            
                            
                            
                            for rect in rects:
                                target_name = rect[4]
                                area = rect[2] * rect[3]
                                if target_name == "Predator":
        
                                    #if self.prey[prey_idx].name == self.target.name:                                       
                                    #    fitness -= area / detect_img_area
        
                                    feature1 += [rect[0] / (detect_img_width / 2) - 1.0]
                                    feature2 += [area / AGENT_IMAGE_MAX]
                                        
                                    #obs[count_predators * 2] = rect[0] / (detect_img_width / 2) - 1.0                              
                                    #obs[count_predators * 2 + 1] = area / MAX_SPEED000                                                                
                                
                            # big to small                                   
                            feature2_sorted_idx = sorted(range(len(feature2)), key=lambda k: feature2[k], reverse=True)
                            
                            
                            for idx in feature2_sorted_idx:
                                obs[count_predators * 2] = feature1[idx]
                                obs[count_predators * 2 + 1] = feature2[idx]
                                count_predators += 1
                                if count_predators == NUM_PREDATORS:
                                    break
                                                        
                                              
                        if self.prey[prey_idx].name == self.target.name:
                            if obs[1] > 0:
                                look_at_fitness += 1.0
                                              
                        #action[prey_idx + NUM_PREDATORS,:] = np.array(play_net.activate(obs), dtype=float) * MAX_SPEED
                    '''
                    for prey_idx, obs in enumerate(observations.prey):
                        if obs == None:
                            continue

                        #obs = np.concatenate((np.array([obs[2]]), obsp))
                        obs = obsp

                        if DEBUG_MANUAL:
                            if EVOLVE_PREY:
                                action[prey_idx + NUM_PREDATORS, :] = np.array(
                                    play_net[prey_idx +
                                             NUM_PREDATORS].activate(obs),
                                    dtype=float) * 0
                            else:
                                action[prey_idx + NUM_PREDATORS, :] = np.array(
                                    self.prey[prey_idx].fixed_strategy(
                                        x_list, y_list, sigma_x_list,
                                        sigma_y_list, w_x_list, w_y_list,
                                        w_sigma_x_list, w_sigma_y_list, prey_x,
                                        prey_y, prey_orientation,
                                        PW_RATIO)) * MAX_SPEED
                        else:
                            if EVOLVE_PREY:
                                action[prey_idx + NUM_PREDATORS, :] = np.array(
                                    play_net[prey_idx +
                                             NUM_PREDATORS].activate(obs),
                                    dtype=float) * MAX_SPEED
                            else:
                                action[prey_idx + NUM_PREDATORS, :] = np.array(
                                    self.prey[prey_idx].fixed_strategy(
                                        x_list, y_list, sigma_x_list,
                                        sigma_y_list, w_x_list, w_y_list,
                                        w_sigma_x_list, w_sigma_y_list, prey_x,
                                        prey_y, prey_orientation,
                                        PW_RATIO)) * MAX_SPEED

                        if DEBUG_INOUT:
                            print("prey input:", obs)
                            print(
                                "prey output:",
                                np.array(play_net[prey_idx +
                                                  NUM_PREDATORS].activate(obs),
                                         dtype=float) * MAX_SPEED)

                    observations, reward, done, info = env.step(action)

                    if not EVOLVE_PREY and self.target.name == "prey":
                        break

                s0_avg = s0_avg / step
                s1_avg = s1_avg / step
                s2_avg = s2_avg / step
                sp_avg = sp_avg / step

                print('s0_avg', s0_avg)
                print('s1_avg', s1_avg)
                print('s2_avg', s2_avg)
                print('sp_avg', sp_avg)

                if self.target.name == self.prey[0].name:
                    if not prey_break:
                        print("Prey fitness!!!")
                        fitness = reward['prey']
                        look_at_fitness = look_at_fitness / step
                        fitness = fitness  # * look_at_fitness
                    else:
                        print("Prey break")
                        fitness = fitness
                else:
                    print(
                        "Predator fitness", reward['predators'],
                        info["distances"][self.target.idx],
                        info["arena_length"] * np.sqrt(2) -
                        info["distances"][self.target.idx])
                    #fitness = #reward['predators'] * (info["arena_length"] * np.sqrt(2) - info["distances"][self.target.idx])

                    fitness = 1 / info["distances"][self.target.idx]

                    look_at_fitness = look_at_fitness / step
                    print("look_at_fitness:", look_at_fitness)

                    if self.target.name == 'predator0' and predator0_break:
                        fitness = fitness * 0.3
                    if self.target.name == 'predator1' and predator1_break:
                        fitness = fitness * 0.3
                    if self.target.name == 'predator2' and predator2_break:
                        fitness = fitness * 0.3

                average_fitness += fitness

            print("FINAL count_evalution:", count_evalution)
            average_fitness /= count_evalution

            if DEBUG_FITNESS:
                print("average_fitness", average_fitness)

            self.target.set_fitness(average_fitness)

            print("Total step:", step)

            if DEBUG_THREAD:
                print("end game")
            self.set_next_evolution_target()
コード例 #4
0
    def start_evolve(self):        
        
        global END_GAME
        
        if REAL:
            env = gym.make('gym_robobo_predator_prey_real-v0')
        else:
            env = gym.make('gym_robobo_predator_prey-v0')

        #self.set_next_evolution_target()                        
        
        interval = 1
        prey_fitnesses = np.zeros((len(range(0, len(self.predators[0].best_nets), interval)), len(range(0, len(self.predators[0].best_nets), interval))))
        fitnesses = np.zeros((len(range(0, len(self.predators[0].best_nets), interval)), len(range(0, len(self.predators[0].best_nets), interval))))
        caught_matrix = np.zeros((len(range(0, len(self.prey[0].best_nets), interval)), len(range(0, len(self.prey[0].best_nets), interval))), dtype=int)
        end_time = np.zeros((len(range(0, len(self.predators[0].best_nets), interval)), len(range(0, len(self.predators[0].best_nets), interval))), dtype=float)
        
        #print(len(self.prey[0].best_nets), len(self.predators[0].best_nets), "???")
        try:
            if not rospy.is_shutdown():            
                
                #print(len(self.predators[0].best_nets), len(self.predators[1].best_nets), len(self.prey[0].best_nets))
                
                if not HUMAN:
                    predator_start = 0
                    predator_end = 100
                    num_prey_controller = 100
                else:
                    predator_start = PREDATOR_START
                    predator_end = PREDATOR_START + 1
                    num_prey_controller = 8
                
                for save_predator_idx, predator_generation_idx in enumerate(range(predator_start, predator_end, interval)):
                    
                    for save_prey_idx, prey_generation_idx in enumerate(range(0, num_prey_controller, interval)):
                        #print(predator_generation_idx, prey_generation_idx)
                        
                        print("Round " + str(prey_generation_idx) + ":")
                        
                        play_net = []
                    
                        for i in range(NUM_PREDATORS):
                            
                            play_net += [self.predators[i].best_nets[predator_generation_idx]]                        
                            
                            
                            
                        for i in range(NUM_PREY):
                            
                            play_net += [self.prey[i].best_nets[prey_generation_idx]]
                            
                            self.prey[i].human_left = 0.0
                            self.prey[i].human_right = 0.0
            
                        done = False
                        step = 0
                        observations, reward, done, info = env.reset()
                        fitness = 0.0
                        look_at_fitness = 0.0            
                        init_distance = None                
                        END_GAME = False
                        previous_position = np.array([0.0] * (NUM_PREDATORS + NUM_PREY))
                             
                        sigma_x_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA
                        sigma_y_list = np.array([1.0] * NUM_PREDATORS) * PREDATOR_SIGMA
                        w_x_list = [2.0,-2.0,0.0,0.0]
                        w_y_list = [0.0,0.0,2.0,-2.0]
                        w_sigma_x_list = np.array([1.0,1.0,0.0,0.0]) * WALL_SIGMA
                        w_sigma_y_list = np.array([0.0,0.0,1.0,1.0]) * WALL_SIGMA   
                        
                        s0_avg = 0.0
                        s1_avg = 0.0
                        s2_avg = 0.0
                        sp_avg = 0.0                               
                             
                        prey_break = False 
                        predator0_break = False
                        predator1_break = False        
                        predator2_break = False 
                        
                        tracking = []
                        
                        while not done and not rospy.is_shutdown():       
                            if TIME:
                                print("start", time.time())    
                    
                            action = np.zeros((NUM_PREDATORS + NUM_PREY, 2), dtype=int)
                            step += 1
                            if not REAL:
                                prey_input = []
                                
                                s0 = self.compute_speed(env.predators[0])      
                                s1 = self.compute_speed(env.predators[1])
                                s2 = self.compute_speed(env.predators[2])
                                sp = self.compute_speed(env.prey[0])
                                
                                s0_avg += s0
                                s1_avg += s1
                                s2_avg += s2
                                sp_avg += sp
                                      
                                
                                x = env.prey[0].position.x / 2.0
                                y = env.prey[0].position.y / 2.0
                                
                                d0p = self.compute_distance(env.predators[0], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2))                
                                d1p = self.compute_distance(env.predators[1], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2))
                                d2p = self.compute_distance(env.predators[2], env.prey[0]) / (ARENA_LENGTH * np.sqrt(2))
                                
                                yp0 = self.compute_face_yaw_diff(env.prey[0], env.predators[0]) / np.pi
                                yp1 = self.compute_face_yaw_diff(env.prey[0], env.predators[1]) / np.pi
                                yp2 = self.compute_face_yaw_diff(env.prey[0], env.predators[2]) / np.pi
                                
                                obsp = np.array([x, y, d0p, d1p, d2p, yp0, yp1, yp2])   
                            
                            
                            #print("????????????????", info, step)
                            
                                if not EVOLVE_PREY:
                                    x_list = [info['predator0_position'].x, info['predator1_position'].x, info['predator2_position'].x]
                                    y_list = [info['predator0_position'].y, info['predator1_position'].y, info['predator2_position'].y]
                                    prey_x = info['prey_position'].x
                                    prey_y = info['prey_position'].y
                                    prey_orientation = info['prey_orientation']
                            
                            else:
                                x_list = [0.0, 0.0, 0.0]
                                y_list = [0.0, 0.0, 0.0]
                                prey_x = 0.0
                                prey_y = 0.0
                                prey_orientation = 0.0
                                
                            for predator_idx, obs in enumerate(observations.predator):                
                            
                                if obs == None:
                                    continue                        

                                obs_img = obs[-1]
                                #print("obs", obs)
                                #obs = np.array([obs[2], obs[], obs[7], obs[8], obs[9]])
                                #obs_img_feature = np.array([0.0, -1.0, 0.0, -1.0, 0.0, -1.0])
                                obs = np.array([obs[2]])
                                obs_img_feature = np.array([previous_position[predator_idx], -1.0])
                                #obs = np.array([0.0, -1.0])
                                obs = np.concatenate((obs_img_feature, obs))
                                #obs = np.concatenate((obs, np.array([0.0, 0.0, -1.0])), axis=None)
                                
                                if type(obs_img) != type(None):
                                    if TIME:
                                        print("robobo_detection start", time.time())    
                                
                                    rects, detect_img = robobo_detection.detect_robobo(obs_img, real=REAL)
                                    if TIME:
                                        print("robobo_detection end", time.time())    
                                    
                                    window_name = "predator" + str(predator_idx + 1) + "_image"
                                    
                                    if DEBUG_IMAGE:
                                        cv2.namedWindow(window_name,cv2.WINDOW_NORMAL)
                                        '''if not REAL:
                                            cv2.moveWindow(window_name, 100 + predator_idx * 320, 30)                
                                        else:'''
                                        cv2.moveWindow(window_name, 100, 30 + predator_idx * 350)                
                                        cv2.resizeWindow(window_name, 300,300)
                                        cv2.imshow(window_name, detect_img)
                                        cv2.waitKey(1)
                                    
                                    detect_img_height = detect_img.shape[0]
                                    detect_img_width = detect_img.shape[1]
                                    detect_img_area = detect_img_height * detect_img_width
                                    
                                    #print(detect_img.shape)
                                    #cv2.imwrite('images/' + str(time.time()) + ".png", obs_img)
                                    
                                    count_predators = 0
                                    feature1 = []
                                    feature2 = []
                                        
                                    if SAVE_IMAGE:
                                        cv2.imwrite('images/' + str(time.time()) + ".png", detect_img)
                                        
                                    largest_area = 0
                                    for rect in rects:
                                        #print("FFFFFFFFFFFFFFFFFFFFFFFFFFF", predator_idx)
                                        target_name = rect[4]
                                        area = rect[2] * rect[3]                                                        
                                        #print(area)
                                        if target_name == "Prey" and area > 50 and area > largest_area:
                                            #print('area:', area, predator_idx)
                                            
                                            #if self.predators[predator_idx].name == self.target.name:
                                         
                                            #    fitness += area / (abs(rect[0] - (detect_img_width / 2)) + detect_img_area)
                                                
                                            #if DEBUG_FITNESS:
                                            #    print('FFFF', area / (abs(rect[0] - (detect_img_width / 2)) + 1.0) ** 2)                                                                    
                                            
                                            obs[0] = rect[0] / (detect_img_width) - 0.5 # -1.0 - 1.0 
                                            #obs[-2] = rect[1] / (detect_img_height / 2) - 1.0
                                            obs[1] = area / AGENT_IMAGE_MAX
                                            
                                            previous_position[predator_idx] = obs[0]
                                            
                                            largest_area = area
                                            
                                            
                                            
                                        elif target_name == "Predator" and area > 50:
                                        
                                            feature1 += [rect[0] / (detect_img_width / 2) - 1.0]
                                            feature2 += [area / AGENT_IMAGE_MAX]                                           
                                                                                                                                              
                                    '''if self.predators[predator_idx].name == self.target.name:
                                        if obs[1] > 0:
                                            look_at_fitness += 1.0'''
                                            
                                    feature2_sorted_idx = sorted(range(len(feature2)), key=lambda k: feature2[k], reverse=True)
                                    
                                    '''
                                    
                                    for idx in feature2_sorted_idx:
                                        count_predators += 1
                                        obs[count_predators * 2] = feature1[idx]
                                        obs[count_predators * 2 + 1] = feature2[idx]
                                        
                                        if count_predators == 2:
                                            break
                                    
                                    '''
                                    
                                '''if self.predators[predator_idx].name == self.target.name:
                                    play_net = self.predators[predator_idx].select_player(True)
                                else:
                                    play_net = self.predators[predator_idx].select_player()'''                              
                                                                      
                                if DEBUG_MANUAL:
                                    action[predator_idx,:] = np.array(play_net[predator_idx].activate(obs), dtype=float) * 0
                                    #action[predator_idx,:] = np.array([1.0, 1.0]) * MAX_SPEED
                                else:
                                    action[predator_idx,:] = np.array(play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED
                                #if predator_idx == 1: 
                                #    print("predator input:", obs)    
                                #print("predator output:", np.array(play_net.activate(obs), dtype=float) * MAX_SPEED)          
                                                              
                                '''if EVO_FLAG == 0:    
                                    action[predator_idx,:] = nets[predator_idx](torch.FloatTensor(obs)).numpy()
                                else:
                                    action[predator_idx,:] = best_nets[predator_idx](torch.FloatTensor(obs)).numpy()'''
                                
                                if DEBUG_INOUT:
                                    print("predator input:", predator_idx, obs)    
                                    print("predator output:", predator_idx, np.array(play_net[predator_idx].activate(obs), dtype=float) * MAX_SPEED)
                                
                            for prey_idx, obs in enumerate(observations.prey):
                                if obs == None:
                                    continue
                                
                                if not REAL:    
                                    obs = obsp
                                
                                if DEBUG_MANUAL:
                                    if EVOLVE_PREY:                    
                                        action[prey_idx + NUM_PREDATORS,:] = np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * 0
                                    else:                    
                                        action[prey_idx + NUM_PREDATORS,:] = np.array(self.prey[prey_idx].fixed_strategy(x_list, y_list, sigma_x_list, sigma_y_list, w_x_list, w_y_list, 
                                                                           w_sigma_x_list, w_sigma_y_list, prey_x, prey_y, prey_orientation, PW_RATIO)) * 0
                                else:                    
                                    if EVOLVE_PREY:                    
                                        action[prey_idx + NUM_PREDATORS,:] = np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED
                                    else:                                                    
                                        action[prey_idx + NUM_PREDATORS,:] = np.array(self.prey[prey_idx].fixed_strategy(x_list, y_list, sigma_x_list, sigma_y_list, w_x_list, w_y_list, 
                                                                           w_sigma_x_list, w_sigma_y_list, prey_x, prey_y, prey_orientation, PW_RATIO)) * MAX_SPEED * EVOVLED_PREY_SPEED_FACTOR
                                        #print(action[prey_idx + NUM_PREDATORS,:])
                                    
                                #if DEBUG_INOUT:
                                #    print("prey input:", obs)    
                                #    #print("prey output:", np.array(play_net[prey_idx + NUM_PREDATORS].activate(obs), dtype=float) * MAX_SPEED)
                                    
                           
                            
                            if not REAL:
                            
                                xp = info['prey_position'].x
                                yp = info['prey_position'].y
                                yaw_p, _, _ = self.orientation_quaternion_to_euler(info['prey_orientation'])
                                x0 = info['predator0_position'].x
                                y0 = info['predator0_position'].y
                                yaw_0, _, _ = self.orientation_quaternion_to_euler(info['predator0_orientation'])
                                x1 = info['predator1_position'].x
                                y1 = info['predator1_position'].y
                                yaw_1, _, _ = self.orientation_quaternion_to_euler(info['predator1_orientation'])
                                x2 = info['predator2_position'].x
                                y2 = info['predator2_position'].y
                                yaw_2, _, _ = self.orientation_quaternion_to_euler(info['predator2_orientation'])
                                     
                                xp_info = (xp, yp, yaw_p)     
                                x0_info = (x0, y0, yaw_0)
                                x1_info = (x1, y1, yaw_1)
                                x2_info = (x2, y2, yaw_2)
                                predators_info = (x0_info, x1_info, x2_info)  
                                     
                                tracking += [(xp_info, predators_info)]
                            if TIME:   
                                print("step start", time.time()) 
                            observations, reward, done, info = env.step(action)
                            if TIME:
                                print("step end", time.time()) 
                                print("end", time.time()) 

                            if END_GAME:
                                break
                            
                            #if not EVOLVE_PREY and self.target.name == "prey":
                            #    break
                        
                        #if self.target.name == self.prey[0].name:
                        
                        if not REAL:
                        
                            fitness = reward['prey']
                            #look_at_fitness = look_at_fitness / step
                            prey_fitness = fitness# * look_at_fitness
                            #print("Prey fitness!!!", prey_fitness)
                            #else:                
                            #print("Predator fitness", reward['predators'], info["distances"][self.target.idx], info["arena_length"] * np.sqrt(2) - info["distances"][self.target.idx])
                            fitness = (1 / info["distances"][0] + 1 / info["distances"][1] + 1 / info["distances"][2]) / 3
                            #look_at_fitness = look_at_fitness / step
                            fitness = fitness# * look_at_fitness
                            #print("predators fitness!!!", fitness)
                            
                            if info["caught"]:
                                caught_matrix[save_predator_idx, save_prey_idx] = 1
                            else:
                                caught_matrix[save_predator_idx, save_prey_idx] = -1                      
                                
                            fitnesses[save_predator_idx, save_prey_idx] = fitness
                            prey_fitnesses[save_predator_idx, save_prey_idx] = prey_fitness
                            
                            if info["time"] > 30:
                                info["time"] = 30.0
                             
                            print("END TIME:", info["time"])
                            print("-----------------------------------------------------------------------------------")


                            end_time[save_predator_idx, save_prey_idx] = info["time"]
                            output = open("output/tracking" + str(save_predator_idx) + "_" + str(save_prey_idx), 'wb')
                            pickle.dump(tracking, output)
                            output.close()
                        else:
                            
                            if info["time"] > 30:
                                info["time"] = 30.0
                        
                            end_time[save_predator_idx, save_prey_idx] = info["time"]
                            #print("END TIME:", info["time"])
                            print("-----------------------------------------------------------------------------------")
                            #np.save("output/end_time", end_time)
                        
                        timestamp = time.time()
                        if SAVE:
                            if not REAL:                      
                                np.save("output/fitnesses", fitnesses)    
                                np.save("output/prey_fitnesses", prey_fitnesses)    
                                np.save("output/caught_matrix", caught_matrix)    
                                np.save("output/end_time", end_time)
                                
                                if HUMAN:
                                    np.save("output/fitnesses_" + str(int(timestamp)), fitnesses)    
                                    np.save("output/prey_fitnesses_" + str(int(timestamp)), prey_fitnesses)    
                                    np.save("output/caught_matrix_" + str(int(timestamp)), caught_matrix)    
                                    np.save("output/end_time_" + str(int(timestamp)), end_time)  
                            else:  
                            
                                np.save("output/end_time" + "_real_" + str(int(timestamp)), end_time)
                                            
                print("Avg: " + str(end_time.sum().sum() / 7))
                if SAVE:
                    if not REAL:
                        np.save("output/fitnesses", fitnesses)    
                        np.save("output/prey_fitnesses", prey_fitnesses)    
                        np.save("output/caught_matrix", caught_matrix)    
                        np.save("output/end_time", end_time)

                        if HUMAN:
                            np.save("human/sim/fitnesses_" + str(int(timestamp)), fitnesses)    
                            np.save("human/sim/prey_fitnesses_" + str(int(timestamp)), prey_fitnesses)    
                            np.save("human/sim/caught_matrix_" + str(int(timestamp)), caught_matrix)    
                            np.save("human/sim/end_time_" + str(int(timestamp)), end_time)  
                    else:  
                    
                        np.save("human/real/end_time" + "_real_" + str(int(timestamp)), end_time)
        except:
            pass