Esempio n. 1
0
def run_homogeneous_rovers():
    cc = ccea.Ccea()
    nn = neural_network.NeuralNetwork()
    rd = RoverDomain()

    rtype = p.reward_type
    rd.inital_world_setup()

    for srun in range(p.stat_runs):  # Perform statistical runs
        print("Run: %i" % srun)
        reward_history = []

        # Reset CCEA and NN new stat run
        cc.reset_populations()  # Randomly initialize ccea populations
        nn.reset_nn()  # Initialize NN architecture

        for gen in range(p.generations):
            # print("Gen: %i" % gen)
            cc.select_policy_teams()

            for team_number in range(
                    cc.total_pop_size
            ):  # Each policy in CCEA is tested in teams
                rd.reset_to_init()  # Resets rovers to initial configuration
                done = False
                rd.istep = 0
                joint_state = rd.get_joint_state()

                while not done:
                    for rover_id in range(rd.num_agents):
                        policy_id = int(cc.team_selection[rover_id,
                                                          team_number])
                        nn.run_neural_network(joint_state[rover_id],
                                              cc.pops[rover_id,
                                                      policy_id], rover_id)
                    joint_state, done = rd.step(nn.out_layer)

                # Update fitness of policies using reward information
                global_reward = homr.calc_global(rd.rover_path, rd.poi_values,
                                                 rd.poi_pos)
                if rtype == "Global":
                    for rover_id in range(rd.num_agents):
                        policy_id = int(cc.team_selection[rover_id,
                                                          team_number])
                        cc.fitness[rover_id, policy_id] = global_reward
                elif rtype == "Difference":
                    d_reward = homr.calc_difference(rd.rover_path,
                                                    rd.poi_values, rd.poi_pos,
                                                    global_reward)
                    for rover_id in range(p.num_rovers):
                        policy_id = int(cc.team_selection[rover_id,
                                                          team_number])
                        cc.fitness[rover_id, policy_id] = d_reward[rover_id]
                elif rtype == "DPP":
                    dpp_reward = homr.calc_dpp(rd.rover_path, rd.poi_values,
                                               rd.poi_pos, global_reward)
                    for rover_id in range(p.num_rovers):
                        policy_id = int(cc.team_selection[rover_id,
                                                          team_number])
                        cc.fitness[rover_id, policy_id] = dpp_reward[rover_id]
                elif rtype == "SDPP":
                    sdpp_reward = homr.calc_sdpp(rd.rover_path, rd.poi_values,
                                                 rd.poi_pos, global_reward)
                    for rover_id in range(p.num_rovers):
                        policy_id = int(cc.team_selection[rover_id,
                                                          team_number])
                        cc.fitness[rover_id, policy_id] = sdpp_reward[rover_id]
                else:
                    sys.exit('Incorrect Reward Type')

            # Testing Phase (test best policies found so far)
            rd.reset_to_init()  # Reset rovers to initial positions
            done = False
            rd.istep = 0
            joint_state = rd.get_joint_state()
            while not done:
                for rover_id in range(rd.num_agents):
                    pol_index = np.argmax(cc.fitness[rover_id])
                    nn.run_neural_network(joint_state[rover_id],
                                          cc.pops[rover_id,
                                                  pol_index], rover_id)
                joint_state, done = rd.step(nn.out_layer)

            global_reward = homr.calc_global(rd.rover_path, rd.poi_values,
                                             rd.poi_pos)
            reward_history.append(global_reward)

            if gen == (p.generations -
                       1):  # Save path at end of final generation
                save_rover_path(rd.rover_path)

            cc.down_select(
            )  # Choose new parents and create new offspring population

        if rtype == "Global":
            save_reward_history(reward_history, "Global_Reward.csv")
        if rtype == "Difference":
            save_reward_history(reward_history, "Difference_Reward.csv")
        if rtype == 'DPP':
            save_reward_history(reward_history, "DPP_Reward.csv")
        if rtype == "SDPP":
            save_reward_history(reward_history, "SDPP_Reward.csv")
Esempio n. 2
0
def run_homogeneous_rovers():
    cc = ccea.Ccea()
    nn = neural_network.NeuralNetwork()
    rd = RoverDomain()

    rtype = p.reward_type

    for srun in range(p.stat_runs):  # Perform statistical runs
        print("Run: %i" % srun)
        reward_history = []

        # Reset CCEA, NN, and world for new stat run
        cc.reset_populations()  # Randomly initialize ccea populations
        nn.reset_nn()  # Initialize NN architecture
        rd.reset_world()  # Re-initialize world

        save_world_configuration(rd.rover_initial_pos, rd.poi_pos,
                                 rd.poi_values)

        for gen in range(p.generations):
            print("Gen: %i" % gen)
            cc.select_policy_teams(
            )  # Selects which policies will be grouped into which teams

            for team_number in range(
                    cc.population_size
            ):  # Each policy in CCEA is tested in teams
                rd.reset_to_init()  # Resets rovers to initial configuration
                global_reward = 0.0

                done = False
                rd.istep = 0
                joint_state = rd.get_joint_state()
                while not done:
                    for rover_id in range(rd.num_agents):
                        policy_id = int(
                            cc.team_selection[rover_id][team_number])
                        nn.run_neural_network(joint_state[rover_id],
                                              cc.pops[rover_id,
                                                      policy_id], rover_id)
                    joint_state, done, global_reward = rd.step(nn.out_layer)

                    # Update fitness of policies using reward information
                    if rtype == "Global":
                        for rover_id in range(rd.num_agents):
                            policy_id = int(
                                cc.team_selection[rover_id][team_number])
                            cc.fitness[rover_id, policy_id] += global_reward
                    elif rtype == "Difference":
                        d_reward = homr.calc_difference(
                            rd.rover_path, rd.poi_values, rd.poi_pos,
                            global_reward, rd.istep)
                        for rover_id in range(p.num_rovers):
                            policy_id = int(
                                cc.team_selection[rover_id][team_number])
                            cc.fitness[rover_id,
                                       policy_id] += d_reward[rover_id]
                    elif rtype == "DPP":
                        dpp_reward = homr.calc_dpp(rd.rover_path,
                                                   rd.poi_values, rd.poi_pos,
                                                   global_reward, rd.istep)
                        for rover_id in range(p.num_rovers):
                            policy_id = int(
                                cc.team_selection[rover_id][team_number])
                            cc.fitness[rover_id,
                                       policy_id] += dpp_reward[rover_id]
                    elif rtype == "SDPP":
                        sdpp_reward = homr.calc_sdpp(rd.rover_path,
                                                     rd.poi_values, rd.poi_pos,
                                                     global_reward, rd.istep)
                        for rover_id in range(p.num_rovers):
                            policy_id = int(
                                cc.team_selection[rover_id][team_number])
                            cc.fitness[rover_id,
                                       policy_id] += sdpp_reward[rover_id]
                    else:
                        sys.exit('Incorrect Reward Type for Homogeneous Teams')

            cc.down_select(
            )  # Perform down_selection after each policy has been evaluated

            # Testing Phase
            rd.reset_to_init()  # Reset rovers to initial positions
            global_reward = 0.0
            global_max = 0.0
            done = False
            rd.istep = 0
            joint_state = rd.get_joint_state()
            while not done:
                for rover_id in range(rd.num_agents):
                    nn.run_neural_network(joint_state[rover_id],
                                          cc.pops[rover_id, 0], rover_id)
                joint_state, done, global_reward = rd.step(nn.out_layer)

                global_max += global_reward

            reward_history.append(global_max)

            if gen == (p.generations -
                       1):  # Save path at end of final generation
                save_rover_path(rd.rover_path)
                if p.visualizer_on:
                    visualize(rd, global_max)

        if rtype == "Global":
            save_reward_history(reward_history, "Global_Reward.csv")
        if rtype == "Difference":
            save_reward_history(reward_history, "Difference_Reward.csv")
        if rtype == 'DPP':
            save_reward_history(reward_history, "DPP_Reward.csv")
        if rtype == "SDPP":
            save_reward_history(reward_history, "SDPP_Reward.csv")
def run_homogeneous_rovers():
    cc = ccea.Ccea()
    nn = neural_network.NeuralNetwork()
    rd = RoverDomain()

    rtype = p.reward_type

    for srun in range(p.stat_runs):  # Perform statistical runs
        print("Run: %i" % srun)
        reward_history = []

        # Reset CCEA, NN, and world for new stat run
        cc.reset_populations()  # Randomly initialize ccea populations
        nn.reset_nn()  # Initialize NN architecture
        rd.reset()  # Re-initialize world

        save_world_configuration(rd.rover_initial_pos, rd.poi_pos,
                                 rd.poi_value)

        for gen in range(p.generations):
            # print("Gen: %i" % gen)
            cc.select_policy_teams(
            )  # Selects which policies will be grouped into which teams
            for team_number in range(
                    cc.population_size
            ):  # Each policy in CCEA is tested in teams
                rd.reset_to_init()  # Resets rovers to initial configuration

                done = False
                rd.istep = 0
                joint_state = rd.get_joint_state()
                while not done:
                    for rover_id in range(rd.num_agents):
                        policy_id = int(
                            cc.team_selection[rover_id][team_number]
                        )  # get policy selected from previous generation
                        nn.run_neural_network(joint_state[rover_id],
                                              cc.pops[rover_id,
                                                      policy_id], rover_id)
                    joint_state, done = rd.step(nn.out_layer)

                    temp_state = joint_state

                    # print("#############", np.shape(temp_state), "##################")
                    temp_state = numpy.array(temp_state)
                    #print("####", temp_state, "#####")

                    file = open(
                        "CCEA_data_%d_%d_%d.txt" %
                        (p.num_rovers, p.num_pois, p.angle_resolution), "a")
                    file.write(str(temp_state) + "\n")

                    file.close()

                # Update fitness of policies using reward information
                if rtype == 0:
                    reward, poi_status = homr.calc_global(
                        rd.rover_path, rd.poi_value, rd.poi_pos)
                    rd.poi_status = poi_status

                    for pop_id in range(rd.num_agents):
                        policy_id = int(cc.team_selection[pop_id][team_number])
                        cc.fitness[pop_id, policy_id] = reward

                elif rtype == 1:
                    reward = homr.calc_difference(rd.rover_path, rd.poi_value,
                                                  rd.poi_pos)
                    for pop_id in range(rd.num_agents):
                        policy_id = int(cc.team_selection[pop_id][team_number])
                        cc.fitness[pop_id, policy_id] = reward[pop_id]
                    print("Generation", gen, "out of", p.generations,
                          "Difference Reward:", reward)

                elif rtype == 2:
                    reward = homr.calc_dpp(rd.rover_path, rd.poi_value,
                                           rd.poi_pos)
                    for pop_id in range(rd.num_agents):
                        policy_id = int(cc.team_selection[pop_id][team_number])
                        cc.fitness[pop_id, policy_id] = reward[pop_id]
                    print("Generation", gen, "out of", p.generations,
                          "D++ Reward:", reward)

                else:
                    sys.exit('Incorrect Reward Type for Homogeneous Teams')

            cc.down_select(
            )  # Perform down_selection after each policy has been evaluated

            if rtype == 0:
                print("Generation", gen, "out of", p.generations,
                      "Global Reward:", reward)
            elif rtype == 1:
                print("Generation", gen, "out of", p.generations,
                      "Difference Reward:", reward)
            elif rtype == 2:
                print("Generation", gen, "out of", p.generations,
                      "D++ Reward:", reward)
            else:
                sys.exit('Incorrect Reward Type for Homogeneous Teams')

            # Testing Phase
            rd.reset_to_init()  # Reset rovers to initial positions
            done = False
            rd.istep = 0
            joint_state = rd.get_joint_state()
            while not done:
                for rover_id in range(rd.num_agents):
                    nn.run_neural_network(joint_state[rover_id],
                                          cc.pops[rover_id, 0], rover_id)
                joint_state, done = rd.step(nn.out_layer)

            reward, poi_status = homr.calc_global(rd.rover_path, rd.poi_value,
                                                  rd.poi_pos)
            reward_history.append(reward)
            print("Global Reward", reward)

            if gen % 1 == 0:  # Save path at end of final generation
                save_rover_path(rd.rover_path)
                if p.visualizer_on:
                    visualize(rd, reward)

        if rtype == 0:
            save_reward_history(reward_history, "Global_Reward.csv")
        if rtype == 1:
            save_reward_history(reward_history, "Difference_Reward.csv")
        if rtype == 2:
            save_reward_history(reward_history, "DPP_Reward.csv")