def run_homogeneous_rovers(): cc = ccea.Ccea() nn = neural_network.NeuralNetwork() rd = RoverDomain() rtype = p.reward_type rd.inital_world_setup() for srun in range(p.stat_runs): # Perform statistical runs print("Run: %i" % srun) reward_history = [] # Reset CCEA and NN new stat run cc.reset_populations() # Randomly initialize ccea populations nn.reset_nn() # Initialize NN architecture for gen in range(p.generations): # print("Gen: %i" % gen) cc.select_policy_teams() for team_number in range( cc.total_pop_size ): # Each policy in CCEA is tested in teams rd.reset_to_init() # Resets rovers to initial configuration done = False rd.istep = 0 joint_state = rd.get_joint_state() while not done: for rover_id in range(rd.num_agents): policy_id = int(cc.team_selection[rover_id, team_number]) nn.run_neural_network(joint_state[rover_id], cc.pops[rover_id, policy_id], rover_id) joint_state, done = rd.step(nn.out_layer) # Update fitness of policies using reward information global_reward = homr.calc_global(rd.rover_path, rd.poi_values, rd.poi_pos) if rtype == "Global": for rover_id in range(rd.num_agents): policy_id = int(cc.team_selection[rover_id, team_number]) cc.fitness[rover_id, policy_id] = global_reward elif rtype == "Difference": d_reward = homr.calc_difference(rd.rover_path, rd.poi_values, rd.poi_pos, global_reward) for rover_id in range(p.num_rovers): policy_id = int(cc.team_selection[rover_id, team_number]) cc.fitness[rover_id, policy_id] = d_reward[rover_id] elif rtype == "DPP": dpp_reward = homr.calc_dpp(rd.rover_path, rd.poi_values, rd.poi_pos, global_reward) for rover_id in range(p.num_rovers): policy_id = int(cc.team_selection[rover_id, team_number]) cc.fitness[rover_id, policy_id] = dpp_reward[rover_id] elif rtype == "SDPP": sdpp_reward = homr.calc_sdpp(rd.rover_path, rd.poi_values, rd.poi_pos, global_reward) for rover_id in range(p.num_rovers): policy_id = int(cc.team_selection[rover_id, team_number]) cc.fitness[rover_id, policy_id] = sdpp_reward[rover_id] else: sys.exit('Incorrect Reward Type') # Testing Phase (test best policies found so far) rd.reset_to_init() # Reset rovers to initial positions done = False rd.istep = 0 joint_state = rd.get_joint_state() while not done: for rover_id in range(rd.num_agents): pol_index = np.argmax(cc.fitness[rover_id]) nn.run_neural_network(joint_state[rover_id], cc.pops[rover_id, pol_index], rover_id) joint_state, done = rd.step(nn.out_layer) global_reward = homr.calc_global(rd.rover_path, rd.poi_values, rd.poi_pos) reward_history.append(global_reward) if gen == (p.generations - 1): # Save path at end of final generation save_rover_path(rd.rover_path) cc.down_select( ) # Choose new parents and create new offspring population if rtype == "Global": save_reward_history(reward_history, "Global_Reward.csv") if rtype == "Difference": save_reward_history(reward_history, "Difference_Reward.csv") if rtype == 'DPP': save_reward_history(reward_history, "DPP_Reward.csv") if rtype == "SDPP": save_reward_history(reward_history, "SDPP_Reward.csv")
def run_homogeneous_rovers(): cc = ccea.Ccea() nn = neural_network.NeuralNetwork() rd = RoverDomain() rtype = p.reward_type for srun in range(p.stat_runs): # Perform statistical runs print("Run: %i" % srun) reward_history = [] # Reset CCEA, NN, and world for new stat run cc.reset_populations() # Randomly initialize ccea populations nn.reset_nn() # Initialize NN architecture rd.reset_world() # Re-initialize world save_world_configuration(rd.rover_initial_pos, rd.poi_pos, rd.poi_values) for gen in range(p.generations): print("Gen: %i" % gen) cc.select_policy_teams( ) # Selects which policies will be grouped into which teams for team_number in range( cc.population_size ): # Each policy in CCEA is tested in teams rd.reset_to_init() # Resets rovers to initial configuration global_reward = 0.0 done = False rd.istep = 0 joint_state = rd.get_joint_state() while not done: for rover_id in range(rd.num_agents): policy_id = int( cc.team_selection[rover_id][team_number]) nn.run_neural_network(joint_state[rover_id], cc.pops[rover_id, policy_id], rover_id) joint_state, done, global_reward = rd.step(nn.out_layer) # Update fitness of policies using reward information if rtype == "Global": for rover_id in range(rd.num_agents): policy_id = int( cc.team_selection[rover_id][team_number]) cc.fitness[rover_id, policy_id] += global_reward elif rtype == "Difference": d_reward = homr.calc_difference( rd.rover_path, rd.poi_values, rd.poi_pos, global_reward, rd.istep) for rover_id in range(p.num_rovers): policy_id = int( cc.team_selection[rover_id][team_number]) cc.fitness[rover_id, policy_id] += d_reward[rover_id] elif rtype == "DPP": dpp_reward = homr.calc_dpp(rd.rover_path, rd.poi_values, rd.poi_pos, global_reward, rd.istep) for rover_id in range(p.num_rovers): policy_id = int( cc.team_selection[rover_id][team_number]) cc.fitness[rover_id, policy_id] += dpp_reward[rover_id] elif rtype == "SDPP": sdpp_reward = homr.calc_sdpp(rd.rover_path, rd.poi_values, rd.poi_pos, global_reward, rd.istep) for rover_id in range(p.num_rovers): policy_id = int( cc.team_selection[rover_id][team_number]) cc.fitness[rover_id, policy_id] += sdpp_reward[rover_id] else: sys.exit('Incorrect Reward Type for Homogeneous Teams') cc.down_select( ) # Perform down_selection after each policy has been evaluated # Testing Phase rd.reset_to_init() # Reset rovers to initial positions global_reward = 0.0 global_max = 0.0 done = False rd.istep = 0 joint_state = rd.get_joint_state() while not done: for rover_id in range(rd.num_agents): nn.run_neural_network(joint_state[rover_id], cc.pops[rover_id, 0], rover_id) joint_state, done, global_reward = rd.step(nn.out_layer) global_max += global_reward reward_history.append(global_max) if gen == (p.generations - 1): # Save path at end of final generation save_rover_path(rd.rover_path) if p.visualizer_on: visualize(rd, global_max) if rtype == "Global": save_reward_history(reward_history, "Global_Reward.csv") if rtype == "Difference": save_reward_history(reward_history, "Difference_Reward.csv") if rtype == 'DPP': save_reward_history(reward_history, "DPP_Reward.csv") if rtype == "SDPP": save_reward_history(reward_history, "SDPP_Reward.csv")
def run_homogeneous_rovers(): cc = ccea.Ccea() nn = neural_network.NeuralNetwork() rd = RoverDomain() rtype = p.reward_type for srun in range(p.stat_runs): # Perform statistical runs print("Run: %i" % srun) reward_history = [] # Reset CCEA, NN, and world for new stat run cc.reset_populations() # Randomly initialize ccea populations nn.reset_nn() # Initialize NN architecture rd.reset() # Re-initialize world save_world_configuration(rd.rover_initial_pos, rd.poi_pos, rd.poi_value) for gen in range(p.generations): # print("Gen: %i" % gen) cc.select_policy_teams( ) # Selects which policies will be grouped into which teams for team_number in range( cc.population_size ): # Each policy in CCEA is tested in teams rd.reset_to_init() # Resets rovers to initial configuration done = False rd.istep = 0 joint_state = rd.get_joint_state() while not done: for rover_id in range(rd.num_agents): policy_id = int( cc.team_selection[rover_id][team_number] ) # get policy selected from previous generation nn.run_neural_network(joint_state[rover_id], cc.pops[rover_id, policy_id], rover_id) joint_state, done = rd.step(nn.out_layer) temp_state = joint_state # print("#############", np.shape(temp_state), "##################") temp_state = numpy.array(temp_state) #print("####", temp_state, "#####") file = open( "CCEA_data_%d_%d_%d.txt" % (p.num_rovers, p.num_pois, p.angle_resolution), "a") file.write(str(temp_state) + "\n") file.close() # Update fitness of policies using reward information if rtype == 0: reward, poi_status = homr.calc_global( rd.rover_path, rd.poi_value, rd.poi_pos) rd.poi_status = poi_status for pop_id in range(rd.num_agents): policy_id = int(cc.team_selection[pop_id][team_number]) cc.fitness[pop_id, policy_id] = reward elif rtype == 1: reward = homr.calc_difference(rd.rover_path, rd.poi_value, rd.poi_pos) for pop_id in range(rd.num_agents): policy_id = int(cc.team_selection[pop_id][team_number]) cc.fitness[pop_id, policy_id] = reward[pop_id] print("Generation", gen, "out of", p.generations, "Difference Reward:", reward) elif rtype == 2: reward = homr.calc_dpp(rd.rover_path, rd.poi_value, rd.poi_pos) for pop_id in range(rd.num_agents): policy_id = int(cc.team_selection[pop_id][team_number]) cc.fitness[pop_id, policy_id] = reward[pop_id] print("Generation", gen, "out of", p.generations, "D++ Reward:", reward) else: sys.exit('Incorrect Reward Type for Homogeneous Teams') cc.down_select( ) # Perform down_selection after each policy has been evaluated if rtype == 0: print("Generation", gen, "out of", p.generations, "Global Reward:", reward) elif rtype == 1: print("Generation", gen, "out of", p.generations, "Difference Reward:", reward) elif rtype == 2: print("Generation", gen, "out of", p.generations, "D++ Reward:", reward) else: sys.exit('Incorrect Reward Type for Homogeneous Teams') # Testing Phase rd.reset_to_init() # Reset rovers to initial positions done = False rd.istep = 0 joint_state = rd.get_joint_state() while not done: for rover_id in range(rd.num_agents): nn.run_neural_network(joint_state[rover_id], cc.pops[rover_id, 0], rover_id) joint_state, done = rd.step(nn.out_layer) reward, poi_status = homr.calc_global(rd.rover_path, rd.poi_value, rd.poi_pos) reward_history.append(reward) print("Global Reward", reward) if gen % 1 == 0: # Save path at end of final generation save_rover_path(rd.rover_path) if p.visualizer_on: visualize(rd, reward) if rtype == 0: save_reward_history(reward_history, "Global_Reward.csv") if rtype == 1: save_reward_history(reward_history, "Difference_Reward.csv") if rtype == 2: save_reward_history(reward_history, "DPP_Reward.csv")