def evaluate(genome, config_file_path, driving_agent, normalise_obs, domain_params_in_obs, num_trials): #Instantiate the env env = PlarkEnvSparse(config_file_path=config_file_path, image_based=False, driving_agent=driving_agent, normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs) num_inputs = len(env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 if trained_agent == 'panther': agent = PantherNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) else: agent = PelicanNN(num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) agent.set_weights(genome) reward = 0 for i in range(num_trials): env.reset() obs = env._observation() trial_reward = 0 while True: action = agent.getAction(obs) obs, r, done, info = env.step(action) trial_reward += r if done: break reward += trial_reward #Average trial reward reward /= num_trials #agent.save_agent(obs_normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs) #print("Finished at step num:", step_num) #print("Reward:", reward) #print("Status:", info['status']) #save_video(genome, agent, env, max_num_steps, file_name='evo.mp4') #exit() return [reward]
image_based=False, driving_agent='panther', normalise=normalise_obs) #Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 panther_dummy_agent = PantherNN( num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) #I need to figure out how to get rid of the 139 magic number pelican_dummy_agent = PelicanNN( num_inputs=139, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) #num_panther_weights = panther_dummy_agent.get_num_weights() #num_pelican_weights = pelican_dummy_agent.get_num_weights() #Let's try instantiating with dummy agents and setting the agents competing against each #other dummy_env.reset() max_num_steps = 200 reward = 0 obs = dummy_env._observation() for step_num in range(max_num_steps): action = panther_dummy_agent.getAction(obs)
#Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 if trained_agent == 'panther': dummy_agent = PantherNN( num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer, stochastic_actions=stochastic_actions) else: dummy_agent = PelicanNN( num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer, stochastic_actions=stochastic_actions) num_weights = dummy_agent.get_num_weights() creator.create("FitnessMax", base.Fitness, weights=(1.0, )) creator.create("Individual", list, fitness=creator.FitnessMax) toolbox = base.Toolbox() toolbox.register("evaluate", evaluate, config_file_path=config_file_path, driving_agent=trained_agent, normalise_obs=normalise_obs, domain_params_in_obs=domain_params_in_obs,
driving_agent=driving_agent, normalise=normalise_obs, domain_params_in_obs=domain_params_in_obs) game = dummy_env.env.activeGames[len(dummy_env.env.activeGames) - 1] #Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 panther_dummy_agent = PantherNN( num_inputs=num_inputs, num_hidden_layers=num_hidden_layers, neurons_per_hidden_layer=neurons_per_hidden_layer) pelican_dummy_agent = PelicanNN(file_dir_name='pelican_20210301_193714', game=game) #Set non-driving agent game.pelicanAgent = pelican_dummy_agent dummy_env.reset() max_num_steps = 200 reward = 0 obs = dummy_env._observation() for step_num in range(max_num_steps): action = panther_dummy_agent.getAction(obs) obs, r, done, info = dummy_env.step(action) reward += r if done:
def load_combatant(agent_path, agent_name, basic_agents_path, game=None, **kwargs): """ Loads agent as a combatant. agent_path: agent_name: basic_agents_path: game: kwargs: Returns: agent """ if ".py" in agent_path: return load_agent(agent_path, agent_name, basic_agents_path, game, in_tournament=False, **kwargs) #Load an agent trained via an Evolutionary Algorithm - they use the NNAgent class elif "evo_models" in agent_path: from plark_game.agents.basic.panther_nn import PantherNN from plark_game.agents.basic.pelican_nn import PelicanNN if 'pelican' in agent_name: return PelicanNN(file_dir_name='pelican', game=game, in_tournament=True) else: return PantherNN(file_dir_name='panther', game=game, in_tournament=True) else: files = os.listdir(agent_path) for f in files: if ".zip" not in f: # ignore non agent files pass elif ".zip" in f: # load model metadata_filepath = os.path.join(agent_path, "metadata.json") agent_filepath = os.path.join(agent_path, f) with open(metadata_filepath) as f: metadata = json.load(f) # NON-IMAGE-BASED if ("image_based" in metadata and metadata["image_based"] is False): return load_agent(agent_path, agent_name, basic_agents_path, game, in_tournament=True, **kwargs) # IMAGE-BASED - NOT SUPPORTED FOR DSG TOURNAMENT observation = None image_based = True algorithm = metadata["algorithm"] print("algorithm: ", algorithm) if metadata["agentplayer"] == "pelican": return Pelican_Agent_Load_Agent(agent_filepath, algorithm, observation, image_based, in_tournament=True) elif metadata["agentplayer"] == "panther": return Panther_Agent_Load_Agent(agent_filepath, algorithm, observation, image_based, in_tournament=True) return None
domain_params_in_obs=domain_params_in_obs, random_panther_start_position=random_panther_start_position, random_pelican_start_position=random_pelican_start_position) game = dummy_env.env.activeGames[len(dummy_env.env.activeGames) - 1] #Neural net variables num_inputs = len(dummy_env._observation()) num_hidden_layers = 0 neurons_per_hidden_layer = 0 #panther_dummy_agent = PantherNN(num_inputs=num_inputs, # num_hidden_layers=num_hidden_layers, # neurons_per_hidden_layer=neurons_per_hidden_layer) pelican_dummy_agent = PelicanNN( file_dir_name='pelican_20210309_100850_gen_2', game=game, driving_agent=True) #Set agent #game.pelicanAgent = pelican_dummy_agent dummy_env.reset() ''' max_num_steps = 1 reward = 0 obs = dummy_env._observation() for step_num in range(max_num_steps): action = panther_dummy_agent.getAction(obs) obs, r, done, info = dummy_env.step(action) reward += r