class NNAgent(Agent): def __init__(self, num_inputs, num_outputs, num_hidden_layers=0, neurons_per_hidden_layer=0, file_dir_name=None, agent_type=None, game=None, stochastic_actions=False, driving_agent=True, in_tournament=False): self.agent_type = agent_type self.stochastic_actions = stochastic_actions self.driving_agent = driving_agent self.in_tournament = in_tournament #For reading and writing models self.base_path = '/data/agents/evo_models/' #Number of outputs is always given via the subclass call to this constructor self.num_outputs = num_outputs #If file directory name is given, read from file if file_dir_name is not None: metadata, genotype = self._read_agent_from_file(file_dir_name) self.num_inputs = metadata['num_inputs'] self.num_hidden_layers = metadata['num_hidden_layers'] self.neurons_per_hidden_layer = metadata['neurons_per_hidden_layer'] self.stochastic_actions = metadata['stochastic_actions'] obs_kwargs = {} obs_kwargs['driving_agent'] = self.agent_type obs_kwargs['normalise'] = metadata['normalise'] obs_kwargs['domain_params_in_obs'] = metadata['domain_params_in_obs'] self.obs_kwargs = obs_kwargs if not self.in_tournament: assert game is not None, "Need to hand NewGame object to NNAgent " \ "constructor in order to build the Observation class" self.observation = Observation(game, **obs_kwargs) #Build neural net self._build_nn() #Set read genotype as weights self.set_weights(genotype) else: #Check that num_inputs is not None if num_inputs is None: print('One needs to give either a number of inputs or a file directory' ' name to build an NNAgent') exit() self.num_inputs = num_inputs self.num_hidden_layers = num_hidden_layers self.neurons_per_hidden_layer = neurons_per_hidden_layer self.observation = None #Build neural net self._build_nn() def _build_nn(self): layers = [] if self.num_hidden_layers == 0: layers.append(torch.nn.Linear(self.num_inputs, self.num_outputs)) else: layers.append(torch.nn.Linear(self.num_inputs, self.neurons_per_hidden_layer)) #Hidden layers have ReLU activation layers.append(torch.nn.ReLU()) for i in range(self.num_hidden_layers-1): layers.append(torch.nn.Linear(self.neurons_per_hidden_layer, self.neurons_per_hidden_layer)) layers.append(torch.ReLU()) layers.append(torch.nn.Linear(self.neurons_per_hidden_layer, self.num_outputs)) #Final layer goes through a softmax layers.append(torch.nn.Softmax(dim=0)) self.nn = torch.nn.Sequential(*layers).double() #Takes a list, passes through the network and returns a list def _forward_pass(self, x): x = torch.tensor(x, dtype=torch.float64) net_out = self.nn.forward(x) return net_out.tolist() #Randomly sample action from network output probability distribution def _sample_action(self, net_out): action_nums = list(range(len(net_out))) return np.random.choice(action_nums, p=net_out) #Get the most probable action from the network output probability distribution def _get_most_probable_action(self, net_out): return np.argmax(net_out) def getAction(self, state): #If state dictionary comes through, convert to numpy array. #This will happen when the NNAgent is the non-driving agent. if not self.driving_agent: state = self.observation.get_observation(state) assert len(state) == self.num_inputs, "State length: {}, num inputs: {}" \ .format(len(state), self.num_inputs) #Push state through network net_out = self._forward_pass(state) #Get action from nework output if self.stochastic_actions: action = self._sample_action(net_out) else: action = self._get_most_probable_action(net_out) if (not self.driving_agent) or self.in_tournament: action = self.action_lookup(action) return action #Stitch together state from tournament def getTournamentAction(self, obs, obs_normalised, domain_parameters, domain_parameters_normalised, state): if self.obs_kwargs['normalise']: stitched_obs = obs_normalised else: stitched_obs = obs if self.obs_kwargs['domain_params_in_obs']: if self.obs_kwargs['normalise']: stitched_obs = np.concatenate((stitched_obs, domain_parameters_normalised)) else: stitched_obs = np.concatenate((stitched_obs, domain_parameters)) return self.getAction(stitched_obs) #Returns the number of weights def get_num_weights(self): num_weights = 0 for layer in self.nn: for params in layer.parameters(): num_weights += params.numel() return num_weights def print_weights(self): for layer in self.nn: for params in layer.parameters(): print(params) def _set_weights_err_msg(self, weights_len, num_weights_required): return "Trying to set {} weights to an NN that requires {} weights" \ .format(weights_len, num_weights_required) #Sets a list of weights def set_weights(self, new_weights): #Check new weights is of correct size num_weights_required = self.get_num_weights() assert num_weights_required == len(new_weights), \ self._set_weights_err_msg(len(new_weights), \ num_weights_required) weight_index = 0 for layer in self.nn: for params in layer.parameters(): #Slice out new weights p_weights = new_weights[weight_index : weight_index + params.numel()] weight_index += params.numel() #Resize and set new weights params.data = torch.tensor(np.reshape(p_weights, params.size()), \ dtype=torch.float64) #Return weights as a 1d list def get_weights(self): weights = [] for layer in self.nn: for params in layer.parameters(): weights += params.flatten().tolist() return weights def _save_metadata(self, dir_path, player_type, obs_normalise, domain_params_in_obs): metadata = {} metadata['playertype'] = player_type metadata['normalise'] = obs_normalise metadata['domain_params_in_obs'] = domain_params_in_obs metadata['stochastic_actions'] = self.stochastic_actions metadata['num_inputs'] = self.num_inputs metadata['num_hidden_layers'] = self.num_hidden_layers metadata['neurons_per_hidden_layer'] = self.neurons_per_hidden_layer file_path = dir_path + '/metadata.json' with open(file_path, 'w') as outfile: json.dump(metadata, outfile) def _save_genotype(self, dir_path): #Save genotype as a csv - it is just a list file_path = dir_path + '/genotype.csv' with open(file_path, 'w') as outfile: csv_writer = csv.writer(outfile) csv_writer.writerow(self.get_weights()) def _save_agent_to_file(self, player_type, obs_normalise, domain_params_in_obs, file_name_suffix=''): #Construct full directory path date_time = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S")) dir_name = player_type + '_' + date_time + file_name_suffix dir_path = self.base_path + dir_name #Create directory for model os.makedirs(dir_path, exist_ok=True) os.chmod(dir_path, 0o777) #Save metadata self._save_metadata(dir_path, player_type, obs_normalise, domain_params_in_obs) #Save genotype self._save_genotype(dir_path) def _read_metadata(self, metadata_filepath): with open(metadata_filepath, 'r') as metadata_file: metadata = json.load(metadata_file) return metadata def _read_genotype(self, genotype_filepath): with open(genotype_filepath, 'r') as genotype_file: reader = csv.reader(genotype_file) genotype = list(map(float, list(reader)[0])) return genotype def _read_agent_from_file(self, dir_name): dir_path = self.base_path + dir_name + '/' if self.in_tournament: dir_path = "/plark_ai_public" + dir_path #Read metadata metadata = self._read_metadata(dir_path + 'metadata.json') #Read genotype genotype = self._read_genotype(dir_path + 'genotype.csv') return metadata, genotype
class PlarkEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self,config_file_path=None,verbose=False, **kwargs): self.kwargs = kwargs self.random_panther_start_position = kwargs.get('random_panther_start_position', False) self.random_pelican_start_position = kwargs.get('random_pelican_start_position', False) self.render_height = kwargs.get('render_height', None) if self.render_height is None: self.render_height = 250 self.kwargs['render_height'] = self.render_height self.render_width = kwargs.get('render_width', None) if self.render_width is None: self.render_width = 310 self.kwargs['render_width'] = self.render_width self.driving_agent = kwargs.get('driving_agent', None) if self.driving_agent is None: self.driving_agent = 'pelican' self.kwargs['driving_agent'] = self.driving_agent #logger.info('plark.kwargs :'+ str(self.kwargs)) self.verbose = verbose self.viewer = None self.server_process = None self.server_port = None self.image_based = kwargs.get('image_based', False) #logger.info('self.image_based :'+ str(self.image_based)) self.env = Environment() self.config_file_path = config_file_path self.illegal_move_reward = -0.1 self.buoy_too_close_reward = -0.2 self.buoy_far_apart_reward = 0.5 #1 UP #2 UP RIGHT #3 DOWN RIGHT #4 DOWN #5 DOWN LEFT #6 UP LEFT if self.driving_agent == 'panther': self.view = 'PANTHER' self.ACTION_LOOKUP = { 0 : '1', 1 : '2', 2 : '3', 3 : '4', 4 : '5', 5 : '6', 6 : 'end' } elif self.driving_agent == 'pelican': self.view = 'PELICAN' self.ACTION_LOOKUP = { 0 : '1', 1 : '2', 2 : '3', 3 : '4', 4 : '5', 5 : '6', 6 : 'drop_buoy', 7 : 'drop_torpedo', 8 : 'end' } else: raise ValueError('driving_agent not set correctly') # Inverse action lookup for looking up specific actions self.action_index = dict((val, key) for key, val in self.ACTION_LOOKUP.items()) if self.image_based: self.reset() self.game = self.env.activeGames[len(self.env.activeGames)-1] N_CHANNELS = 3 logger.info('observation space: Height:'+str(self.render_height)+', Width:'+str(self.render_width)+', Channels:'+str(N_CHANNELS)) self.observation_space = spaces.Box(low=0, high=255, shape=(self.render_height, self.render_width, N_CHANNELS), dtype=np.uint8) logger.info('Image observations created') self.normalise = None self.domain_params_in_obs = None else: if len(self.env.activeGames) > 0: self.env.activeGames[len(self.env.activeGames)-1].reset_game() else: if self.config_file_path: #logger.info('config filepath: ' +str(self.config_file_path)) self.env.createNewGame(config_file_path=self.config_file_path, **self.kwargs) else: self.env.createNewGame(**self.kwargs) self.game = self.env.activeGames[len(self.env.activeGames)-1] self.observation = Observation(self.game, **kwargs) self.observation_space = self.observation.get_observation_space() self.normalise = self.observation.normalise self.domain_params_in_obs = self.observation.domain_params_in_obs #logger.info('Non image observations created') if self.driving_agent == 'panther': self.action_space = spaces.Discrete(7) elif self.driving_agent == 'pelican': self.action_space = spaces.Discrete(9) self.status = self.env.activeGames[len(self.env.activeGames)-1].gameState def close(self): self.env.stopAllGames() def _get_location(self, board, item): for col in range(board.cols): for row in range(board.rows): if board.is_item_type_in_cell(item, col, row): return (col, row) raise ValueError("Could not find {}".format(item)) def _observation(self): if self.image_based: pil_image = self.env.activeGames[len(self.env.activeGames)-1].render(self.render_width,self.render_height,self.view) np_image = np.array(pil_image, dtype=np.uint8) return np_image else: obs = self.observation.get_observation(self.env.activeGames[len(self.env.activeGames)-1]._state(self.view)) #return obs return np.array(obs, dtype=np.float) def step(self, action): action = self.ACTION_LOOKUP[action] game = self.env.activeGames[len(self.env.activeGames)-1] if self.verbose: logger.info('Action:'+action) gameState,uioutput = game.game_step(action) self.status = gameState self.uioutput = uioutput ob = self._observation() #print(self.driving_agent) #print(ob) reward = 0 done = False _info = { 'turn': game.turn_count } if self.driving_agent == 'pelican': illegal_move = game.illegal_pelican_move else: illegal_move = game.illegal_panther_move _info['illegal_move'] = illegal_move if illegal_move == True: reward = reward + self.illegal_move_reward if self.driving_agent == 'pelican': #If it wasn't an illegal move. ## Reward for droping a sonobouy if action == 'drop_buoy' and illegal_move == False: self.globalSonobuoys = game.globalSonobuoys if len(self.globalSonobuoys)>1: sonobuoy = self.globalSonobuoys[-1] sbs_in_range = game.gameBoard.searchRadius(sonobuoy.col, sonobuoy.row, sonobuoy.range, "SONOBUOY") sbs_in_range.remove(sonobuoy) # remove itself from search results if len(sbs_in_range) > 0: reward = reward + self.buoy_too_close_reward else: reward = reward + self.buoy_far_apart_reward else: reward = reward + self.buoy_far_apart_reward # PELICANWIN = Pelican has won # ESCAPE = Panther has won # BINGO = Panther has won, Pelican has reached it's turn limit and run out of fuel # WINCHESTER = Panther has won, All torpedoes dropped and stopped running. Panther can't be stopped if self.status == "PELICANWIN" or self.status == "BINGO" or self.status == "WINCHESTER" or self.status == "ESCAPE": done = True if self.verbose: logger.info("GAME STATE IS " + self.status) if self.status in ["ESCAPE","BINGO","WINCHESTER"]: if self.driving_agent == 'pelican': reward = -1 _info['result'] = "LOSE" elif self.driving_agent == 'panther': reward = 1 _info['result'] = "WIN" else: raise ValueError('driving_agent not set correctly') if self.status == "PELICANWIN": if self.driving_agent == 'pelican': reward = 1 _info['result'] = "WIN" elif self.driving_agent == 'panther': reward = -1 _info['result'] = "LOSE" else: raise ValueError('driving_agent not set correctly') return ob, reward, done, _info def set_pelican(self, pelican): self.env.activeGames[len(self.env.activeGames)-1].set_pelican(pelican) def set_panther(self, panther): self.env.activeGames[len(self.env.activeGames)-1].set_panther(panther) def set_pelican_using_path(self, pelican): self.env.activeGames[len(self.env.activeGames)-1].load_pelican_using_path(pelican) def set_panther_using_path(self, panther): self.env.activeGames[len(self.env.activeGames)-1].load_panther_using_path(panther) def clear_memory(self): self.env.activeGames[len(self.env.activeGames)-1].clear_memory() def wrapper_test_function(self): print("hello there!") def reset(self): #If a game already exists. reset if len(self.env.activeGames) > 0: #On reset randomly places panther in a different location. if self.random_panther_start_position: map_width = self.env.activeGames[len(self.env.activeGames)-1].map_width map_height = self.env.activeGames[len(self.env.activeGames)-1].map_height #Taken out of domain_parameters.py (Simon's bounds) panther_start_col_lb = int(math.floor(0.33 * map_width)) panther_start_col_ub = int(math.floor(0.66 * map_width)) panther_start_row_lb = int(math.floor(0.8 * map_height)) panther_start_row_ub = map_height-1 panther_start_col = random.choice(range(panther_start_col_lb, panther_start_col_ub+1)) panther_start_row = random.choice(range(panther_start_row_lb, panther_start_row_ub+1)) self.env.activeGames[len(self.env.activeGames)-1].panther_start_col = panther_start_col self.env.activeGames[len(self.env.activeGames)-1].panther_start_row = panther_start_row #On reset randomly places pelican in a different location. if self.random_pelican_start_position: map_width = self.env.activeGames[len(self.env.activeGames)-1].map_width map_height = self.env.activeGames[len(self.env.activeGames)-1].map_height #Taken out of domain_parameters.py (Simon's bounds) pelican_start_col_lb = 0 pelican_start_col_ub = int(math.floor(0.33 * map_width)) pelican_start_row_lb = 0 pelican_start_row_ub = int(math.floor(0.2 * map_height)) pelican_start_col = random.choice(range(pelican_start_col_lb, pelican_start_col_ub+1)) pelican_start_row = random.choice(range(pelican_start_row_lb, pelican_start_row_ub+1)) self.env.activeGames[len(self.env.activeGames)-1].pelican_start_col = pelican_start_col self.env.activeGames[len(self.env.activeGames)-1].pelican_start_row = pelican_start_row self.env.activeGames[len(self.env.activeGames)-1].reset_game() else: if self.config_file_path: logger.info('config filepath: ' +str(self.config_file_path)) self.env.createNewGame(config_file_path=self.config_file_path, **self.kwargs) else: self.env.createNewGame(**self.kwargs) self.game = self.env.activeGames[len(self.env.activeGames)-1] if self.driving_agent == 'pelican': self.render_width = self.game.pelican_parameters['render_width'] self.render_height = self.game.pelican_parameters['render_height'] elif self.driving_agent == 'panther': self.render_width = self.game.panther_parameters['render_width'] self.render_height = self.game.panther_parameters['render_height'] return self._observation() def render(self, mode='human', close=False, view=None): if view is None: view = self.view pil_image = self.env.activeGames[len(self.env.activeGames)-1].render(self.render_width,self.render_height,view) return pil_image