コード例 #1
0
ファイル: nn_agent.py プロジェクト: vakker/plark_ai_public
class NNAgent(Agent):

    def __init__(self, num_inputs, num_outputs,
                 num_hidden_layers=0, neurons_per_hidden_layer=0,
                 file_dir_name=None, agent_type=None, game=None,
                 stochastic_actions=False, driving_agent=True,
                 in_tournament=False):

        self.agent_type = agent_type
        self.stochastic_actions = stochastic_actions
        self.driving_agent = driving_agent

        self.in_tournament = in_tournament

        #For reading and writing models
        self.base_path = '/data/agents/evo_models/'

        #Number of outputs is always given via the subclass call to this constructor
        self.num_outputs = num_outputs

        #If file directory name is given, read from file
        if file_dir_name is not None:
            metadata, genotype = self._read_agent_from_file(file_dir_name)

            self.num_inputs = metadata['num_inputs']
            self.num_hidden_layers = metadata['num_hidden_layers']
            self.neurons_per_hidden_layer = metadata['neurons_per_hidden_layer']
            self.stochastic_actions = metadata['stochastic_actions']

            obs_kwargs = {}
            obs_kwargs['driving_agent'] = self.agent_type
            obs_kwargs['normalise'] = metadata['normalise']
            obs_kwargs['domain_params_in_obs'] = metadata['domain_params_in_obs']
            self.obs_kwargs = obs_kwargs

            if not self.in_tournament:
                assert game is not None, "Need to hand NewGame object to NNAgent " \
                    "constructor in order to build the Observation class"

                self.observation = Observation(game, **obs_kwargs)

            #Build neural net
            self._build_nn()

            #Set read genotype as weights
            self.set_weights(genotype)

        else:
            #Check that num_inputs is not None
            if num_inputs is None:
                print('One needs to give either a number of inputs or a file directory'
                      ' name to build an NNAgent')
                exit()

            self.num_inputs = num_inputs
            self.num_hidden_layers = num_hidden_layers
            self.neurons_per_hidden_layer = neurons_per_hidden_layer

            self.observation = None

            #Build neural net
            self._build_nn()


    def _build_nn(self):

        layers = []
        if self.num_hidden_layers == 0:
            layers.append(torch.nn.Linear(self.num_inputs, self.num_outputs))

        else:
            layers.append(torch.nn.Linear(self.num_inputs, self.neurons_per_hidden_layer))
            #Hidden layers have ReLU activation
            layers.append(torch.nn.ReLU())

            for i in range(self.num_hidden_layers-1):
                layers.append(torch.nn.Linear(self.neurons_per_hidden_layer,
                                              self.neurons_per_hidden_layer))
                layers.append(torch.ReLU())

            layers.append(torch.nn.Linear(self.neurons_per_hidden_layer, self.num_outputs))

        #Final layer goes through a softmax
        layers.append(torch.nn.Softmax(dim=0))

        self.nn = torch.nn.Sequential(*layers).double()

    #Takes a list, passes through the network and returns a list
    def _forward_pass(self, x):
        x = torch.tensor(x, dtype=torch.float64)
        net_out = self.nn.forward(x)
        return net_out.tolist()

    #Randomly sample action from network output probability distribution
    def _sample_action(self, net_out):
        action_nums = list(range(len(net_out)))
        return np.random.choice(action_nums, p=net_out)

    #Get the most probable action from the network output probability distribution
    def _get_most_probable_action(self, net_out):
        return np.argmax(net_out)

    def getAction(self, state):

        #If state dictionary comes through, convert to numpy array.
        #This will happen when the NNAgent is the non-driving agent.
        if not self.driving_agent:
            state = self.observation.get_observation(state)

        assert len(state) == self.num_inputs, "State length: {}, num inputs: {}" \
            .format(len(state), self.num_inputs)

        #Push state through network
        net_out = self._forward_pass(state)

        #Get action from nework output
        if self.stochastic_actions:
            action = self._sample_action(net_out)
        else:
            action = self._get_most_probable_action(net_out)

        if (not self.driving_agent) or self.in_tournament:
            action = self.action_lookup(action)

        return action

    #Stitch together state from tournament
    def getTournamentAction(self, obs, obs_normalised, domain_parameters,
                            domain_parameters_normalised, state):

        if self.obs_kwargs['normalise']:
            stitched_obs = obs_normalised
        else:
            stitched_obs = obs

        if self.obs_kwargs['domain_params_in_obs']:
            if self.obs_kwargs['normalise']:
                stitched_obs = np.concatenate((stitched_obs, domain_parameters_normalised))
            else:
                stitched_obs = np.concatenate((stitched_obs, domain_parameters))

        return self.getAction(stitched_obs)

    #Returns the number of weights
    def get_num_weights(self):
        num_weights = 0
        for layer in self.nn:
            for params in layer.parameters():
                num_weights += params.numel()
        return num_weights

    def print_weights(self):
        for layer in self.nn:
            for params in layer.parameters():
                print(params)

    def _set_weights_err_msg(self, weights_len, num_weights_required):
        return "Trying to set {} weights to an NN that requires {} weights" \
            .format(weights_len, num_weights_required)

    #Sets a list of weights
    def set_weights(self, new_weights):

        #Check new weights is of correct size
        num_weights_required = self.get_num_weights()
        assert num_weights_required == len(new_weights), \
                                       self._set_weights_err_msg(len(new_weights), \
                                                                 num_weights_required)

        weight_index = 0
        for layer in self.nn:
            for params in layer.parameters():

                #Slice out new weights
                p_weights = new_weights[weight_index : weight_index + params.numel()]
                weight_index += params.numel()

                #Resize and set new weights
                params.data = torch.tensor(np.reshape(p_weights, params.size()), \
                                           dtype=torch.float64)

    #Return weights as a 1d list
    def get_weights(self):
        weights = []
        for layer in self.nn:
            for params in layer.parameters():
                weights += params.flatten().tolist()
        return weights

    def _save_metadata(self, dir_path, player_type, obs_normalise, domain_params_in_obs):
        metadata = {}

        metadata['playertype'] = player_type
        metadata['normalise'] = obs_normalise
        metadata['domain_params_in_obs'] = domain_params_in_obs
        metadata['stochastic_actions'] = self.stochastic_actions

        metadata['num_inputs'] = self.num_inputs
        metadata['num_hidden_layers'] = self.num_hidden_layers
        metadata['neurons_per_hidden_layer'] = self.neurons_per_hidden_layer

        file_path = dir_path + '/metadata.json'

        with open(file_path, 'w') as outfile:
            json.dump(metadata, outfile)

    def _save_genotype(self, dir_path):
        #Save genotype as a csv - it is just a list
        file_path = dir_path + '/genotype.csv'

        with open(file_path, 'w') as outfile:
            csv_writer = csv.writer(outfile)
            csv_writer.writerow(self.get_weights())

    def _save_agent_to_file(self, player_type, obs_normalise, domain_params_in_obs,
                            file_name_suffix=''):

        #Construct full directory path
        date_time = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
        dir_name = player_type + '_' + date_time + file_name_suffix
        dir_path = self.base_path + dir_name

        #Create directory for model
        os.makedirs(dir_path, exist_ok=True)
        os.chmod(dir_path, 0o777)

        #Save metadata
        self._save_metadata(dir_path, player_type, obs_normalise, domain_params_in_obs)

        #Save genotype
        self._save_genotype(dir_path)

    def _read_metadata(self, metadata_filepath):
        with open(metadata_filepath, 'r') as metadata_file:
            metadata = json.load(metadata_file)
        return metadata

    def _read_genotype(self, genotype_filepath):
        with open(genotype_filepath, 'r') as genotype_file:
            reader = csv.reader(genotype_file)
            genotype = list(map(float, list(reader)[0]))
        return genotype

    def _read_agent_from_file(self, dir_name):
        dir_path = self.base_path + dir_name + '/'

        if self.in_tournament:
            dir_path = "/plark_ai_public" + dir_path

        #Read metadata
        metadata = self._read_metadata(dir_path + 'metadata.json')

        #Read genotype
        genotype = self._read_genotype(dir_path + 'genotype.csv')

        return metadata, genotype
コード例 #2
0
class PlarkEnv(gym.Env):
        metadata = {'render.modes': ['human']}

        def __init__(self,config_file_path=None,verbose=False, **kwargs):
                self.kwargs = kwargs


                self.random_panther_start_position = kwargs.get('random_panther_start_position', False)
                self.random_pelican_start_position = kwargs.get('random_pelican_start_position', False)

                self.render_height = kwargs.get('render_height', None)
                if self.render_height is None:
                        self.render_height = 250
                        self.kwargs['render_height'] = self.render_height
                self.render_width = kwargs.get('render_width', None)
                if self.render_width is None:
                        self.render_width = 310
                        self.kwargs['render_width'] = self.render_width

                self.driving_agent = kwargs.get('driving_agent', None)
                if self.driving_agent is None:
                        self.driving_agent = 'pelican'
                        self.kwargs['driving_agent'] = self.driving_agent

                #logger.info('plark.kwargs :'+ str(self.kwargs))

                self.verbose = verbose
                self.viewer = None
                self.server_process = None
                self.server_port = None

                self.image_based = kwargs.get('image_based', False)

                #logger.info('self.image_based :'+ str(self.image_based))
                self.env = Environment()
                self.config_file_path = config_file_path

                self.illegal_move_reward = -0.1
                self.buoy_too_close_reward = -0.2
                self.buoy_far_apart_reward = 0.5

                #1 UP
                #2 UP RIGHT
                #3 DOWN RIGHT
                #4 DOWN
                #5 DOWN LEFT
                #6 UP LEFT

                if self.driving_agent == 'panther':
                        self.view = 'PANTHER'
                        self.ACTION_LOOKUP = {
                                0 : '1',
                                1 : '2',
                                2 : '3',
                                3 : '4', 
                                4 : '5',  
                                5 : '6',  
                                6 : 'end'  
                        }
                elif self.driving_agent == 'pelican':
                        self.view = 'PELICAN'
                        self.ACTION_LOOKUP = {
                                0 : '1',
                                1 : '2',
                                2 : '3',
                                3 : '4', 
                                4 : '5',  
                                5 : '6',  
                                6 : 'drop_buoy',  
                                7 : 'drop_torpedo',  
                                8 : 'end'  
                        }
                else:
                        raise ValueError('driving_agent not set correctly')

                # Inverse action lookup for looking up specific actions
                self.action_index = dict((val, key) for key, val in self.ACTION_LOOKUP.items())

                

                if self.image_based:
                        self.reset()
                        self.game = self.env.activeGames[len(self.env.activeGames)-1]
                        N_CHANNELS = 3
                        logger.info('observation space: Height:'+str(self.render_height)+', Width:'+str(self.render_width)+', Channels:'+str(N_CHANNELS))
                        self.observation_space = spaces.Box(low=0, high=255,
                                                                                        shape=(self.render_height, self.render_width, N_CHANNELS), dtype=np.uint8)
                        logger.info('Image observations created')                                                               
                        self.normalise = None
                        self.domain_params_in_obs = None
                else:
                        if len(self.env.activeGames) > 0:
                                self.env.activeGames[len(self.env.activeGames)-1].reset_game()
                        else:    
                                if self.config_file_path:
                                        #logger.info('config filepath: ' +str(self.config_file_path))
                                        self.env.createNewGame(config_file_path=self.config_file_path, **self.kwargs)
                                else:
                                        self.env.createNewGame(**self.kwargs)
                        self.game = self.env.activeGames[len(self.env.activeGames)-1]           
                        self.observation = Observation(self.game, **kwargs)
                        self.observation_space = self.observation.get_observation_space() 

                        self.normalise = self.observation.normalise
                        self.domain_params_in_obs = self.observation.domain_params_in_obs
                        #logger.info('Non image observations created')

                        
                        
                if self.driving_agent == 'panther':
                        self.action_space = spaces.Discrete(7)
                elif self.driving_agent == 'pelican':    
                        self.action_space = spaces.Discrete(9)
                self.status = self.env.activeGames[len(self.env.activeGames)-1].gameState

        def close(self):
                self.env.stopAllGames()

        def _get_location(self, board, item):
                for col in range(board.cols):
                        for row in range(board.rows):
                                if board.is_item_type_in_cell(item, col, row):
                                        return (col, row)
                raise ValueError("Could not find {}".format(item))


        def _observation(self):
                if self.image_based:    
                        pil_image = self.env.activeGames[len(self.env.activeGames)-1].render(self.render_width,self.render_height,self.view)
                        np_image = np.array(pil_image, dtype=np.uint8)
                        return np_image
                else:
                        obs = self.observation.get_observation(self.env.activeGames[len(self.env.activeGames)-1]._state(self.view))
                        #return obs
                        return np.array(obs, dtype=np.float)


        def step(self, action):
                action = self.ACTION_LOOKUP[action]
                game = self.env.activeGames[len(self.env.activeGames)-1]

                if self.verbose:
                        logger.info('Action:'+action)
                gameState,uioutput = game.game_step(action)
                self.status = gameState
                self.uioutput = uioutput 
                
                ob = self._observation()

                #print(self.driving_agent)
                #print(ob)

                reward = 0
                done = False
                _info = { 'turn': game.turn_count }

                if self.driving_agent == 'pelican':
                        illegal_move = game.illegal_pelican_move
                else:
                        illegal_move = game.illegal_panther_move
                _info['illegal_move'] = illegal_move

                if illegal_move == True:
                        reward = reward + self.illegal_move_reward
                if self.driving_agent == 'pelican': #If it wasn't an illegal move.
                        ## Reward for droping a sonobouy 
                        if action == 'drop_buoy' and illegal_move == False:
                                self.globalSonobuoys = game.globalSonobuoys
                                if len(self.globalSonobuoys)>1: 
                                        sonobuoy = self.globalSonobuoys[-1]
                                        sbs_in_range = game.gameBoard.searchRadius(sonobuoy.col, sonobuoy.row, sonobuoy.range, "SONOBUOY")
                                        sbs_in_range.remove(sonobuoy) # remove itself from search results
                                        if len(sbs_in_range) > 0:
                                                reward = reward + self.buoy_too_close_reward
                                        else:
                                                reward = reward + self.buoy_far_apart_reward 
                                else:
                                        reward = reward + self.buoy_far_apart_reward        

                #  PELICANWIN = Pelican has won    
                #  ESCAPE     = Panther has won
                #  BINGO      = Panther has won, Pelican has reached it's turn limit and run out of fuel 
                #  WINCHESTER = Panther has won, All torpedoes dropped and stopped running. Panther can't be stopped   
                if self.status == "PELICANWIN" or self.status == "BINGO" or self.status == "WINCHESTER" or self.status == "ESCAPE": 
                        done = True
                        if self.verbose:
                                logger.info("GAME STATE IS " + self.status)    
                        if self.status in ["ESCAPE","BINGO","WINCHESTER"]:
                                if self.driving_agent == 'pelican':
                                        reward = -1 
                                        _info['result'] = "LOSE"
                                elif self.driving_agent == 'panther':  
                                        reward = 1 
                                        _info['result'] = "WIN"
                                else:
                                        raise ValueError('driving_agent not set correctly')
                        if self.status == "PELICANWIN":
                                if self.driving_agent == 'pelican':
                                        reward = 1 
                                        _info['result'] = "WIN"
                                elif self.driving_agent == 'panther':  
                                        reward = -1 
                                        _info['result'] = "LOSE"
                                else:
                                        raise ValueError('driving_agent not set correctly')
                
                return ob, reward, done, _info

        def set_pelican(self, pelican):
            self.env.activeGames[len(self.env.activeGames)-1].set_pelican(pelican)

        def set_panther(self, panther):
            self.env.activeGames[len(self.env.activeGames)-1].set_panther(panther)

        def set_pelican_using_path(self, pelican):
            self.env.activeGames[len(self.env.activeGames)-1].load_pelican_using_path(pelican)

        def set_panther_using_path(self, panther):
            self.env.activeGames[len(self.env.activeGames)-1].load_panther_using_path(panther)

        def clear_memory(self):
            self.env.activeGames[len(self.env.activeGames)-1].clear_memory()


        def wrapper_test_function(self):
            print("hello there!")

        def reset(self):
                #If a game already exists. reset
                if len(self.env.activeGames) > 0:
                        #On reset randomly places panther in a different location.
                        if self.random_panther_start_position: 
                                map_width = self.env.activeGames[len(self.env.activeGames)-1].map_width
                                map_height = self.env.activeGames[len(self.env.activeGames)-1].map_height
                                #Taken out of domain_parameters.py (Simon's bounds)
                                panther_start_col_lb = int(math.floor(0.33 * map_width))
                                panther_start_col_ub = int(math.floor(0.66 * map_width))
                                panther_start_row_lb = int(math.floor(0.8 * map_height))
                                panther_start_row_ub = map_height-1

                                panther_start_col = random.choice(range(panther_start_col_lb,
                                                                        panther_start_col_ub+1))
                                panther_start_row = random.choice(range(panther_start_row_lb,
                                                                        panther_start_row_ub+1)) 
                                self.env.activeGames[len(self.env.activeGames)-1].panther_start_col = panther_start_col
                                self.env.activeGames[len(self.env.activeGames)-1].panther_start_row = panther_start_row

                        #On reset randomly places pelican in a different location.
                        if self.random_pelican_start_position: 
                                map_width = self.env.activeGames[len(self.env.activeGames)-1].map_width
                                map_height = self.env.activeGames[len(self.env.activeGames)-1].map_height
                                #Taken out of domain_parameters.py (Simon's bounds)
                                pelican_start_col_lb = 0 
                                pelican_start_col_ub = int(math.floor(0.33 * map_width))
                                pelican_start_row_lb = 0
                                pelican_start_row_ub = int(math.floor(0.2 * map_height))

                                pelican_start_col = random.choice(range(pelican_start_col_lb,
                                                                        pelican_start_col_ub+1))
                                pelican_start_row = random.choice(range(pelican_start_row_lb,
                                                                        pelican_start_row_ub+1))
                                self.env.activeGames[len(self.env.activeGames)-1].pelican_start_col = pelican_start_col
                                self.env.activeGames[len(self.env.activeGames)-1].pelican_start_row = pelican_start_row

                        self.env.activeGames[len(self.env.activeGames)-1].reset_game()
                else:    
                        if self.config_file_path:
                                logger.info('config filepath: ' +str(self.config_file_path))
                                self.env.createNewGame(config_file_path=self.config_file_path, **self.kwargs)
                        else:
                                self.env.createNewGame(**self.kwargs)   

                self.game = self.env.activeGames[len(self.env.activeGames)-1]           

                if self.driving_agent == 'pelican':
                        self.render_width = self.game.pelican_parameters['render_width']
                        self.render_height = self.game.pelican_parameters['render_height']

                elif self.driving_agent == 'panther':
                        self.render_width = self.game.panther_parameters['render_width']
                        self.render_height = self.game.panther_parameters['render_height']

                return self._observation()


        def render(self, mode='human', close=False, view=None):
                if view is None:
                        view = self.view
                pil_image = self.env.activeGames[len(self.env.activeGames)-1].render(self.render_width,self.render_height,view)
                return pil_image