def testSimpleAgent(): game_type = constants.GameType(4) board = [[0, 0, 2, 1, 1, 1], [0, 0, 0, 0, 0, 0], [2, 8, 0, 1, 0, 1], [1, 0, 1, 0, 10, 1], [1, 0, 3, 0, 0, 1], [1, 11, 1, 1, 1, 0]] bomb_info = [(0, 1, 2, None)] game_state = my_utility.get_gamestate(board, bomb_info) game_data = my_utility.get_gamedata(game_state, game_type) fm = forward_model.ForwardModel() obs = fm.get_observations(game_data.board, game_data.agents, game_data.bombs, game_data.flames, False, None, game_data.game_type, None) simpel_agent = SimpleAgent() print(simpel_agent.act(obs[1], spaces.Discrete(6)))
class EvaluatorAgent(BaseAgent): def __init__( self, n_actions, character, evaluation_model=None, evaluation_model_path=None, # Set agent properties to preprocess observations use_history=True, # Use previous observations for predictions use_2d=True, # Use 2d convolutions patient=True, # Wait to make initial observations (you don't need it if you don't use history) center_view=True, # Use centering original_view=False, # Use 11x11 board, if false, use 21x21 verbose=False # Comment actions ): super(EvaluatorAgent, self).__init__(character=character) # Properties self.use_history = use_history self.use_2d = use_2d self.patient = patient self.center_view = center_view self.original_view = original_view self.verbose = verbose # Acting history for the evaluation self.actions_history = [] self.observations_history = [] self.episode_count = 0 self.steps = 0 self.n_actions = n_actions self.simple_agent = SimpleAgent(character=character) # Load any custom model self.evaluation_model = None if evaluation_model: self.evaluation_model = evaluation_model if evaluation_model_path: try: self.evaluation_model.load_weights(evaluation_model_path) except: print('Weights load failed') elif evaluation_model_path: try: self.evaluation_model = load_model(evaluation_model_path) except: print('Model load failed') else: print('Use SimpleAgent') # Featurization def featurize(self, obs): return featurize(obs, center=self.center_view, crop=self.original_view) # Acting def act(self, obs, action_space=None): # Initialize new episode if self.steps == 0: self.actions_history.append([]) # Create observation, merge with the predecessors obs_f = self.featurize(obs) # If our agent is patient, wait for the first 3 steps to make observations if self.patient and len( self.observations_history) < history_length - 1: self.observations_history.append(obs_f) self.actions_history[self.episode_count].append(0) return 0 if self.use_history: obs_history = self.make_observation(obs_f, self.steps, self.use_2d) else: obs_history = obs_f self.observations_history.append( obs_f) # Append current observation after the merge # Predict action if self.evaluation_model is not None: res = self.evaluation_model.predict( obs_history.reshape((1, ) + obs_history.shape))[0] res = np.argmax(res) else: res = self.simple_agent.act(obs, action_space) if self.verbose: print(res, end='; ') # # In the dueling DQN the first output relates to the advantage # if len(res) > self.n_actions: # res = res[1:] self.actions_history[self.episode_count].append(res) if self.verbose: print(ACTIONS[res]) self.steps += 1 return res def make_observation(self, obs, i, use_2d=True): if i == 0: # If it is a first observation res = np.array([obs for _ in range(history_length)]) elif i < history_length - 1: # If there are less than 3 observations in a history n_first = history_length - 1 - i res = np.concatenate( [ np.array([ self.observations_history[0] for _ in range(n_first) ]), # Repeat the first observation np.array(self.observations_history[:i]).reshape( i, view_size, view_size, n_channels), # Add next observations obs.reshape(1, view_size, view_size, n_channels) ], # Current observation axis=0) else: res = np.concatenate( [ np.array( self.observations_history[i - history_length + 1:i]). reshape(history_length - 1, view_size, view_size, n_channels), # Add next observations obs.reshape(1, view_size, view_size, n_channels) ], # Current observation axis=0) if use_2d: res = np.concatenate(res, axis=-1) return res # Evaluation def end_episode(self): self.steps = 0 self.episode_count += 1 self.observations_history = [] def reset_run(self): self.actions_history = [] self.episode_count = 0 self.steps = 0 def close(self): pass def run_episode(self, config, env): return run_episode(self, config, env, self.agent_id) def plot_statistics(self, info, selected_labels): return plot_statistics(self, info, selected_labels) def evaluate_agent(self, selected_labels, iterations=100, plot=True): return evaluate_agent(self, selected_labels, self.agent_id, iterations, plot)