def get_neighborlist(self): # Logic retrieved from tutor code: https://gist.github.com/tttor/826be15b99bb4b33a50787d7eb7b5fda neighborlist = [] for action in self.moves: data = [x[:] for x in self.lasertank.grid_data] temp = LaserTankMap(self.lasertank.x_size, self.lasertank.y_size, data, self.lasertank.player_x, self.lasertank.player_y, self.lasertank.player_heading) temp.apply_move(action) neighbor = LaserTankState(temp, 1, self.flag_pos) neighborlist.append((neighbor, action)) return neighborlist
def train_q_learning(self, simulator: LaserTankMap): print('q-learning') """ Train the agent using Q-learning, building up a table of Q-values. :param simulator: A simulator for collecting episode data (LaserTankMap instance) """ # Q(s, a) table # suggested format: key = hash(state), value = dict(mapping actions to values) q_values = {} # # TODO # Write your Q-Learning implementation here. # # When this method is called, you are allowed up to [state.time_limit] seconds of compute time. You should # continue training until the time limit is reached. # start = time.time() reward_list = [] episode_reward = [] while time.time() - start < simulator.time_limit: s = simulator.__hash__() a = self.choose_action(simulator, q_values) if s not in q_values: q_values[s] = {} q_s = q_values[s] if a in q_s: old_q = q_s[a] else: old_q = .0 r, episode_finished = simulator.apply_move(a) reward_list.append(r) next_s = simulator.__hash__() if next_s not in q_values: q_values[next_s] = {} next_s_q = {} for action in simulator.MOVES: # print(action) next_s_q[action] = .0 if action in q_values[next_s]: next_s_q[action] = q_values[next_s][action] best_next_q = next_s_q[dict_argmax(next_s_q)] # update q_values(s,a,r,old_q,best_next_q) td = r + (simulator.gamma * best_next_q) - old_q q_values[s][a] = old_q + (self.learning_rate * td) if episode_finished: episode_reward.append(sum(reward_list)) reward_list = [] simulator.reset_to_start() df = pd.DataFrame(episode_reward) # df.to_csv('episode.csv', index=False) # store the computed Q-values self.q_values = q_values
def action_move(self, action): new_grid = [row[:] for row in self.grid] new_player = LaserTankMap(self.x_size, self.y_size, new_grid, self.coord_x, self.coord_y, self.player_heading) # Move Forward if action == 'W': result = new_player.apply_move('f') path_to_take = 'f' # Turn Clockwise elif action == 'D': result = new_player.apply_move('r') path_to_take = 'r' # Turn Counter-Clockwise elif action == 'A': result = new_player.apply_move('l') path_to_take = 'l' # Shoot Laser elif action == 'S': result = new_player.apply_move('s') path_to_take = 's' else: print("No/Worng Action Input") if result == 0: new_state = PlayerTank(new_player.grid_data, self.cost + 1, new_player.player_x, new_player.player_y, action, self.path + [path_to_take], self.x_size, self.y_size, new_player.player_heading) elif result == 1: new_state = 0 elif result == 2: new_state = 0 return new_state
def get_successor(self): next_states = [] for move in self.moves: new_data = [row[:] for row in self.game_map.grid_data] new_map = LaserTankMap(self.game_map.x_size, self.game_map.y_size, new_data, player_x=self.game_map.player_x, player_y=self.game_map.player_y, player_heading=self.game_map.player_heading) # new_state = deepcopy(self.get_map()) new_parents = [row[:] for row in self.parents] # new_parents = deepcopy(self.parents) if new_map.apply_move(move) == LaserTankMap.SUCCESS: new_parents.append(move) nextState = State(new_map, 1, new_parents) next_states.append((nextState, move)) return next_states