def __init__(self, problem, steps): self.original_problem = deepcopy(problem) start_state, special_things = checker.problem_to_state(problem) self.steps = steps self.current_state_of_board, self.current_special_things = checker.problem_to_state( problem) self.eval = checker.Evaluator(0, problem, steps) self.act_list = ACT_LIST all_states, trans_dict, rewards = self.compute_states print(all_states) print(rewards) mdp.MDP.__init__(self, init=start_state, actlist=["U", "D", "R", "L"], terminals=[], transitions=trans_dict, states=all_states, gamma=0.01) self.reward = rewards #mpd rewards dictionary self.U = mdp.value_iteration(self) self.pi = mdp.best_policy(self, self.U) # print(mdp.best_policy(self, self.U)) print("end of initialization\n\n\n\n") return
def __init__(self, board_state, last_pacman_action="reset", e1=0, e2=0, e3=0): # board self.state, self.special_things = checker.problem_to_state(board_state) self.size_of_board = len(self.state) self.last_pacman_action = last_pacman_action self.last_type_eaten = 0 self.score = 0 self.h_val = 0 self.future_h_val = 0 self.numb_of_ghosts = self.get_num_ghosts() # Expected Values for each type of dot self.e1 = e1 self.e2 = e2 self.e3 = e3 self.numb_of_dots_within_three = self.get_dot_neighbors() self.numb_of_dots_div_board_size = self.numb_of_dots_within_three / self.size_of_board # initialize location of all the dots self.list_of_dots = {} for number_of_row, row in enumerate(board_state): for number_of_column, cell in enumerate(row): if cell % 10 == 1 or cell % 10 == 2 or cell % 10 == 3: self.list_of_dots[(number_of_row, number_of_column)] = cell
def __init__(self, initial, goal=None): """The constructor specifies the initial state, and possibly a goal state, if there is a unique goal. Your subclass's constructor can add other arguments.""" self.original_problem = deepcopy(initial) self.initial, self.special_things = checker.problem_to_state(initial) self.accumulated_reward = 0 #self.initial = initial self.goal = goal
def __init__(self,board_state,last_pacman_action="reset",e1=0,e2=0,e3=0): # board self.state, self.special_things = checker.problem_to_state(board_state) self.size_of_board = len(self.state) self.last_pacman_action=last_pacman_action self.last_type_eaten = 0 self.score = 0 self.h_val = 0 self.future_h_val = 0 self.numb_of_ghosts = self.get_num_ghosts() # Expected Values for each type of dot self.e1=e1 self.e2=e2 self.e3=e3 self.numb_of_dots_within_three = self.get_dot_neighbors() self.numb_of_dots_div_board_size = self.numb_of_dots_within_three / self.size_of_board
def choose_next_action(self, state): state_of_board, special_things = checker.problem_to_state(state) eval_state = checker.Evaluator(0, state, 1) if not "pacman" in special_things: # check if PACMAN is still in the game return "reset" # if pacman is still in the game, then, choose best next step. s = self.eval_state_to_ab_state_plus_md(eval_state) if s in self.pi: new_min_md = 0 # check if we need to update R based on Ghost location: min_md = self.find_min_md_from_ghosts(eval_state) # we check if there any ghosts on the board, and if they are very close. if min_md != -100 and min_md <= 2: print("performing update to R") # start scanning for a better position for action in ["U", "L", "R", "D"]: child_eval = deepcopy(eval_state) checker.Evaluator.change_state_after_action( child_eval, action) temp_new_md = self.find_min_md_from_ghosts(child_eval) if temp_new_md != -100 and temp_new_md > new_min_md: new_min_md = temp_new_md next_state_md = self.eval_state_to_ab_state_plus_md( child_eval) self.rewards[next_state_md] = self.rewards[ next_state_md] + 10 * new_min_md # TODO: we might be yeilding a state that didnt exist before self.U = mdp.value_iteration(self) self.pi = mdp.best_policy(self, self.U) return self.pi[s] else: a = ["U", "D", "L", "R"] print("random chosen") # maybe here we should go into a simple dfs to find rest of the route to finish the board? @meir index = random.randint(0, 3) return a[index]