Example #1
0
    def __init__(self, problem, steps):
        self.original_problem = deepcopy(problem)
        start_state, special_things = checker.problem_to_state(problem)
        self.steps = steps
        self.current_state_of_board, self.current_special_things = checker.problem_to_state(
            problem)
        self.eval = checker.Evaluator(0, problem, steps)
        self.act_list = ACT_LIST
        all_states, trans_dict, rewards = self.compute_states
        print(all_states)
        print(rewards)
        mdp.MDP.__init__(self,
                         init=start_state,
                         actlist=["U", "D", "R", "L"],
                         terminals=[],
                         transitions=trans_dict,
                         states=all_states,
                         gamma=0.01)

        self.reward = rewards  #mpd rewards dictionary

        self.U = mdp.value_iteration(self)

        self.pi = mdp.best_policy(self, self.U)

        # print(mdp.best_policy(self, self.U))
        print("end of initialization\n\n\n\n")
        return
Example #2
0
 def __init__(self,
              board_state,
              last_pacman_action="reset",
              e1=0,
              e2=0,
              e3=0):
     # board
     self.state, self.special_things = checker.problem_to_state(board_state)
     self.size_of_board = len(self.state)
     self.last_pacman_action = last_pacman_action
     self.last_type_eaten = 0
     self.score = 0
     self.h_val = 0
     self.future_h_val = 0
     self.numb_of_ghosts = self.get_num_ghosts()
     # Expected Values for each type of dot
     self.e1 = e1
     self.e2 = e2
     self.e3 = e3
     self.numb_of_dots_within_three = self.get_dot_neighbors()
     self.numb_of_dots_div_board_size = self.numb_of_dots_within_three / self.size_of_board
     # initialize location of all the dots
     self.list_of_dots = {}
     for number_of_row, row in enumerate(board_state):
         for number_of_column, cell in enumerate(row):
             if cell % 10 == 1 or cell % 10 == 2 or cell % 10 == 3:
                 self.list_of_dots[(number_of_row, number_of_column)] = cell
Example #3
0
 def __init__(self, initial, goal=None):
     """The constructor specifies the initial state, and possibly a goal
     state, if there is a unique goal.  Your subclass's constructor can add
     other arguments."""
     self.original_problem = deepcopy(initial)
     self.initial, self.special_things = checker.problem_to_state(initial)
     self.accumulated_reward = 0
     #self.initial = initial
     self.goal = goal
Example #4
0
 def __init__(self,board_state,last_pacman_action="reset",e1=0,e2=0,e3=0):
     # board
     self.state, self.special_things = checker.problem_to_state(board_state)
     self.size_of_board = len(self.state)
     self.last_pacman_action=last_pacman_action
     self.last_type_eaten = 0
     self.score = 0
     self.h_val = 0
     self.future_h_val = 0
     self.numb_of_ghosts = self.get_num_ghosts()
     # Expected Values for each type of dot
     self.e1=e1
     self.e2=e2
     self.e3=e3
     self.numb_of_dots_within_three = self.get_dot_neighbors()
     self.numb_of_dots_div_board_size = self.numb_of_dots_within_three / self.size_of_board
Example #5
0
 def choose_next_action(self, state):
     state_of_board, special_things = checker.problem_to_state(state)
     eval_state = checker.Evaluator(0, state, 1)
     if not "pacman" in special_things:
         # check if PACMAN is still in the game
         return "reset"
     # if pacman is still in the game, then, choose best next step.
     s = self.eval_state_to_ab_state_plus_md(eval_state)
     if s in self.pi:
         new_min_md = 0
         # check if we need to update R based on Ghost location:
         min_md = self.find_min_md_from_ghosts(eval_state)
         # we check if there any ghosts on the board, and if they are very close.
         if min_md != -100 and min_md <= 2:
             print("performing update to R")
             # start scanning for a better position
             for action in ["U", "L", "R", "D"]:
                 child_eval = deepcopy(eval_state)
                 checker.Evaluator.change_state_after_action(
                     child_eval, action)
                 temp_new_md = self.find_min_md_from_ghosts(child_eval)
                 if temp_new_md != -100 and temp_new_md > new_min_md:
                     new_min_md = temp_new_md
                     next_state_md = self.eval_state_to_ab_state_plus_md(
                         child_eval)
                     self.rewards[next_state_md] = self.rewards[
                         next_state_md] + 10 * new_min_md
             # TODO: we might be yeilding a state that didnt exist before
             self.U = mdp.value_iteration(self)
             self.pi = mdp.best_policy(self, self.U)
         return self.pi[s]
     else:
         a = ["U", "D", "L", "R"]
         print("random chosen")
         # maybe here we should go into a simple dfs to find rest of the route to finish the board? @meir
         index = random.randint(0, 3)
         return a[index]