def max_reward(self, state, action):
     new_state = move(state, action)
     if not new_state in self.R.keys():
         self.R[new_state] = []
         for action in self.actions:
             self.R[new_state].append(self.reward(new_state, action))
     return max(self.R[new_state])
Ejemplo n.º 2
0
def generate_next_states(puzzle):

    next_states = []
    moves = find_possible_moves(puzzle)

    for m in moves:
        state = move(puzzle, m)
        next_states.append(state)

    return next_states
def makeNearGoal():
    cube = State()
    cube.set_top([['W', 'W', 'W'], ['W', 'W', 'W'], ['W', 'W', 'W']])
    cube.set_bottom([['B', 'B', 'B'], ['B', 'B', 'B'], ['B', 'B', 'B']])
    cube.set_left([['O', 'O', 'O'], ['G', 'G', 'G'], ['G', 'G', 'G']])
    cube.set_right([['G', 'G', 'G'], ['O', 'O', 'O'], ['O', 'O', 'O']])
    cube.set_front([['R', 'R', 'R'], ['Y', 'Y', 'Y'], ['Y', 'Y', 'Y']])
    cube.set_back([['Y', 'Y', 'Y'], ['R', 'R', 'R'], ['R', 'R', 'R']])
    for action in cube.actions:
        new_s = move(cube, action)
        print(action)
        if new_s.isGoalState():
            print("executing the " + action +
                  " action resulted in the below goal state " + str(new_s))
 def reward(self, state, action):
     # this reward function should be a function approximation made up of
     # a set of features, these features should be in decreasing order of priority:
     # 1. solved sides ()
     # use next state to get value for next state vs. self.curr_state, to determine
     # if feature values should be 1 or 0, e.g. if solved_sides(next_state) > solved_sides(self.curr_state)
     # then the solved sides feature is 1, else 0
     next_state = move(state, action)
     if next_state.isGoalState():
         print(state)
         print(next_state)
         print("REWARD IS GOAL")
         return 100
     reward = -0.1
     solved_sides = 2 * (num_solved_sides(next_state) < num_solved_sides(state))
     solved_pieces = 0.5 * (num_pieces_correct_side(next_state) < num_pieces_correct_side(state))
     if (next_state.__hash__(), action) in self.QV.keys():
         reward -= 0.2
     reward -= solved_sides
     reward -= solved_pieces
     return reward
    def register_patterns(self):
        s = State()
        # get list of goal successors
        for action in self.actions:
            s_ = move(s, action)
            self.one_away.append(s_)
            for action_ in self.actions:
                self.QV[(s_.__hash__(), action_)] = -10 if action_ != action else 10
        # get list of successors of goal successors
        for s in self.one_away:
            for action in self.actions:
                s_ = move(s, action)
                self.two_away.append(s_)
                for action_ in self.actions:
                    self.QV[(s_.__hash__(), action_)] = -6 if action_ != action else 6
        # get list of successors of successors of goal successors
        for s in self.two_away:
            for action in self.actions:
                s_ = move(s, action)
                self.three_away.append(s_)
                for action_ in self.actions:
                    self.QV[(s_.__hash__(), action_)] = -5 if action_ != action else 5
        
        for s in self.three_away:
            for action in self.actions:
                s_ = move(s, action)
                self.four_away.append(s_)
                for action_ in self.actions:
                    self.QV[(s_.__hash__(), action_)] = -4 if action_ != action else 4

        for s in self.four_away:
            for action in self.actions:
                s_ = move(s, action)
                self.five_away.append(s_)
                for action_ in self.actions:
                    self.QV[(s_.__hash__(), action_)] = -3 if action_ != action else 3
        
        for s in self.five_away:
            for action in self.actions:
                s_ = move(s, action)
                self.six_away.append(s_)
                for action_ in self.actions:
                    self.QV[(s_.__hash__(), action_)] = -1 if action_ != action else 1