def max_reward(self, state, action): new_state = move(state, action) if not new_state in self.R.keys(): self.R[new_state] = [] for action in self.actions: self.R[new_state].append(self.reward(new_state, action)) return max(self.R[new_state])
def generate_next_states(puzzle): next_states = [] moves = find_possible_moves(puzzle) for m in moves: state = move(puzzle, m) next_states.append(state) return next_states
def makeNearGoal(): cube = State() cube.set_top([['W', 'W', 'W'], ['W', 'W', 'W'], ['W', 'W', 'W']]) cube.set_bottom([['B', 'B', 'B'], ['B', 'B', 'B'], ['B', 'B', 'B']]) cube.set_left([['O', 'O', 'O'], ['G', 'G', 'G'], ['G', 'G', 'G']]) cube.set_right([['G', 'G', 'G'], ['O', 'O', 'O'], ['O', 'O', 'O']]) cube.set_front([['R', 'R', 'R'], ['Y', 'Y', 'Y'], ['Y', 'Y', 'Y']]) cube.set_back([['Y', 'Y', 'Y'], ['R', 'R', 'R'], ['R', 'R', 'R']]) for action in cube.actions: new_s = move(cube, action) print(action) if new_s.isGoalState(): print("executing the " + action + " action resulted in the below goal state " + str(new_s))
def reward(self, state, action): # this reward function should be a function approximation made up of # a set of features, these features should be in decreasing order of priority: # 1. solved sides () # use next state to get value for next state vs. self.curr_state, to determine # if feature values should be 1 or 0, e.g. if solved_sides(next_state) > solved_sides(self.curr_state) # then the solved sides feature is 1, else 0 next_state = move(state, action) if next_state.isGoalState(): print(state) print(next_state) print("REWARD IS GOAL") return 100 reward = -0.1 solved_sides = 2 * (num_solved_sides(next_state) < num_solved_sides(state)) solved_pieces = 0.5 * (num_pieces_correct_side(next_state) < num_pieces_correct_side(state)) if (next_state.__hash__(), action) in self.QV.keys(): reward -= 0.2 reward -= solved_sides reward -= solved_pieces return reward
def register_patterns(self): s = State() # get list of goal successors for action in self.actions: s_ = move(s, action) self.one_away.append(s_) for action_ in self.actions: self.QV[(s_.__hash__(), action_)] = -10 if action_ != action else 10 # get list of successors of goal successors for s in self.one_away: for action in self.actions: s_ = move(s, action) self.two_away.append(s_) for action_ in self.actions: self.QV[(s_.__hash__(), action_)] = -6 if action_ != action else 6 # get list of successors of successors of goal successors for s in self.two_away: for action in self.actions: s_ = move(s, action) self.three_away.append(s_) for action_ in self.actions: self.QV[(s_.__hash__(), action_)] = -5 if action_ != action else 5 for s in self.three_away: for action in self.actions: s_ = move(s, action) self.four_away.append(s_) for action_ in self.actions: self.QV[(s_.__hash__(), action_)] = -4 if action_ != action else 4 for s in self.four_away: for action in self.actions: s_ = move(s, action) self.five_away.append(s_) for action_ in self.actions: self.QV[(s_.__hash__(), action_)] = -3 if action_ != action else 3 for s in self.five_away: for action in self.actions: s_ = move(s, action) self.six_away.append(s_) for action_ in self.actions: self.QV[(s_.__hash__(), action_)] = -1 if action_ != action else 1