Esempi in Python per GridWorld.isEndFunc

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: grid_world

Classe/tipologia: GridWorld

Metodo/funzione: isEndFunc

Esempi su hotexamples.com: 2

GridWorld.isEndFunc in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per grid_world.GridWorld.isEndFunc, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

GridWorld(30)

step(8)

reset(6)

action_space(4)

giveReward(3)

current_state(3)

nxtPosition(3)

set_state(2)

print_policy(2)

observation_space(2)

move(2)

make_maps(2)

isEndFunc(2)

_feature_map(2)

close(2)

get_path(1)

painful_game(1)

simulate(1)

show_policy(1)

show_grid(1)

build_tower(1)

reward(1)

reset_state(1)

coordToScalar(1)

one_hot(1)

get_reward_matrix(1)

create_walls(1)

default_game(1)

display(1)

game_over(1)

makeMove(1)

T(1)

get_neighbors(1)

get_transition_matrix(1)

get_states(1)

visualize_heuristic(1)

Esempio n. 1

Mostra file

File: q_learning.py Progetto: jeehyun100/control_rl

class Agent:

    def __init__(self):
        self.states = []  # record position and action taken at the position
        self.actions = ["up", "down", "left", "right"]
        self.grid_world = GridWorld()
        self.isEnd = self.grid_world.isEnd
        self.lr = 0.2
        self.exp_rate = 0.3
        self.decay_gamma = 0.9

        # initial Q values
        self.Q_values = {}
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                self.Q_values[(i, j)] = {}
                for a in self.actions:
                    self.Q_values[(i, j)][a] = 0  # Q value is a dict of dict

    def chooseAction(self):
        # choose action with most expected value
        mx_nxt_reward = 0
        action = ""

        if np.random.uniform(0, 1) <= self.exp_rate:
            action = np.random.choice(self.actions)
        else:
            # greedy action
            for a in self.actions:
                current_position = self.grid_world.state
                nxt_reward = self.Q_values[current_position][a]
                if nxt_reward >= mx_nxt_reward:
                    action = a
                    mx_nxt_reward = nxt_reward
            # print("current pos: {}, greedy aciton: {}".format(self.grid_world.state, action))
        return action

    def takeAction(self, action):
        position = self.grid_world.nxtPosition(action)
        # update GridWorld
        return GridWorld(state=position)

    def reset(self):
        self.states = []
        self.grid_world = GridWorld()
        self.isEnd = self.grid_world.isEnd

    def play(self, rounds=10):
        i = 0
        while i < rounds:
            # to the end of game back propagate reward
            if self.grid_world.isEnd:
                # back propagate
                reward = self.grid_world.giveReward()
                for a in self.actions:
                    self.Q_values[self.grid_world.state][a] = reward
                print("Game End Reward", reward)
                for s in reversed(self.states):
                    current_q_value = self.Q_values[s[0]][s[1]]
                    reward = current_q_value + self.lr * (self.decay_gamma * reward - current_q_value)
                    self.Q_values[s[0]][s[1]] = round(reward, 3)
                self.reset()
                i += 1
            else:
                action = self.chooseAction()
                # append trace
                self.states.append([(self.grid_world.state), action])
                print("current position {} action {}".format(self.grid_world.state, action))
                # by taking the action, it reaches the next state
                self.grid_world = self.takeAction(action)
                # mark is end
                self.grid_world.isEndFunc()
                print("nxt state", self.grid_world.state)
                print("---------------------")
                self.isEnd = self.grid_world.isEnd

Esempio n. 2

Mostra file

class Agent:

    def __init__(self):
        self.states = []  # record position and action taken at the position
        self.actions = ["up", "down", "left", "right"]
        self.grid_world = GridWorld()
        self.isEnd = self.grid_world.isEnd
        self.lr = 0.2
        self.exp_rate = 0.3#-1#0.3#0.3#0.3
        self.decay_gamma = 0.9

        # initial Q values
        self.Q_values = {}
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                self.Q_values[(i, j)] = {}
                for a in self.actions:
                    self.Q_values[(i, j)][a] = 0  # Q value is a dict of dict
        self.state_values_vec = np.zeros((len(self.Q_values)))
        self.policy_list = list()

    def chooseAction(self):
        # choose action with most expected value
        mx_nxt_reward = -np.inf
        action = ""

        if np.random.uniform(0, 1) <= self.exp_rate:
            action = np.random.choice(self.actions)
        else:
            # greedy action
            for a in self.actions:
                current_position = self.grid_world.state
                nxt_reward = self.Q_values[current_position][a]
                if nxt_reward >= mx_nxt_reward:
                    action = a
                    mx_nxt_reward = nxt_reward
            # print("current pos: {}, greedy aciton: {}".format(self.grid_world.state, action))
        return action

    def takeAction(self, action):
        position = self.grid_world.nxtPosition(action)
        # update GridWorld
        return GridWorld(state=position)

    def reset(self):
        self.states = []
        self.grid_world = GridWorld()
        self.isEnd = self.grid_world.isEnd

    def play(self, rounds=10):
        i = 0
        histories = list()
        data_sample = 0
        while i < rounds:
            # to the end of game back propagate reward
            if self.grid_world.isEnd:
                # back propagate
                reward = self.grid_world.giveReward()
                for a in self.actions:
                    self.Q_values[self.grid_world.state][a] = reward
                #print("Game End Reward", reward)
                next_state = (self.grid_world.state, 'right')
                for s in reversed(self.states):
                    next_q_value = self.Q_values[next_state[0]][next_state[1]]
                    current_q_value = self.Q_values[s[0]][s[1]]
                    #reward = current_q_value + self.lr * (self.decay_gamma * reward - current_q_value)
                    reward = current_q_value + self.lr * (self.decay_gamma * next_q_value - current_q_value)
                    self.Q_values[s[0]][s[1]] = round(reward, 3)
                    next_state = s

                self.policy_list.append(self.make_policy_table_from_q())
                self.reset()

                i += 1
                aa = [[k for e, k in e.items()] for e in [v for k, v in self.Q_values.items()]]
                ab = np.array(aa, dtype=float)
                q_value_numpy  = np.max(ab, axis=1, keepdims=False)
                diff = np.linalg.norm(self.state_values_vec[:] - q_value_numpy[:])
                self.state_values_vec = q_value_numpy
                #print("iter {0}".format(i))
                #print("diff {0}".format(diff))
                #print("qvalue {0}".format(np.linalg.norm(q_value_numpy[:])))
                print("{0}".format(data_sample))
                value_scalar= np.linalg.norm(q_value_numpy[:])
                histories.append((value_scalar, diff, int(data_sample)))

                data_sample = 0
            else:
                action = self.chooseAction()
                # append trace
                self.states.append([(self.grid_world.state), action]) #s, a
                #print("current position {} action {}".format(self.grid_world.state, action))
                # by taking the action, it reaches the next state
                self.grid_world = self.takeAction(action)
                # mark is end
                self.grid_world.isEndFunc()
                #print("nxt state", self.grid_world.state)
                #print("---------------------{0}".format(i))
                self.isEnd = self.grid_world.isEnd
                data_sample += 1

        return histories, self.policy_list

    def make_policy_table_from_q(self):
        convert_list = list()
        for k, v in self.Q_values.items():
            convert_list.append(np.argmax(np.array([val for (key, val) in v.items()], float)))
        return convert_list