Ejemplo n.º 1
0
def execute_policy_iteration_test(test, output='console'):
    """
    Description
    -----------
    Function used to run the policy_iteration for
    the given test.

    Parameters
    ----------
    test: Test() \\
        -- A Test() instance with the information
        needed to run the policy_iteration.

    output: str \\
        -- A string that tells the function where
        its output is expected.

    Returns
    -------
    PolicyIteration() \\
        -- If no recognizable outputs is informed,
        returns the instance of the policy_iteration used.
    """

    policy_iteration = PolicyIteration(test)

    policy_iteration.run()

    if output in ['console', 'file']:
        output_processing(output, test, policy_iteration, 'PolicyIteration')

    return policy_iteration
Ejemplo n.º 2
0
 def __init__(self):
     super(GraphicDisplay, self).__init__()
     self.title('Policy Iteration')
     self.geometry('{0}x{1}'.format(HEIGHT * UNIT, HEIGHT * UNIT + 50))
     self.texts = []
     self.arrows = []
     self.util = Util()
     self.agent = PolicyIteration(self.util)
     self._build_env()
Ejemplo n.º 3
0
    def clear(self):
        for i in self.texts:
            self.canvas.delete(i)

        for i in self.arrows:
            self.canvas.delete(i)

        self.canvas.delete(self.rectangle)
        self.rectangle = self.canvas.create_image(50,
                                                  50,
                                                  image=self.rectangle_image)
        self.agent = PolicyIteration(self.util)
    def clear(self):

        if self.is_moving == 0:
            self.evaluation_count = 0
            self.improvement_count = 0
            for i in self.texts:
                self.canvas.delete(i)

            for i in self.arrows:
                self.canvas.delete(i)

            self.canvas.delete(self.rectangle)
            self.rectangle = self.canvas.create_image(
                50, 50, image=self.rectangle_image)
            self.agent = PolicyIteration(self.util)
Ejemplo n.º 5
0
def main(args):
    # resolve path to world map definition
    if not args.world:
        world_map_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'world_map.txt')
    else:
        world_map_path = args.world

    print("Reading world from %s" % world_map_path)
    if not os.path.exists(world_map_path):
        raise IOError(
            "World map definition not found at its expected path: %s" %
            world_map_path)

    world = World(world_map_path)
    visualizer = Visualizer(world)

    # Value Iteration
    value_iteration = ValueIteration(world,
                                     one_step_cost_v1,
                                     discount_factor=args.gamma,
                                     eps=10e-10)
    value_iteration.execute()
    optimal_policy = value_iteration.extract_policy()

    fig_vi = plt.figure()
    visualizer.draw(fig_vi, optimal_policy, value_iteration.value_fn,
                    "Value Iteration (gamma = %.2f)" % args.gamma)

    # Policy Iteration
    policy_iteration = PolicyIteration(world,
                                       one_step_cost_v1,
                                       discount_factor=args.gamma)
    value_fn = policy_iteration.execute()

    fig_pi = plt.figure()
    visualizer.draw(fig_pi, policy_iteration.policy, value_fn,
                    "Policy Iteration (gamma = %.2f)" % args.gamma)

    plt.show()
Ejemplo n.º 6
0
def main():
    aiType = 3
    worldSize = 6
    game = Game(aiType, worldSize)

    agent = Agent()
    pc = None
    policy = None
    if aiType == 1:
        policy = ValueIteration()
        pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]])
    elif aiType == 2:
        policy = PolicyIteration()
        pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]])
    elif aiType == 3:
        policy = qLearningAgent()
        pc = PolicyConfiguration(inpRewards=[0, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]],
                                 inpFile="QLValues.p",
                                 inpTrainingLimit=1000)
    elif aiType == 4:
        policy = approximateQLearning()
        pc = PolicyConfiguration(inpRewards=[2, -1, 0, 0, -1],
                                 inpDiscounts=[0.9, .2, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]],
                                 inpFile="AQLWeights.json",
                                 inpTrainingLimit=500)
    else:
        policy = ValueIteration()
        pc = PolicyConfiguration()
    policy.config = pc

    agent.policy = policy

    game.agent = agent

    game.mainLoop()
Ejemplo n.º 7
0
 def __init__(self, can, direction, inpAIType):
     self.flag = 1
     self.can = can
     self.direction = direction
     self.aiType = inpAIType
     self.agent = Agent()
     pc = None
     policy = None
     #inpRewards = [food reward, hazard reward, living reward, good location reward, bad location reward]
         #good and bad location is only used for qlearning
             #tried to use to cause graph searching
         #not really used and can give wonky results
     #inpDiscounts = [gamma discount, alpha discount, epsilon explore chance]
     #inpStochastic = [forward action[forward chance, left chance, right chance]
     #left action[forward chance, left chance, right chance]
     #right action[forward chance, left chance, right chance]]
     #inpFile file for weight or qvalues
     if self.aiType == 1:
         policy = ValueIteration()
         pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
     elif self.aiType == 2:
         policy = PolicyIteration()
         pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
     elif self.aiType == 3:
         policy = qLearningAgent()
         #risk aversion aka rarely go off best path seems to work best
         #This one seemed to work #pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
         pc = PolicyConfiguration(inpRewards = [2,-1,0,0,0], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 20000)
     elif self.aiType == 4:
         policy = approximateQLearning()
         pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 5000)
     else:
         policy = ValueIteration()
         pc = PolicyConfiguration()
     policy.config = pc
         
     self.agent.policy = policy
Ejemplo n.º 8
0
    for i in range(size):
        for j in range(size):
            cell_type = cell_matrix[j][i]

            if cell_type == CellType.WHOOPING:
                is_terminal = True
                reward = -10
            elif cell_type == CellType.KFC:
                is_terminal = True
                reward = 10
            else:
                is_terminal = False
                reward = -1
            cell = CellState((i, j), reward, cell_type, is_terminal)
            env.place_cell(i, j, cell)
            states.append(cell)
    return env, states


env, states = create_game_env()
agent = Agent("policy_eval", (0, 0))

policy_iter_algo = PolicyIteration(states)
policy = policy_iter_algo.run()

game = Game(Config, Controller, env, agent, policy)
# initiate env
game.draw_env()
pygame.display.update()
game.start()
Ejemplo n.º 9
0
            if i < row_num - 2:
                trans_probs[i * col_num + j][down][(i + 1) * col_num + j] = p
            else:
                trans_probs[i * col_num + j][down][i * col_num + j] = p
    # 0
    trans_probs[0][:] = 0
    # 15
    trans_probs[15][:] = 0
    # 16
    trans_probs[16][:] = 0
    # 18
    trans_probs[18][:] = 0
    # 19
    trans_probs[19][:] = 0
    trans_probs[17][up][13] = 1
    trans_probs[17][left][12] = 1
    trans_probs[17][left][16] = 0
    trans_probs[17][right][14] = 1
    trans_probs[17][right][18] = 0
    trans_probs[17][down][17] = 1

    trans_probs[13][down][17] = 1
    trans_probs[13][down][13] = 0

    policy_iteration = PolicyIteration(trans_probs, rewards, action_probs,
                                       0.00001, states_num, actions_num, 1)
    policy_iteration.policy_evaluation()
    for i in range(row_num):
        for j in range(col_num):
            print("%.2f" % policy_iteration.v[i * col_num + j], end=" ")
        print()
Ejemplo n.º 10
0
from config import Config
from policy_iteration import PolicyIteration

if __name__ == "__main__":
    conf = Config()
    pi = PolicyIteration(conf, "pi", True)
Ejemplo n.º 11
0
# Sanity check
transitions = grid_mdp.T(grid_mdp.initial_state, action=1)
print(transitions)

# Run value iteration
# vi = ValueIteration(grid_mdp)
# print('Running value iteration')
# vi.run()
# vi.plot_learning_curve()
#
# print('\nFinal V table:')
# grid_mdp.print_values(vi.V)
#
# # Print the optimal policy
# policy = vi.get_best_policy()
# print('\nBest policy:')
# grid_mdp.print_policy(policy)

# Run policy iteration
pi = PolicyIteration(grid_mdp)
print('Running policy iteration')
pi.run()

# Print the optimal policy
print('\nBest policy:')
grid_mdp.print_policy(pi.policy)

# Save policy to file
with open('results/policy.h5', 'wb') as file:
    pickle.dump(pi.policy, file)
Ejemplo n.º 12
0
    t = L2norm(muE - mu_i_1)

    print("===== t = {} =====".format(t))

    if t <= epsilon:
        break

    #R = np.zeros(x_size*y_size)

    #Rの計算
    R = Rcalc(w)

    #print("===== R: =====")
    #print(R)
    #print("")
    np.savetxt('R.csv', R, delimiter=',')

    #強化学習により、Rからpi_selectedを求める
    state = np.zeros(11)
    for hoge in range(11):
        state[hoge] = hoge
    pi_selected = PolicyIteration(state, P, R, gamma)
    # print(pi_selected)
    #mu(pi_selected) を計算
    mu_old = Mu(pi_selected)
    mu_i_2 = mu_i_1
    #i を加算、ループ続行
    i = i + 1
print(R)
##########################################################################
Ejemplo n.º 13
0
class GraphicDisplay(tk.Tk):
    def __init__(self):
        super(GraphicDisplay, self).__init__()
        self.title('Policy Iteration')
        self.geometry('{0}x{1}'.format(HEIGHT * UNIT, HEIGHT * UNIT + 50))
        self.texts = []
        self.arrows = []
        self.util = Util()
        self.agent = PolicyIteration(self.util)
        self._build_env()

    def _build_env(self):
        self.canvas = tk.Canvas(self,
                                bg='white',
                                height=HEIGHT * UNIT,
                                width=WIDTH * UNIT)

        # Buttons
        iteration_button = tk.Button(self,
                                     text="Evaluation",
                                     command=self.policy_evaluation)
        iteration_button.configure(width=10, activebackground="#33B5E5")
        self.canvas.create_window(WIDTH * UNIT * 0.13, (HEIGHT * UNIT) + 10,
                                  window=iteration_button)

        policy_button = tk.Button(self,
                                  text="Improvement",
                                  command=self.policy_improvement)
        policy_button.configure(width=10, activebackground="#33B5E5")
        self.canvas.create_window(WIDTH * UNIT * 0.37, (HEIGHT * UNIT) + 10,
                                  window=policy_button)

        policy_button = tk.Button(self,
                                  text="move",
                                  command=self.move_by_policy)
        policy_button.configure(width=10, activebackground="#33B5E5")
        self.canvas.create_window(WIDTH * UNIT * 0.62, (HEIGHT * UNIT) + 10,
                                  window=policy_button)

        policy_button = tk.Button(self, text="clear", command=self.clear)
        policy_button.configure(width=10, activebackground="#33B5E5")
        self.canvas.create_window(WIDTH * UNIT * 0.87, (HEIGHT * UNIT) + 10,
                                  window=policy_button)

        # create grids
        for col in range(0, WIDTH * UNIT, UNIT):  # 0~400 by 80
            x0, y0, x1, y1 = col, 0, col, HEIGHT * UNIT
            self.canvas.create_line(x0, y0, x1, y1)
        for row in range(0, HEIGHT * UNIT, UNIT):  # 0~400 by 80
            x0, y0, x1, y1 = 0, row, HEIGHT * UNIT, row
            self.canvas.create_line(x0, y0, x1, y1)

        # image_load
        self.up_image = ImageTk.PhotoImage(
            Image.open("../resources/up.png").resize((13, 13)))
        self.right_image = ImageTk.PhotoImage(
            Image.open("../resources/right.png").resize((13, 13)))
        self.left_image = ImageTk.PhotoImage(
            Image.open("../resources/left.png").resize((13, 13)))
        self.down_image = ImageTk.PhotoImage(
            Image.open("../resources/down.png").resize((13, 13)))
        self.rectangle_image = ImageTk.PhotoImage(
            Image.open("../resources/rectangle.png").resize((65, 65),
                                                            Image.ANTIALIAS))
        self.triangle_image = ImageTk.PhotoImage(
            Image.open("../resources/triangle.png").resize((65, 65)))
        self.circle_image = ImageTk.PhotoImage(
            Image.open("../resources/circle.png").resize((65, 65)))

        # add image to canvas
        self.rectangle = self.canvas.create_image(50,
                                                  50,
                                                  image=self.rectangle_image)
        self.triangle1 = self.canvas.create_image(250,
                                                  150,
                                                  image=self.triangle_image)
        self.triangle2 = self.canvas.create_image(150,
                                                  250,
                                                  image=self.triangle_image)
        self.circle = self.canvas.create_image(250,
                                               250,
                                               image=self.circle_image)

        # add reward text
        self.text_reward(2, 2, "R : 1.0")
        self.text_reward(1, 2, "R : -1.0")
        self.text_reward(2, 1, "R : -1.0")

        # pack all
        self.canvas.pack()

    def clear(self):
        for i in self.texts:
            self.canvas.delete(i)

        for i in self.arrows:
            self.canvas.delete(i)

        self.canvas.delete(self.rectangle)
        self.rectangle = self.canvas.create_image(50,
                                                  50,
                                                  image=self.rectangle_image)
        self.agent = PolicyIteration(self.util)

    def text_value(self,
                   row,
                   col,
                   contents,
                   font='Helvetica',
                   size=10,
                   style='normal',
                   anchor="nw"):
        origin_x, origin_y = 85, 70
        x, y = origin_y + (UNIT * col), origin_x + (UNIT * row)
        font = (font, str(size), style)
        return self.texts.append(
            self.canvas.create_text(x,
                                    y,
                                    fill="black",
                                    text=contents,
                                    font=font,
                                    anchor=anchor))

    def text_reward(self,
                    row,
                    col,
                    contents,
                    font='Helvetica',
                    size=10,
                    style='normal',
                    anchor="nw"):
        origin_x, origin_y = 5, 5
        x, y = origin_y + (UNIT * col), origin_x + (UNIT * row)
        font = (font, str(size), style)
        return self.canvas.create_text(x,
                                       y,
                                       fill="black",
                                       text=contents,
                                       font=font,
                                       anchor=anchor)

    def rectangle_move(self, action):
        base_action = np.array([0, 0])
        self.render()
        if action[0] == 1:  # down
            base_action[1] += UNIT
        elif action[0] == -1:  # up
            base_action[1] -= UNIT
        elif action[1] == 1:  # right
            base_action[0] += UNIT
        elif action[1] == -1:  # left
            base_action[0] -= UNIT

        self.canvas.move(self.rectangle, base_action[0],
                         base_action[1])  # move agent

    def rectangle_location(self):
        temp = self.canvas.coords(self.rectangle)
        x = (temp[0] / 100) - 0.5
        y = (temp[1] / 100) - 0.5
        return int(y), int(x)

    def move_by_policy(self):
        self.canvas.delete(self.rectangle)
        self.rectangle = self.canvas.create_image(50,
                                                  50,
                                                  image=self.rectangle_image)
        while len(self.agent.get_policy_table()[self.rectangle_location()[0]][
                self.rectangle_location()[1]]) != 0:
            self.after(
                100,
                self.rectangle_move(
                    self.agent.get_action([
                        self.rectangle_location()[0],
                        self.rectangle_location()[1]
                    ])))

    def draw_one_arrow(self, col, row, action):

        if col == 2 and row == 2:
            return

        if action[0] > 0:  # up
            origin_x, origin_y = 50 + (UNIT * row), 10 + (UNIT * col)
            self.arrows.append(
                self.canvas.create_image(origin_x,
                                         origin_y,
                                         image=self.up_image))

        if action[1] > 0:  # down
            origin_x, origin_y = 50 + (UNIT * row), 90 + (UNIT * col)
            self.arrows.append(
                self.canvas.create_image(origin_x,
                                         origin_y,
                                         image=self.down_image))

        if action[2] > 0:  # left
            origin_x, origin_y = 10 + (UNIT * row), 50 + (UNIT * col)
            self.arrows.append(
                self.canvas.create_image(origin_x,
                                         origin_y,
                                         image=self.left_image))

        if action[3] > 0:  # right
            origin_x, origin_y = 90 + (UNIT * row), 50 + (UNIT * col)
            self.arrows.append(
                self.canvas.create_image(origin_x,
                                         origin_y,
                                         image=self.right_image))

    def draw_from_policy(self, policy_table):
        for i in range(HEIGHT):
            for j in range(WIDTH):
                self.draw_one_arrow(i, j, policy_table[i][j])

    def print_value_table(self, value_table):
        for i in range(WIDTH):
            for j in range(HEIGHT):
                self.text_value(i, j, value_table[i][j])

    def render(self):
        time.sleep(0.1)
        self.canvas.tag_raise(self.rectangle)
        self.update()

    def policy_evaluation(self):
        for i in self.texts:
            self.canvas.delete(i)
        self.agent.policy_evaluation()
        self.print_value_table(self.agent.get_value_table())

    def policy_improvement(self):
        for i in self.arrows:
            self.canvas.delete(i)
        self.agent.policy_improvement()
        self.draw_from_policy(self.agent.get_policy_table())