コード例 #1
0
def execute_policy_iteration_test(test, output='console'):
    """
    Description
    -----------
    Function used to run the policy_iteration for
    the given test.

    Parameters
    ----------
    test: Test() \\
        -- A Test() instance with the information
        needed to run the policy_iteration.

    output: str \\
        -- A string that tells the function where
        its output is expected.

    Returns
    -------
    PolicyIteration() \\
        -- If no recognizable outputs is informed,
        returns the instance of the policy_iteration used.
    """

    policy_iteration = PolicyIteration(test)

    policy_iteration.run()

    if output in ['console', 'file']:
        output_processing(output, test, policy_iteration, 'PolicyIteration')

    return policy_iteration
コード例 #2
0
 def __init__(self):
     super(GraphicDisplay, self).__init__()
     self.title('Policy Iteration')
     self.geometry('{0}x{1}'.format(HEIGHT * UNIT, HEIGHT * UNIT + 50))
     self.texts = []
     self.arrows = []
     self.util = Util()
     self.agent = PolicyIteration(self.util)
     self._build_env()
コード例 #3
0
    def clear(self):
        for i in self.texts:
            self.canvas.delete(i)

        for i in self.arrows:
            self.canvas.delete(i)

        self.canvas.delete(self.rectangle)
        self.rectangle = self.canvas.create_image(50,
                                                  50,
                                                  image=self.rectangle_image)
        self.agent = PolicyIteration(self.util)
コード例 #4
0
    def clear(self):

        if self.is_moving == 0:
            self.evaluation_count = 0
            self.improvement_count = 0
            for i in self.texts:
                self.canvas.delete(i)

            for i in self.arrows:
                self.canvas.delete(i)

            self.canvas.delete(self.rectangle)
            self.rectangle = self.canvas.create_image(
                50, 50, image=self.rectangle_image)
            self.agent = PolicyIteration(self.util)
コード例 #5
0
ファイル: mainGame.py プロジェクト: harkaranbrar7/AI-Snake
def main():
    aiType = 3
    worldSize = 6
    game = Game(aiType, worldSize)

    agent = Agent()
    pc = None
    policy = None
    if aiType == 1:
        policy = ValueIteration()
        pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]])
    elif aiType == 2:
        policy = PolicyIteration()
        pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]])
    elif aiType == 3:
        policy = qLearningAgent()
        pc = PolicyConfiguration(inpRewards=[0, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]],
                                 inpFile="QLValues.p",
                                 inpTrainingLimit=1000)
    elif aiType == 4:
        policy = approximateQLearning()
        pc = PolicyConfiguration(inpRewards=[2, -1, 0, 0, -1],
                                 inpDiscounts=[0.9, .2, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]],
                                 inpFile="AQLWeights.json",
                                 inpTrainingLimit=500)
    else:
        policy = ValueIteration()
        pc = PolicyConfiguration()
    policy.config = pc

    agent.policy = policy

    game.agent = agent

    game.mainLoop()
コード例 #6
0
ファイル: main.py プロジェクト: Ali2500/exact-VI-PI
def main(args):
    # resolve path to world map definition
    if not args.world:
        world_map_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'world_map.txt')
    else:
        world_map_path = args.world

    print("Reading world from %s" % world_map_path)
    if not os.path.exists(world_map_path):
        raise IOError(
            "World map definition not found at its expected path: %s" %
            world_map_path)

    world = World(world_map_path)
    visualizer = Visualizer(world)

    # Value Iteration
    value_iteration = ValueIteration(world,
                                     one_step_cost_v1,
                                     discount_factor=args.gamma,
                                     eps=10e-10)
    value_iteration.execute()
    optimal_policy = value_iteration.extract_policy()

    fig_vi = plt.figure()
    visualizer.draw(fig_vi, optimal_policy, value_iteration.value_fn,
                    "Value Iteration (gamma = %.2f)" % args.gamma)

    # Policy Iteration
    policy_iteration = PolicyIteration(world,
                                       one_step_cost_v1,
                                       discount_factor=args.gamma)
    value_fn = policy_iteration.execute()

    fig_pi = plt.figure()
    visualizer.draw(fig_pi, policy_iteration.policy, value_fn,
                    "Policy Iteration (gamma = %.2f)" % args.gamma)

    plt.show()
コード例 #7
0
 def __init__(self, can, direction, inpAIType):
     self.flag = 1
     self.can = can
     self.direction = direction
     self.aiType = inpAIType
     self.agent = Agent()
     pc = None
     policy = None
     #inpRewards = [food reward, hazard reward, living reward, good location reward, bad location reward]
         #good and bad location is only used for qlearning
             #tried to use to cause graph searching
         #not really used and can give wonky results
     #inpDiscounts = [gamma discount, alpha discount, epsilon explore chance]
     #inpStochastic = [forward action[forward chance, left chance, right chance]
     #left action[forward chance, left chance, right chance]
     #right action[forward chance, left chance, right chance]]
     #inpFile file for weight or qvalues
     if self.aiType == 1:
         policy = ValueIteration()
         pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
     elif self.aiType == 2:
         policy = PolicyIteration()
         pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
     elif self.aiType == 3:
         policy = qLearningAgent()
         #risk aversion aka rarely go off best path seems to work best
         #This one seemed to work #pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
         pc = PolicyConfiguration(inpRewards = [2,-1,0,0,0], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 20000)
     elif self.aiType == 4:
         policy = approximateQLearning()
         pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 5000)
     else:
         policy = ValueIteration()
         pc = PolicyConfiguration()
     policy.config = pc
         
     self.agent.policy = policy
コード例 #8
0
    for i in range(size):
        for j in range(size):
            cell_type = cell_matrix[j][i]

            if cell_type == CellType.WHOOPING:
                is_terminal = True
                reward = -10
            elif cell_type == CellType.KFC:
                is_terminal = True
                reward = 10
            else:
                is_terminal = False
                reward = -1
            cell = CellState((i, j), reward, cell_type, is_terminal)
            env.place_cell(i, j, cell)
            states.append(cell)
    return env, states


env, states = create_game_env()
agent = Agent("policy_eval", (0, 0))

policy_iter_algo = PolicyIteration(states)
policy = policy_iter_algo.run()

game = Game(Config, Controller, env, agent, policy)
# initiate env
game.draw_env()
pygame.display.update()
game.start()
コード例 #9
0
            if i < row_num - 2:
                trans_probs[i * col_num + j][down][(i + 1) * col_num + j] = p
            else:
                trans_probs[i * col_num + j][down][i * col_num + j] = p
    # 0
    trans_probs[0][:] = 0
    # 15
    trans_probs[15][:] = 0
    # 16
    trans_probs[16][:] = 0
    # 18
    trans_probs[18][:] = 0
    # 19
    trans_probs[19][:] = 0
    trans_probs[17][up][13] = 1
    trans_probs[17][left][12] = 1
    trans_probs[17][left][16] = 0
    trans_probs[17][right][14] = 1
    trans_probs[17][right][18] = 0
    trans_probs[17][down][17] = 1

    trans_probs[13][down][17] = 1
    trans_probs[13][down][13] = 0

    policy_iteration = PolicyIteration(trans_probs, rewards, action_probs,
                                       0.00001, states_num, actions_num, 1)
    policy_iteration.policy_evaluation()
    for i in range(row_num):
        for j in range(col_num):
            print("%.2f" % policy_iteration.v[i * col_num + j], end=" ")
        print()
コード例 #10
0
from config import Config
from policy_iteration import PolicyIteration

if __name__ == "__main__":
    conf = Config()
    pi = PolicyIteration(conf, "pi", True)
コード例 #11
0
# Sanity check
transitions = grid_mdp.T(grid_mdp.initial_state, action=1)
print(transitions)

# Run value iteration
# vi = ValueIteration(grid_mdp)
# print('Running value iteration')
# vi.run()
# vi.plot_learning_curve()
#
# print('\nFinal V table:')
# grid_mdp.print_values(vi.V)
#
# # Print the optimal policy
# policy = vi.get_best_policy()
# print('\nBest policy:')
# grid_mdp.print_policy(policy)

# Run policy iteration
pi = PolicyIteration(grid_mdp)
print('Running policy iteration')
pi.run()

# Print the optimal policy
print('\nBest policy:')
grid_mdp.print_policy(pi.policy)

# Save policy to file
with open('results/policy.h5', 'wb') as file:
    pickle.dump(pi.policy, file)
コード例 #12
0
ファイル: markov_irl.py プロジェクト: d-km/markov_irl
    t = L2norm(muE - mu_i_1)

    print("===== t = {} =====".format(t))

    if t <= epsilon:
        break

    #R = np.zeros(x_size*y_size)

    #Rの計算
    R = Rcalc(w)

    #print("===== R: =====")
    #print(R)
    #print("")
    np.savetxt('R.csv', R, delimiter=',')

    #強化学習により、Rからpi_selectedを求める
    state = np.zeros(11)
    for hoge in range(11):
        state[hoge] = hoge
    pi_selected = PolicyIteration(state, P, R, gamma)
    # print(pi_selected)
    #mu(pi_selected) を計算
    mu_old = Mu(pi_selected)
    mu_i_2 = mu_i_1
    #i を加算、ループ続行
    i = i + 1
print(R)
##########################################################################