Ejemplo n.º 1
0
def q3():
    source = (0, 0)
    target = (10, 10)
    my_grid = environment.Environment(source, target)
    random_policy = randomPolicy.RandomPolicy()
    simulation_object_random = Simulation.Simulation(my_grid, random_policy)
    simulation_object_random.simulate()
    return
Ejemplo n.º 2
0
def q4():
    source = (0, 0)
    # fixed target
    target = (10, 10)
    my_grid = environment.Environment(source, target)
    worse_policy = worsePolicy.WorsePolicy()
    random_policy = randomPolicy.RandomPolicy()
    better_policy = betterPolicy.BetterPolicy(target)
    simulation_object_random = Simulation.Simulation(my_grid, random_policy)
    simulation_object_random.simulate()
    simulation_object_worse = Simulation.Simulation(my_grid, worse_policy)
    simulation_object_worse.simulate()
    simulation_object_better = Simulation.Simulation(my_grid, better_policy)
    simulation_object_better.simulate()
    return
Ejemplo n.º 3
0
def q2():
    source = (0, 0)
    target = (10, 10)
    my_grid = environment.Environment(source, target)
    curr_state = State.State(source[0], source[1], my_grid)
    manual_policy = manualPolicy.ManualPolicy()
    simulation_object_manual = Simulation.Simulation(my_grid, manual_policy)
    while True:
        action = manual_policy.get_action(curr_state)
        action_object = Action.Action(action)
        curr_state, reward = simulation_object_manual.transition(
            curr_state, action_object)
        print("Next state : ", curr_state.x, " ", curr_state.y)
        print("Reward obtained : ", reward)
    return
Ejemplo n.º 4
0
def q1():
    source = (0, 0)
    target = (10, 10)
    my_grid = environment.Environment(source, target)
    print("Enter current state : ")
    x = int(input())
    y = int(input())
    curr_state = State.State(x, y, my_grid)
    print("Enter desired action : ")
    action = input()
    action_object = Action.Action(action)
    random_policy = randomPolicy.RandomPolicy()
    simulation_object_random = Simulation.Simulation(my_grid, random_policy)
    curr_state, reward = simulation_object_random.transition(
        curr_state, action_object)
    print("Next state : ", curr_state.x, " ", curr_state.y)
    print("Reward obtained : ", reward)
    return
Ejemplo n.º 5
0
def q5():
    source = (0, 0)
    # random target
    target = (randint(0, 10), randint(0, 10))
    my_grid = environment.Environment(source, target)
    worse_policy = worsePolicy.WorsePolicy()
    random_policy = randomPolicy.RandomPolicy()
    better_policy = betterPolicy.BetterPolicy(target)
    learned_policy = learnedPolicy.LearnedPolicy()
    simulation_object_random = Simulation.Simulation(my_grid, random_policy)
    simulation_object_random.simulate()
    simulation_object_learned = Simulation.Simulation(my_grid, learned_policy)
    simulation_object_learned.simulate()
    simulation_object_worse = Simulation.Simulation(my_grid, worse_policy)
    simulation_object_worse.simulate()
    simulation_object_learned = Simulation.Simulation(my_grid, better_policy)
    simulation_object_learned.simulate()
    learned_policy.print_lookup()
    return