Exemple #1
0
 def setup_predator():
     predator = Predator("Predator%d" % i, pred_locs[i])
     predator.plearner = pred_plearners[i](field=field, agent=predator,
                                           **pred_plearner_params[i])  # TODO: check of *array kan
     field.add_player(predator)
     predators.append(predator)
Exemple #2
0
def run(n_episodes=1000, gui=False):
    """
    runs a simulation with 3 predators, one prey and random policies for all agents
    :return:
    """

    #initialize the environment
    field = Field(11, 11)
    num_episodes = n_episodes

    pred1loc = (0, 0)
    pred2loc = (10, 10)
    pred3loc = (0, 10)
    preyloc = (5, 5)

    #initialize the predators
    predator1 = Predator(id="Plato", location=pred1loc)
    predator2 = Predator(id="Pythagoras", location=pred2loc)
    # predator3 = Predator(pred3loc)

    #probabilistic
    # predator1.plearner = ProbabilisticPlearner(field=field, agent=predator1)
    # predator2.plearner = ProbabilisticPlearner(field=field, agent=predator2)
    # predator3.plearner = ProbabilisticPlearner(field=field, agent=predator3)

    #greedy Q
    #predator1.plearner = SarsaPlearner.create_greedy_plearner(field=field, agent=predator1, value_init=0,epsilon=0.01)
    # predator2.plearner = SarsaPlearner.create_greedy_plearner(field=field, agent=predator2, value_init=0,epsilon=0.01)
    # predator1.plearner = QPlearner.create_greedy_plearner(field=field, agent=predator1, value_init=0)
    # predator2.plearner = QPlearner.create_greedy_plearner(field=field, agent=predator2, value_init=0)
    # predator3.plearner = QPlearner.create_greedy_plearner(field=field, agent=predator3)

    # wolf
    predator1.plearner = Wolf_phc.create_greedy_plearner(field=field,
                                                         agent=predator1)
    predator2.plearner = Wolf_phc.create_greedy_plearner(field=field,
                                                         agent=predator2)
    # predator3.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator3)

    #softmax Q
    #predator1.plearner = QPlearner.create_softmax_plearner(field=field, agent=predator1)
    #predator2.plearner = QPlearner.create_softmax_plearner(field=field, agent=predator2)
    # predator3.plearner = QPlearner.create_softmax_plearner(field=field, agent=predator3)

    #minimax q
    # predator1.plearner = MiniMaxQPlearner(field=field,agent=predator1,end_alpha=0.01,num_episodes=num_episodes)

    field.add_player(predator1)
    field.add_player(predator2)
    # field.add_player(predator3)
    #initialize the prey
    chip = Prey(id="Kant", location=preyloc)

    # chip.plearner = ProbabilisticPlearner(field=field, agent=chip)
    chip.plearner = Wolf_phc.create_greedy_plearner(field=field,
                                                    agent=chip,
                                                    epsilon=0.01)
    #chip.plearner = QPlearner.create_softmax_plearner(field=field, agent=chip)
    # chip.plearner = MiniMaxQPlearner(field=field,agent=chip,end_alpha=0.01,num_episodes=num_episodes)

    field.add_player(chip)

    field.init_players()

    # set GUI
    if gui:
        GUI = GameFrame(field=field)

    num_steps = []
    pred_win = []

    for i in range(0, n_episodes):
        predator1.location = pred1loc
        predator2.location = pred2loc
        #predator3.location = pred3loc
        chip.location = preyloc
        field.update_state()
        field.steps = 0
        #run the simulation
        while not field.is_ended():
            field.run_step()
            if gui and i == n_episodes - 1:
                GUI.update()
                time.sleep(0.2)

        num_steps.append(field.steps)
        pred_win.append(field.state.prey_is_caught())
        # breakpoint
        #if i > 900:
        #    pass

        #print State.state_from_field(field)
        num_steps.append(field.steps)
        if i % 100 == 0:
            print i
        # print State.state_from_field(field), field.steps, field.state.prey_is_caught()
        # print State.state_from_field(field), field.steps, field.state.prey_is_caught()
        # print [str(state) + ": " + str([predator1.plearner.policy.value[State([state]),action] for action in predator1.get_actions() ])for state in itertools.product([-1,0,1],repeat=2)]
        # for action in chip.get_actions():
        #     print '1', action, predator1.plearner.policy.get_value(State([(0,-1),(0,1)]),action)
        #     print '2', action, predator2.plearner.policy.get_value(State([(0,-1),(0,1)]),action)

    step = 50
    plot_steps(num_steps,
               pred_win,
               window_size=step,
               title="moving average over " + str(step) + " episodes")
def run_wolf(n_episodes=1000):
    # initialize the environment
    field = Field(3, 3)

    """
    initial state:
    | | | |
    |X|O|X|
    | | | |
    """
    pred1loc = (0, 1)
    pred2loc = (2, 1)
    preyloc = (1, 1)

    predator1 = Predator(id="Plato", location=pred1loc)
    predator2 = Predator(id="Pythagoras", location=pred2loc)

    # WoLF
    predator1.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator1)
    predator2.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator2)
    field.add_player(predator1)
    field.add_player(predator2)

    chip = Prey(id="Kant", location=preyloc)
    chip.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=chip, epsilon=0.01)
    field.add_player(chip)
    field.init_players()

    plot_state = State.state_from_field(field)

    num_steps = []
    pred_win = []
    value_of_pred1 = []
    value_of_pred2 = []
    value_of_prey = []

    for i in range(0, n_episodes):
        predator1.location = pred1loc
        predator2.location = pred2loc
        chip.location = preyloc
        field.update_state()
        field.steps = 0
        # run the simulation
        while not field.is_ended():
            field.run_step()

        num_steps.append(field.steps)
        pred_win.append(field.state.prey_is_caught())
        value_of_pred1.append(predator1.plearner.policy.get_probability_mapping(plot_state))
        value_of_pred2.append(predator2.plearner.policy.get_probability_mapping(plot_state))
        value_of_prey.append(chip.plearner.policy.get_probability_mapping(plot_state))

        # print progress every 10%
        if n_episodes > 10 and i % (n_episodes / 10) == 0:
            print int(1.0 * i / n_episodes * 100), "%"

    # some list wrangling to get a list of 5 action lists with values for each predator
    vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)]
    vp2 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred2)]
    vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)]


    # create plots
    colors = ["r", "b", "g", "k", "m"]
    actions = {
        (0, 0): "stay",
        (-1, 0): "left",
        (1, 0): "right",
        (0, -1): "up",
        (0, 1): "down"
    }
    plt.figure(figsize=(15, 15))

    s = plt.subplot(3, 1, 1)
    s.set_yscale("log")
    for index, action in enumerate(predator1.actions):
        plt.plot(vp1[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for predator 1")
    plt.legend(loc="upper right")

    s = plt.subplot(3, 1, 2)
    s.set_yscale("log")
    for index, action in enumerate(predator2.actions):
        plt.plot(vp2[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for predator 2")
    # plt.legend(loc="upper left")

    s = plt.subplot(3, 1, 3)
    s.set_yscale("log")
    for index, action in enumerate(chip.actions):
        plt.plot(vpc[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for prey")

    plt.suptitle(str(n_episodes) + " episodes")
    plt.savefig(get_output_path() + "policychange-wolf-" + str(n_episodes) + ".pdf")
def run_minimax(n_episodes=1000):
    # initialize the environment
    field = Field(5, 5)

    """
    initial state:
    | | | |
    |X|O| |
    | | | |
    """
    pred1loc = (0, 0)
    preyloc = (2, 2)

    predator1 = Predator(id="Plato", location=pred1loc)

    # WoLF
    predator1.plearner = MiniMaxQPlearner(field=field, agent=predator1, end_alpha=0.1, num_episodes=n_episodes, epsilon=0.1)
    field.add_player(predator1)

    chip = Prey(id="Kant", location=preyloc, tripping_prob=0.2)
    chip.plearner = MiniMaxQPlearner(field=field, agent=chip, end_alpha=0.1, num_episodes=n_episodes, epsilon=0.1)
    field.add_player(chip)
    field.init_players()

    plot_state = State([(1, 0)])

    num_steps = []
    pred_win = []
    value_of_pred1 = []
    value_of_prey = []

    for i in range(0, n_episodes):
        predator1.location = pred1loc
        chip.location = preyloc
        field.update_state()
        field.steps = 0
        # run the simulation
        while not field.is_ended():
            field.run_step()
            # print field.state

        num_steps.append(field.steps)
        pred_win.append(field.state.prey_is_caught())
        value_of_pred1.append(predator1.plearner.policy.get_probability_mapping(plot_state))
        # print predator1.plearner.policy.get_probability_mapping(plot_state)
        value_of_prey.append(chip.plearner.policy.get_probability_mapping(plot_state))

        # print progress every 10%
        if n_episodes >= 10 and i % (n_episodes / 10) == 0:
            print int(1.0 * i / n_episodes * 100), "%:", field.steps, "steps"

    # some list wrangling to get a list of 5 action lists with values for each predator
    vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)]
    vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)]


    # create plots
    colors = ["r", "b", "g", "k", "m"]
    actions = {
        (0, 0): "stay",
        (-1, 0): "left",
        (1, 0): "right",
        (0, -1): "up",
        (0, 1): "down"
    }
    plt.figure(figsize=(15, 15))

    s = plt.subplot(2, 1, 1)
    # s.set_yscale("log")
    plt.ylim([-0.1, 1.1])
    for index, action in enumerate(predator1.actions):
        plt.plot(vp1[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for predator 1")
    plt.legend(loc="upper right")

    s = plt.subplot(2, 1, 2)
    #s.set_yscale("log")
    plt.ylim([-0.1, 1.1])
    for index, action in enumerate(chip.actions):
        plt.plot(vpc[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for prey")

    plt.suptitle(str(n_episodes) + " episodes")
    plt.savefig(get_output_path() + "policychange-minimax-" + str(n_episodes) + ".pdf")
def run_wolf(n_episodes=1000):
    # initialize the environment
    field = Field(3, 3)
    """
    initial state:
    | | | |
    |X|O|X|
    | | | |
    """
    pred1loc = (0, 1)
    pred2loc = (2, 1)
    preyloc = (1, 1)

    predator1 = Predator(id="Plato", location=pred1loc)
    predator2 = Predator(id="Pythagoras", location=pred2loc)

    # WoLF
    predator1.plearner = Wolf_phc.create_greedy_plearner(field=field,
                                                         agent=predator1)
    predator2.plearner = Wolf_phc.create_greedy_plearner(field=field,
                                                         agent=predator2)
    field.add_player(predator1)
    field.add_player(predator2)

    chip = Prey(id="Kant", location=preyloc)
    chip.plearner = Wolf_phc.create_greedy_plearner(field=field,
                                                    agent=chip,
                                                    epsilon=0.01)
    field.add_player(chip)
    field.init_players()

    plot_state = State.state_from_field(field)

    num_steps = []
    pred_win = []
    value_of_pred1 = []
    value_of_pred2 = []
    value_of_prey = []

    for i in range(0, n_episodes):
        predator1.location = pred1loc
        predator2.location = pred2loc
        chip.location = preyloc
        field.update_state()
        field.steps = 0
        # run the simulation
        while not field.is_ended():
            field.run_step()

        num_steps.append(field.steps)
        pred_win.append(field.state.prey_is_caught())
        value_of_pred1.append(
            predator1.plearner.policy.get_probability_mapping(plot_state))
        value_of_pred2.append(
            predator2.plearner.policy.get_probability_mapping(plot_state))
        value_of_prey.append(
            chip.plearner.policy.get_probability_mapping(plot_state))

        # print progress every 10%
        if n_episodes > 10 and i % (n_episodes / 10) == 0:
            print int(1.0 * i / n_episodes * 100), "%"

    # some list wrangling to get a list of 5 action lists with values for each predator
    vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)]
    vp2 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred2)]
    vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)]

    # create plots
    colors = ["r", "b", "g", "k", "m"]
    actions = {
        (0, 0): "stay",
        (-1, 0): "left",
        (1, 0): "right",
        (0, -1): "up",
        (0, 1): "down"
    }
    plt.figure(figsize=(15, 15))

    s = plt.subplot(3, 1, 1)
    s.set_yscale("log")
    for index, action in enumerate(predator1.actions):
        plt.plot(vp1[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for predator 1")
    plt.legend(loc="upper right")

    s = plt.subplot(3, 1, 2)
    s.set_yscale("log")
    for index, action in enumerate(predator2.actions):
        plt.plot(vp2[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for predator 2")
    # plt.legend(loc="upper left")

    s = plt.subplot(3, 1, 3)
    s.set_yscale("log")
    for index, action in enumerate(chip.actions):
        plt.plot(vpc[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for prey")

    plt.suptitle(str(n_episodes) + " episodes")
    plt.savefig(get_output_path() + "policychange-wolf-" + str(n_episodes) +
                ".pdf")
def run_minimax(n_episodes=1000):
    # initialize the environment
    field = Field(5, 5)
    """
    initial state:
    | | | |
    |X|O| |
    | | | |
    """
    pred1loc = (0, 0)
    preyloc = (2, 2)

    predator1 = Predator(id="Plato", location=pred1loc)

    # WoLF
    predator1.plearner = MiniMaxQPlearner(field=field,
                                          agent=predator1,
                                          end_alpha=0.1,
                                          num_episodes=n_episodes,
                                          epsilon=0.1)
    field.add_player(predator1)

    chip = Prey(id="Kant", location=preyloc, tripping_prob=0.2)
    chip.plearner = MiniMaxQPlearner(field=field,
                                     agent=chip,
                                     end_alpha=0.1,
                                     num_episodes=n_episodes,
                                     epsilon=0.1)
    field.add_player(chip)
    field.init_players()

    plot_state = State([(1, 0)])

    num_steps = []
    pred_win = []
    value_of_pred1 = []
    value_of_prey = []

    for i in range(0, n_episodes):
        predator1.location = pred1loc
        chip.location = preyloc
        field.update_state()
        field.steps = 0
        # run the simulation
        while not field.is_ended():
            field.run_step()
            # print field.state

        num_steps.append(field.steps)
        pred_win.append(field.state.prey_is_caught())
        value_of_pred1.append(
            predator1.plearner.policy.get_probability_mapping(plot_state))
        # print predator1.plearner.policy.get_probability_mapping(plot_state)
        value_of_prey.append(
            chip.plearner.policy.get_probability_mapping(plot_state))

        # print progress every 10%
        if n_episodes >= 10 and i % (n_episodes / 10) == 0:
            print int(1.0 * i / n_episodes * 100), "%:", field.steps, "steps"

    # some list wrangling to get a list of 5 action lists with values for each predator
    vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)]
    vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)]

    # create plots
    colors = ["r", "b", "g", "k", "m"]
    actions = {
        (0, 0): "stay",
        (-1, 0): "left",
        (1, 0): "right",
        (0, -1): "up",
        (0, 1): "down"
    }
    plt.figure(figsize=(15, 15))

    s = plt.subplot(2, 1, 1)
    # s.set_yscale("log")
    plt.ylim([-0.1, 1.1])
    for index, action in enumerate(predator1.actions):
        plt.plot(vp1[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for predator 1")
    plt.legend(loc="upper right")

    s = plt.subplot(2, 1, 2)
    #s.set_yscale("log")
    plt.ylim([-0.1, 1.1])
    for index, action in enumerate(chip.actions):
        plt.plot(vpc[index], c=colors[index], label=actions[action])
    plt.title("action probabilities for prey")

    plt.suptitle(str(n_episodes) + " episodes")
    plt.savefig(get_output_path() + "policychange-minimax-" + str(n_episodes) +
                ".pdf")