def setup_predator(): predator = Predator("Predator%d" % i, pred_locs[i]) predator.plearner = pred_plearners[i](field=field, agent=predator, **pred_plearner_params[i]) # TODO: check of *array kan field.add_player(predator) predators.append(predator)
def run(n_episodes=1000, gui=False): """ runs a simulation with 3 predators, one prey and random policies for all agents :return: """ #initialize the environment field = Field(11, 11) num_episodes = n_episodes pred1loc = (0, 0) pred2loc = (10, 10) pred3loc = (0, 10) preyloc = (5, 5) #initialize the predators predator1 = Predator(id="Plato", location=pred1loc) predator2 = Predator(id="Pythagoras", location=pred2loc) # predator3 = Predator(pred3loc) #probabilistic # predator1.plearner = ProbabilisticPlearner(field=field, agent=predator1) # predator2.plearner = ProbabilisticPlearner(field=field, agent=predator2) # predator3.plearner = ProbabilisticPlearner(field=field, agent=predator3) #greedy Q #predator1.plearner = SarsaPlearner.create_greedy_plearner(field=field, agent=predator1, value_init=0,epsilon=0.01) # predator2.plearner = SarsaPlearner.create_greedy_plearner(field=field, agent=predator2, value_init=0,epsilon=0.01) # predator1.plearner = QPlearner.create_greedy_plearner(field=field, agent=predator1, value_init=0) # predator2.plearner = QPlearner.create_greedy_plearner(field=field, agent=predator2, value_init=0) # predator3.plearner = QPlearner.create_greedy_plearner(field=field, agent=predator3) # wolf predator1.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator1) predator2.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator2) # predator3.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator3) #softmax Q #predator1.plearner = QPlearner.create_softmax_plearner(field=field, agent=predator1) #predator2.plearner = QPlearner.create_softmax_plearner(field=field, agent=predator2) # predator3.plearner = QPlearner.create_softmax_plearner(field=field, agent=predator3) #minimax q # predator1.plearner = MiniMaxQPlearner(field=field,agent=predator1,end_alpha=0.01,num_episodes=num_episodes) field.add_player(predator1) field.add_player(predator2) # field.add_player(predator3) #initialize the prey chip = Prey(id="Kant", location=preyloc) # chip.plearner = ProbabilisticPlearner(field=field, agent=chip) chip.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=chip, epsilon=0.01) #chip.plearner = QPlearner.create_softmax_plearner(field=field, agent=chip) # chip.plearner = MiniMaxQPlearner(field=field,agent=chip,end_alpha=0.01,num_episodes=num_episodes) field.add_player(chip) field.init_players() # set GUI if gui: GUI = GameFrame(field=field) num_steps = [] pred_win = [] for i in range(0, n_episodes): predator1.location = pred1loc predator2.location = pred2loc #predator3.location = pred3loc chip.location = preyloc field.update_state() field.steps = 0 #run the simulation while not field.is_ended(): field.run_step() if gui and i == n_episodes - 1: GUI.update() time.sleep(0.2) num_steps.append(field.steps) pred_win.append(field.state.prey_is_caught()) # breakpoint #if i > 900: # pass #print State.state_from_field(field) num_steps.append(field.steps) if i % 100 == 0: print i # print State.state_from_field(field), field.steps, field.state.prey_is_caught() # print State.state_from_field(field), field.steps, field.state.prey_is_caught() # print [str(state) + ": " + str([predator1.plearner.policy.value[State([state]),action] for action in predator1.get_actions() ])for state in itertools.product([-1,0,1],repeat=2)] # for action in chip.get_actions(): # print '1', action, predator1.plearner.policy.get_value(State([(0,-1),(0,1)]),action) # print '2', action, predator2.plearner.policy.get_value(State([(0,-1),(0,1)]),action) step = 50 plot_steps(num_steps, pred_win, window_size=step, title="moving average over " + str(step) + " episodes")
def run_wolf(n_episodes=1000): # initialize the environment field = Field(3, 3) """ initial state: | | | | |X|O|X| | | | | """ pred1loc = (0, 1) pred2loc = (2, 1) preyloc = (1, 1) predator1 = Predator(id="Plato", location=pred1loc) predator2 = Predator(id="Pythagoras", location=pred2loc) # WoLF predator1.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator1) predator2.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator2) field.add_player(predator1) field.add_player(predator2) chip = Prey(id="Kant", location=preyloc) chip.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=chip, epsilon=0.01) field.add_player(chip) field.init_players() plot_state = State.state_from_field(field) num_steps = [] pred_win = [] value_of_pred1 = [] value_of_pred2 = [] value_of_prey = [] for i in range(0, n_episodes): predator1.location = pred1loc predator2.location = pred2loc chip.location = preyloc field.update_state() field.steps = 0 # run the simulation while not field.is_ended(): field.run_step() num_steps.append(field.steps) pred_win.append(field.state.prey_is_caught()) value_of_pred1.append(predator1.plearner.policy.get_probability_mapping(plot_state)) value_of_pred2.append(predator2.plearner.policy.get_probability_mapping(plot_state)) value_of_prey.append(chip.plearner.policy.get_probability_mapping(plot_state)) # print progress every 10% if n_episodes > 10 and i % (n_episodes / 10) == 0: print int(1.0 * i / n_episodes * 100), "%" # some list wrangling to get a list of 5 action lists with values for each predator vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)] vp2 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred2)] vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)] # create plots colors = ["r", "b", "g", "k", "m"] actions = { (0, 0): "stay", (-1, 0): "left", (1, 0): "right", (0, -1): "up", (0, 1): "down" } plt.figure(figsize=(15, 15)) s = plt.subplot(3, 1, 1) s.set_yscale("log") for index, action in enumerate(predator1.actions): plt.plot(vp1[index], c=colors[index], label=actions[action]) plt.title("action probabilities for predator 1") plt.legend(loc="upper right") s = plt.subplot(3, 1, 2) s.set_yscale("log") for index, action in enumerate(predator2.actions): plt.plot(vp2[index], c=colors[index], label=actions[action]) plt.title("action probabilities for predator 2") # plt.legend(loc="upper left") s = plt.subplot(3, 1, 3) s.set_yscale("log") for index, action in enumerate(chip.actions): plt.plot(vpc[index], c=colors[index], label=actions[action]) plt.title("action probabilities for prey") plt.suptitle(str(n_episodes) + " episodes") plt.savefig(get_output_path() + "policychange-wolf-" + str(n_episodes) + ".pdf")
def run_minimax(n_episodes=1000): # initialize the environment field = Field(5, 5) """ initial state: | | | | |X|O| | | | | | """ pred1loc = (0, 0) preyloc = (2, 2) predator1 = Predator(id="Plato", location=pred1loc) # WoLF predator1.plearner = MiniMaxQPlearner(field=field, agent=predator1, end_alpha=0.1, num_episodes=n_episodes, epsilon=0.1) field.add_player(predator1) chip = Prey(id="Kant", location=preyloc, tripping_prob=0.2) chip.plearner = MiniMaxQPlearner(field=field, agent=chip, end_alpha=0.1, num_episodes=n_episodes, epsilon=0.1) field.add_player(chip) field.init_players() plot_state = State([(1, 0)]) num_steps = [] pred_win = [] value_of_pred1 = [] value_of_prey = [] for i in range(0, n_episodes): predator1.location = pred1loc chip.location = preyloc field.update_state() field.steps = 0 # run the simulation while not field.is_ended(): field.run_step() # print field.state num_steps.append(field.steps) pred_win.append(field.state.prey_is_caught()) value_of_pred1.append(predator1.plearner.policy.get_probability_mapping(plot_state)) # print predator1.plearner.policy.get_probability_mapping(plot_state) value_of_prey.append(chip.plearner.policy.get_probability_mapping(plot_state)) # print progress every 10% if n_episodes >= 10 and i % (n_episodes / 10) == 0: print int(1.0 * i / n_episodes * 100), "%:", field.steps, "steps" # some list wrangling to get a list of 5 action lists with values for each predator vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)] vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)] # create plots colors = ["r", "b", "g", "k", "m"] actions = { (0, 0): "stay", (-1, 0): "left", (1, 0): "right", (0, -1): "up", (0, 1): "down" } plt.figure(figsize=(15, 15)) s = plt.subplot(2, 1, 1) # s.set_yscale("log") plt.ylim([-0.1, 1.1]) for index, action in enumerate(predator1.actions): plt.plot(vp1[index], c=colors[index], label=actions[action]) plt.title("action probabilities for predator 1") plt.legend(loc="upper right") s = plt.subplot(2, 1, 2) #s.set_yscale("log") plt.ylim([-0.1, 1.1]) for index, action in enumerate(chip.actions): plt.plot(vpc[index], c=colors[index], label=actions[action]) plt.title("action probabilities for prey") plt.suptitle(str(n_episodes) + " episodes") plt.savefig(get_output_path() + "policychange-minimax-" + str(n_episodes) + ".pdf")
def run_wolf(n_episodes=1000): # initialize the environment field = Field(3, 3) """ initial state: | | | | |X|O|X| | | | | """ pred1loc = (0, 1) pred2loc = (2, 1) preyloc = (1, 1) predator1 = Predator(id="Plato", location=pred1loc) predator2 = Predator(id="Pythagoras", location=pred2loc) # WoLF predator1.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator1) predator2.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=predator2) field.add_player(predator1) field.add_player(predator2) chip = Prey(id="Kant", location=preyloc) chip.plearner = Wolf_phc.create_greedy_plearner(field=field, agent=chip, epsilon=0.01) field.add_player(chip) field.init_players() plot_state = State.state_from_field(field) num_steps = [] pred_win = [] value_of_pred1 = [] value_of_pred2 = [] value_of_prey = [] for i in range(0, n_episodes): predator1.location = pred1loc predator2.location = pred2loc chip.location = preyloc field.update_state() field.steps = 0 # run the simulation while not field.is_ended(): field.run_step() num_steps.append(field.steps) pred_win.append(field.state.prey_is_caught()) value_of_pred1.append( predator1.plearner.policy.get_probability_mapping(plot_state)) value_of_pred2.append( predator2.plearner.policy.get_probability_mapping(plot_state)) value_of_prey.append( chip.plearner.policy.get_probability_mapping(plot_state)) # print progress every 10% if n_episodes > 10 and i % (n_episodes / 10) == 0: print int(1.0 * i / n_episodes * 100), "%" # some list wrangling to get a list of 5 action lists with values for each predator vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)] vp2 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred2)] vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)] # create plots colors = ["r", "b", "g", "k", "m"] actions = { (0, 0): "stay", (-1, 0): "left", (1, 0): "right", (0, -1): "up", (0, 1): "down" } plt.figure(figsize=(15, 15)) s = plt.subplot(3, 1, 1) s.set_yscale("log") for index, action in enumerate(predator1.actions): plt.plot(vp1[index], c=colors[index], label=actions[action]) plt.title("action probabilities for predator 1") plt.legend(loc="upper right") s = plt.subplot(3, 1, 2) s.set_yscale("log") for index, action in enumerate(predator2.actions): plt.plot(vp2[index], c=colors[index], label=actions[action]) plt.title("action probabilities for predator 2") # plt.legend(loc="upper left") s = plt.subplot(3, 1, 3) s.set_yscale("log") for index, action in enumerate(chip.actions): plt.plot(vpc[index], c=colors[index], label=actions[action]) plt.title("action probabilities for prey") plt.suptitle(str(n_episodes) + " episodes") plt.savefig(get_output_path() + "policychange-wolf-" + str(n_episodes) + ".pdf")
def run_minimax(n_episodes=1000): # initialize the environment field = Field(5, 5) """ initial state: | | | | |X|O| | | | | | """ pred1loc = (0, 0) preyloc = (2, 2) predator1 = Predator(id="Plato", location=pred1loc) # WoLF predator1.plearner = MiniMaxQPlearner(field=field, agent=predator1, end_alpha=0.1, num_episodes=n_episodes, epsilon=0.1) field.add_player(predator1) chip = Prey(id="Kant", location=preyloc, tripping_prob=0.2) chip.plearner = MiniMaxQPlearner(field=field, agent=chip, end_alpha=0.1, num_episodes=n_episodes, epsilon=0.1) field.add_player(chip) field.init_players() plot_state = State([(1, 0)]) num_steps = [] pred_win = [] value_of_pred1 = [] value_of_prey = [] for i in range(0, n_episodes): predator1.location = pred1loc chip.location = preyloc field.update_state() field.steps = 0 # run the simulation while not field.is_ended(): field.run_step() # print field.state num_steps.append(field.steps) pred_win.append(field.state.prey_is_caught()) value_of_pred1.append( predator1.plearner.policy.get_probability_mapping(plot_state)) # print predator1.plearner.policy.get_probability_mapping(plot_state) value_of_prey.append( chip.plearner.policy.get_probability_mapping(plot_state)) # print progress every 10% if n_episodes >= 10 and i % (n_episodes / 10) == 0: print int(1.0 * i / n_episodes * 100), "%:", field.steps, "steps" # some list wrangling to get a list of 5 action lists with values for each predator vp1 = [[val[0] for val in sublist] for sublist in zip(*value_of_pred1)] vpc = [[val[0] for val in sublist] for sublist in zip(*value_of_prey)] # create plots colors = ["r", "b", "g", "k", "m"] actions = { (0, 0): "stay", (-1, 0): "left", (1, 0): "right", (0, -1): "up", (0, 1): "down" } plt.figure(figsize=(15, 15)) s = plt.subplot(2, 1, 1) # s.set_yscale("log") plt.ylim([-0.1, 1.1]) for index, action in enumerate(predator1.actions): plt.plot(vp1[index], c=colors[index], label=actions[action]) plt.title("action probabilities for predator 1") plt.legend(loc="upper right") s = plt.subplot(2, 1, 2) #s.set_yscale("log") plt.ylim([-0.1, 1.1]) for index, action in enumerate(chip.actions): plt.plot(vpc[index], c=colors[index], label=actions[action]) plt.title("action probabilities for prey") plt.suptitle(str(n_episodes) + " episodes") plt.savefig(get_output_path() + "policychange-minimax-" + str(n_episodes) + ".pdf")