def _reset(self): self.env.reset() self.model = Model( self.env) # warning: this breaks if env resets again start = self.Node(self.env._state, [], 0, False) frontier = PriorityQueue(key=self.eval_node( noisy=True)) # this is really part of the Meta Policy frontier.push(start) reward_to_state = defaultdict(lambda: -np.inf) best_done = None # Warning: state is mutable (and we mutate it!) self._state = self.State(frontier, reward_to_state, best_done) return self._state
from agents import Model, Agent, run miner_model = Model("MinerBots", 100, 100) miner_model.add_agent(Agent()) run(miner_model)
from random import randint from agents import Model, Agent, AgentShape, run miner_model = Model("MinerBots", 100, 100) class Robot(Agent): def setup(self, model): self.color = (100, 100, 100) self.direction = randint(0, 359) self.loaded = False self.x = model.width / 2 self.y = model.height / 2 def step(self, model): if self.loaded: self.point_towards(model.width / 2, model.height / 2) else: self.direction += randint(0, 20) - 10 self.forward() self.speed = model.speed_factor t = self.current_tile() if t.info["has_mineral"] and not self.loaded: t.info["has_mineral"] = False t.color = (200, 100, 0) self.color = (100, 100, 255) self.loaded = True class Homebase(Agent): def setup(self, model):
class Astar(Policy): """A* search finds the shortest path to a goal.""" def __init__(self, heuristic): assert 0 # this implementation is incorrect super().__init__() self.heuristic = heuristic self.plan = iter(()) def start_episode(self, state): self.history = Counter() self.model = Model(self.env) def act(self, state): # return self.env.action_space.sample() self.history[state] += 1 try: return next(self.plan) except StopIteration: self.plan = iter(self.make_plan(state)) return next(self.plan) def eval_node(self, node): if not node.path: return np.inf # the empty plan has infinite cost obs = self.env._observe(node.state) value = 0 if node.done else self.heuristic(self.env, obs) boredom = -0.1 * self.history[obs] score = node.reward + value + boredom return -score def make_plan(self, state, expansions=5000): Node = namedtuple('Node', ('state', 'path', 'reward', 'done')) eval_node = self.eval_node start = Node(self.env._state, [], 0, False) frontier = PriorityQueue(key=eval_node) frontier.push(start) reward_to_state = defaultdict(lambda: -np.inf) # import IPython; IPython.embed() best_finished = start def expand(node): # print(node.state, node.reward, self.rts[node.state], V(env._observe(node.state))) # time.sleep(0.1) nonlocal best_finished # best_finished = min((best_finished, node), key=eval_node) s0, p0, r0, _ = node for a, s1, r, done in self.model.options(s0): node1 = Node(s1, p0 + [a], r0 + r, done) if node1.reward <= reward_to_state[s1]: # print('abandon') pass continue # cannot be better than an existing node # self.save('node', node) reward_to_state[s1] = node1.reward if done: best_finished = min((best_finished, node1), key=eval_node) else: frontier.push(node1) for i in range(expansions): self.save('frontier', [n[1].state for n in frontier]) if frontier: expand(frontier.pop()) else: break if frontier: # plan = min(best_finished, frontier.pop(), key=eval_node) plan = frontier.pop() raise RuntimeError('No plan found.') else: plan = best_finished # choices = concat([completed, map(get(1), take(100, frontier))]) # plan = min(choices, key=eval_node(noisy=True)) # self.log( # i, # len(plan.path), # -round(eval_node(plan, noisy=False), 2), # plan.done, # ) # self._trace['paths'].append(plan.path) self.save('plan', plan) return plan.path
class ValSearchPolicy(Policy): """Searches for the maximum reward path using a model.""" def __init__(self, V, replan=False, epsilon=0, noise=1, anneal=1, **kwargs): super().__init__(**kwargs) self.V = V self.replan = replan self.epsilon = epsilon self.noise = noise self.anneal = anneal self.history = None self.model = None self.plan = None def start_episode(self, state): self.history = Counter() self.model = Model(self.env) self.plan = iter(()) # start with no plan def finish_episode(self, trace): self.ep_trace['berries'] = self.env._observe()[-1] def act(self, state): # return self.env.action_space.sample() self.history[state] += 1 try: if self.replan: raise StopIteration() else: return next(self.plan) except StopIteration: self.plan = iter(self.make_plan(state)) return next(self.plan) def make_plan(self, state, expansions=2000): Node = namedtuple('Node', ('state', 'path', 'reward', 'done')) env = self.env V = memoize(self.V.predict) self.node_history = [] def eval_node(node, noisy=False): if not node.path: return np.inf # the empty plan has infinite cost obs = env._observe(node.state) noise = np.random.rand() * ( self.noise * self.anneal**self.i_episode) if noisy else 0 value = 0 if node.done else V(obs)[0] boredom = -0.1 * self.history[obs] score = node.reward + value + noise + boredom return -score start = Node(env._state, [], 0, False) frontier = PriorityQueue(key=eval_node) frontier.push(start) reward_to_state = defaultdict(lambda: -np.inf) reward_to_state[start.state] = 0 best_finished = start def expand(node): nonlocal best_finished best_finished = min((best_finished, node), key=eval_node) s0, p0, r0, _ = node for a, s1, r, done in self.model.options(s0): node1 = Node(s1, p0 + [a], r0 + r, done) if node1.reward <= reward_to_state[s1]: continue # cannot be better than an existing node self.node_history.append({ 'path': node1.path, 'r': node1.reward, 'b': self.env._observe(node1.state)[-1], 'v': -eval_node(node1) }) reward_to_state[s1] = node1.reward if done: best_finished = min((best_finished, node1), key=eval_node) else: frontier.push(node1) for i in range(expansions): if frontier: expand(frontier.pop()) else: break if frontier: plan = min(best_finished, frontier.pop(), key=eval_node) else: plan = best_finished # choices = concat([completed, map(get(1), take(100, frontier))]) # plan = min(choices, key=eval_node(noisy=True)) self.log( i, len(plan.path), -round(eval_node(plan, noisy=False), 2), plan.done, ) # self._trace['paths'].append(plan.path) return plan.path
for tile in model.tiles: tile.color = (tile.info["value"], tile.info["value"], 0) seekers = set([Seeker() for i in range(200)]) model.add_agents(seekers) def step(model): for tile in model.tiles: if tile.info["value"] > 255: tile.info["value"] = 0 tile.color = (tile.info["value"], tile.info["value"], 0) for agent in model.agents: agent.step(model) def invisible(model): model.invisible = not model.invisible if model.invisible: for a in model.agents: a.size = 0 else: for a in model.agents: a.size = 4 urban_model = Model("urban", 50, 50) urban_model.add_button("Setup", setup) urban_model.add_toggle_button("Go", step) urban_model.add_button("Invisible", invisible) run(urban_model)
from agents import Model, Agent, run epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_agent(Agent()) run(epidemic_model)
model.Infectious -= 1 model.Recovered += 1 self.color = (0, 0, 200) self.category = 2 def model_setup(model): model.reset() model.Susceptible = 0 model.Infectious = 0 model.Recovered = 0 for person in range(100): model.add_agent(Person()) def model_step(model): for person in model.agents: person.step(model) model.update_plots() epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_button("Setup", model_setup) epidemic_model.add_button("Go", model_step, toggle=True) epidemic_model.line_chart( ["Susceptible", "Infectious", "Recovered"], [(0, 200, 0), (200, 0, 0), (0, 0, 200)] ) run(epidemic_model)
# Jump there if new_tile is not None: self.jump_to_tile(new_tile) # Does nothing, if all tiles are occupied def step(self, model): self.move() def setup(model): model.reset() model.initial_bugs = 100 # Create and add agents for i in range(int(model.initial_bugs)): model.add_agent(Bug()) def step(model): # Move all agents for agent in model.agents: agent.step(model) stupid_model = Model("Basic StupidModel (stupid01)", 100, 100, tile_size=5) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) run(stupid_model)
class MetaBestFirstSearchEnv(gym.Env): """A meta-MDP for best first search with a deterministic transition model.""" Node = namedtuple('Node', ('state', 'path', 'reward', 'done')) State = namedtuple('State', ('frontier', 'reward_to_state', 'best_done')) TERM = 'TERM' def __init__(self, env, eval_node, expansion_cost=0.01): super().__init__() self.env = env self.expansion_cost = -abs(expansion_cost) # This guy interacts with the external environment, what a chump! self.surface_agent = Agent() self.surface_agent.register(self.env) self.eval_node = eval_node def _reset(self): self.env.reset() self.model = Model( self.env) # warning: this breaks if env resets again start = self.Node(self.env._state, [], 0, False) frontier = PriorityQueue(key=self.eval_node( noisy=True)) # this is really part of the Meta Policy frontier.push(start) reward_to_state = defaultdict(lambda: -np.inf) best_done = None # Warning: state is mutable (and we mutate it!) self._state = self.State(frontier, reward_to_state, best_done) return self._state def _step(self, action): """Expand a node in the frontier.""" if action is self.TERM: # The return of one episode in the external env is # one reward in the MetaSearchEnv. trace = self._execute_plan() external_reward = trace['return'] return None, external_reward, True, {'trace': trace} else: return self._expand_node(action), self.expansion_cost, False, {} def _execute_plan(self): frontier, reward_to_state, best_done = self._state if not best_done: raise RuntimeError('Cannot make plan.') policy = FixedPlanPolicy(best_done.path) self.surface_agent.register(policy) trace = self.surface_agent.run_episode(reset=False) return trace # elif frontier: # plan = min(best_done, frontier.pop(), key=eval_node) # plan = frontier.pop() def _expand_node(self, node): frontier, reward_to_state, best_done = self._state s0, p0, r0, _ = node for a, s1, r, done in self.model.options(s0): node1 = self.Node(s1, p0 + [a], r0 + r, done) if node1.reward <= reward_to_state[s1] - 0.002: continue # cannot be better than an existing node reward_to_state[s1] = node1.reward if done: best_done = max((best_done, node1), key=self.eval_node(noisy=False)) else: frontier.push(node1) self._state = self.State(frontier, reward_to_state, best_done) return self._state
from agents import Model, Agent, run def model_setup(model): model.reset() model.add_agent(Agent()) epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_button("Setup", model_setup) run(epidemic_model)
model.remove_destroyed_agents() # TODO: Stop after 1000 iterations if model.agent_count() == 0: model.pause() def close(model): if file_handle: file_handle.close() stupid_model = Model( "StupidModel - reading habitat data from file (stupid15)", 100, 100, tile_size=3, cell_data_file="stupid.cell", ) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) stupid_model.add_controller_row() stupid_model.add_slider("initial_bugs", 100, 10, 300) stupid_model.add_controller_row() stupid_model.add_slider("max_food_eat", 1.0, 0.1, 1.0) stupid_model.add_controller_row() stupid_model.add_slider("initialBugSizeMean", 1, 0, 10) stupid_model.add_slider("initialBugSizeSD", 5, 0, 10) stupid_model.histogram("grow_size", 0, 10, 5, (0, 0, 0)) stupid_model.line_chart(["current_bugs"], [(0, 0, 0)])
other.update_visual() self.util = self.utility() def setup(model): model.reset() model.clear_plots() model.total_util = 0 model.movespeed = 0.2 people = set([Person() for i in range(20)]) model.add_agents(people) def step(model): model.total_util = 0 for a in model.agents: a.step(model) model.update_plots() model.remove_destroyed_agents() bnb_model = Model("Bread & butter economy", 50, 50) bnb_model.add_button("Setup", setup) bnb_model.add_button("Step", step) bnb_model.add_button("Go", step, toggle=True) bnb_model.add_controller_row() bnb_model.add_slider("movespeed", 0.1, 0.1, 1) bnb_model.line_chart(["total_util"], [(0, 0, 0)]) bnb_model.agent_line_chart("util") run(bnb_model)
def step(self, model): self.direction += randint(-10, 10) self.forward() if self.category == 1: for agent in self.agents_nearby(12): agent.infect(model) def infect(self, model): self.color = (200, 0, 0) self.category = 1 def model_setup(model): model.reset() for person in range(100): model.add_agent(Person()) def model_step(model): for person in model.agents: person.step(model) epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_button("Setup", model_setup) epidemic_model.add_button("Go", model_step, toggle=True) run(epidemic_model)
from random import randint from agents import Model, Agent, run miner_model = Model("MinerBots", 100, 100) class Robot(Agent): def setup(self, model): self.color = (100, 100, 100) self.direction = randint(0, 359) def step(self, model): self.direction += randint(0, 20) - 10 self.forward() def setup(model): model.reset() for x in range(10): model.add_agent(Robot()) def step(model): for ag in model.agents: ag.step(model) miner_model.add_button("Setup", setup) miner_model.add_toggle_button("Go", step)
def setup(model): model.reset() model.clear_plots() model.total_util = 0 model.BNP = 0 people = set([Person() for i in range(20)]) model.add_agents(people) def step(model): model.BNP = 0 for a in model.agents: a.step(model) model.BNP += a.utility() model.update_plots() model.remove_destroyed_agents() bnb_model = Model("Bread & butter economy during pandemic", 50, 50) bnb_model.add_button("Setup", setup) bnb_model.add_button("Step", step) bnb_model.add_button("Go", step, toggle=True) bnb_model.add_controller_row() bnb_model.add_slider("movespeed", 0.5, 0.1, 1) bnb_model.add_checkbox("Decay") bnb_model.line_chart(["BNP"], [(0, 0, 0)]) bnb_model.show_direction = False run(bnb_model)
from agents import Model, run miner_model = Model("MinerBots", 100, 100) run(miner_model)
from random import randint from agents import Model, Agent, run miner_model = Model("MinerBots", 100, 100) class Robot(Agent): def setup(self, model): self.color = (100, 100, 100) self.direction = randint(0, 359) def step(self, model): self.direction += randint(0, 20) - 10 self.forward() self.speed = model.speed_factor def setup(model): model.reset() for x in range(10): model.add_agent(Robot()) model.speed_factor = 1 for t in model.tiles: if randint(0, 50) == 50: t.color = (0, 255, 255) t.info["has_mineral"] = True else: t.color = (200, 100, 0) t.info["has_mineral"] = False
from agents import Model, Agent, run miner_model = Model("MinerBots", 100, 100) def setup(model): model.reset() model.add_agent(Agent()) miner_model.add_button("Setup", setup) run(miner_model)
model.update_plots() model.remove_destroyed_agents() # TODO: Stop after 1000 iterations if model.agent_count() == 0: model.pause() def close(model): if file_handle: file_handle.close() stupid_model = Model( "StupidModel w. gauss distribution of sizes (stupid14)", 100, 100, tile_size=5, ) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) stupid_model.add_controller_row() stupid_model.add_slider("initial_bugs", 100, 10, 300) stupid_model.add_controller_row() stupid_model.add_slider("max_food_eat", 1.0, 0.1, 1.0) stupid_model.add_slider("max_food_prod", 0.01, 0.01, 0.1) stupid_model.add_controller_row() stupid_model.add_slider("initialBugSizeMean", 1, 0, 10) stupid_model.add_slider("initialBugSizeSD", 5, 0, 10) stupid_model.histogram("grow_size", 0, 10, 5, (0, 0, 0)) stupid_model.line_chart(["current_bugs"], [(0, 0, 0)])
# Initialize tiles for tile in model.tiles: tile.info["food"] = 0.0 tile.color = (0, 0, 0) def step(model): # Food production for tile in model.tiles: food_prod = random.uniform(0, model.max_food_prod) tile.info["food"] += food_prod c = min(255, math.floor(tile.info["food"] * 255)) tile.color = (c, c, c) # Move all agents for agent in model.agents: agent.step(model) stupid_model = Model( "StupidModel w. habitat cells and resources (stupid03)", 100, 100, tile_size=5, ) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) run(stupid_model)
# Update plots model.update_plots() model.remove_destroyed_agents() # TODO: Stop after 1000 iterations if model.agent_count() == 0: model.pause() def close(model): if file_handle: file_handle.close() stupid_model = Model( "StupidModel w. population graph (stupid13)", 100, 100, tile_size=5 ) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) stupid_model.add_controller_row() stupid_model.add_slider("initial_bugs", 100, 10, 300) stupid_model.add_controller_row() stupid_model.add_slider("max_food_eat", 1.0, 0.1, 1.0) stupid_model.add_controller_row() stupid_model.add_slider("max_food_prod", 0.01, 0.01, 0.1) stupid_model.histogram("grow_size", 0, 10, 5, (0, 0, 0)) stupid_model.line_chart(["current_bugs"], [(0, 0, 0)]) stupid_model.on_close(close) run(stupid_model)
person.vaccinate(model) exp_modifier += 0.2 break file_handle = open("vaccine.csv", "a") file_handle.write( str(counter) + "," + str(model.Susceptible) + "," + str(model.Infectious) + "," + str(model.Recovered) + "," + str(model.Vaccinated) + "\n") file_handle.close() counter += 1 model.update_plots() epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_button("Setup", model_setup) epidemic_model.add_button("Go", model_step, toggle=True) epidemic_model.line_chart( ["Susceptible", "Infectious", "Recovered", "Vaccinated"], [(0, 200, 0), (200, 0, 0), (0, 0, 200), (100, 100, 200)]) epidemic_model.bar_chart( ["Susceptible", "Infectious", "Recovered", "Vaccinated"], (200, 200, 200)) epidemic_model.add_checkbox("enable_groups") epidemic_model.add_checkbox("Vaccine_exponential") epidemic_model.add_controller_row() epidemic_model.add_slider("Social_distance", 50, 0, 80) epidemic_model.add_controller_row() epidemic_model.add_slider("Infection_distance", 15, 0, 40)
def start_episode(self, state): self.history = Counter() self.model = Model(self.env) self.plan = iter(()) # start with no plan
def model_setup(model): model.reset() model.Susceptible = 0 model.Infectious = 0 model.Recovered = 0 for person in range(100): model.add_agent(Person()) def model_step(model): for person in model.agents: person.step(model) model.update_plots() epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_button("Setup", model_setup) epidemic_model.add_button("Go", model_step, toggle=True) epidemic_model.line_chart(["Susceptible", "Infectious", "Recovered"], [(0, 200, 0), (200, 0, 0), (0, 0, 200)]) epidemic_model.add_checkbox("enable_groups") epidemic_model.add_controller_row() epidemic_model.add_slider("social_distance", 50, 0, 80) epidemic_model.add_controller_row() epidemic_model.add_slider("infection_distance", 15, 0, 40) run(epidemic_model)
def start_episode(self, state): self.history = Counter() self.model = Model(self.env)
def step(model): # Food production for tile in model.tiles: food_prod = random.uniform(0, model.max_food_prod) tile.info["food"] += food_prod c = min(255, math.floor(tile.info["food"] * 255)) tile.color = (c, c, c) # Move all agents for agent in model.agents: agent.step(model) stupid_model = Model("StupidModel w. parameters and displays (stupid05)", 100, 100, tile_size=5) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) stupid_model.add_controller_row() stupid_model.add_slider( "initial_bugs", 100, 10, 300, ) stupid_model.add_controller_row() stupid_model.add_slider("max_food_eat", 1.0, 0.1, 1.0) stupid_model.add_controller_row() stupid_model.add_slider("max_food_prod", 0.01, 0.01, 0.1)
# Write min, average and max bug size to file file_handle.write( str(bug_min) + " " + str(bug_mean) + " " + str(bug_max) + "\n") # Update plots model.update_plots() def close(model): if file_handle: file_handle.close() stupid_model = Model("StupidModel w. file output (stupid08)", 100, 100, tile_size=5) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) stupid_model.add_controller_row() stupid_model.add_slider("initial_bugs", 100, 10, 300) stupid_model.add_controller_row() stupid_model.add_slider("max_food_eat", 1.0, 0.1, 1.0) stupid_model.add_controller_row() stupid_model.add_slider("max_food_prod", 0.01, 0.01, 0.1) stupid_model.histogram("grow_size", 0, 10, 5, (0, 0, 0)) stupid_model.on_close(close) run(stupid_model)
self.size_to_color() def step(self, model): self.grow() self.move() def setup(model): model.reset() model.initial_bugs = 100 # Create and add agents for i in range(int(model.initial_bugs)): model.add_agent(Bug()) def step(model): # Move all agents for agent in model.agents: agent.step(model) stupid_model = Model("StupidModel w. Bug Growth (stupid02)", 100, 100, tile_size=5) stupid_model.add_button("setup", setup) stupid_model.add_button("step", step) stupid_model.add_toggle_button("go", step) run(stupid_model)