best_a = np.argmax( self.Q[self.agent.convert_location_to_state(), :]) if random_step: a = np.random.choice(self.nA) else: a = best_a r = self.get_reward(a) best_r = self.get_reward(best_a) new_belief = self.Q[self.agent.convert_location_to_state(),a] + \ alpha * (r + gamma * best_r - self.Q[self.agent.convert_location_to_state(),a]) self.Q[self.agent.convert_location_to_state(), a] = new_belief self.agent.simulate_step(a) def get_reward(self, a): return self.agent.get_reward(a) + self.grid.get_reward()[0] def choose_action(self): return np.argmax(self.Q[self.agent.convert_location_to_state(), :]) if __name__ == '__main__': grid = Grid() grid.add_agent() grid.add_agent() ql = QLearning(grid.agents[0]) print(ql.choose_action()) ql.learn() print(ql.choose_action())
def RECONNECT(self): self.Gr = nx.DiGraph() for i, agent in enumerate(self.grid.agents): self.Gr.add_node(agent, label=i) self.D = self.get_Dist_Mat() self.connect(self.D) def show_Graph(self): nx.draw(Gr.Gr) plt.show() if __name__ == '__main__': G = Grid() G.add_agent([1, 0], viewing=3) G.add_agent([3, 0], viewing=3.3) G.add_agent([3, 3]) Gr = Graph(G) Gr.show_Graph() # from Grid import Grid # import numpy as np # G = Grid() # G.add_agent([1,0],viewing = 2) # G.add_agent([3,0]) # # Gr = nx.Graph() # for i,agent in enumerate(G.agents): # Gr.add_node(agent) # vecs = []
class Model: ''' Model class for the Schelling segregation model. ''' def __init__(self, width, height, density, similarity): self.width = width self.height = height num_agent = (int)(height * width * density / 2) # so agent moi loai, chua ra 20% empty cell self.similarity = similarity # muc do tuong tu self.schedule = RandomActivation(self) self.grid = Grid(height, width) self.happy = 0 self.running = True # Set up agents id = 0 id = self._create_agent(id, num_agent, Agent.typeA) id = self._create_agent(id, num_agent, Agent.typeB) self.grid.cal_happiness() # tinh initial happiness def _create_agent(self, startid, num, type): id = startid for i in range(0, num): agent = Agent(id, (0, 0), self, type) id = id + 1 self.grid.add_agent(agent) self.schedule.add(agent) return id def step(self): ''' Run one step of the model. If All agents are happy, halt the model. ''' self.happy = 0 # Reset counter of happy agents self.schedule.step() if self.happy == self.schedule.get_agent_count(): self.running = False def plot_grid(self, savefile=False, filename=None): self.grid.plot_grid(self.happy, savefile, filename) def plot_happiness(self, filename): self.grid.plot_happiness(self.happy, filename) def is_happy(self): # model da happy roi thi dung lai return not self.running # it's just for test purpose def print_grid(self): for cell in self.grid.coord_iter(): agent = cell[0] y = cell[2] if agent == None: agent_type = -1 else: agent_type = agent.type print '{} '.format(agent_type), if y == self.width - 1: print('\n')
Map[1, 4] = 100000 Map[2, 4] = 100000 Map[3, 4] = 100000 Map[4, 4] = 100000 Map[5, 4] = 100000 Map[6, 4] = 100000 Map[7, 4] = 100000 Map[8, 3] = 100000 Map[17, 17] = 100000 Map[17, 18] = 100000 Map[18, 17] = 100000 Map[18, 16] = 100000 Map[16, 18] = 100000 grid = Grid(Map=Map, meeting_point=[18, 18]) plt.figure(1) grid.show_grid() plt.figure(2) for _ in range(30): grid.add_agent(location=[3, 1], learner='DynamicMCTSFinder', ant_mode=False) # grid.add_agent(location=[3, 1], learner='DynamicMCTSFinder',ant_mode = False) # grid.add_agent(location=[8,7],learner = 'DynamicMCTSFinder') #grid.add_agent(learner='RandomFinder') #grid.add_agent(learner='RandomFinder') #grid.add_agent(learner='RandomFinder') print('meeting point is %s' % (str(grid.agents[0].meeting_point))) #Simulatortext(grid, num_steps=100) Simulatorgraphical(grid, num_steps=100, just_agents=True)
# a = np.argmax(policy[s0]) # if Q[s0,a]==0: # for action in agent def MCTS(agent, env, depth, gamma=.8): Q = np.zeros((agent.nS, agent.nA)) s = agent.convert_location_to_state() for a in agent.nA: s_, r = agent.simulate(action) Q[s, a] = r + gamma * MCTS if __name__ == '__main__': grid = Grid(n=5) grid.add_agent(location=(3, 3)) grid.add_agent(location=(3, 4)) done = False while not done: for agent in grid.agents: print(agent.location) print(agent.last_action) print('-----------') actions = input('actions:') R, done = grid.time_step(actions=actions) Reward = R #done = done grid.show_grid() for agent in grid.agents: print(agent.convert_location_to_state()) print(grid.get_reward())