def _move_selected(state, button, move): """Interface callback for proceeding through game play with user""" # Guard clause for when already-filled place is attempted if button.get_label() in [X, O]: return interface.set_invalid_move_state(state) # Update board state based on move human_id = board.not_id(state.agent_state.identifier) state.board_state = board.place(state.board_state, human_id, move) # Update UI to reflect model interface.update_board(state) # Check for game finished if board.is_finished(state.board_state): # Display game results interface.game_finished(state) else: # Process agent's decision (move, agent_state) = agent.move(state.agent_state, state.board_state) # Update board and agent state state.board_state = board.place(state.board_state, state.agent_state.identifier, move) state.agent_state = agent_state # Update UI to reflect model interface.update_board(state) # Another check for game finish to prevent inadvertent user events if board.is_finished(state.board_state): # Display game results interface.game_finished(state)
def update(i): # Move the agent & update the whisker deflections agent.move(speed) if agent.x > texture_length: agent.x = 0 agent.update_whisker_deflections(texture) # Update the whisking angle whisking_angle = mean_whisking_angle + whisking_angle_amplitude * np.sin( np.sin(whisking_speed * i)) agent.update_whisking_angle(whisking_angle) # Update sensory cell weights to learn to match whisker deflections f = interp1d(range(n_whiskers), agent.whiskers.deflections) target_cell_activity = f(np.linspace(0, n_whiskers - 1, n_sensory_cells)) agent.sensory_cells.update_weights(target_cell_activity) return texture, agent
def _play_train_move(player, board_state): """Helper for handling single agent's move""" # Get move and updated player state (move, player) = agent.move(player, board_state) # Update board state board_state = board.place(board_state, player.identifier, move) return (board_state, player)
def run(env, batch_size, agent, memory, discount, steps=300, episode_i=0, eps=.9, render=False, normalize=False): state = env.reset() done = False acc_reward = 0.0 loss = 0.0 for i in range(steps): if done: break # eps should decay overtime action = agent.move(state, eps=.9) # print("state:",state.shape,state) if normalize: state = featurize_state(state) next_state, reward, done, _ = env.step(action) acc_reward += reward memory.add((state, action, next_state, reward, done)) if render: env.render() if len(memory.memory) > batch_size: state_m, action_m, next_state_m, reward_m, done_m = zip( *memory.sample(batch_size)) state_m = np.array(state_m) action_m = np.array(action_m) next_state_m = np.array(next_state_m) reward_m = np.array(reward_m) done_m = np.array(done_m) q_m = agent.predict(next_state_m) actual_target_m = reward_m + (1. - done_m) * discount * np.amax( q_m, axis=1) targets = agent.predict(state_m) # assign the actual reward to the taken action for i, action in enumerate(action_m): targets[i, action] = actual_target_m[i] loss = agent.train(states=state_m, targets=targets) state = copy.copy(next_state) # print("acc_reward:", acc_reward) return acc_reward, i, loss
def run_game(): pg.init() # Extra space at the bottom to display win/loss message size = (Board.WIDTH, Board.HEIGHT + Board.SQUARESIZE) screen = pg.display.set_mode(size=size) pg.draw.rect(screen, Board.BLUE, (0, 0, size[0], size[1])) board = Board() draw_board(screen, board) turn = random.randint(Board.PLAYER, Board.AGENT) game_over = False while not game_over: if turn == Board.PLAYER: event = pg.event.poll() if event.type == pg.QUIT: sys.exit() if event.type == pg.MOUSEBUTTONDOWN: x = event.pos[0] col = x // Board.SQUARESIZE if col in board.valid_moves(): board = board.drop_piece(col, Board.PLAYER) if board.is_win(Board.PLAYER): display_message(screen, "You Won!") game_over = True if board.is_tie(): display_message(screen, "Tied Game!") game_over = True draw_board(screen, board) turn = Board.AGENT if turn == Board.AGENT and not game_over: col = agent.move(board) board = board.drop_piece(col, Board.AGENT) if board.is_win(Board.AGENT): display_message(screen, "You Lost!") game_over = True if board.is_tie(): display_message(screen, "Tied Game!") game_over = True draw_board(screen, board) turn = Board.PLAYER pg.time.wait(5000)
def step_agent(self, agent, action): # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # - 2D/3D cases; # # - UNLIMITED/LIMITED battery; # # - Constat battery consumption wich includes both services and motions of the UAVs; # # - All the users have the same priority (each of the is served) and and ask for the same service; # # - It is possible to set multi-service UAVs only with the following settings: # # * 3D scenario; # # * Limited UAV bandwidth; # # * discrete and discontinuos users service request (i.e. INF_REQUEST=False). # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # info = "" self.agents_paths[agent._uav_ID].append(self.get_agent_pos(agent)) #self.agents_paths[agent._uav_ID].append(self.get_agent_pos(agent)) if (UAV_STANDARD_BEHAVIOUR==False): current_action = self.q_table_action_set[action] agent_pos_ = agent.move(current_action, self.cells_matrix) # --> move_2D_unlimited_battery else: current_action = ACTION_SPACE_STANDARD_BEHAVIOUR.index(action) agent_pos_ = agent.move_standard_behaviour(action) if ( ((action==CHARGE_2D_INDEX) or (action==CHARGE)) or ((action==GO_TO_CS_3D_INDEX) or (action==CHARGE)) ): agent._users_in_footprint = [] current_users_in_footprint = [] else: self.current_requested_bandwidth = 0 current_users_in_footprint, bandwidth_request_in_current_footprint = agent.users_in_uav_footprint(self.users, self.uav_footprint, self.discovered_users) if (MULTI_SERVICE==False): agent._users_in_footprint = current_users_in_footprint else: self.current_requested_bandwidth = bandwidth_request_in_current_footprint # Compute the number of users which are served by the current UAV agent (useless at the moment, since it is not return by this method): n_served_users = agent.n_served_users_in_foot(agent._users_in_footprint) # --> This mainly performs a SIDE-EFFECT on the info 'served or not served' related to the users. # For the current iteration, add the users inside the footprint of the current UAV agent: for user_per_agent_foot in current_users_in_footprint: self.all_users_in_all_foots.append(user_per_agent_foot) # --> SIDE-EFFECT on 'self.all_users_in_all_foots' agent._x_coord = agent_pos_[0] agent._y_coord = agent_pos_[1] if (DIMENSION_2D==False): agent._z_coord = agent_pos_[2] if (UNLIMITED_BATTERY==True): reward = self.reward_function_1(agent._users_in_footprint) s_ = (agent_pos_) else: if (MULTI_SERVICE==False): reward = self.reward_function_2(agent._users_in_footprint, agent._battery_level, agent._required_battery_to_CS) else: if ( (MULTI_SERVICE==True) and (INF_REQUEST==False) ): reward = self.reward_function_3(agent._users_in_footprint, agent._battery_level, agent._required_battery_to_CS, self.n_tr_active, self.n_ec_active, self.n_dg_active) s_ = (agent_pos_, agent._battery_level) done, info = self.is_terminal_state(agent) if (done): if (info=="IS CRASHED"): reward = 0.0 else: reward = 0.0 # Every time the action is undertaken by the 'first' UAV, then increse 'self.last_render': if (agent._uav_ID==0): self.last_render += 1 return s_, reward, done, info
import environment import agent userInput = '' e = environment.Environment() a = agent.Agent() print 'Welcome!' userInput = raw_input() while not agent.move( e, a, userInput ): userInput = raw_input()
def move_everybody(self): for a in self.pop: a.move(self.w, self.h)