Example #1
0
def _move_selected(state, button, move):
    """Interface callback for proceeding through game play with user"""

    # Guard clause for when already-filled place is attempted
    if button.get_label() in [X, O]:
        return interface.set_invalid_move_state(state)

    # Update board state based on move
    human_id = board.not_id(state.agent_state.identifier)
    state.board_state = board.place(state.board_state, human_id, move)

    # Update UI to reflect model
    interface.update_board(state)

    # Check for game finished
    if board.is_finished(state.board_state):
        # Display game results
        interface.game_finished(state)
    else:
        # Process agent's decision
        (move, agent_state) = agent.move(state.agent_state, state.board_state)

        # Update board and agent state
        state.board_state = board.place(state.board_state,
                                        state.agent_state.identifier, move)
        state.agent_state = agent_state

        # Update UI to reflect model
        interface.update_board(state)

    # Another check for game finish to prevent inadvertent user events
    if board.is_finished(state.board_state):
        # Display game results
        interface.game_finished(state)
Example #2
0
def update(i):
    # Move the agent & update the whisker deflections
    agent.move(speed)
    if agent.x > texture_length:
        agent.x = 0
    agent.update_whisker_deflections(texture)

    # Update the whisking angle
    whisking_angle = mean_whisking_angle + whisking_angle_amplitude * np.sin(
        np.sin(whisking_speed * i))
    agent.update_whisking_angle(whisking_angle)

    # Update sensory cell weights to learn to match whisker deflections
    f = interp1d(range(n_whiskers), agent.whiskers.deflections)
    target_cell_activity = f(np.linspace(0, n_whiskers - 1, n_sensory_cells))
    agent.sensory_cells.update_weights(target_cell_activity)

    return texture, agent
Example #3
0
def _play_train_move(player, board_state):
    """Helper for handling single agent's move"""

    # Get move and updated player state
    (move, player) = agent.move(player, board_state)

    # Update board state
    board_state = board.place(board_state, player.identifier, move)

    return (board_state, player)
Example #4
0
File: run.py Project: uusama/RL
def run(env,
        batch_size,
        agent,
        memory,
        discount,
        steps=300,
        episode_i=0,
        eps=.9,
        render=False,
        normalize=False):
    state = env.reset()
    done = False
    acc_reward = 0.0
    loss = 0.0
    for i in range(steps):
        if done:
            break
        # eps should decay overtime
        action = agent.move(state, eps=.9)
        # print("state:",state.shape,state)
        if normalize:
            state = featurize_state(state)

        next_state, reward, done, _ = env.step(action)
        acc_reward += reward
        memory.add((state, action, next_state, reward, done))
        if render:
            env.render()

        if len(memory.memory) > batch_size:
            state_m, action_m, next_state_m, reward_m, done_m = zip(
                *memory.sample(batch_size))
            state_m = np.array(state_m)
            action_m = np.array(action_m)
            next_state_m = np.array(next_state_m)
            reward_m = np.array(reward_m)
            done_m = np.array(done_m)

            q_m = agent.predict(next_state_m)

            actual_target_m = reward_m + (1. - done_m) * discount * np.amax(
                q_m, axis=1)

            targets = agent.predict(state_m)

            # assign the actual reward to the taken action
            for i, action in enumerate(action_m):
                targets[i, action] = actual_target_m[i]
            loss = agent.train(states=state_m, targets=targets)
            state = copy.copy(next_state)

    # print("acc_reward:", acc_reward)
    return acc_reward, i, loss
Example #5
0
def run_game():
    pg.init()
    # Extra space at the bottom to display win/loss message
    size = (Board.WIDTH, Board.HEIGHT + Board.SQUARESIZE)
    screen = pg.display.set_mode(size=size)
    pg.draw.rect(screen, Board.BLUE, (0, 0, size[0], size[1]))

    board = Board()
    draw_board(screen, board)

    turn = random.randint(Board.PLAYER, Board.AGENT)
    game_over = False

    while not game_over:
        if turn == Board.PLAYER:
            event = pg.event.poll()
            if event.type == pg.QUIT:
                sys.exit()

            if event.type == pg.MOUSEBUTTONDOWN:
                x = event.pos[0]
                col = x // Board.SQUARESIZE

                if col in board.valid_moves():
                    board = board.drop_piece(col, Board.PLAYER)
                    if board.is_win(Board.PLAYER):
                        display_message(screen, "You Won!")
                        game_over = True
                    if board.is_tie():
                        display_message(screen, "Tied Game!")
                        game_over = True
                    draw_board(screen, board)
                    turn = Board.AGENT

        if turn == Board.AGENT and not game_over:
            col = agent.move(board)
            board = board.drop_piece(col, Board.AGENT)
            if board.is_win(Board.AGENT):
                display_message(screen, "You Lost!")
                game_over = True
            if board.is_tie():
                display_message(screen, "Tied Game!")
                game_over = True
            draw_board(screen, board)
            turn = Board.PLAYER

    pg.time.wait(5000)
Example #6
0
    def step_agent(self, agent, action):
        # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
        # - 2D/3D cases;                                                                                    #
        # - UNLIMITED/LIMITED battery;                                                                      #
        # - Constat battery consumption wich includes both services and motions of the UAVs;                #
        # - All the users have the same priority (each of the is served) and and ask for the same service;  # 
        # - It is possible to set multi-service UAVs only with the following settings:                      #
        #       * 3D scenario;                                                                              #
        #       * Limited UAV bandwidth;                                                                    #
        #       * discrete and discontinuos users service request (i.e. INF_REQUEST=False).                 #
        # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

        info = ""


        self.agents_paths[agent._uav_ID].append(self.get_agent_pos(agent))
        #self.agents_paths[agent._uav_ID].append(self.get_agent_pos(agent))

        if (UAV_STANDARD_BEHAVIOUR==False):
            current_action = self.q_table_action_set[action]
            agent_pos_ = agent.move(current_action, self.cells_matrix) # --> move_2D_unlimited_battery
        else:
            current_action = ACTION_SPACE_STANDARD_BEHAVIOUR.index(action)
            agent_pos_ = agent.move_standard_behaviour(action)

        if ( ((action==CHARGE_2D_INDEX) or (action==CHARGE)) or ((action==GO_TO_CS_3D_INDEX) or (action==CHARGE)) ):
            agent._users_in_footprint = []
            current_users_in_footprint = []
        else:
            self.current_requested_bandwidth = 0
            current_users_in_footprint, bandwidth_request_in_current_footprint = agent.users_in_uav_footprint(self.users, self.uav_footprint, self.discovered_users)
            if (MULTI_SERVICE==False):
                agent._users_in_footprint = current_users_in_footprint
            else:
                self.current_requested_bandwidth = bandwidth_request_in_current_footprint

        # Compute the number of users which are served by the current UAV agent (useless at the moment, since it is not return by this method):
        n_served_users = agent.n_served_users_in_foot(agent._users_in_footprint) # --> This mainly performs a SIDE-EFFECT on the info 'served or not served' related to the users.

        # For the current iteration, add the users inside the footprint of the current UAV agent:  
        for user_per_agent_foot in current_users_in_footprint:
            self.all_users_in_all_foots.append(user_per_agent_foot) # --> SIDE-EFFECT on 'self.all_users_in_all_foots'

        agent._x_coord = agent_pos_[0]
        agent._y_coord = agent_pos_[1]
        if (DIMENSION_2D==False):
            agent._z_coord = agent_pos_[2]

        if (UNLIMITED_BATTERY==True):
            reward = self.reward_function_1(agent._users_in_footprint)
            s_ = (agent_pos_)
        else:
            if (MULTI_SERVICE==False):
                reward = self.reward_function_2(agent._users_in_footprint, agent._battery_level, agent._required_battery_to_CS)
            else:
                if ( (MULTI_SERVICE==True) and (INF_REQUEST==False) ):
                    reward = self.reward_function_3(agent._users_in_footprint, agent._battery_level, agent._required_battery_to_CS, self.n_tr_active, self.n_ec_active, self.n_dg_active)
            s_ = (agent_pos_, agent._battery_level)

        done, info = self.is_terminal_state(agent)

        if (done):
            
            if (info=="IS CRASHED"):
                reward = 0.0
            else:
                reward = 0.0

        # Every time the action is undertaken by the 'first' UAV, then increse 'self.last_render':
        if (agent._uav_ID==0):
            self.last_render += 1

        return s_, reward, done, info
Example #7
0
import environment
import agent

userInput = ''
e = environment.Environment()
a = agent.Agent()

print 'Welcome!'

userInput = raw_input()
while not agent.move( e, a, userInput ):
    userInput = raw_input()
Example #8
0
 def move_everybody(self):
     for a in self.pop:
         a.move(self.w, self.h)