Beispiel #1
0
    def step(self, action_single):
        # advances the simulator by step_len number of steps. Returns a list of
        # [observation (object), reward (float), done (bool), info (dict)]
        # Actions must be integeres in the range [0, 18)
        self.steps_since_reset += 1

        self.game_sim.giveCommands([playeraction.Action(action_single)])

        for i in range(self.step_len):
            self.game_sim.step()
            goal = self.goalScored()
            # If a goal is scored return instantly
            if goal != 0:
                return [
                    self.getState(),
                    self.game_sim.getSingeplayerReward(), True, {}
                ]

        # If no goal consider it a tie.
        if self.steps_since_reset >= self.max_steps:
            return [
                self.getState(),
                self.game_sim.getSingeplayerReward(), True, {}
            ]
        else:
            return [self.getState(), 0.0, False, {}]
Beispiel #2
0
    def getAction(self, frame):
        frame_tensor = torch.FloatTensor(frame.posToNp(self.team, 0, self.accepts_normalised))

        #    frame_tensor = frame_tensor.cuda()
        #if torch.cuda.is_available():

        output = self.network(frame_tensor)
        win_prob = output[-1]
        action_pred_data = output[0:-1]

        if not self.value_is_prob:
            #win_prob = torch.nn.Sigmoid()(win_prob)
            win_prob = (win_prob + 1.0)/2

        #if torch.cuda.is_available():
        #    movepred = movepred.cpu()


        action_data = []
        if self.method == "random":
            for i in range(len(action_pred_data)):
                if len(action_pred_data[i]) == 1:
                    p = action_pred_data[i].detach().numpy()[0]
                    action_data.append(np.random.choice([False, True], p = [1 - p, p]))
                else:
                    action_data.append(np.random.choice(len(action_pred_data[i]), p = action_pred_data[i].detach().numpy()))
        elif self.method == "max":
            for i in range(len(action_pred_data)):
                action_data.append(np.argmax(action_pred_data[i].detach().numpy()))
        else:
            raise ValueError
        action = playeraction.Action(*action_data)
        if self.team == "red":
            pass
        elif self.team == "blue":
            action = action.flipped()
        else:
            raise ValueError


        if self.debug_surf:
            move_probs = []
            if len(action_pred_data) == 1:
                temp = action_pred_data[0].detach().numpy()
                for i in range(len(temp)):
                    if i % 2 == 0:
                        move_probs.append(temp[i])
                    else:
                        move_probs[i // 2] = (move_probs[i // 2] + temp[i]) / 2
                move_probs = np.array(move_probs)
            elif len(action_pred_data) == 2:
                move_probs = action_pred_data[0].detach().numpy()
            else:
                raise ValueError

            if self.team == "blue":
                move_probs = move_probs[[0,5,6,7,8,1,2,3,4]]
            self.debug_surf.drawMove(move_probs, action.dir_idx, self.team, float(win_prob))
        return action
Beispiel #3
0
    def getAction(self, frame=None):
        # Ignore frame
        _ = frame
        # Returns raw action of the agent based on the key presses queried from
        # the gui. Returns (dir_idx, kicking_state)
        movements = [self.gui.isKeyPressed(key) for key in self.movement_keys]
        movements[0], movements[2] = movements[2], movements[0]

        raw_action = playeraction.binaryToRaw(*movements,
                                              self.gui.isKeyPressed(self.kick))
        action = playeraction.Action(*raw_action)

        return action
Beispiel #4
0
    def reset(self, reset_type):
        if reset_type == "random":
			# positional parameters
            self.pos = np.array([gameparams.pitchcornerx + (np.random.random_sample())*580, gameparams.pitchcornery + (np.random.random_sample())*200]).astype(float)
        elif reset_type == "default":
            self.pos = self.default_position
        else:
            raise ValueError("Passed a wrong reset type to a player")

        self.vel = np.zeros(2)

        self.kick_count = 0

        # Set the action to default action state
        self.current_action = playeraction.Action()
Beispiel #5
0
    def __init__(self, team, initial_position, initial_velocity = np.zeros(2), initial_acceleration = np.zeros(2), ):
        # Initialise positional parameters, basic properties of the object
        Entity.__init__(self, initial_position, initial_velocity, initial_acceleration,
                        gameparams.playerradius, gameparams.playerbouncing)

        # Set the not random reset position
        self.default_position = initial_position

        # Initialise current action + can_kick which presents kick-spamming
        self.current_action = playeraction.Action()
        self.can_kick = True

        # Records the number of kicks the ball has made, used for reward shaping
        self.kick_count = 0

        # player properties
        self.team = team
        self.mass = 1 / gameparams.playerinvmass
Beispiel #6
0
 def getAction(self, frame=None):
     # Ignore frame
     _ = frame
     # Returns raw action of the agent based on the key presses queried from
     # the gui. Returns (dir_idx, kicking_state)
     return playeraction.Action(0)