Ejemplo n.º 1
0
    def get_reward(self, reset_reason):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 7)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 7)
        reward = cur_potential - pre_potential

        # Reset
        if (reset_reason != NONE):
            reward = 0

        reward -= helper.distance2(self.cur_ball[X], self.cur_my_posture[0][X],
                                   self.cur_ball[Y], self.cur_my_posture[0][Y])
        if self.cur_my_posture[0][TOUCH]:
            reward += 500

        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            reward += 1000  # minimum 25
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            reward -= 1000  # maxmimum -25
            self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward
Ejemplo n.º 2
0
    def get_reward(self, reset_reason, i):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 7)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 7)
        reward = cur_potential - pre_potential

        # Reset
        if (reset_reason != NONE):
            reward = 0

        reward -= 0.1 * helper.distance(
            self.cur_ball[X], self.cur_my_posture[i][X], self.cur_ball[Y],
            self.cur_my_posture[i][Y])

        for k in range(self.number_of_robots):
            if self.cur_my_posture[k][TOUCH]:
                reward += 30

        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            reward += 200  # minimum 25
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            reward -= 200  # maxmimum -25
            self.printConsole("op team goal")

        # self.printConsole("reward: " + str(reward))
        return reward
Ejemplo n.º 3
0
    def get_reward(self, reset_reason, i):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 30)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 30)
        potential_rew = cur_potential - pre_potential
        # Reset
        if (reset_reason != NONE):
            potential_rew = 0
        self.printConsole('     potential reward ' + str(i) + ': ' +
                          str(potential_rew))

        dist_rew = -0.1 * helper.distance(
            self.cur_ball[X], self.cur_my_posture[i][X], self.cur_ball[Y],
            self.cur_my_posture[i][Y])
        self.printConsole('         distance reward ' + str(i) + ': ' +
                          str(dist_rew))

        touch_rew = 0
        touch_case = 0
        if (abs(self.cur_ball[X]) < 1.85) and (abs(self.cur_ball[Y]) < 1.3):
            if self.cur_my_posture[i][TOUCH]:
                touch_rew += 20
                touch_case = 1
        elif (self.cur_ball[X] > 0) and (abs(self.cur_ball[Y]) < 0.3):
            if self.cur_my_posture[i][TOUCH]:
                touch_rew += 20
                touch_case = 2
        elif (self.cur_ball[X] < 0) and (abs(self.cur_ball[Y]) < 0.3):
            pass
        else:
            if self.cur_my_posture[i][TOUCH]:
                touch_rew -= 0.01
                touch_case = 3
        self.printConsole('             touch reward ' + str(i) + ': ' +
                          str(touch_rew) + ' (case: ' + str(touch_case) + ')')

        goal_rew = 0
        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            goal_rew += 500  # minimum 25
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            goal_rew -= 500  # maxmimum -25
            self.printConsole("op team goal")

        rew = potential_rew + dist_rew + touch_rew + goal_rew

        self.printConsole('                 reward ' + str(i) + ': ' +
                          str(rew))
        return rew
Ejemplo n.º 4
0
    def get_reward(self, reset_reason):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 3)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 3)
        reward = cur_potential - pre_potential

        # Add dead lock penalty
        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            reward += 10
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            reward -= 10
            self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward
Ejemplo n.º 5
0
    def get_reward(self, reset_reason):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 5)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 5)
        reward = cur_potential - pre_potential

        # if my robot become deactive, recieve penalty
        for i in range(self.number_of_robots):
            if (self.cur_my_posture[i][ACTIVE]
                    == False) and (self.active_flag[MY_TEAM][i] is True):
                reward -= 1
                self.printConsole("my team deactive")
            self.active_flag[MY_TEAM][i] = self.cur_my_posture[i][ACTIVE]

        # if opponent robot become deactive, recieve penalty
        for i in range(self.number_of_robots):
            if (self.cur_op_posture[i][ACTIVE]
                    == False) and (self.active_flag[OP_TEAM][i] is True):
                reward += 1
                self.printConsole("opponent team deactive")
            self.active_flag[OP_TEAM][i] = self.cur_op_posture[i][ACTIVE]

        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            reward += 50  # minimum 25
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            reward -= 50  # maxmimum -25
            self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward
Ejemplo n.º 6
0
    def get_reward(self, reset_reason):
        # pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 3)
        # cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 3)
        reward = helper.dipole_potential(self.cur_my_posture[4][X], self.cur_my_posture[4][Y], 2.1, 0.3)

        # Add dead lock penalty
        # if(reset_reason == SCORE_MYTEAM):
        #     self.score_sum += 1
        #     reward += 24 # minimum 24
        #     self.printConsole("my team goal")

        # if(reset_reason == SCORE_OPPONENT):
        #     self.score_sum -= 1
        #     reward -= 24 # maxmimum -24
        #     self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward