def get_reward(self, reset_reason): pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 7) cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 7) reward = cur_potential - pre_potential # Reset if (reset_reason != NONE): reward = 0 reward -= helper.distance2(self.cur_ball[X], self.cur_my_posture[0][X], self.cur_ball[Y], self.cur_my_posture[0][Y]) if self.cur_my_posture[0][TOUCH]: reward += 500 if (reset_reason == SCORE_MYTEAM): self.score_sum += 1 reward += 1000 # minimum 25 self.printConsole("my team goal") if (reset_reason == SCORE_OPPONENT): self.score_sum -= 1 reward -= 1000 # maxmimum -25 self.printConsole("op team goal") self.printConsole("reward: " + str(reward)) self.pre_ball = self.cur_ball return reward
def get_reward(self, reset_reason, i): pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 7) cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 7) reward = cur_potential - pre_potential # Reset if (reset_reason != NONE): reward = 0 reward -= 0.1 * helper.distance( self.cur_ball[X], self.cur_my_posture[i][X], self.cur_ball[Y], self.cur_my_posture[i][Y]) for k in range(self.number_of_robots): if self.cur_my_posture[k][TOUCH]: reward += 30 if (reset_reason == SCORE_MYTEAM): self.score_sum += 1 reward += 200 # minimum 25 self.printConsole("my team goal") if (reset_reason == SCORE_OPPONENT): self.score_sum -= 1 reward -= 200 # maxmimum -25 self.printConsole("op team goal") # self.printConsole("reward: " + str(reward)) return reward
def get_reward(self, reset_reason, i): pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 30) cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 30) potential_rew = cur_potential - pre_potential # Reset if (reset_reason != NONE): potential_rew = 0 self.printConsole(' potential reward ' + str(i) + ': ' + str(potential_rew)) dist_rew = -0.1 * helper.distance( self.cur_ball[X], self.cur_my_posture[i][X], self.cur_ball[Y], self.cur_my_posture[i][Y]) self.printConsole(' distance reward ' + str(i) + ': ' + str(dist_rew)) touch_rew = 0 touch_case = 0 if (abs(self.cur_ball[X]) < 1.85) and (abs(self.cur_ball[Y]) < 1.3): if self.cur_my_posture[i][TOUCH]: touch_rew += 20 touch_case = 1 elif (self.cur_ball[X] > 0) and (abs(self.cur_ball[Y]) < 0.3): if self.cur_my_posture[i][TOUCH]: touch_rew += 20 touch_case = 2 elif (self.cur_ball[X] < 0) and (abs(self.cur_ball[Y]) < 0.3): pass else: if self.cur_my_posture[i][TOUCH]: touch_rew -= 0.01 touch_case = 3 self.printConsole(' touch reward ' + str(i) + ': ' + str(touch_rew) + ' (case: ' + str(touch_case) + ')') goal_rew = 0 if (reset_reason == SCORE_MYTEAM): self.score_sum += 1 goal_rew += 500 # minimum 25 self.printConsole("my team goal") if (reset_reason == SCORE_OPPONENT): self.score_sum -= 1 goal_rew -= 500 # maxmimum -25 self.printConsole("op team goal") rew = potential_rew + dist_rew + touch_rew + goal_rew self.printConsole(' reward ' + str(i) + ': ' + str(rew)) return rew
def get_reward(self, reset_reason): pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 3) cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 3) reward = cur_potential - pre_potential # Add dead lock penalty if (reset_reason == SCORE_MYTEAM): self.score_sum += 1 reward += 10 self.printConsole("my team goal") if (reset_reason == SCORE_OPPONENT): self.score_sum -= 1 reward -= 10 self.printConsole("op team goal") self.printConsole("reward: " + str(reward)) self.pre_ball = self.cur_ball return reward
def get_reward(self, reset_reason): pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 5) cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 5) reward = cur_potential - pre_potential # if my robot become deactive, recieve penalty for i in range(self.number_of_robots): if (self.cur_my_posture[i][ACTIVE] == False) and (self.active_flag[MY_TEAM][i] is True): reward -= 1 self.printConsole("my team deactive") self.active_flag[MY_TEAM][i] = self.cur_my_posture[i][ACTIVE] # if opponent robot become deactive, recieve penalty for i in range(self.number_of_robots): if (self.cur_op_posture[i][ACTIVE] == False) and (self.active_flag[OP_TEAM][i] is True): reward += 1 self.printConsole("opponent team deactive") self.active_flag[OP_TEAM][i] = self.cur_op_posture[i][ACTIVE] if (reset_reason == SCORE_MYTEAM): self.score_sum += 1 reward += 50 # minimum 25 self.printConsole("my team goal") if (reset_reason == SCORE_OPPONENT): self.score_sum -= 1 reward -= 50 # maxmimum -25 self.printConsole("op team goal") self.printConsole("reward: " + str(reward)) self.pre_ball = self.cur_ball return reward
def get_reward(self, reset_reason): # pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 3) # cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 3) reward = helper.dipole_potential(self.cur_my_posture[4][X], self.cur_my_posture[4][Y], 2.1, 0.3) # Add dead lock penalty # if(reset_reason == SCORE_MYTEAM): # self.score_sum += 1 # reward += 24 # minimum 24 # self.printConsole("my team goal") # if(reset_reason == SCORE_OPPONENT): # self.score_sum -= 1 # reward -= 24 # maxmimum -24 # self.printConsole("op team goal") self.printConsole("reward: " + str(reward)) self.pre_ball = self.cur_ball return reward