Example #1
0
    def global_step(self):
        self.frame += 1

        if not self.multiplayer:
            # Trainer
            self.p1x.set_servo_target(self.trainer_x, 1.0, 0.02, 0.02, 4)
            self.p1y.set_servo_target(self.trainer_y, 1.0, 0.02, 0.02, 4)

        Scene.global_step(self)

        self.ball_x, ball_vx = self.ballx.current_position()
        self.ball_y, ball_vy = self.bally.current_position()

        if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0:
            self.bally.reset_current_position(self.ball_y, -ball_vy)

        if ball_vx * self.timeout_dir < 0:
            if self.timeout_dir < 0:
                self.score_left += 0.01 * np.abs(
                    ball_vx)  # hint for early learning: hit the ball!
            else:
                self.score_right += 0.01 * np.abs(ball_vx)
            self.timeout_dir *= -1
            self.timeout = 150
            self.bounce_n += 1
        else:
            self.timeout -= 1

        if np.abs(self.ball_x) > 1.65 or self.timeout == 0:
            if self.timeout == 0:
                self.restart_from_center(
                    self.players_count == 1
                    or ball_vx < 0)  # send ball in same dir on timeout
            elif ball_vx > 0:
                if self.bounce_n > 0:
                    self.score_left += 1
                self.score_right -= 1
                self.restart_from_center(
                    self.players_count == 1
                    or ball_vx > 0)  # winning streak, let it hit more
            else:
                if self.bounce_n > 0:
                    self.score_right += 1.0
                self.score_left -= 1
                self.restart_from_center(self.players_count == 1
                                         or ball_vx > 0)
    def global_step(self):
        self.frame += 1

        # if not self.multiplayer:
        #     # Trainer
        #     self.p1x.set_servo_target( self.trainer_x, 0.02, 0.02, 4 )
        #     self.p1y.set_servo_target( self.trainer_y, 0.02, 0.02, 4 )

        Scene.global_step(self)

        self.ball_x, ball_vx = self.ballx.current_position()
        self.ball_y, ball_vy = self.bally.current_position()

        if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0:
            self.bally.reset_current_position(self.ball_y, -ball_vy)

        if ball_vx * self.timeout_dir < 0:
            # if self.timeout_dir < 0:
            #     self.score_left += 0.00*np.abs(ball_vx)   # hint for early learning: hit the ball!
            # else:
            #     self.score_right += 0.00*np.abs(ball_vx)
            self.timeout_dir *= -1
            self.timeout = self.TIMEOUT
            self.bounce_n += 1
    def global_step(self):
        self.frame += 1

        Scene.global_step(self)

        self.ball_x, ball_vx = self.ballx.current_position()
        self.ball_y, ball_vy = self.bally.current_position()

        if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0:
            self.bally.reset_current_position(self.ball_y, -ball_vy)

        if ball_vx * self.timeout_dir < 0:
            # if self.timeout_dir < 0:
            #     self.score_left += 0.00*np.abs(ball_vx)   # hint for early learning: hit the ball!
            # else:
            #     self.score_right += 0.00*np.abs(ball_vx)
            self.timeout_dir *= -1
            # self.timeout = self.TIMEOUT
            self.bounce_n += 1
            # print("bounce", self.bounce_n)
        # else:
        #     self.timeout -= 1

        if np.abs(self.ball_x) > 1.65 or self.timeout == 0:
            if self.timeout == 0:
                self.restart_from_center(ball_vx < 0)  # send ball in same dir on timeout
                # if self.score_right + self.score_left >
            elif ball_vx > 0:
                self.score_left += 1
                self.restart_from_center(ball_vx > 0)  # winning streak, let it hit more
            else:
                self.score_right += 1.0
                self.restart_from_center(ball_vx > 0)
            self.timeout = self.TIMEOUT
        else:
            self.timeout -= 1
Example #4
0
    def global_step(self):
        self.frame += 1

        if not self.multiplayer:
            # Trainer
            self.p1x.set_servo_target(self.trainer_x, 0.02, 0.02, 4)
            self.p1y.set_servo_target(self.trainer_y, 0.02, 0.02, 4)

        Scene.global_step(self)

        self.ball_x, ball_vx = self.ballx.current_position()
        self.ball_y, ball_vy = self.bally.current_position()

        if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0:
            self.bally.reset_current_position(self.ball_y, -ball_vy)


        # Add by xian, according to the status of Pong, give different scores
        # The following if / else are adaptively changed
        if ball_vx * self.timeout_dir < 0:
            if self.timeout_dir < 0:
                self.score_left += 0.01 * np.abs(ball_vx)  # hint for early learning: hit the ball!
            else:
                self.score_right += 0.01 * np.abs(ball_vx)
            self.timeout_dir *= -1
            self.timeout = self.TIMEOUT  # hua-todo
            self.bounce_n += 1
            if self.ball_x > 0:  # hua-todo
                self.bounce_n_right += 1
                self.bounce_n_left = 0
            else:
                self.bounce_n_left += 1
                self.bounce_n_right = 0
        else:
            self.timeout -= 1
            self.episode_time -= 1

        if self.bounce_n_right > 1:  #
            self.score_left += 1
            self.score_right -= 1
            self.score_board['left']['oppo_double_hit'] += 1
            self.restart_from_center(self.players_count == 1 or ball_vx > 0)
        elif self.bounce_n_left > 1:
            self.score_left -= 1
            self.score_right += 1
            self.score_board['right']['oppo_double_hit'] += 1
            self.restart_from_center(self.players_count == 1 or ball_vx > 0)

        if np.abs(self.ball_x) > 1.65 or self.timeout == 0:
            if self.timeout == 0:
                if np.abs(self.ball_x) < 1:
                    if self.timeout_dir < 0:
                        self.score_left += 1
                        self.score_right -= 1
                        self.score_board['left']['oppo_slow_ball'] += 1
                    else:
                        self.score_left -= 1
                        self.score_right += 1
                        self.score_board['right']['oppo_slow_ball'] += 1
                else:
                    if self.timeout_dir < 0:
                        self.score_left -= 1
                        self.score_right += 1
                        self.score_board['right']['oppo_miss_catch'] += 1
                    else:
                        self.score_left += 1
                        self.score_right -= 1
                        self.score_board['left']['oppo_miss_catch'] += 1
                self.restart_from_center(self.players_count == 1 or ball_vx < 0)  # send ball in same dir on timeout
            elif ball_vx > 0:
                if self.bounce_n > 0:
                    self.score_left += 1
                    self.score_board['left']['oppo_miss_catch'] += 1
                else:
                    self.score_board['left']['oppo_miss_start'] += 1
                self.score_right -= 1
                self.restart_from_center(self.players_count == 1 or ball_vx > 0)  # winning streak, let it hit more
            else:
                if self.bounce_n > 0:
                    self.score_right += 1.0
                    self.score_board['right']['oppo_miss_catch'] += 1
                else:
                    self.score_board['right']['oppo_miss_start'] += 1
                self.score_left -= 1
                self.restart_from_center(self.players_count == 1 or ball_vx > 0)
        elif self.episode_time < 0:
            self.score_left -= 1
            self.score_board['left']['not_finish'] -= 1
            self.restart_from_center(self.players_count == 1 or ball_vx < 0)