Example #1
0
    '\U000e0076',
    '\U000e0077',
    '\U000e0078',
    '\U000e0079',
    '\U000e007a',
    '\U000e007b',
    '\U000e007c',
    '\U000e007d',
    '\U000e007e',
    '\U000e007f',
])
# This is a set of all invisible characters
# At the moment we've only added the characters from the Cf category
_invisible_chars = _category_cf

invisible_regex = LazyRegex(lambda: '[' + ''.join(_invisible_chars) + ']')


def contains_invisible(text):
    """Return True if the text contain any of the invisible characters."""
    return any(char in _invisible_chars for char in text)


def replace_invisible(text):
    """Replace invisible characters by '<codepoint>'."""
    def replace(match):
        match = match.group()
        if sys.maxunicode < 0x10ffff and len(match) == 2:
            mask = (1 << 10) - 1
            assert (ord(match[0]) & ~mask == 0xd800)
            assert (ord(match[1]) & ~mask == 0xdc00)
Example #2
0
            # This means ipaddress has correctly determined '1111' is invalid
            pass
except ImportError as e:
    warn('Importing ipaddress.ip_address failed: %s' % e, ImportWarning)

    def ip_address(IP):
        """Fake ip_address method."""
        warn('ipaddress backport not available.', DeprecationWarning)
        if ip_regexp.match(IP) is None:
            raise ValueError('Invalid IP address')

    # The following flag is used by the unit tests
    ip_address.__fake__ = True

# deprecated IP detector
ip_regexp = LazyRegex()
ip_regexp.flags = re.IGNORECASE
ip_regexp.raw = (r'^(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
                 r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|'
                 r'(((?=(?=(.*?(::)))\3(?!.+\4)))\4?|[\dA-F]{1,4}:)'
                 r'([\dA-F]{1,4}(\4|:\b)|\2){5}'
                 r'(([\dA-F]{1,4}(\4|:\b|$)|\2){2}|'
                 r'(((2[0-4]|1\d|[1-9])?\d|25[0-5])\.?\b){4}))\Z')


def is_IP(IP):
    """
    Verify the IP address provided is valid.

    No logging is performed.  Use ip_address instead to catch errors.
Example #3
0
"""Character based helper functions (not wiki-dependent)."""
#
# (C) Pywikibot team, 2015-2021
#
# Distributed under the terms of the MIT license.
#
import sys

from pywikibot.tools._unidata import _category_cf
from pywikibot.tools import LazyRegex

# This is a set of all invisible characters
# At the moment we've only added the characters from the Cf category
_invisible_chars = _category_cf

invisible_regex = LazyRegex(lambda: '[{}]'.format(''.join(_invisible_chars)))


def contains_invisible(text):
    """Return True if the text contain any of the invisible characters."""
    return any(char in _invisible_chars for char in text)


def replace_invisible(text):
    """Replace invisible characters by '<codepoint>'."""
    def replace(match):
        match = match.group()
        if sys.maxunicode < 0x10ffff and len(match) == 2:
            mask = (1 << 10) - 1
            assert ord(match[0]) & ~mask == 0xd800
            assert ord(match[1]) & ~mask == 0xdc00
Example #4
0
    '\U000e0053', '\U000e0054', '\U000e0055', '\U000e0056', '\U000e0057',
    '\U000e0058', '\U000e0059', '\U000e005a', '\U000e005b', '\U000e005c',
    '\U000e005d', '\U000e005e', '\U000e005f', '\U000e0060', '\U000e0061',
    '\U000e0062', '\U000e0063', '\U000e0064', '\U000e0065', '\U000e0066',
    '\U000e0067', '\U000e0068', '\U000e0069', '\U000e006a', '\U000e006b',
    '\U000e006c', '\U000e006d', '\U000e006e', '\U000e006f', '\U000e0070',
    '\U000e0071', '\U000e0072', '\U000e0073', '\U000e0074', '\U000e0075',
    '\U000e0076', '\U000e0077', '\U000e0078', '\U000e0079', '\U000e007a',
    '\U000e007b', '\U000e007c', '\U000e007d', '\U000e007e', '\U000e007f',
])
# This is a set of all invisible characters
# At the moment we've only added the characters from the Cf category
_invisible_chars = frozenset(_category_cf)

# TODO: Is that complex and a lazy regex justified?
invisible_regex = LazyRegex()
invisible_regex.raw = '[' + ''.join(_invisible_chars) + ']'
invisible_regex.flags = 0


def contains_invisible(text):
    """Return True if the text contain any of the invisible characters."""
    return any(char in _invisible_chars for char in text)


def replace_invisible(text):
    """Replace invisible characters by '<codepoint>'."""
    def replace(match):
        match = match.group()
        if sys.maxunicode < 0x10ffff and len(match) == 2:
            mask = (1 << 10) - 1
Example #5
0
            pass
except ImportError as e:
    warn('Importing ipaddress.ip_address failed: %s' % e,
         ImportWarning)

    def ip_address(IP):
        """Fake ip_address method."""
        warn('ipaddress backport not available.', DeprecationWarning)
        if ip_regexp.match(IP) is None:
            raise ValueError('Invalid IP address')

    # The following flag is used by the unit tests
    ip_address.__fake__ = True

# deprecated IP detector
ip_regexp = LazyRegex()
ip_regexp.flags = re.IGNORECASE
ip_regexp.raw = (
    r'^(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
    r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|'
    r'(((?=(?=(.*?(::)))\3(?!.+\4)))\4?|[\dA-F]{1,4}:)'
    r'([\dA-F]{1,4}(\4|:\b)|\2){5}'
    r'(([\dA-F]{1,4}(\4|:\b|$)|\2){2}|'
    r'(((2[0-4]|1\d|[1-9])?\d|25[0-5])\.?\b){4}))\Z')


def is_IP(IP):
    """
    Verify the IP address provided is valid.

    No logging is performed.  Use ip_address instead to catch errors.