Python ACRONYM_REGEX Examples

Programming Language: Python

Namespace/Package Name: textacy.constants

Class/Type: ACRONYM_REGEX

Examples at hotexamples.com: 8

Python ACRONYM_REGEX - 8 examples found. These are the top rated real world Python examples of textacy.constants.ACRONYM_REGEX extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

search(4)

match(1)

Example #1

Show file

File: text_utils.py Project: nigeljyng/textacy

def is_acronym(token, exclude=None):
    """
    Pass single token as a string, return True/False if is/is not valid acronym.

    Args:
        token (str): single word to check for acronym-ness
        exclude (Set[str]): if technically valid but not actually good acronyms
            are known in advance, pass them in as a set of strings; matching
            tokens will return False

    Returns:
        bool
    """
    # exclude certain valid acronyms from consideration
    if exclude and token in exclude:
        return False
    # don't allow empty strings
    if not token:
        return False
    # don't allow spaces
    if ' ' in token:
        return False
    # 2-character acronyms can't have lower-case letters
    if len(token) == 2 and not token.isupper():
        return False
    # acronyms can't be all digits
    if token.isdigit():
        return False
    # acronyms must have at least one upper-case letter or start/end with a digit
    if (not any(char.isupper() for char in token)
            and not (token[0].isdigit() or token[-1].isdigit())):
        return False
    # acronyms must have between 2 and 10 alphanumeric characters
    if not 2 <= sum(1 for char in token if char.isalnum()) <= 10:
        return False
    # only certain combinations of letters, digits, and '&/.-' allowed
    if not ACRONYM_REGEX.match(token):
        return False
    return True

Example #2

Show file

File: text_utils.py Project: chartbeat-labs/textacy

def is_acronym(token, exclude=None):
    """
    Pass single token as a string, return True/False if is/is not valid acronym.

    Args:
        token (str): single word to check for acronym-ness
        exclude (Set[str]): if technically valid but not actually good acronyms
            are known in advance, pass them in as a set of strings; matching
            tokens will return False

    Returns:
        bool
    """
    # exclude certain valid acronyms from consideration
    if exclude and token in exclude:
        return False
    # don't allow empty strings
    if not token:
        return False
    # don't allow spaces
    if ' ' in token:
        return False
    # 2-character acronyms can't have lower-case letters
    if len(token) == 2 and not token.isupper():
        return False
    # acronyms can't be all digits
    if token.isdigit():
        return False
    # acronyms must have at least one upper-case letter or start/end with a digit
    if (not any(char.isupper() for char in token) and
            not (token[0].isdigit() or token[-1].isdigit())):
        return False
    # acronyms must have between 2 and 10 alphanumeric characters
    if not 2 <= sum(1 for char in token if char.isalnum()) <= 10:
        return False
    # only certain combinations of letters, digits, and '&/.-' allowed
    if not ACRONYM_REGEX.match(token):
        return False
    return True

Example #3

Show file

def test_bad_acronym_regex():
    for item in BAD_ACRONYMS:
        assert ACRONYM_REGEX.search(item) is None

Example #4

Show file

def test_good_acronym_regex():
    for item in GOOD_ACRONYMS:
        assert item == ACRONYM_REGEX.search(item).group()

Example #5

Show file

 def test_bad_acronym_regex(self):
     for item in BAD_ACRONYMS:
         self.assertIsNone(ACRONYM_REGEX.search(item))

Example #6

Show file

 def test_good_acronym_regex(self):
     for item in GOOD_ACRONYMS:
         self.assertEqual(item, ACRONYM_REGEX.search(item).group())

Example #7

Show file

File: test_constants.py Project: chartbeat-labs/textacy

 def test_bad_acronym_regex(self):
     for item in BAD_ACRONYMS:
         self.assertIsNone(ACRONYM_REGEX.search(item))

Example #8

Show file

File: test_constants.py Project: chartbeat-labs/textacy

 def test_good_acronym_regex(self):
     for item in GOOD_ACRONYMS:
         self.assertEqual(item, ACRONYM_REGEX.search(item).group())