def _input_string_transform_factory(alg): """ Given a set of natsort algorithms, return the function to operate on the pre-split input string according to the user's request. """ # Shortcuts. lowfirst = alg & ns.LOWERCASEFIRST dumb = alg & ns._DUMB # Build the chain of functions to execute in order. function_chain = [] if (dumb and not lowfirst) or (lowfirst and not dumb): function_chain.append(methodcaller('swapcase')) if alg & ns.IGNORECASE: if NEWPY: function_chain.append(methodcaller('casefold')) else: function_chain.append(methodcaller('lower')) if alg & ns.LOCALENUM: # Create a regular expression that will remove thousands separators. strip_thousands = r''' (?<=[0-9]{{1}}) # At least 1 number (?<![0-9]{{4}}) # No more than 3 numbers {nodecimal} # Cannot follow decimal {thou} # The thousands separator (?=[0-9]{{3}} # Three numbers must follow ([^0-9]|$) # But a non-number after that ) ''' nodecimal = r'' if alg & ns.FLOAT: # Make a regular expression component that will ensure no # separators are removed after a decimal point. d = get_decimal_point() d = r'\.' if d == r'.' else d nodecimal += r'(?<!' + d + r'[0-9])' nodecimal += r'(?<!' + d + r'[0-9]{2})' nodecimal += r'(?<!' + d + r'[0-9]{3})' strip_thousands = strip_thousands.format(thou=get_thousands_sep(), nodecimal=nodecimal) strip_thousands = re.compile(strip_thousands, flags=re.VERBOSE) function_chain.append(partial(strip_thousands.sub, '')) # Create a regular expression that will change the decimal point to # a period if not already a period. decimal = get_decimal_point() if alg & ns.FLOAT and decimal != '.': switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])' switch_decimal = switch_decimal.format(decimal=decimal) switch_decimal = re.compile(switch_decimal) function_chain.append(partial(switch_decimal.sub, '.')) # Return the chained functions. return chain_functions(function_chain)
def input_string_transform_factory(alg): """ Create a function to transform a string. Parameters ---------- alg : ns enum Indicate how to format the *str*. Returns ------- func : callable A function to be used as the *input_transform* argument to *parse_string_factory*. See Also -------- parse_string_factory """ # Shortcuts. lowfirst = alg & ns.LOWERCASEFIRST dumb = alg & NS_DUMB # Build the chain of functions to execute in order. function_chain = [] if (dumb and not lowfirst) or (lowfirst and not dumb): function_chain.append(methodcaller("swapcase")) if alg & ns.IGNORECASE: function_chain.append(methodcaller("casefold")) if alg & ns.LOCALENUM: # Create a regular expression that will remove thousands separators. strip_thousands = r""" (?<=[0-9]{{1}}) # At least 1 number (?<![0-9]{{4}}) # No more than 3 numbers {nodecimal} # Cannot follow decimal {thou} # The thousands separator (?=[0-9]{{3}} # Three numbers must follow ([^0-9]|$) # But a non-number after that ) """ nodecimal = r"" if alg & ns.FLOAT: # Make a regular expression component that will ensure no # separators are removed after a decimal point. d = re.escape(get_decimal_point()) nodecimal += r"(?<!" + d + r"[0-9])" nodecimal += r"(?<!" + d + r"[0-9]{2})" nodecimal += r"(?<!" + d + r"[0-9]{3})" strip_thousands = strip_thousands.format( thou=re.escape(get_thousands_sep()), nodecimal=nodecimal ) strip_thousands = re.compile(strip_thousands, flags=re.VERBOSE) function_chain.append(partial(strip_thousands.sub, "")) # Create a regular expression that will change the decimal point to # a period if not already a period. decimal = get_decimal_point() if alg & ns.FLOAT and decimal != ".": switch_decimal = r"(?<=[0-9]){decimal}|{decimal}(?=[0-9])" switch_decimal = switch_decimal.format(decimal=re.escape(decimal)) switch_decimal = re.compile(switch_decimal) function_chain.append(partial(switch_decimal.sub, ".")) # Return the chained functions. return chain_functions(function_chain)
def input_string_transform_factory(alg): """ Create a function to transform a string. Parameters ---------- alg : ns enum Indicate how to format the *str*. Returns ------- func : callable A function to be used as the *input_transform* argument to *parse_string_factory*. See Also -------- parse_string_factory """ # Shortcuts. lowfirst = alg & ns.LOWERCASEFIRST dumb = alg & NS_DUMB # Build the chain of functions to execute in order. function_chain = [] if (dumb and not lowfirst) or (lowfirst and not dumb): function_chain.append(methodcaller("swapcase")) if alg & ns.IGNORECASE: if NEWPY: function_chain.append(methodcaller("casefold")) else: function_chain.append(methodcaller("lower")) if alg & ns.LOCALENUM: # Create a regular expression that will remove thousands separators. strip_thousands = r""" (?<=[0-9]{{1}}) # At least 1 number (?<![0-9]{{4}}) # No more than 3 numbers {nodecimal} # Cannot follow decimal {thou} # The thousands separator (?=[0-9]{{3}} # Three numbers must follow ([^0-9]|$) # But a non-number after that ) """ nodecimal = r"" if alg & ns.FLOAT: # Make a regular expression component that will ensure no # separators are removed after a decimal point. d = get_decimal_point() d = r"\." if d == r"." else d nodecimal += r"(?<!" + d + r"[0-9])" nodecimal += r"(?<!" + d + r"[0-9]{2})" nodecimal += r"(?<!" + d + r"[0-9]{3})" strip_thousands = strip_thousands.format( thou=get_thousands_sep(), nodecimal=nodecimal ) strip_thousands = re.compile(strip_thousands, flags=re.VERBOSE) function_chain.append(partial(strip_thousands.sub, "")) # Create a regular expression that will change the decimal point to # a period if not already a period. decimal = get_decimal_point() if alg & ns.FLOAT and decimal != ".": switch_decimal = r"(?<=[0-9]){decimal}|{decimal}(?=[0-9])" switch_decimal = switch_decimal.format(decimal=decimal) switch_decimal = re.compile(switch_decimal) function_chain.append(partial(switch_decimal.sub, ".")) # Return the chained functions. return chain_functions(function_chain)