def apply_replacements(self, date_string, lang): if 'word_replacements' in lang: for replacement, words in lang['word_replacements']: for w in words: wrapped_replacement = wrap_replacement_for_regex(replacement, w) w = ur'(\A|\d|_|\W)%s(\d|_|\W|\Z)' % w date_string = re.sub(w, wrapped_replacement, date_string, flags=re.IGNORECASE | re.UNICODE) return date_string
def _simplify(self, date_string): date_string = date_string.lower() for simplification in self.info.get("simplifications", []): pattern, replacement = list(simplification.items())[0] if not self.info.get("no_word_spacing", False): replacement = wrap_replacement_for_regex(replacement, pattern) pattern = r"(\A|\d|_|\W)%s(\d|_|\W|\Z)" % pattern date_string = re.sub(pattern, replacement, date_string, flags=re.IGNORECASE | re.UNICODE).lower() return date_string
def _simplify(self, date_string, settings=None): date_string = date_string.lower() for simplification in self._get_simplifications(settings=settings): pattern, replacement = list(simplification.items())[0] if not self.info.get('no_word_spacing', False): replacement = wrap_replacement_for_regex(replacement, pattern) pattern = r'(\A|\d|_|\W)%s(\d|_|\W|\Z)' % pattern date_string = re.sub(pattern, replacement, date_string, flags=re.IGNORECASE | re.UNICODE).lower() return date_string
def _get_simplification_substitution(self, simplification): pattern, replacement = list(simplification.items())[0] if not self.info.get('no_word_spacing', False): replacement = wrap_replacement_for_regex(replacement, pattern) pattern = r'(\A|\d|_|\W)%s(\d|_|\W|\Z)' % pattern if self._simplification_patterns is None: self._simplification_patterns = {} if pattern not in self._simplification_patterns: self._simplification_patterns[pattern] = re.compile( pattern, flags=re.IGNORECASE | re.UNICODE) pattern = self._simplification_patterns[pattern] return pattern, replacement