def detranslify(in_string): """ Detranslify @param in_string: input string @type in_string: C{basestring} @return: detransliterated string @rtype: C{unicode} @raise ValueError: if in_string is C{str}, but it isn't ascii """ try: russian = six.text_type(in_string) except UnicodeDecodeError: raise ValueError( "We expects if in_string is 8-bit string," + "then it consists only ASCII chars, but now it doesn't. " + "Use unicode in this case." ) for symb_out, symb_in in TRANSTABLE: russian = russian.replace(symb_in, symb_out) # TODO: выбрать правильный регистр для ь и ъ # твердый и мягкий знак в dentranslify всегда будут в верхнем регистре # потому что ` и ' не несут информацию о регистре return russian
def slugify(in_string): """ Prepare string for slug (i.e. URL or file/dir name) @param in_string: input string @type in_string: C{basestring} @return: slug-string @rtype: C{str} @raise ValueError: if in_string is C{str}, but it isn't ascii """ try: u_in_string = six.text_type(in_string).lower() except UnicodeDecodeError: raise ValueError( "We expects when in_string is str type," + "it is an ascii, but now it isn't. Use unicode " + "in this case." ) # convert & to "and" u_in_string = re.sub("\&\;|\&", " and ", u_in_string) # replace spaces by hyphen u_in_string = re.sub("[-\s]+", "-", u_in_string) # remove symbols that not in alphabet u_in_string = u"".join([symb for symb in u_in_string if symb in ALPHABET]) # translify it out_string = translify(u_in_string) # remove non-alpha return re.sub("[^\w\s-]", "", out_string).strip().lower()
def slugify(in_string): """ Prepare string for slug (i.e. URL or file/dir name) @param in_string: input string @type in_string: C{basestring} @return: slug-string @rtype: C{str} @raise ValueError: if in_string is C{str}, but it isn't ascii """ try: u_in_string = six.text_type(in_string).lower() except UnicodeDecodeError: raise ValueError("We expects when in_string is str type," + \ "it is an ascii, but now it isn't. Use unicode " + \ "in this case.") # convert & to "and" u_in_string = re.sub('\&\;|\&', ' and ', u_in_string) # replace spaces by hyphen u_in_string = re.sub('[-\s]+', '-', u_in_string) # remove symbols that not in alphabet u_in_string = u''.join([symb for symb in u_in_string if symb in ALPHABET]) # translify it out_string = translify(u_in_string) # remove non-alpha return re.sub('[^\w\s-]', '', out_string).strip().lower()
def detranslify(in_string): """ Detranslify @param in_string: input string @type in_string: C{basestring} @return: detransliterated string @rtype: C{unicode} @raise ValueError: if in_string is C{str}, but it isn't ascii """ try: russian = six.text_type(in_string) except UnicodeDecodeError: raise ValueError("We expects if in_string is 8-bit string," + \ "then it consists only ASCII chars, but now it doesn't. " + \ "Use unicode in this case.") for symb_out, symb_in in TRANSTABLE: russian = russian.replace(symb_in, symb_out) # TODO: выбрать правильный регистр для ь и ъ # твердый и мягкий знак в dentranslify всегда будут в верхнем регистре # потому что ` и ' не несут информацию о регистре return russian
def ru_strftime(format=u"%d.%m.%Y", date=None, inflected=False, inflected_day=False, preposition=False): """ Russian strftime without locale @param format: strftime format, default=u'%d.%m.%Y' @type format: C{unicode} @param date: date value, default=None translates to today @type date: C{datetime.date} or C{datetime.datetime} @param inflected: is month inflected, default False @type inflected: C{bool} @param inflected_day: is day inflected, default False @type inflected: C{bool} @param preposition: is preposition used, default False preposition=True automatically implies inflected_day=True @type preposition: C{bool} @return: strftime string @rtype: unicode """ if date is None: date = datetime.datetime.today() weekday = date.weekday() prepos = preposition and DAY_NAMES[weekday][3] or u"" month_idx = inflected and 2 or 1 day_idx = (inflected_day or preposition) and 2 or 1 # for russian typography standard, # 1 April 2007, but 01.04.2007 if u'%b' in format or u'%B' in format: format = format.replace(u'%d', six.text_type(date.day)) format = format.replace(u'%a', prepos + DAY_NAMES[weekday][0]) format = format.replace(u'%A', prepos + DAY_NAMES[weekday][day_idx]) format = format.replace(u'%b', MONTH_NAMES[date.month - 1][0]) format = format.replace(u'%B', MONTH_NAMES[date.month - 1][month_idx]) # Python 2: strftime's argument must be str # Python 3: strftime's argument str, not a bitestring if six.PY2: # strftime must be str, so encode it to utf8: s_format = format.encode("utf-8") s_res = date.strftime(s_format) # and back to unicode u_res = s_res.decode("utf-8") else: u_res = date.strftime(format.encode( 'unicode-escape').decode()).encode().decode('unicode-escape') return u_res
def ru_strftime(format=u"%d.%m.%Y", date=None, inflected=False, inflected_day=False, preposition=False): """ Russian strftime without locale @param format: strftime format, default=u'%d.%m.%Y' @type format: C{unicode} @param date: date value, default=None translates to today @type date: C{datetime.date} or C{datetime.datetime} @param inflected: is month inflected, default False @type inflected: C{bool} @param inflected_day: is day inflected, default False @type inflected: C{bool} @param preposition: is preposition used, default False preposition=True automatically implies inflected_day=True @type preposition: C{bool} @return: strftime string @rtype: unicode """ if date is None: date = datetime.datetime.today() weekday = date.weekday() prepos = preposition and DAY_NAMES[weekday][3] or u"" month_idx = inflected and 2 or 1 day_idx = (inflected_day or preposition) and 2 or 1 # for russian typography standard, # 1 April 2007, but 01.04.2007 if u'%b' in format or u'%B' in format: format = format.replace(u'%d', six.text_type(date.day)) format = format.replace(u'%a', prepos+DAY_NAMES[weekday][0]) format = format.replace(u'%A', prepos+DAY_NAMES[weekday][day_idx]) format = format.replace(u'%b', MONTH_NAMES[date.month-1][0]) format = format.replace(u'%B', MONTH_NAMES[date.month-1][month_idx]) # Python 2: strftime's argument must be str # Python 3: strftime's argument str, not a bitestring if six.PY2: # strftime must be str, so encode it to utf8: s_format = format.encode("utf-8") s_res = date.strftime(s_format) # and back to unicode u_res = s_res.decode("utf-8") else: u_res = date.strftime(format) return u_res
def ru_strftime(format=u"%d.%m.%Y", date=None, inflected=False, inflected_day=False, preposition=False): """ Russian strftime without locale @param format: strftime format, default=u'%d.%m.%Y' @type format: C{unicode} @param date: date value, default=None translates to today @type date: C{datetime.date} or C{datetime.datetime} @param inflected: is month inflected, default False @type inflected: C{bool} @param inflected_day: is day inflected, default False @type inflected: C{bool} @param preposition: is preposition used, default False preposition=True automatically implies inflected_day=True @type preposition: C{bool} @return: strftime string @rtype: unicode """ if date is None: date = datetime.datetime.today() weekday = date.weekday() prepos = preposition and DAY_NAMES[weekday][3] or u"" month_idx = inflected and 2 or 1 day_idx = (inflected_day or preposition) and 2 or 1 # for russian typography standard, # 1 April 2007, but 01.04.2007 if u'%b' in format or u'%B' in format: format = format.replace(u'%d', six.text_type(date.day)) format = format.replace(u'%a', prepos + DAY_NAMES[weekday][0]) format = format.replace(u'%A', prepos + DAY_NAMES[weekday][day_idx]) format = format.replace(u'%b', MONTH_NAMES[date.month - 1][0]) format = format.replace(u'%B', MONTH_NAMES[date.month - 1][month_idx]) def run_strftime_on_py_2_or_3(f): # Python 2: strftime's argument must be str # Python 3: strftime's argument str, not a bitestring if six.PY2: # strftime must be str, so encode it to utf8: s_format = f.encode("utf-8") s_res = date.strftime(s_format) # and back to unicode return s_res.decode("utf-8") else: return date.strftime(f) need_locale_workaround = False try: u_res = run_strftime_on_py_2_or_3(format) if u_res == u'' and format: need_locale_workaround = True except UnicodeError: need_locale_workaround = True # workaround for https://github.com/last-partizan/pytils/issues/32 if need_locale_workaround: u_res = re.sub(u'\%[c-zC-Z]{1}', lambda m: run_strftime_on_py_2_or_3(m.group(0)), format) return u_res