Example #1
0
def detranslify(in_string):
    """
    Detranslify

    @param in_string: input string
    @type in_string: C{basestring}

    @return: detransliterated string
    @rtype: C{unicode}

    @raise ValueError: if in_string is C{str}, but it isn't ascii
    """
    try:
        russian = six.text_type(in_string)
    except UnicodeDecodeError:
        raise ValueError(
            "We expects if in_string is 8-bit string,"
            + "then it consists only ASCII chars, but now it doesn't. "
            + "Use unicode in this case."
        )

    for symb_out, symb_in in TRANSTABLE:
        russian = russian.replace(symb_in, symb_out)

    # TODO: выбрать правильный регистр для ь и ъ
    # твердый и мягкий знак в dentranslify всегда будут в верхнем регистре
    # потому что ` и ' не несут информацию о регистре
    return russian
Example #2
0
def slugify(in_string):
    """
    Prepare string for slug (i.e. URL or file/dir name)

    @param in_string: input string
    @type in_string: C{basestring}

    @return: slug-string
    @rtype: C{str}

    @raise ValueError: if in_string is C{str}, but it isn't ascii
    """
    try:
        u_in_string = six.text_type(in_string).lower()
    except UnicodeDecodeError:
        raise ValueError(
            "We expects when in_string is str type,"
            + "it is an ascii, but now it isn't. Use unicode "
            + "in this case."
        )
    # convert & to "and"
    u_in_string = re.sub("\&amp\;|\&", " and ", u_in_string)
    # replace spaces by hyphen
    u_in_string = re.sub("[-\s]+", "-", u_in_string)
    # remove symbols that not in alphabet
    u_in_string = u"".join([symb for symb in u_in_string if symb in ALPHABET])
    # translify it
    out_string = translify(u_in_string)
    # remove non-alpha
    return re.sub("[^\w\s-]", "", out_string).strip().lower()
Example #3
0
def slugify(in_string):
    """
    Prepare string for slug (i.e. URL or file/dir name)

    @param in_string: input string
    @type in_string: C{basestring}

    @return: slug-string
    @rtype: C{str}

    @raise ValueError: if in_string is C{str}, but it isn't ascii
    """
    try:
        u_in_string = six.text_type(in_string).lower()
    except UnicodeDecodeError:
        raise ValueError("We expects when in_string is str type," + \
                         "it is an ascii, but now it isn't. Use unicode " + \
                         "in this case.")
    # convert & to "and"
    u_in_string = re.sub('\&amp\;|\&', ' and ', u_in_string)
    # replace spaces by hyphen
    u_in_string = re.sub('[-\s]+', '-', u_in_string)
    # remove symbols that not in alphabet
    u_in_string = u''.join([symb for symb in u_in_string if symb in ALPHABET])
    # translify it
    out_string = translify(u_in_string)
    # remove non-alpha
    return re.sub('[^\w\s-]', '', out_string).strip().lower()
Example #4
0
def detranslify(in_string):
    """
    Detranslify

    @param in_string: input string
    @type in_string: C{basestring}

    @return: detransliterated string
    @rtype: C{unicode}

    @raise ValueError: if in_string is C{str}, but it isn't ascii
    """
    try:
        russian = six.text_type(in_string)
    except UnicodeDecodeError:
        raise ValueError("We expects if in_string is 8-bit string," + \
                         "then it consists only ASCII chars, but now it doesn't. " + \
                         "Use unicode in this case.")

    for symb_out, symb_in in TRANSTABLE:
        russian = russian.replace(symb_in, symb_out)

    # TODO: выбрать правильный регистр для ь и ъ
    # твердый и мягкий знак в dentranslify всегда будут в верхнем регистре
    # потому что ` и ' не несут информацию о регистре
    return russian
Example #5
0
File: dt.py Project: PixxxeL/pytils
def ru_strftime(format=u"%d.%m.%Y",
                date=None,
                inflected=False,
                inflected_day=False,
                preposition=False):
    """
    Russian strftime without locale

    @param format: strftime format, default=u'%d.%m.%Y'
    @type format: C{unicode}

    @param date: date value, default=None translates to today
    @type date: C{datetime.date} or C{datetime.datetime}

    @param inflected: is month inflected, default False
    @type inflected: C{bool}

    @param inflected_day: is day inflected, default False
    @type inflected: C{bool}

    @param preposition: is preposition used, default False
        preposition=True automatically implies inflected_day=True
    @type preposition: C{bool}

    @return: strftime string
    @rtype: unicode
    """
    if date is None:
        date = datetime.datetime.today()

    weekday = date.weekday()

    prepos = preposition and DAY_NAMES[weekday][3] or u""

    month_idx = inflected and 2 or 1
    day_idx = (inflected_day or preposition) and 2 or 1

    # for russian typography standard,
    # 1 April 2007, but 01.04.2007
    if u'%b' in format or u'%B' in format:
        format = format.replace(u'%d', six.text_type(date.day))

    format = format.replace(u'%a', prepos + DAY_NAMES[weekday][0])
    format = format.replace(u'%A', prepos + DAY_NAMES[weekday][day_idx])
    format = format.replace(u'%b', MONTH_NAMES[date.month - 1][0])
    format = format.replace(u'%B', MONTH_NAMES[date.month - 1][month_idx])

    # Python 2: strftime's argument must be str
    # Python 3: strftime's argument str, not a bitestring
    if six.PY2:
        # strftime must be str, so encode it to utf8:
        s_format = format.encode("utf-8")
        s_res = date.strftime(s_format)
        # and back to unicode
        u_res = s_res.decode("utf-8")
    else:
        u_res = date.strftime(format.encode(
            'unicode-escape').decode()).encode().decode('unicode-escape')
    return u_res
Example #6
0
File: dt.py Project: j2a/pytils
def ru_strftime(format=u"%d.%m.%Y", date=None, inflected=False,
                inflected_day=False, preposition=False):
    """
    Russian strftime without locale

    @param format: strftime format, default=u'%d.%m.%Y'
    @type format: C{unicode}

    @param date: date value, default=None translates to today
    @type date: C{datetime.date} or C{datetime.datetime}

    @param inflected: is month inflected, default False
    @type inflected: C{bool}

    @param inflected_day: is day inflected, default False
    @type inflected: C{bool}

    @param preposition: is preposition used, default False
        preposition=True automatically implies inflected_day=True
    @type preposition: C{bool}

    @return: strftime string
    @rtype: unicode
    """
    if date is None:
        date = datetime.datetime.today()

    weekday = date.weekday()

    prepos = preposition and DAY_NAMES[weekday][3] or u""

    month_idx = inflected and 2 or 1
    day_idx = (inflected_day or preposition) and 2 or 1

    # for russian typography standard,
    # 1 April 2007, but 01.04.2007
    if u'%b' in format or u'%B' in format:
        format = format.replace(u'%d', six.text_type(date.day))

    format = format.replace(u'%a', prepos+DAY_NAMES[weekday][0])
    format = format.replace(u'%A', prepos+DAY_NAMES[weekday][day_idx])
    format = format.replace(u'%b', MONTH_NAMES[date.month-1][0])
    format = format.replace(u'%B', MONTH_NAMES[date.month-1][month_idx])

    # Python 2: strftime's argument must be str
    # Python 3: strftime's argument str, not a bitestring
    if six.PY2:
        # strftime must be str, so encode it to utf8:
        s_format = format.encode("utf-8")
        s_res = date.strftime(s_format)
        # and back to unicode
        u_res = s_res.decode("utf-8")
    else:
        u_res = date.strftime(format)
    return u_res
Example #7
0
def ru_strftime(format=u"%d.%m.%Y",
                date=None,
                inflected=False,
                inflected_day=False,
                preposition=False):
    """
    Russian strftime without locale

    @param format: strftime format, default=u'%d.%m.%Y'
    @type format: C{unicode}

    @param date: date value, default=None translates to today
    @type date: C{datetime.date} or C{datetime.datetime}

    @param inflected: is month inflected, default False
    @type inflected: C{bool}

    @param inflected_day: is day inflected, default False
    @type inflected: C{bool}

    @param preposition: is preposition used, default False
        preposition=True automatically implies inflected_day=True
    @type preposition: C{bool}

    @return: strftime string
    @rtype: unicode
    """
    if date is None:
        date = datetime.datetime.today()

    weekday = date.weekday()

    prepos = preposition and DAY_NAMES[weekday][3] or u""

    month_idx = inflected and 2 or 1
    day_idx = (inflected_day or preposition) and 2 or 1

    # for russian typography standard,
    # 1 April 2007, but 01.04.2007
    if u'%b' in format or u'%B' in format:
        format = format.replace(u'%d', six.text_type(date.day))

    format = format.replace(u'%a', prepos + DAY_NAMES[weekday][0])
    format = format.replace(u'%A', prepos + DAY_NAMES[weekday][day_idx])
    format = format.replace(u'%b', MONTH_NAMES[date.month - 1][0])
    format = format.replace(u'%B', MONTH_NAMES[date.month - 1][month_idx])

    def run_strftime_on_py_2_or_3(f):
        # Python 2: strftime's argument must be str
        # Python 3: strftime's argument str, not a bitestring
        if six.PY2:
            # strftime must be str, so encode it to utf8:
            s_format = f.encode("utf-8")
            s_res = date.strftime(s_format)
            # and back to unicode
            return s_res.decode("utf-8")
        else:
            return date.strftime(f)

    need_locale_workaround = False
    try:
        u_res = run_strftime_on_py_2_or_3(format)
        if u_res == u'' and format:
            need_locale_workaround = True
    except UnicodeError:
        need_locale_workaround = True

    # workaround for https://github.com/last-partizan/pytils/issues/32
    if need_locale_workaround:
        u_res = re.sub(u'\%[c-zC-Z]{1}',
                       lambda m: run_strftime_on_py_2_or_3(m.group(0)), format)

    return u_res