def _ascii_report(text):
    """
prototype::
    arg = str: text ;
          the non-¨ascii characters for the report

    return = str ;
             a text to be sent to the author of ¨mistool
    """
    problems = []

    for onechar in set(text) - ASCII_CHARS:
        try:
            ascii_it(onechar)

        except ValueError as e:
            problems.append(
                "? >>> {0} : {1}".format(
                    onechar,
                    ucname(onechar)
                )
            )

    if problems:
        problems = """
==============================================
TO SEND TO THE AUTHOR OF ``misTool``

Subject of your mail : "mistool: ascii report"
==============================================

Just replace each "?" with an appropriate
ASCII character(s).

{0}
    """.format("\n".join(problems))

    else:
        problems = "Nothing to report."

    return problems
Exemple #2
0
def _ascii_report(text):
    """
prototype::
    arg = str: text ;
          the non-¨ascii characters for the report

    return = str ;
             a text to be sent to the author of ¨mistool
    """
    problems = []

    for onechar in set(text) - ASCII_CHARS:
        try:
            asciify(onechar)

        except ValueError as e:
            problems.append("? >>> {0} : {1}".format(onechar, ucname(onechar)))

    if problems:
        problems = """
==============================================
TO SEND TO THE AUTHOR OF ``misTool``

Subject of your mail : "mistool: ascii report"
==============================================

Just replace each "?" with an appropriate
ASCII character(s).

{0}
    """.format("\n".join(problems))

    else:
        problems = "Nothing to report."

    return problems
def ascii_it(
    text,
    oldnew = {},
    strict = True
):
    """
prototype::
    arg = str: text ;
          the text to be translated
    arg = {str: str}: oldnew = {} ;
          this dictionary uses couples ``(key, value)`` that are of the kind
          ``(non-ascii character, ascii version)``
    arg = bool: strict = True ;
          ``strict = True`` indicates to raise an error when the translation
          can only be partial, and with ``strict = True`` no error will be
          raised

    return = str ;
             a partial or total ¨ascii version of ``text``


=========
Basic use
=========

The aim of this function is to give an ¨ascii translation of a text. The typical
use is for avoiding strange names of files. Here is a standard use where you can
see that none ¨ascii ponctuation mark is simply removed.

pyterm::
    >>> from mistool.string_use import ascii_it
    >>> print(ascii_it("¡Viva España!"))
    Viva Espana!


=======================
Doing more replacements
=======================

You can use the optional argument ``oldnew`` so as to do more replacements. In
the code below we have choosen to also clean ``!``.

pyterm::
    >>> from mistool.string_use import ascii_it
    >>> oldnew = {'!': ""}
    >>> print(ascii_it(text = "¡Viva España!", oldnew = oldnew))
    Viva Espana


================
Partial cleaning
================

Sometimes, you just want to change the most characters as possible. In the
example below, we use ``strict`` so as to obtain ``L'Odyssee de ∏`` instead
of an error as the second use of ``ascii`` shows.

pyterm::
    >>> from mistool.string_use import ascii_it
    >>> print(ascii_it(text = "L'Odyssée de ∏", strict = False))
    L'Odyssee de ∏
    >>> print(ascii_it("L'Odyssée de ∏"))
    Traceback (most recent call last):
    [...]
    ValueError: ASCII conversion can't be made because of the character << ∏ >>.
    You can use the function ``_ascii_report`` so as to report more precisely
    this fealure with eventually an ascii alternative.


===============================
Improving the replacements made
===============================

It's easy to increase the list of special characters managed by default.

    1) The first method is for very special tunings useful at a moment, in that
    case just use the optional argument ``oldnew``.

    2) If you think that your tuning is general enough, just follow the steps
    below where we will suppose that we want to give ascii representations for
    the greek letters "𝛂", "𝛃" and "𝝲". The upper case variants will be
    automatically managed.

        a) First use the following lines in a terminal.

        pyterm::
            >>> from mistool.string_use import _ascii_report
            >>> print(_ascii_report("𝛂𝛃𝝲"))

        b) When you launch this snippet, you will obtain the output below.

        code::
            ==============================================
            TO SEND TO THE AUTHOR OF ``misTool``

            Subject of your mail : "mistool: ascii report"
            ==============================================

            Just replace each "?" with an appropriate
            ASCII character(s).

            ? >>> 𝝲 : MATHEMATICAL SANS-SERIF BOLD SMALL GAMMA
            ? >>> 𝛃 : MATHEMATICAL BOLD SMALL BETA
            ? >>> 𝛂 : MATHEMATICAL BOLD SMALL ALPHA

        c) The most important lines are the ones with your special letters.
        Just copy all of them and produce the following text that you will send
        to the author of this package. Here we have choosen to associate "a",
        "b" and "g" to "𝛂", "𝛃" and "𝝲" respectively.

        code::
            A sympathic message... ;-)

            g >>> 𝝲 : MATHEMATICAL SANS-SERIF BOLD SMALL GAMMA
            b >>> 𝛃 : MATHEMATICAL BOLD SMALL BETA
            a >>> 𝛂 : MATHEMATICAL BOLD SMALL ALPHA
    """
    for onechar in set(text) - ASCII_CHARS:
        if onechar not in oldnew:
            infos = ucname(onechar).split(" ")

            asciichar = None

            if "SMALL" in infos:
                caseformat = LOWER

            else:
                caseformat = UPPER

            if "LETTER" in infos:
                i = infos.index("LETTER")
                asciichar = infos[i + 1]

            elif "LIGATURE" in infos:
                i = infos.find("LIGATURE")
                asciichar = infos[i + 1]

            elif "MARK" in infos:
                asciichar = ""

            if asciichar is not None:
                oldnew[onechar] = case(
                    text = asciichar,
                    kind = caseformat
                )

            elif strict:
                raise ValueError(
                    "ASCII conversion can't be made because of the character "
                    "<< {0} >>. ".format(onechar) + "\nYou can use the "
                    "function ``_ascii_report`` so as to report more precisely "
                    "this fealure with eventually an ascii alternative."
                )

    return MultiReplace(oldnew)(text)
Exemple #4
0
def asciify(text, oldnew={}, strict=True):
    """
prototype::
    arg = str: text ;
          the text to be translated
    arg = {str: str}: oldnew = {} ;
          this dictionary uses couples ``(key, value)`` that are of the kind
          ``(non-ascii character, ascii version)``
    arg = bool: strict = True ;
          ``strict = True`` indicates to raise an error when the translation
          can only be partial, and with ``strict = True`` no error will be
          raised

    return = str ;
             a partial or total ¨ascii version of ``text``


=========
Basic use
=========

The aim of this function is to give an ¨ascii translation of a text. The typical
use is for avoiding strange names of files. Here is a standard use where you can
see that none ¨ascii ponctuation mark is simply removed.

pyterm::
    >>> from mistool.string_use import asciify
    >>> print(asciify("¡Viva España!"))
    Viva Espana!


=======================
Doing more replacements
=======================

You can use the optional argument ``oldnew`` so as to do more replacements. In
the code below we have choosen to also clean ``!``.

pyterm::
    >>> from mistool.string_use import asciify
    >>> oldnew = {'!': ""}
    >>> print(asciify(text = "¡Viva España!", oldnew = oldnew))
    Viva Espana


================
Partial cleaning
================

Sometimes, you just want to change the most characters as possible. In the
example below, we use ``strict`` so as to obtain ``L'Odyssee de ∏`` instead
of an error as the second use of ``ascii`` shows.

pyterm::
    >>> from mistool.string_use import asciify
    >>> print(asciify(text = "L'Odyssée de ∏", strict = False))
    L'Odyssee de ∏
    >>> print(asciify("L'Odyssée de ∏"))
    Traceback (most recent call last):
    [...]
    ValueError: ASCII conversion can't be made because of the character << ∏ >>.
    You can use the function ``_ascii_report`` so as to report more precisely
    this fealure with eventually an ascii alternative.


===============================
Improving the replacements made
===============================

It's easy to increase the list of special characters managed by default.

    1) The first method is for very special tunings useful at a moment, in that
    case just use the optional argument ``oldnew``.

    2) If you think that your tuning is general enough, just follow the steps
    below where we will suppose that we want to give ascii representations for
    the greek letters "𝛂", "𝛃" and "𝝲". The upper case variants will be
    automatically managed.

        a) First use the following lines in a terminal.

        pyterm::
            >>> from mistool.string_use import _ascii_report
            >>> print(_ascii_report("𝛂𝛃𝝲"))

        b) When you launch this snippet, you will obtain the output below.

        code::
            ==============================================
            TO SEND TO THE AUTHOR OF ``misTool``

            Subject of your mail : "mistool: ascii report"
            ==============================================

            Just replace each "?" with an appropriate
            ASCII character(s).

            ? >>> 𝝲 : MATHEMATICAL SANS-SERIF BOLD SMALL GAMMA
            ? >>> 𝛃 : MATHEMATICAL BOLD SMALL BETA
            ? >>> 𝛂 : MATHEMATICAL BOLD SMALL ALPHA

        c) The most important lines are the ones with your special letters.
        Just copy all of them and produce the following text that you will send
        to the author of this package. Here we have choosen to associate "a",
        "b" and "g" to "𝛂", "𝛃" and "𝝲" respectively.

        code::
            A sympathic message... ;-)

            g >>> 𝝲 : MATHEMATICAL SANS-SERIF BOLD SMALL GAMMA
            b >>> 𝛃 : MATHEMATICAL BOLD SMALL BETA
            a >>> 𝛂 : MATHEMATICAL BOLD SMALL ALPHA
    """
    for onechar in set(text) - ASCII_CHARS:
        if onechar not in oldnew:
            infos = ucname(onechar).split(" ")

            asciichar = None

            if "SMALL" in infos:
                caseformat = LOWER

            else:
                caseformat = UPPER

            if "LETTER" in infos:
                i = infos.index("LETTER")
                asciichar = infos[i + 1]

            elif "LIGATURE" in infos:
                i = infos.find("LIGATURE")
                asciichar = infos[i + 1]

            elif "MARK" in infos:
                asciichar = ""

            if asciichar is not None:
                oldnew[onechar] = case(text=asciichar, kind=caseformat)

            elif strict:
                raise ValueError(
                    "ASCII conversion can't be made because of the character "
                    "<< {0} >>. ".format(onechar) + "\nYou can use the "
                    "function ``_ascii_report`` so as to report more precisely "
                    "this fealure with eventually an ascii alternative.")

    return MultiReplace(oldnew)(text)