예제 #1
0
    def test_issue_no21(self):
        # test for https://github.com/phfaist/pylatexenc/issues/21

        def capitalize_acronyms(s, pos):
            if s[pos] in ('{', '}'):
                # preserve existing braces
                return (1, s[pos])
            m = re.compile(r'\b[A-Z]{2,}\w*\b').match(s, pos)
            if m is None:
                return None
            return (m.end() - m.start(), "{" + m.group() + "}")

        u = UnicodeToLatexEncoder(conversion_rules=[
            latexencode.UnicodeToLatexConversionRule(latexencode.RULE_CALLABLE,
                                                     capitalize_acronyms),
        ] + latexencode.get_builtin_conversion_rules('defaults'))
        input = "Title with {Some} ABC acronyms LIKe this."
        self.assertEqual(u.unicode_to_latex(input),
                         "Title with {Some} {ABC} acronyms {LIKe} this.")

        u = UnicodeToLatexEncoder(conversion_rules=[
            latexencode.UnicodeToLatexConversionRule(
                latexencode.RULE_REGEX,
                [
                    (re.compile(r'([{}])'), r'\1'),  # keep existing braces
                    (re.compile(r'\b([A-Z]{2,}\w*)\b'), r'{\1}'),
                ]),
        ] + latexencode.get_builtin_conversion_rules('defaults'))
        input = "Title with {Some} ABC acronyms LIKe this."
        self.assertEqual(u.unicode_to_latex(input),
                         "Title with {Some} {ABC} acronyms {LIKe} this.")
예제 #2
0
    def escape_special_chars(self, suppress_warnings: bool = True) -> None:
        """Escapes special characters in the bibliographic data.

        Special characters should be escaped to ensure proper rendering in LaTeX documents. This
        function leverages the existing implementation of the `pylatexenc` module to do said
        conversion. The only fields exempted from the conversion are the `file` and `url` fields of
        the `Entry.data` dictionary.

        Args:
            suppress_warnings: if True, warnings generated by the `pylatexenc` modules will be
                suppressed. This argument will be overwritten if the logging level is set to
                `logging.DEBUG`.
        """
        enc = UnicodeToLatexEncoder(
            non_ascii_only=True,
            replacement_latex_protection="braces-all",
            unknown_char_policy="keep",
            unknown_char_warning=not suppress_warnings
            or LOGGER.isEnabledFor(logging.DEBUG),
        )
        for key, value in self.data.items():
            if key in ("file", "url"):
                # do NOT these fields and keep any special characters
                self.data[key] = value
                continue
            if isinstance(value, str):
                self.data[key] = enc.unicode_to_latex(value)
예제 #3
0
 def test_rules_03(self):
     u = UnicodeToLatexEncoder(conversion_rules=['unicode-xml'])
     input = "* \"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama\N{SUPERSCRIPT TWO} le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
     self.assertEqual(
         u.unicode_to_latex(input),
         "{\\ast} \"\\`{A} votre sant\\'{e}!\" s{\\textquotesingle}exclama{^2} le ma\\^{\\i}tre de maison \\`{a} 100\\%."
     )
예제 #4
0
 def test_basic_2d(self):
     u = UnicodeToLatexEncoder(non_ascii_only=False)
     ascii_chars_convert = " \" # $ % & \\ _ { } ~ "
     self.assertEqual(
         u.unicode_to_latex(ascii_chars_convert),
         " '' \\# \\$ \\% \\& {\\textbackslash} \\_ \\{ \\} {\\textasciitilde} "
     )
예제 #5
0
 def test_basic_2b(self):
     u = UnicodeToLatexEncoder(replacement_latex_protection='none')
     input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
     self.assertEqual(
         u.unicode_to_latex(input),
         "''\\`A votre sant\\'e!'' s'exclama le ma\\^\\itre de maison \\`a 100\\%."
     )
예제 #6
0
    def test_rules_01(self):
        def acallable(s, pos):
            if s[pos] == "\N{LATIN SMALL LETTER E WITH ACUTE}":
                return (1, r"{\'{e}}")
            if s.startswith('...', pos):
                return (3, r"\ldots")
            return None

        u = UnicodeToLatexEncoder(conversion_rules=[
            latexencode.UnicodeToLatexConversionRule(
                latexencode.RULE_DICT, {
                    ord("\N{LATIN CAPITAL LETTER A WITH GRAVE}"): r"{{\`{A}}}",
                    ord("%"): r"\textpercent",
                }),
            latexencode.UnicodeToLatexConversionRule(latexencode.RULE_REGEX, [
                (re.compile('v(otre)'), r'n\1'),
                (re.compile("s'exclama", flags=re.I), r"s'exprima"),
                (re.compile('\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}'),
                 r"{\^i}"),
            ]),
            'unicode-xml',  # expand built-in rule names
            latexencode.UnicodeToLatexConversionRule(latexencode.RULE_CALLABLE,
                                                     acallable),
        ])
        input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison ... \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
        self.assertEqual(
            u.unicode_to_latex(input),
            "\"{{\\`{A}}} notre sant\\'{e}!\" s'exprima le ma{\\^i}tre de maison {\\ldots} \\`{a} 100{\\textpercent}."
        )
예제 #7
0
 def test_basic_callable_replacement_latex_protection(self):
     u = UnicodeToLatexEncoder(
         replacement_latex_protection=lambda s: '{***{' + s + '}***}')
     input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
     self.assertEqual(
         u.unicode_to_latex(input),
         "{***{''}***}{***{\\`A}***} votre sant{***{\\'e}***}!{***{''}***} s'exclama le ma{***{\\^\\i}***}tre de maison {***{\\`a}***} 100{***{\\%}***}."
     )
예제 #8
0
 def test_basic_1(self):
     u = UnicodeToLatexEncoder(non_ascii_only=True,
                               replacement_latex_protection='braces-all')
     input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
     self.assertEqual(
         u.unicode_to_latex(input),
         "\"{\\`A} votre sant{\\'e}!\" s'exclama le ma{\\^\\i}tre de maison {\\`a} 100%."
     )
예제 #9
0
 def test_basic_3b(self):
     test_unknown_chars = "A unicode character: \N{THAI CHARACTER THO THONG}"
     # generates warnings -- that's good
     with self.assertLogs(logger='pylatexenc.latexencode',
                          level='WARNING') as cm:
         u = UnicodeToLatexEncoder(unknown_char_policy='replace')
         self.assertEqual(u.unicode_to_latex(test_unknown_chars),
                          "A unicode character: {\\bfseries ?}")
예제 #10
0
 def test_basic_2a(self):
     # Issue #44
     u = UnicodeToLatexEncoder(
         replacement_latex_protection='braces-after-macro')
     input = "Jabłoński, François, ⟨.⟩, ~"
     self.assertEqual(
         u.unicode_to_latex(input),
         "Jab\\l{}o\\'nski, Fran\\c{c}ois, \\ensuremath{\\langle}.\\ensuremath{\\rangle}, \\textasciitilde{}"
     )
예제 #11
0
 def test_rules_02(self):
     # based on test_basic_0()
     u = UnicodeToLatexEncoder(conversion_rules=['defaults'])
     #u = UnicodeToLatexEncoder()
     input = "* \"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama\N{SUPERSCRIPT TWO} le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
     self.assertEqual(
         u.unicode_to_latex(input),
         "* ''\\`A votre sant\\'e!'' s'exclama{\\texttwosuperior} le ma{\\^\\i}tre de maison \\`a 100\\%."
     )
예제 #12
0
    def test_basic_3c(self):
        test_unknown_chars = "A unicode character: \N{THAI CHARACTER THO THONG}"
        u = UnicodeToLatexEncoder(unknown_char_policy='unihex',
                                  unknown_char_warning=False)

        self.assertEqual(
            u.unicode_to_latex(test_unknown_chars),
            "A unicode character: \\ensuremath{\\langle}\\texttt{U+0E18}\\ensuremath{\\rangle}"
        )
예제 #13
0
    def test_latex_string_class(self):
        class LatexChunkList:
            def __init__(self):
                self.chunks = []

            def __iadd__(self, s):
                self.chunks.append(s)
                return self

        u = UnicodeToLatexEncoder(latex_string_class=LatexChunkList,
                                  replacement_latex_protection='none')
        result = u.unicode_to_latex("A é → α")
        # result is an object of custom type LatexChunkList
        self.assertEqual(result.chunks, [
            'A', ' ', r'\'e', ' ', r'\textrightarrow', ' ',
            r'\ensuremath{\alpha}'
        ])
예제 #14
0
class Latex:
    def __init__(self):
        conversion_rules = [
            # our custom rules
            UnicodeToLatexConversionRule(
                RULE_REGEX,
                [
                    # double \\ needed, see UnicodeToLatexConversionRule
                    (re.compile(r'\u1ec5'), r'\\~{\\^{{e}}}'),
                ]),
            # plus all the default rules
            'defaults'
        ]
        self.u = UnicodeToLatexEncoder(
            conversion_rules=conversion_rules,
            replacement_latex_protection='braces-almost-all')

    def encode(self, text):
        return self.u.unicode_to_latex(text)
예제 #15
0
    def escape_special_chars(self, suppress_warnings=True):
        """Escapes special characters.

        Special characters should be escaped to ensure proper rendering in LaTeX documents. This
        function leverages the existing implementation of the pylatexenc module.

        Args:
            suppress_warnings (bool): if True, suppresses warnings.
        """
        enc = UnicodeToLatexEncoder(
            non_ascii_only=True,
            replacement_latex_protection='braces-all',
            unknown_char_policy='keep',
            unknown_char_warning=not suppress_warnings
            or LOGGER.isEnabledFor(10))  # 10 = DEBUG logging level
        for key, value in self.data.items():
            if key in ('ID', 'file'):
                # do NOT these fields and keep any special characters
                self.data[key] = value
                continue
            if isinstance(value, str):
                self.data[key] = enc.unicode_to_latex(value)
예제 #16
0
    def test_all(self):

        loglevel = logging.getLogger().level
        logging.getLogger().setLevel(logging.CRITICAL)

        u = UnicodeToLatexEncoder(
            unknown_char_policy='fail',
            replacement_latex_protection='braces-almost-all')

        def fn(x,
               bdir=os.path.realpath(os.path.abspath(
                   os.path.dirname(__file__)))):
            return os.path.join(bdir, x)

        with codecs.open(fn('_tmp_uni_chars_test.temp.txt'),
                         'w',
                         encoding='utf-8') as testf:

            for i in range(0x10FFFF):
                # iter over all valid unicode characters
                try:
                    chrname = unicodedata.name(unichr(
                        i))  # test if valid, i.e., it has a UNICODE NAME
                except ValueError:
                    continue

                line = "0x%04X %-50s    |%s|\n" % (i, '[' + chrname + ']',
                                                   unichr(i))

                # try to encode it using our unicode_to_latex routines
                try:
                    enc = u.unicode_to_latex(line)
                except ValueError:
                    continue
                testf.write(enc)

        with codecs.open(fn('uni_chars_test_previous.txt'), 'r', encoding='utf-8') as reff, \
             codecs.open(fn('_tmp_uni_chars_test.temp.txt'), 'r', encoding='utf-8') as testf:
            a = reff.readlines()
            b = testf.readlines()

        logging.getLogger().setLevel(loglevel)
        logger = logging.getLogger(__name__)

        # only check up to the supported unicode range
        if sys.maxunicode < 0x10FFFF:
            logger.warning(
                "Only checking up to unicode U+%X, your python build doesn't support higher",
                sys.maxunicode)
            afiltered = [
                aline for aline in a
                if int(aline[:aline.find(' ')], 0) < sys.maxunicode
            ]
            a = afiltered

        s = difflib.unified_diff(a,
                                 b,
                                 fromfile='uni_chars_test_previous.txt',
                                 tofile='_tmp_uni_chars_test.temp.txt')
        diffmsg = "".join(list(s)).strip()
        if diffmsg:
            print(diffmsg)
            raise self.failureException(
                "Unicode coverage tests failed. See full diff above.")
예제 #17
0
 def test_basic_2c(self):
     u = UnicodeToLatexEncoder(non_ascii_only=True)
     ascii_chars_convert = " \" # $ % & \\ _ { } ~ "
     self.assertEqual(u.unicode_to_latex(ascii_chars_convert),
                      ascii_chars_convert)