Ejemplo n.º 1
0
Archivo: tests.py Proyecto: H0bby/polib
 def test_detect_encoding7(self):
     """
     Test with iso-8859-15 .mo file.
     """
     self.assertEqual(
         polib.detect_encoding('tests/test_iso-8859-15.mo', True),
         'ISO_8859-15')
Ejemplo n.º 2
0
 def test_detect_encoding4(self):
     """
     Test with utf8 data (no file).
     """
     with open("tests/test_utf8.po", "rb") as f:
         data = str(f.read(), "utf-8")
         self.assertEqual(polib.detect_encoding(data), "UTF-8")
Ejemplo n.º 3
0
 def test_detect_encoding4(self):
     """
     Test with utf8 data (no file).
     """
     f = open('tests/test_utf8.po','r')
     try:
         self.assertEqual(polib.detect_encoding(f.read()), 'UTF-8')
     finally:
         f.close()    
Ejemplo n.º 4
0
Archivo: tests.py Proyecto: H0bby/polib
 def test_detect_encoding4(self):
     """
     Test with utf8 data (no file).
     """
     if polib.PY3:
         f = open('tests/test_utf8.po', 'rb')
         data = str(f.read(), 'utf-8')
     else:
         f = open('tests/test_utf8.po', 'r')
         data = f.read()
     try:
         self.assertEqual(polib.detect_encoding(data), 'UTF-8')
     finally:
         f.close()
Ejemplo n.º 5
0
 def test_detect_encoding4(self):
     """
     Test with utf8 data (no file).
     """
     if polib.PY3:
         f = open('tests/test_utf8.po', 'rb')
         data = str(f.read(), 'utf-8')
     else:
         f = open('tests/test_utf8.po', 'r')
         data = f.read()
     try:
         self.assertEqual(polib.detect_encoding(data), 'UTF-8')
     finally:
         f.close()    
Ejemplo n.º 6
0
def parse_pofile(fn_or_string):
    """Parses a po file and attaches original poentry blocks

    When polib parses a pofile, it captures the line number of the
    start of the block, but doesn't capture the original string for
    the block. When you call str() on the poentry, it "reassembles"
    the block with textwrapped lines, so it returns something
    substantially different than the original block. This is
    problematic if we want to print out the block with the line
    numbers--one for each line.

    So this wrapper captures the line numbers and original text for
    each block and attaches that to the parsed poentries in an
    attribute named "original" thus allowing us to print the original
    text with line numbers.

    """
    from polib import _is_file, detect_encoding, io, pofile

    # This parses the pofile
    parsed_pofile = pofile(fn_or_string)

    # Now we need to build a linenumber -> block hash so that we can
    # accurately print out what was in the pofile because polib will
    # reassembled what it parsed, but it's not the same.
    if _is_file(fn_or_string):
        enc = detect_encoding(fn_or_string, 'pofile')
        fp = io.open(fn_or_string, 'rt', encoding=enc)
    else:
        fp = fn_or_string.splitlines(True)

    fp = list(fp)
    entries = list(parsed_pofile)
    for i, poentry in enumerate(entries):
        # Grab the lines that make up the poentry.
        # Note: linenum is 1-based, so we convert it to 0-based.
        try:
            lines = fp[poentry.linenum-1:entries[i+1].linenum-1]
        except IndexError:
            lines = fp[poentry.linenum-1:]

        # Nix blank lines at the end.
        while lines and not lines[-1].strip():
            lines.pop()

        # Join them and voila!
        poentry.original = ''.join(lines)

    return parsed_pofile
Ejemplo n.º 7
0
def parse_pofile(fn_or_string):
    """Parses a po file and attaches original poentry blocks

    When polib parses a pofile, it captures the line number of the
    start of the block, but doesn't capture the original string for
    the block. When you call str() on the poentry, it "reassembles"
    the block with textwrapped lines, so it returns something
    substantially different than the original block. This is
    problematic if we want to print out the block with the line
    numbers--one for each line.

    So this wrapper captures the line numbers and original text for
    each block and attaches that to the parsed poentries in an
    attribute named "original" thus allowing us to print the original
    text with line numbers.

    """
    from polib import _is_file, detect_encoding, io, pofile

    # This parses the pofile
    parsed_pofile = pofile(fn_or_string)

    # Now we need to build a linenumber -> block hash so that we can
    # accurately print out what was in the pofile because polib will
    # reassembled what it parsed, but it's not the same.
    if _is_file(fn_or_string):
        enc = detect_encoding(fn_or_string, "pofile")
        fp = io.open(fn_or_string, "rt", encoding=enc)
    else:
        fp = fn_or_string.splitlines(True)

    fp = list(fp)
    entries = list(parsed_pofile)
    for i, poentry in enumerate(entries):
        # Grab the lines that make up the poentry.
        # Note: linenum is 1-based, so we convert it to 0-based.
        try:
            lines = fp[poentry.linenum - 1:entries[i + 1].linenum - 1]
        except IndexError:
            lines = fp[poentry.linenum - 1:]

        # Nix blank lines at the end.
        while lines and not lines[-1].strip():
            lines.pop()

        # Join them and voila!
        poentry.original = "".join(lines)

    return parsed_pofile
Ejemplo n.º 8
0
 def test_detect_encoding5(self):
     """
     Test with utf8 .mo file.
     """
     self.assertEqual(polib.detect_encoding("tests/test_utf8.mo"), "UTF-8")
Ejemplo n.º 9
0
 def test_detect_encoding5(self):
     """
     Test with utf8 .mo file.
     """
     self.assertEqual(polib.detect_encoding('tests/test_utf8.mo', True), 'UTF-8')
Ejemplo n.º 10
0
 def test_detect_encoding2(self):
     """
     Test with a .pot file.
     """
     self.assertEqual(polib.detect_encoding("tests/test_merge.pot"),
                      "utf-8")
Ejemplo n.º 11
0
 def test_detect_encoding1(self):
     """
     Test that given encoding is returned when file has no encoding defined.
     """
     self.assertEqual(polib.detect_encoding("tests/test_noencoding.po"),
                      "utf-8")
Ejemplo n.º 12
0
 def test_detect_encoding1(self):
     """
     Test that given enconding is returned when file has no encoding defined.
     """
     self.assertEqual(polib.detect_encoding('tests/test_noencoding.po'), 'utf-8')
Ejemplo n.º 13
0
 def test_detect_encoding7(self):
     """
     Test with iso-8859-15 .mo file.
     """
     self.assertEqual(polib.detect_encoding('tests/test_iso-8859-15.mo', True), 'ISO_8859-15')
Ejemplo n.º 14
0
 def test_detect_encoding7(self):
     """
     Test with iso-8859-15 .mo file.
     """
     self.assertEqual(polib.detect_encoding("tests/test_iso-8859-15.mo"),
                      "ISO_8859-15")
Ejemplo n.º 15
0
Archivo: tests.py Proyecto: H0bby/polib
 def test_detect_encoding3(self):
     """
     Test with an utf8 .po file.
     """
     self.assertEqual(polib.detect_encoding('tests/test_utf8.po'), 'UTF-8')
Ejemplo n.º 16
0
def mergepo_minimaldiff(target, source, options):
    changed = 0
    po2 = polib.pofile(source)
    target_enc = polib.detect_encoding(target)
    # for utf8 files we can use our self written parser to minimize diffs,
    # otherwise we need to use polib
    if not target_enc in ['UTF-8', 'utf-8', 'utf_8']:
        raise
    # open file with universal newlines, since it can happen that we are
    # on unix, but the file has been written on windows or vice versa.
    po1 = io.open(target, 'r', encoding='utf_8', newline=None)
    oldlines = read(po1)
    po1.close()
    newlines = []
    in_msgid = False
    in_msgstr = False
    flags = []
    msgstr_lines = []
    msgid_lines = []
    msgid = ''
    for line in oldlines:
        if in_msgid:
            if line.find('"') == 0 or line.find('#~ "') == 0:
                msgid_lines.append(line)
            else:
                in_msgid = False
                msgid = parse_msg(msgid_lines)
        elif in_msgstr:
            if line.find('"') == 0 or line.find('#~ "') == 0:
                msgstr_lines.append(line)
            else:
                in_msgstr = False
                changed = changed + translate(msgid, flags, msgstr_lines, po2,
                                              options)
                if len(flags) > 0:
                    flagline = u'#, ' + u', '.join(flags)
                    newlines.append(flagline)
                    flags = []
                newlines.extend(msgid_lines)
                newlines.extend(msgstr_lines)
                msgid_lines = []
                msgstr_lines = []
                msgid = ''
        if not in_msgid and not in_msgstr:
            if line.find('#,') == 0 and len(flags) == 0:
                flags = line[2:].strip().split(u', ')
            elif line.find('msgid') == 0 or line.find('#~ msgid') == 0:
                msgid_lines.append(line)
                in_msgid = True
            elif line.find('msgstr') == 0 or line.find('#~ msgstr') == 0:
                if line.find('msgstr[') == 0 or line.find('#~ msgstr[') == 0:
                    # plural forms are not implemented
                    raise
                msgstr_lines.append(line)
                in_msgstr = True
            else:
                newlines.append(line)
    if msgid != '':
        # the file ended with a msgstr
        changed = changed + translate(msgid, flags, msgstr_lines, po2, options)
        if len(flags) > 0:
            flagline = u'#, ' + u', '.join(flags)
            newlines.append(flagline)
            flags = []
        newlines.extend(msgid_lines)
        newlines.extend(msgstr_lines)
        msgid_lines = []
        msgstr_lines = []
        msgid = ''
    if changed > 0:
        # we store .po files with unix line ends in git,
        # so do always write them even on windows
        po1 = io.open(target, 'w', encoding='utf_8', newline='\n')
        for line in newlines:
            po1.write(line + '\n')
    return changed
Ejemplo n.º 17
0
 def test_detect_encoding3(self):
     """
     Test with an utf8 .po file.
     """
     self.assertEqual(polib.detect_encoding('tests/test_utf8.po'), 'UTF-8')
Ejemplo n.º 18
0
 def test_detect_encoding2(self):
     """
     Test with a .pot file.
     """
     self.assertEqual(polib.detect_encoding('tests/test_merge.pot'), 'utf-8')
Ejemplo n.º 19
0
def mergepo_minimaldiff(target, source, options):
    changed = 0
    po2 = polib.pofile(source)
    target_enc = polib.detect_encoding(target)
    # for utf8 files we can use our self written parser to minimize diffs,
    # otherwise we need to use polib
    if not target_enc in ['UTF-8', 'utf-8', 'utf_8']:
        raise
    # open file with universal newlines, since it can happen that we are
    # on unix, but the file has been written on windows or vice versa.
    po1 = io.open(target, 'r', encoding='utf_8', newline=None)
    oldlines = read(po1)
    po1.close()
    newlines = []
    in_msgid = False
    in_msgstr = False
    flags = []
    msgstr_lines = []
    msgid_lines = []
    msgid = ''
    for line in oldlines:
        if in_msgid:
            if line.find('"') == 0 or line.find('#~ "') == 0:
                msgid_lines.append(line)
            else:
                in_msgid = False
                msgid = parse_msg(msgid_lines)
        elif in_msgstr:
            if line.find('"') == 0 or line.find('#~ "') == 0:
                msgstr_lines.append(line)
            else:
                in_msgstr = False
                changed = changed + translate(msgid, flags, msgstr_lines, po2, options)
                if len(flags) > 0:
                    flagline = u'#, ' + u', '.join(flags)
                    newlines.append(flagline)
                    flags = []
                newlines.extend(msgid_lines)
                newlines.extend(msgstr_lines)
                msgid_lines = []
                msgstr_lines = []
                msgid = ''
        if not in_msgid and not in_msgstr:
            if line.find('#,') == 0 and len(flags) == 0:
                flags = line[2:].strip().split(u', ')
            elif line.find('msgid') == 0 or line.find('#~ msgid') == 0:
                msgid_lines.append(line)
                in_msgid = True
            elif line.find('msgstr') == 0 or line.find('#~ msgstr') == 0:
                if line.find('msgstr[') == 0 or line.find('#~ msgstr[') == 0:
                    # plural forms are not implemented
                    raise
                msgstr_lines.append(line)
                in_msgstr = True
            else:
                newlines.append(line)
    if msgid != '':
        # the file ended with a msgstr
        changed = changed + translate(msgid, flags, msgstr_lines, po2, options)
        if len(flags) > 0:
            flagline = u'#, ' + u', '.join(flags)
            newlines.append(flagline)
            flags = []
        newlines.extend(msgid_lines)
        newlines.extend(msgstr_lines)
        msgid_lines = []
        msgstr_lines = []
        msgid = ''
    if changed > 0:
        # we store .po files with unix line ends in git,
        # so do always write them even on windows
        po1 = io.open(target, 'w', encoding='utf_8', newline='\n')
        for line in newlines:
            po1.write(line + '\n')
    return changed
Ejemplo n.º 20
0
Archivo: tests.py Proyecto: H0bby/polib
 def test_detect_encoding5(self):
     """
     Test with utf8 .mo file.
     """
     self.assertEqual(polib.detect_encoding('tests/test_utf8.mo', True),
                      'UTF-8')