def test_sorted_po(self): catalog = Catalog() catalog.add( u('bar'), locations=[('utils.py', 3)], user_comments=['Comment About `bar` with', 'multiple lines.']) catalog.add((u('foo'), u('foos')), (u('Voh'), u('Voeh')), locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, sort_output=True) value = buf.getvalue().strip() assert b('''\ # Comment About `bar` with # multiple lines. #: utils.py:3 msgid "bar" msgstr "" #: main.py:1 msgid "foo" msgid_plural "foos" msgstr[0] "Voh" msgstr[1] "Voeh"''') in value assert value.find(b('msgid ""')) < value.find( b('msgid "bar"')) < value.find(b('msgid "foo"'))
def test_no_wrap_and_width_behaviour_on_comments(self): catalog = Catalog() catalog.add( "Pretty dam long message id, which must really be big " "to test this wrap behaviour, if not it won't work.", locations=[("fake.py", n) for n in range(1, 30)]) buf = BytesIO() pofile.write_po(buf, catalog, width=None, omit_header=True) self.assertEqual( b("""\ #: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 #: fake.py:8 fake.py:9 fake.py:10 fake.py:11 fake.py:12 fake.py:13 fake.py:14 #: fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 fake.py:20 fake.py:21 #: fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 #: fake.py:29 msgid "pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't work." msgstr "" """), buf.getvalue().lower()) buf = BytesIO() pofile.write_po(buf, catalog, width=100, omit_header=True) self.assertEqual( b("""\ #: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 fake.py:8 fake.py:9 fake.py:10 #: fake.py:11 fake.py:12 fake.py:13 fake.py:14 fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 #: fake.py:20 fake.py:21 fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 #: fake.py:29 msgid "" "pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't" " work." msgstr "" """), buf.getvalue().lower())
def test_no_wrap_and_width_behaviour_on_comments(self): catalog = Catalog() catalog.add("Pretty dam long message id, which must really be big " "to test this wrap behaviour, if not it won't work.", locations=[("fake.py", n) for n in range(1, 30)]) buf = BytesIO() pofile.write_po(buf, catalog, width=None, omit_header=True) self.assertEqual(b("""\ #: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 #: fake.py:8 fake.py:9 fake.py:10 fake.py:11 fake.py:12 fake.py:13 fake.py:14 #: fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 fake.py:20 fake.py:21 #: fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 #: fake.py:29 msgid "pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't work." msgstr "" """), buf.getvalue().lower()) buf = BytesIO() pofile.write_po(buf, catalog, width=100, omit_header=True) self.assertEqual(b("""\ #: fake.py:1 fake.py:2 fake.py:3 fake.py:4 fake.py:5 fake.py:6 fake.py:7 fake.py:8 fake.py:9 fake.py:10 #: fake.py:11 fake.py:12 fake.py:13 fake.py:14 fake.py:15 fake.py:16 fake.py:17 fake.py:18 fake.py:19 #: fake.py:20 fake.py:21 fake.py:22 fake.py:23 fake.py:24 fake.py:25 fake.py:26 fake.py:27 fake.py:28 #: fake.py:29 msgid "" "pretty dam long message id, which must really be big to test this wrap behaviour, if not it won't" " work." msgstr "" """), buf.getvalue().lower())
def test_npgettext(self): self.assertEqualTypeToo('Voh1', self.translations.ngettext('foo1', 'foos1', 1)) self.assertEqualTypeToo('Vohs1', self.translations.ngettext('foo1', 'foos1', 2)) self.assertEqualTypeToo( b('VohCTX1'), self.translations.npgettext('foo', 'foo1', 'foos1', 1)) self.assertEqualTypeToo( b('VohsCTX1'), self.translations.npgettext('foo', 'foo1', 'foos1', 2))
def test_ldnpgettext(self): self.assertEqualTypeToo( b('VohD1'), self.translations.ldngettext('messages1', 'foo1', 'foos1', 1)) self.assertEqualTypeToo( b('VohsD1'), self.translations.ldngettext('messages1', 'foo1', 'foos1', 2)) self.assertEqualTypeToo( b('VohCTXD1'), self.translations.ldnpgettext('messages1', 'foo', 'foo1', 'foos1', 1)) self.assertEqualTypeToo( b('VohsCTXD1'), self.translations.ldnpgettext('messages1', 'foo', 'foo1', 'foos1', 2))
def test_lnpgettext(self): self.assertEqualTypeToo(b('Voh1'), self.translations.lngettext('foo1', 'foos1', 1)) self.assertEqualTypeToo(b('Vohs1'), self.translations.lngettext('foo1', 'foos1', 2)) self.assertEqualTypeToo(b('VohCTX1'), self.translations.lnpgettext('foo', 'foo1', 'foos1', 1)) self.assertEqualTypeToo(b('VohsCTX1'), self.translations.lnpgettext('foo', 'foo1', 'foos1', 2))
def test_wrap_long_lines_in_header(self): """ Verify that long lines in the header comment are wrapped correctly. """ catalog = Catalog(project='AReallyReallyLongNameForAProject', revision_date=datetime(2007, 4, 1)) buf = BytesIO() pofile.write_po(buf, catalog) self.assertEqual(b('''\ # Translations template for AReallyReallyLongNameForAProject. # Copyright (C) 2007 ORGANIZATION # This file is distributed under the same license as the # AReallyReallyLongNameForAProject project. # FIRST AUTHOR <EMAIL@ADDRESS>, 2007. # #, fuzzy'''), b('\n').join(buf.getvalue().splitlines()[:7]))
def test_with_context(self): buf = StringIO(r'''# Some string in the menu #: main.py:1 msgctxt "Menu" msgid "foo" msgstr "Voh" # Another string in the menu #: main.py:2 msgctxt "Menu" msgid "bar" msgstr "Bahr" ''') catalog = pofile.read_po(buf, ignore_obsolete=True) self.assertEqual(2, len(catalog)) message = catalog.get('foo', context='Menu') self.assertEqual('Menu', message.context) message = catalog.get('bar', context='Menu') self.assertEqual('Menu', message.context) # And verify it pass through write_po out_buf = BytesIO() pofile.write_po(out_buf, catalog, omit_header=True) assert out_buf.getvalue().strip() == b(buf.getvalue().strip()), \ out_buf.getvalue()
def test_wrap_long_lines(self): text = """Here's some text where white space and line breaks matter, and should not be removed """ catalog = Catalog() catalog.add(text, locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, no_location=True, omit_header=True, width=42) self.assertEqual( b(r'''msgid "" "Here's some text where \n" "white space and line breaks matter, and" " should\n" "\n" "not be removed\n" "\n" msgstr ""'''), buf.getvalue().strip())
def test_po_with_multiline_obsolete_message(self): catalog = Catalog() catalog.add(u('foo'), u('Voh'), locations=[('main.py', 1)]) msgid = r"""Here's a message that covers multiple lines, and should still be handled correctly. """ msgstr = r"""Here's a message that covers multiple lines, and should still be handled correctly. """ catalog.obsolete[msgid] = Message(msgid, msgstr, locations=[('utils.py', 3)]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual( b(r'''#: main.py:1 msgid "foo" msgstr "Voh" #~ msgid "" #~ "Here's a message that covers\n" #~ "multiple lines, and should still be handled\n" #~ "correctly.\n" #~ msgstr "" #~ "Here's a message that covers\n" #~ "multiple lines, and should still be handled\n" #~ "correctly.\n"'''), buf.getvalue().strip())
def test_po_with_multiline_obsolete_message(self): catalog = Catalog() catalog.add(u('foo'), u('Voh'), locations=[('main.py', 1)]) msgid = r"""Here's a message that covers multiple lines, and should still be handled correctly. """ msgstr = r"""Here's a message that covers multiple lines, and should still be handled correctly. """ catalog.obsolete[msgid] = Message(msgid, msgstr, locations=[('utils.py', 3)]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b(r'''#: main.py:1 msgid "foo" msgstr "Voh" #~ msgid "" #~ "Here's a message that covers\n" #~ "multiple lines, and should still be handled\n" #~ "correctly.\n" #~ msgstr "" #~ "Here's a message that covers\n" #~ "multiple lines, and should still be handled\n" #~ "correctly.\n"'''), buf.getvalue().strip())
def test_write_po_file_with_specified_charset(self): catalog = Catalog(charset='iso-8859-1') catalog.add('foo', u('\xe4\xf6\xfc'), locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=False) po_file = buf.getvalue().strip() assert b(r'"Content-Type: text/plain; charset=iso-8859-1\n"') in po_file assert u('msgstr "\xe4\xf6\xfc"').encode('iso-8859-1') in po_file
def test_write_po_file_with_specified_charset(self): catalog = Catalog(charset='iso-8859-1') catalog.add('foo', u('\xe4\xf6\xfc'), locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=False) po_file = buf.getvalue().strip() assert b( r'"Content-Type: text/plain; charset=iso-8859-1\n"') in po_file assert u('msgstr "\xe4\xf6\xfc"').encode('iso-8859-1') in po_file
def test_join_locations(self): catalog = Catalog() catalog.add(u('foo'), locations=[('main.py', 1)]) catalog.add(u('foo'), locations=[('utils.py', 3)]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b('''#: main.py:1 utils.py:3 msgid "foo" msgstr ""'''), buf.getvalue().strip())
def test_wrap_long_lines_in_header(self): """ Verify that long lines in the header comment are wrapped correctly. """ catalog = Catalog(project='AReallyReallyLongNameForAProject', revision_date=datetime(2007, 4, 1)) buf = BytesIO() pofile.write_po(buf, catalog) self.assertEqual( b('''\ # Translations template for AReallyReallyLongNameForAProject. # Copyright (C) 2007 ORGANIZATION # This file is distributed under the same license as the # AReallyReallyLongNameForAProject project. # FIRST AUTHOR <EMAIL@ADDRESS>, 2007. # #, fuzzy'''), b('\n').join(buf.getvalue().splitlines()[:7]))
def test_duplicate_comments(self): catalog = Catalog() catalog.add(u('foo'), auto_comments=['A comment']) catalog.add(u('foo'), auto_comments=['A comment']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b('''#. A comment msgid "foo" msgstr ""'''), buf.getvalue().strip())
def test_po_with_previous_msgid(self): catalog = Catalog() catalog.add(u('foo'), u('Voh'), locations=[('main.py', 1)], previous_id=u('fo')) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True, include_previous=True) self.assertEqual(b('''#: main.py:1 #| msgid "fo" msgid "foo" msgstr "Voh"'''), buf.getvalue().strip())
def test_po_with_obsolete_message_ignored(self): catalog = Catalog() catalog.add(u('foo'), u('Voh'), locations=[('main.py', 1)]) catalog.obsolete['bar'] = Message(u('bar'), u('Bahr'), locations=[('utils.py', 3)], user_comments=['User comment']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True, ignore_obsolete=True) self.assertEqual(b('''#: main.py:1 msgid "foo" msgstr "Voh"'''), buf.getvalue().strip())
def test_join_locations(self): catalog = Catalog() catalog.add(u('foo'), locations=[('main.py', 1)]) catalog.add(u('foo'), locations=[('utils.py', 3)]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual( b('''#: main.py:1 utils.py:3 msgid "foo" msgstr ""'''), buf.getvalue().strip())
def test_po_with_previous_msgid(self): catalog = Catalog() catalog.add(u('foo'), u('Voh'), locations=[('main.py', 1)], previous_id=u('fo')) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True, include_previous=True) self.assertEqual( b('''#: main.py:1 #| msgid "fo" msgid "foo" msgstr "Voh"'''), buf.getvalue().strip())
def test_sorted_po(self): catalog = Catalog() catalog.add(u('bar'), locations=[('utils.py', 3)], user_comments=['Comment About `bar` with', 'multiple lines.']) catalog.add((u('foo'), u('foos')), (u('Voh'), u('Voeh')), locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, sort_output=True) value = buf.getvalue().strip() assert b('''\ # Comment About `bar` with # multiple lines. #: utils.py:3 msgid "bar" msgstr "" #: main.py:1 msgid "foo" msgid_plural "foos" msgstr[0] "Voh" msgstr[1] "Voeh"''') in value assert value.find(b('msgid ""')) < value.find(b('msgid "bar"')) < value.find(b('msgid "foo"'))
def test_wrap_locations_with_hyphens(self): catalog = Catalog() catalog.add(u('foo'), locations=[ ('doupy/templates/base/navmenu.inc.html.py', 60) ]) catalog.add(u('foo'), locations=[ ('doupy/templates/job-offers/helpers.html', 22) ]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b('''#: doupy/templates/base/navmenu.inc.html.py:60 #: doupy/templates/job-offers/helpers.html:22 msgid "foo" msgstr ""'''), buf.getvalue().strip())
def test_wrap_long_lines_with_long_word(self): text = """Here's some text that includesareallylongwordthatmightbutshouldnt throw us into an infinite loop """ catalog = Catalog() catalog.add(text, locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, no_location=True, omit_header=True, width=32) self.assertEqual(b(r'''msgid "" "Here's some text that\n" "includesareallylongwordthatmightbutshouldnt" " throw us into an infinite " "loop\n" msgstr ""'''), buf.getvalue().strip())
def test_wrap_locations_with_hyphens(self): catalog = Catalog() catalog.add(u('foo'), locations=[('doupy/templates/base/navmenu.inc.html.py', 60) ]) catalog.add(u('foo'), locations=[('doupy/templates/job-offers/helpers.html', 22) ]) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual( b('''#: doupy/templates/base/navmenu.inc.html.py:60 #: doupy/templates/job-offers/helpers.html:22 msgid "foo" msgstr ""'''), buf.getvalue().strip())
def test_pot_with_translator_comments(self): catalog = Catalog() catalog.add(u('foo'), locations=[('main.py', 1)], auto_comments=['Comment About `foo`']) catalog.add(u('bar'), locations=[('utils.py', 3)], user_comments=['Comment About `bar` with', 'multiple lines.']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b('''#. Comment About `foo` #: main.py:1 msgid "foo" msgstr "" # Comment About `bar` with # multiple lines. #: utils.py:3 msgid "bar" msgstr ""'''), buf.getvalue().strip())
def test_wrap_long_lines_with_long_word(self): text = """Here's some text that includesareallylongwordthatmightbutshouldnt throw us into an infinite loop """ catalog = Catalog() catalog.add(text, locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, no_location=True, omit_header=True, width=32) self.assertEqual( b(r'''msgid "" "Here's some text that\n" "includesareallylongwordthatmightbutshouldnt" " throw us into an infinite " "loop\n" msgstr ""'''), buf.getvalue().strip())
def test_wrap_long_lines(self): text = """Here's some text where white space and line breaks matter, and should not be removed """ catalog = Catalog() catalog.add(text, locations=[('main.py', 1)]) buf = BytesIO() pofile.write_po(buf, catalog, no_location=True, omit_header=True, width=42) self.assertEqual(b(r'''msgid "" "Here's some text where \n" "white space and line breaks matter, and" " should\n" "\n" "not be removed\n" "\n" msgstr ""'''), buf.getvalue().strip())
def test_pot_with_translator_comments(self): catalog = Catalog() catalog.add(u('foo'), locations=[('main.py', 1)], auto_comments=['Comment About `foo`']) catalog.add( u('bar'), locations=[('utils.py', 3)], user_comments=['Comment About `bar` with', 'multiple lines.']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual( b('''#. Comment About `foo` #: main.py:1 msgid "foo" msgstr "" # Comment About `bar` with # multiple lines. #: utils.py:3 msgid "bar" msgstr ""'''), buf.getvalue().strip())
def read_mo(fileobj): """Read a binary MO file from the given file-like object and return a corresponding `Catalog` object. :param fileobj: the file-like object to read the MO file from :return: a catalog object representing the parsed MO file :rtype: `Catalog` :note: The implementation of this function is heavily based on the ``GNUTranslations._parse`` method of the ``gettext`` module in the standard library. """ catalog = Catalog() headers = {} filename = getattr(fileobj, 'name', '') buf = fileobj.read() buflen = len(buf) unpack = struct.unpack # Parse the .mo file header, which consists of 5 little endian 32 # bit words. magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian? if magic == LE_MAGIC: version, msgcount, origidx, transidx = unpack('<4I', buf[4:20]) ii = '<II' elif magic == BE_MAGIC: version, msgcount, origidx, transidx = unpack('>4I', buf[4:20]) ii = '>II' else: raise IOError(0, 'Bad magic number', filename) # Now put all messages from the .mo file buffer into the catalog # dictionary for i in xrange(0, msgcount): mlen, moff = unpack(ii, buf[origidx:origidx + 8]) mend = moff + mlen tlen, toff = unpack(ii, buf[transidx:transidx + 8]) tend = toff + tlen if mend < buflen and tend < buflen: msg = buf[moff:mend] tmsg = buf[toff:tend] else: raise IOError(0, 'File is corrupt', filename) # See if we're looking at GNU .mo conventions for metadata if mlen == 0: # Catalog description lastkey = key = None for item in tmsg.splitlines(): item = item.strip() if not item: continue if b(':') in item: key, value = item.split(b(':'), 1) lastkey = key = key.strip().lower() headers[key] = value.strip() elif lastkey: headers[lastkey] += '\n' + item if b('\x04') in msg: # context ctxt, msg = msg.split(b('\x04')) else: ctxt = None if b('\x00') in msg: # plural forms msg = msg.split(b('\x00')) tmsg = tmsg.split(b('\x00')) if catalog.charset: msg = [x.decode(catalog.charset) for x in msg] tmsg = [x.decode(catalog.charset) for x in tmsg] else: if catalog.charset: msg = msg.decode(catalog.charset) tmsg = tmsg.decode(catalog.charset) catalog[msg] = Message(msg, tmsg, context=ctxt) # advance to next entry in the seek tables origidx += 8 transidx += 8 catalog.mime_headers = list(headers.items()) return catalog
def test_ldpgettext(self): self.assertEqualTypeToo( b('VohD'), self.translations.ldgettext('messages1', 'foo')) self.assertEqualTypeToo( b('VohCTXD'), self.translations.ldpgettext('messages1', 'foo', 'foo'))
def test_lpgettext(self): self.assertEqualTypeToo(b('Voh'), self.translations.lgettext('foo')) self.assertEqualTypeToo(b('VohCTX'), self.translations.lpgettext('foo', 'foo'))
>>> print(list(distinct('foobar'))) ['f', 'o', 'b', 'a', 'r'] :param iterable: the iterable collection providing the data :return: the distinct items in the collection :rtype: ``iterator`` """ seen = set() for item in iter(iterable): if item not in seen: yield item seen.add(item) # Regexp to match python magic encoding line PYTHON_MAGIC_COMMENT_re = re.compile( b(r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)'), re.VERBOSE) def parse_encoding(fp): """Deduce the encoding of a source file from magic comment. It does this in the same way as the `Python interpreter`__ .. __: http://docs.python.org/ref/encodings.html The ``fp`` argument should be a seekable file object. (From Jeff Dairiki) """ pos = fp.tell() fp.seek(0) try: line1 = fp.readline()
def write_mo(fileobj, catalog, use_fuzzy=False): """Write a catalog to the specified file-like object using the GNU MO file format. >>> from babel.messages import Catalog >>> from babel.compat import BytesIO, GNUTranslations >>> catalog = Catalog(locale='en_US') >>> catalog.add('foo', 'Voh') <Message ...> >>> catalog.add((u('bar'), u('baz')), (u('Bahr'), u('Batz'))) <Message ...> >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) <Message ...> >>> catalog.add('Fizz', '') <Message ...> >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) <Message ...> >>> buf = BytesIO() >>> write_mo(buf, catalog) >>> _ = buf.seek(0) >>> translations = GNUTranslations(fp=buf) >>> translations.ugettext('foo') == u('Voh') True >>> translations.ungettext('bar', 'baz', 1) == u('Bahr') True >>> translations.ungettext('bar', 'baz', 2) == u('Batz') True >>> translations.ugettext('fuz') == u('fuz') True >>> translations.ugettext('Fizz') == u('Fizz') True >>> translations.ugettext('Fuzz') == u('Fuzz') True >>> translations.ugettext('Fuzzes') == u('Fuzzes') True :param fileobj: the file-like object to write to :param catalog: the `Catalog` instance :param use_fuzzy: whether translations marked as "fuzzy" should be included in the output """ messages = list(catalog) if not use_fuzzy: messages[1:] = [m for m in messages[1:] if not m.fuzzy] messages.sort() ids = strs = b('') offsets = [] for message in messages: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. if message.pluralizable: msgid = b('\x00').join( [msgid.encode(catalog.charset) for msgid in message.id]) msgstrs = [] for idx, string in enumerate(message.string): if not string: msgstrs.append(message.id[min(int(idx), 1)]) else: msgstrs.append(string) msgstr = b('\x00').join( [msgstr.encode(catalog.charset) for msgstr in msgstrs]) else: msgid = message.id.encode(catalog.charset) if not message.string: msgstr = message.id.encode(catalog.charset) else: msgstr = message.string.encode(catalog.charset) if message.context: msgid = b('\x04').join( [message.context.encode(catalog.charset), msgid]) offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) ids += msgid + b('\x00') strs += msgstr + b('\x00') # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. keystart = 7 * 4 + 16 * len(messages) valuestart = keystart + len(ids) # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. koffsets = [] voffsets = [] for o1, l1, o2, l2 in offsets: koffsets += [l1, o1 + keystart] voffsets += [l2, o2 + valuestart] offsets = koffsets + voffsets fileobj.write( struct.pack( 'Iiiiiii', LE_MAGIC, # magic 0, # version len(messages), # number of entries 7 * 4, # start of key index 7 * 4 + len(messages) * 8, # start of value index 0, 0 # size and offset of hash table )) if PY3: fileobj.write(array.array("i", offsets).tobytes()) else: fileobj.write(array.array("i", offsets).tostring()) fileobj.write(ids + strs)
>>> print(list(distinct('foobar'))) ['f', 'o', 'b', 'a', 'r'] :param iterable: the iterable collection providing the data :return: the distinct items in the collection :rtype: ``iterator`` """ seen = set() for item in iter(iterable): if item not in seen: yield item seen.add(item) # Regexp to match python magic encoding line PYTHON_MAGIC_COMMENT_re = re.compile(b(r"[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)"), re.VERBOSE) def parse_encoding(fp): """Deduce the encoding of a source file from magic comment. It does this in the same way as the `Python interpreter`__ .. __: http://docs.python.org/ref/encodings.html The ``fp`` argument should be a seekable file object. (From Jeff Dairiki) """ pos = fp.tell() fp.seek(0)
def write_mo(fileobj, catalog, use_fuzzy=False): """Write a catalog to the specified file-like object using the GNU MO file format. >>> from babel.messages import Catalog >>> from babel.compat import BytesIO, GNUTranslations >>> catalog = Catalog(locale='en_US') >>> catalog.add('foo', 'Voh') <Message ...> >>> catalog.add((u('bar'), u('baz')), (u('Bahr'), u('Batz'))) <Message ...> >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) <Message ...> >>> catalog.add('Fizz', '') <Message ...> >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) <Message ...> >>> buf = BytesIO() >>> write_mo(buf, catalog) >>> _ = buf.seek(0) >>> translations = GNUTranslations(fp=buf) >>> translations.ugettext('foo') == u('Voh') True >>> translations.ungettext('bar', 'baz', 1) == u('Bahr') True >>> translations.ungettext('bar', 'baz', 2) == u('Batz') True >>> translations.ugettext('fuz') == u('fuz') True >>> translations.ugettext('Fizz') == u('Fizz') True >>> translations.ugettext('Fuzz') == u('Fuzz') True >>> translations.ugettext('Fuzzes') == u('Fuzzes') True :param fileobj: the file-like object to write to :param catalog: the `Catalog` instance :param use_fuzzy: whether translations marked as "fuzzy" should be included in the output """ messages = list(catalog) if not use_fuzzy: messages[1:] = [m for m in messages[1:] if not m.fuzzy] messages.sort() ids = strs = b('') offsets = [] for message in messages: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. if message.pluralizable: msgid = b('\x00').join([ msgid.encode(catalog.charset) for msgid in message.id ]) msgstrs = [] for idx, string in enumerate(message.string): if not string: msgstrs.append(message.id[min(int(idx), 1)]) else: msgstrs.append(string) msgstr = b('\x00').join([ msgstr.encode(catalog.charset) for msgstr in msgstrs ]) else: msgid = message.id.encode(catalog.charset) if not message.string: msgstr = message.id.encode(catalog.charset) else: msgstr = message.string.encode(catalog.charset) if message.context: msgid = b('\x04').join([message.context.encode(catalog.charset), msgid]) offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) ids += msgid + b('\x00') strs += msgstr + b('\x00') # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. keystart = 7 * 4 + 16 * len(messages) valuestart = keystart + len(ids) # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. koffsets = [] voffsets = [] for o1, l1, o2, l2 in offsets: koffsets += [l1, o1 + keystart] voffsets += [l2, o2 + valuestart] offsets = koffsets + voffsets fileobj.write(struct.pack('Iiiiiii', LE_MAGIC, # magic 0, # version len(messages), # number of entries 7 * 4, # start of key index 7 * 4 + len(messages) * 8, # start of value index 0, 0 # size and offset of hash table )) if PY3: fileobj.write(array.array("i", offsets).tobytes()) else: fileobj.write(array.array("i", offsets).tostring()) fileobj.write(ids + strs)