def testIsIdenticalTranslation(self): """Test `is_identical_translation`.""" msg1 = TranslationMessageData() msg2 = TranslationMessageData() msg1.msgid_singular = "foo" msg2.msgid_singular = "foo" self.assertTrue( is_identical_translation(msg1, msg2), "Two blank translation messages do not evaluate as identical.") msg1.msgid_plural = "foos" self.assertFalse( is_identical_translation(msg1, msg2), "Message with fewer plural forms is accepted as identical.") msg2.msgid_plural = "splat" self.assertFalse( is_identical_translation(msg1, msg2), "Messages with different plurals accepted as identical.") msg2.msgid_plural = "foos" self.assertTrue( is_identical_translation(msg1, msg2), "Messages with identical plural forms not accepted as identical.") msg1._translations = ["le foo"] self.assertFalse( is_identical_translation(msg1, msg2), "Failed to distinguish translated message from untranslated one.") msg2._translations = ["le foo"] self.assertTrue(is_identical_translation(msg1, msg2), "Identical translations not accepted as identical.") msg1._translations = ["le foo", "les foos"] self.assertFalse( is_identical_translation(msg1, msg2), "Failed to distinguish message with missing plural translation.") msg2._translations = ["le foo", "les foos"] self.assertTrue( is_identical_translation(msg1, msg2), "Identical plural translations not accepted as equal.") msg1._translations = ["le foo", "les foos", "beaucoup des foos"] self.assertFalse( is_identical_translation(msg1, msg2), "Failed to distinguish message with extra plural translations.") msg2._translations = ["le foo", "les foos", "beaucoup des foos", None] self.assertTrue( is_identical_translation(msg1, msg2), "Identical multi-form messages not accepted as identical.")
def test_export_message(self): # The MO exporter does not support export of individual # messages. exporter = GettextMOExporter() self.assertRaises( NotImplementedError, exporter.exportTranslationMessageData, TranslationMessageData())
def test_duplicateTranslationError(self): # Providing multiple translations for the same form raises a # sensible error message. data = TranslationMessageData() data.addTranslation(0, 'singular') try: data.addTranslation(0, 'ralugnis') except TranslationFormatSyntaxError as error: self.assertEqual( error.represent("(Default text, should not be returned.)"), "Message has more than one translation for plural form 0.")
def getTranslationMessageData(self, translationmessage): # Convert a TranslationMessage to TranslationMessageData object, # which is used during import. potmsgset = translationmessage.potmsgset message_data = TranslationMessageData() message_data.context = potmsgset.context message_data.msgid_singular = potmsgset.singular_text message_data.msgid_plural = potmsgset.plural_text translations = translationmessage.translations for plural_form, translation in enumerate(translations): message_data.addTranslation(plural_form, translation) return message_data
def test_comments_text_representation_multiline(self): # Comments with newlines should be correctly exported. data = TranslationMessageData() data.comment = "Line One\nLine Two" self.assertEqual("#Line One\n#Line Two", comments_text_representation(data)) # It works the same when there's a final newline as well. data.comment = "Line One\nLine Two\n" self.assertEqual("#Line One\n#Line Two", comments_text_representation(data)) # And similar processing happens for source comments. data = TranslationMessageData() data.source_comment = "Line One\nLine Two" self.assertEqual("#. Line One\n#. Line Two", comments_text_representation(data)) # It works the same when there's a final newline as well. data.source_comment = "Line One\nLine Two\n" self.assertEqual("#. Line One\n#. Line Two", comments_text_representation(data))
def _makeExportedHeader(self, translation_file): """Create a standard gettext PO header, encoded as a message. :return: The header message as a unicode string. """ header_translation_message = TranslationMessageData() header_translation_message.addTranslation( TranslationConstants.SINGULAR_FORM, translation_file.header.getRawContent()) header_translation_message.comment = (translation_file.header.comment) if translation_file.is_template: header_translation_message.flags.update(['fuzzy']) exported_header = self.exportTranslationMessageData( header_translation_message) return exported_header
def test_template_importMessage_updates_file_references(self): # Importing a template message updates the filereferences on an # existing POTMsgSet. template = self.factory.makePOTemplate() potmsgset = self.factory.makePOTMsgSet(potemplate=template) old_file_references = self.factory.getUniqueString() new_file_references = self.factory.getUniqueString() potmsgset.filereferences = old_file_references message = TranslationMessageData() message.msgid_singular = potmsgset.singular_text message.file_references = new_file_references queue_entry = FakeImportQueueEntry(template) importer = POTFileImporter(queue_entry, FakeParser(), DevNullLogger()) importer.importMessage(message) self.assertEqual(new_file_references, potmsgset.filereferences)
def test_translation_importMessage_does_not_update_file_references(self): # Importing a translation message does not update the # filereferences on an existing POTMsgSet. (It used to, which # is what caused bug 715854). pofile = self.factory.makePOFile() potmsgset = self.factory.makePOTMsgSet(potemplate=pofile.potemplate) old_file_references = self.factory.getUniqueString() new_file_references = self.factory.getUniqueString() potmsgset.filereferences = old_file_references message = TranslationMessageData() message.msgid_singular = potmsgset.singular_text message.file_references = new_file_references queue_entry = FakeImportQueueEntry(pofile.potemplate, pofile) importer = POFileImporter(queue_entry, FakeParser(), DevNullLogger()) importer.importMessage(message) self.assertEqual(old_file_references, potmsgset.filereferences)
def new_general_entity(self, name, value): """See `xmldtd.WFCDTD`.""" if not self.started: return message = TranslationMessageData() message.msgid_singular = name # CarlosPerelloMarin 20070326: xmldtd parser does an inline # parsing which means that the content is all in a single line so we # don't have a way to show the line number with the source reference. message.file_references_list = ["%s(%s)" % (self.filename, name)] message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.context = self.chrome_path message.source_comment = self.last_comment self.messages.append(message) self.started += 1 self.last_comment = None
def _test_storeTranslationsInDatabase_empty(self, by_maintainer=True): """Check whether we store empty messages appropriately.""" # Construct a POFile importer. pot_importer = self._createPOTFileImporter( TEST_TEMPLATE_EXPORTED, by_maintainer=True) importer = self._createPOFileImporter( pot_importer, TEST_TRANSLATION_EXPORTED, by_maintainer=by_maintainer, person=self.importer_person) # Empty message to import. message = TranslationMessageData() message.addTranslation(0, u'') potmsgset = self.factory.makePOTMsgSet( potemplate=importer.potemplate, sequence=50) translation = importer.storeTranslationsInDatabase( message, potmsgset) # No TranslationMessage is created. self.assertIs(None, translation)
def test_addTranslation1(self): # Unusual but possible: translate a higher form but not form 0. data = TranslationMessageData() data.addTranslation(1, 'plural') self.assertEqual(data.translations, [None, 'plural'])
def test_emptyTranslations(self): # TranslationMessageData starts out as an empty message. data = TranslationMessageData() self.assertEqual(data.translations, [])
def test_addTranslationReversed(self): # Translate to multiple forms, but in a strange order. data = TranslationMessageData() data.addTranslation(1, 'plural') data.addTranslation(0, 'singular') self.assertEqual(data.translations, ['singular', 'plural'])
def test_addTranslationMulti(self): # Regular multi-form translation. data = TranslationMessageData() data.addTranslation(0, 'singular') data.addTranslation(1, 'plural') self.assertEqual(data.translations, ['singular', 'plural'])
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key) ] value = translation.strip() message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''
def _fetchDBRows(self, simulate_timeout=False): msgstr_joins = [ "LEFT OUTER JOIN POTranslation AS pt%d " "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) ] translations = [ "pt%d.translation AS translation%d" % (form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) ] substitutions = { 'translation_columns': ', '.join(translations), 'translation_joins': '\n'.join(msgstr_joins), 'language': quote(self.pofile.language), 'potemplate': quote(self.pofile.potemplate), 'flag': self._getFlagName(), } sql = """ SELECT POMsgId.msgid AS msgid, POMsgID_Plural.msgid AS msgid_plural, context, date_reviewed, %(translation_columns)s FROM POTMsgSet JOIN TranslationTemplateItem ON TranslationTemplateItem.potmsgset = POTMsgSet.id AND TranslationTemplateItem.potemplate = %(potemplate)s JOIN TranslationMessage ON POTMsgSet.id=TranslationMessage.potmsgset AND ( TranslationMessage.potemplate = %(potemplate)s OR TranslationMessage.potemplate IS NULL) AND TranslationMessage.language = %(language)s %(translation_joins)s JOIN POMsgID ON POMsgID.id = POTMsgSet.msgid_singular LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON POMsgID_Plural.id = POTMsgSet.msgid_plural WHERE %(flag)s IS TRUE ORDER BY TranslationTemplateItem.sequence, TranslationMessage.potemplate NULLS LAST """ % substitutions cur = cursor() try: # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a # timeout to work around bug 408718, but the query is # simpler now. See if we still need this. # We have to commit what we've got so far or we'll lose # it when we hit TimeoutError. transaction.commit() if simulate_timeout: # This is used in tests. timeout = '1ms' query = "SELECT pg_sleep(2)" else: timeout = 1000 * int(config.poimport.statement_timeout) query = sql cur.execute("SET statement_timeout to %s" % quote(timeout)) cur.execute(query) except TimeoutError: # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we # know whether it still happens. transaction.abort() return rows = cur.fetchall() assert TranslationConstants.MAX_PLURAL_FORMS == 6, ( "Change this code to support %d plural forms" % TranslationConstants.MAX_PLURAL_FORMS) for row in rows: msgid, msgid_plural, context, date = row[:4] # The last part of the row is msgstr0 .. msgstr5. Store them # in a dict indexed by the number of the plural form. msgstrs = dict(enumerate(row[4:])) key = (msgid, msgid_plural, context) if key in self.current_messages: message = self.current_messages[key] else: message = TranslationMessageData() self.current_messages[key] = message message.context = context message.msgid_singular = msgid message.msgid_plural = msgid_plural for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS): msgstr = msgstrs.get(plural, None) if (msgstr is not None and ((len(message.translations) > plural and message.translations[plural] is None) or (len(message.translations) <= plural))): message.addTranslation(plural, msgstr)
def test_resetAllTranslations(self): # resetAllTranslations clears the message's translations. data = TranslationMessageData() data.addTranslation(0, 'singular') data.resetAllTranslations() self.assertEqual(data.translations, [])
def test_duplicateTranslation(self): # Providing multiple translations for the same form is an error. data = TranslationMessageData() data.addTranslation(0, 'singular') self.assertRaises(TranslationFormatSyntaxError, data.addTranslation, 0, 'ralugnis')
def _parseFreshLine(self, line, original_line): """Parse a new line (not a continuation after escaped newline). :param line: Remaining part of input line. :param original_line: Line as it originally was on input. :return: If there is one, the first line of a quoted string belonging to the line's section. Otherwise, None. """ is_obsolete = False if line.startswith('#~'): if line.startswith('#~|'): # This is an old msgid for an obsolete message. return None else: is_obsolete = True line = line[2:].lstrip() if len(line) == 0: return None # If we get a comment line after a msgstr or a line starting with # msgid or msgctxt, this is a new entry. if ((line.startswith('#') or line.startswith('msgid') or line.startswith('msgctxt')) and self._section == 'msgstr'): if self._message is None: # first entry - do nothing. pass elif self._message.msgid_singular: self._dumpCurrentSection() self._storeCurrentMessage() elif self._translation_file.header is None: # When there is no msgid in the parsed message, it's the # header for this file. self._dumpCurrentSection() self._parseHeader( self._message.translations[ TranslationConstants.SINGULAR_FORM], self._message.comment) else: self._emitSyntaxWarning("We got a second header.") # Start a new message. self._message = TranslationMessageData() self._message_lineno = self._lineno self._section = None self._plural_case = None self._parsed_content = u'' if self._message is not None: # Record whether the message is obsolete. self._message.is_obsolete = is_obsolete if line[0] == '#': # Record flags if line[:2] == '#,': new_flags = [flag.strip() for flag in line[2:].split(',')] self._message.flags.update(new_flags) return None # Record file references if line[:2] == '#:': if self._message.file_references: # There is already a file reference, let's split it from # the new one with a new line char. self._message.file_references += '\n' self._message.file_references += line[2:].strip() return None # Record source comments if line[:2] == '#.': self._message.source_comment += line[2:].strip() + '\n' return None # Record comments self._message.comment += line[1:] + '\n' return None # Now we are in a msgctxt or msgid section, output previous section if line.startswith('msgid_plural'): if self._section != 'msgid': raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgid_plural") self._dumpCurrentSection() self._section = 'msgid_plural' line = line[len('msgid_plural'):] elif line.startswith('msgctxt'): if (self._section is not None and (self._section == 'msgctxt' or self._section.startswith('msgid'))): raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgctxt") self._section = 'msgctxt' line = line[len('msgctxt'):] elif line.startswith('msgid'): if (self._section is not None and self._section.startswith('msgid')): raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgid") if self._section is not None: self._dumpCurrentSection() self._section = 'msgid' line = line[len('msgid'):] self._plural_case = None # Now we are in a msgstr section elif line.startswith('msgstr'): self._dumpCurrentSection() self._section = 'msgstr' line = line[len('msgstr'):] # XXX kiko 2005-08-19: if line is empty, it means we got an msgstr # followed by a newline; that may be critical, but who knows? if line.startswith('['): # Plural case new_plural_case, line = line[1:].split(']', 1) try: new_plural_case = int(new_plural_case) except ValueError: # Trigger "invalid plural case number" error. new_plural_case = -1 if new_plural_case < 0: raise TranslationFormatSyntaxError( line_number=self._lineno, message="Invalid plural case number.") elif new_plural_case >= TranslationConstants.MAX_PLURAL_FORMS: raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unsupported plural case number.") if (self._plural_case is not None) and (new_plural_case != self._plural_case + 1): self._emitSyntaxWarning("Bad plural case number.") if new_plural_case != self._plural_case: self._plural_case = new_plural_case else: self._emitSyntaxWarning( "msgstr[] repeats same plural case number.") else: self._plural_case = TranslationConstants.SINGULAR_FORM elif self._section is None: raise TranslationFormatSyntaxError(line_number=self._lineno, message='Invalid content: %r' % original_line) else: # This line could be the continuation of a previous section. pass line = line.strip() if len(line) == 0: self._emitSyntaxWarning( "Line has no content; this is not supported by some " "implementations of msgfmt.") return line
def parse(self, content_text): """Parse string as a PO file.""" # Initialize the parser. self._translation_file = TranslationFileData() self._messageids = set() self._pending_chars = content_text self._pending_unichars = u'' self._lineno = 0 # Message specific variables. self._message = TranslationMessageData() self._message_lineno = self._lineno self._section = None self._plural_case = None self._parsed_content = u'' # First thing to do is to get the charset used in the content_text. charset = parse_charset(content_text) # Now, parse the header, inefficiently. It ought to be short, so # this isn't disastrous. line = self._getHeaderLine() while line is not None: self._parseLine(line.decode(charset)) if (self._translation_file.header is not None or self._message.msgid_singular): # Either found the header already or it's a message with a # non empty msgid which means is not a header. break line = self._getHeaderLine() if line is None: if (self._translation_file.header is None and not self._message.msgid_singular): # This file contains no actual messages. self._dumpCurrentSection() # It may contain a header though. if not self._message.translations: raise TranslationFormatSyntaxError( message="File contains no messages.") self._parseHeader( self._message.translations[ TranslationConstants.SINGULAR_FORM], self._message.comment) # There is nothing left to parse. return self._translation_file # Parse anything left all in one go. lines = re.split(r'\n|\r\n|\r', self._pending_unichars) for line in lines: self._parseLine(line) if self._translation_file.header is None: raise TranslationFormatSyntaxError( message='No header found in this pofile') if self._message is not None: # We need to dump latest message. if self._section is None: # The message has not content or it's just a comment, ignore # it. return self._translation_file elif self._section == 'msgstr': self._dumpCurrentSection() self._storeCurrentMessage() else: raise TranslationFormatSyntaxError( line_number=self._lineno, message='Got a truncated message!') return self._translation_file
def test_addTranslation0(self): # Standard use case: add a form-0 translation. data = TranslationMessageData() data.addTranslation(0, 'singular') self.assertEqual(data.translations, ['singular'])