def test_duplicateTranslationError(self): # Providing multiple translations for the same form raises a # sensible error message. data = TranslationMessageData() data.addTranslation(0, 'singular') try: data.addTranslation(0, 'ralugnis') except TranslationFormatSyntaxError as error: self.assertEqual( error.represent("(Default text, should not be returned.)"), "Message has more than one translation for plural form 0.")
def getTranslationMessageData(self, translationmessage): # Convert a TranslationMessage to TranslationMessageData object, # which is used during import. potmsgset = translationmessage.potmsgset message_data = TranslationMessageData() message_data.context = potmsgset.context message_data.msgid_singular = potmsgset.singular_text message_data.msgid_plural = potmsgset.plural_text translations = translationmessage.translations for plural_form, translation in enumerate(translations): message_data.addTranslation(plural_form, translation) return message_data
def _makeExportedHeader(self, translation_file): """Create a standard gettext PO header, encoded as a message. :return: The header message as a unicode string. """ header_translation_message = TranslationMessageData() header_translation_message.addTranslation( TranslationConstants.SINGULAR_FORM, translation_file.header.getRawContent()) header_translation_message.comment = (translation_file.header.comment) if translation_file.is_template: header_translation_message.flags.update(['fuzzy']) exported_header = self.exportTranslationMessageData( header_translation_message) return exported_header
def _makeExportedHeader(self, translation_file): """Create a standard gettext PO header, encoded as a message. :return: The header message as a unicode string. """ header_translation_message = TranslationMessageData() header_translation_message.addTranslation( TranslationConstants.SINGULAR_FORM, translation_file.header.getRawContent()) header_translation_message.comment = ( translation_file.header.comment) if translation_file.is_template: header_translation_message.flags.update(['fuzzy']) exported_header = self.exportTranslationMessageData( header_translation_message) return exported_header
def new_general_entity(self, name, value): """See `xmldtd.WFCDTD`.""" if not self.started: return message = TranslationMessageData() message.msgid_singular = name # CarlosPerelloMarin 20070326: xmldtd parser does an inline # parsing which means that the content is all in a single line so we # don't have a way to show the line number with the source reference. message.file_references_list = ["%s(%s)" % (self.filename, name)] message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.context = self.chrome_path message.source_comment = self.last_comment self.messages.append(message) self.started += 1 self.last_comment = None
def _test_storeTranslationsInDatabase_empty(self, by_maintainer=True): """Check whether we store empty messages appropriately.""" # Construct a POFile importer. pot_importer = self._createPOTFileImporter( TEST_TEMPLATE_EXPORTED, by_maintainer=True) importer = self._createPOFileImporter( pot_importer, TEST_TRANSLATION_EXPORTED, by_maintainer=by_maintainer, person=self.importer_person) # Empty message to import. message = TranslationMessageData() message.addTranslation(0, u'') potmsgset = self.factory.makePOTMsgSet( potemplate=importer.potemplate, sequence=50) translation = importer.storeTranslationsInDatabase( message, potmsgset) # No TranslationMessage is created. self.assertIs(None, translation)
class POParser(object): """Parser class for Gettext files.""" def __init__(self, plural_formula=None): self._translation_file = None self._lineno = 0 # This is a default plural form mapping (i.e. no mapping) when # no header is present in the PO file. self._plural_form_mapping = make_plurals_identity_map() self._expected_plural_formula = plural_formula # Marks when we're parsing a continuation of a string after an escaped # newline. self._escaped_line_break = False def _emitSyntaxWarning(self, message): warning = POSyntaxWarning(message, line_number=self._lineno) if self._translation_file: self._translation_file.syntax_warnings.append(unicode(warning)) def _decode(self): # is there anything to convert? if not self._pending_chars: return # if the PO header hasn't been parsed, then we don't know the # encoding yet if self._translation_file.header is None: return charset = self._translation_file.header.charset decode = codecs.getdecoder(charset) # decode as many characters as we can: try: newchars, length = decode(self._pending_chars, 'strict') except UnicodeDecodeError as exc: # XXX: James Henstridge 2006-03-16: # If the number of unconvertable chars is longer than a # multibyte sequence to be, the UnicodeDecodeError indicates # a real error, rather than a partial read. # I don't know what the longest multibyte sequence in the # encodings we need to support, but it shouldn't be more # than 10 bytes ... if len(self._pending_chars) - exc.start > 10: raise TranslationFormatInvalidInputError( line_number=self._lineno, message="Could not decode input from %s" % charset) newchars, length = decode(self._pending_chars[:exc.start], 'strict') self._pending_unichars += newchars self._pending_chars = self._pending_chars[length:] def _getHeaderLine(self): if self._translation_file.header is not None: # We know what charset the data is in, as we've already # parsed the header. However, we're going to handle this # more efficiently, so we don't want to use _getHeaderLine # except for parsing the header. raise AssertionError( 'using _getHeaderLine after header is parsed') # We don't know what charset the data is in, so we parse it one line # at a time until we have the header, and then we'll know how to # treat the rest of the data. parts = re.split(r'\n|\r\n|\r', self._pending_chars, 1) if len(parts) == 1: # only one line return None line, self._pending_chars = parts return line.strip() def parse(self, content_text): """Parse string as a PO file.""" # Initialize the parser. self._translation_file = TranslationFileData() self._messageids = set() self._pending_chars = content_text self._pending_unichars = u'' self._lineno = 0 # Message specific variables. self._message = TranslationMessageData() self._message_lineno = self._lineno self._section = None self._plural_case = None self._parsed_content = u'' # First thing to do is to get the charset used in the content_text. charset = parse_charset(content_text) # Now, parse the header, inefficiently. It ought to be short, so # this isn't disastrous. line = self._getHeaderLine() while line is not None: self._parseLine(line.decode(charset)) if (self._translation_file.header is not None or self._message.msgid_singular): # Either found the header already or it's a message with a # non empty msgid which means is not a header. break line = self._getHeaderLine() if line is None: if (self._translation_file.header is None and not self._message.msgid_singular): # This file contains no actual messages. self._dumpCurrentSection() # It may contain a header though. if not self._message.translations: raise TranslationFormatSyntaxError( message="File contains no messages.") self._parseHeader( self._message.translations[ TranslationConstants.SINGULAR_FORM], self._message.comment) # There is nothing left to parse. return self._translation_file # Parse anything left all in one go. lines = re.split(r'\n|\r\n|\r', self._pending_unichars) for line in lines: self._parseLine(line) if self._translation_file.header is None: raise TranslationFormatSyntaxError( message='No header found in this pofile') if self._message is not None: # We need to dump latest message. if self._section is None: # The message has not content or it's just a comment, ignore # it. return self._translation_file elif self._section == 'msgstr': self._dumpCurrentSection() self._storeCurrentMessage() else: raise TranslationFormatSyntaxError( line_number = self._lineno, message='Got a truncated message!') return self._translation_file def _storeCurrentMessage(self): if self._message is not None: msgkey = self._message.msgid_singular if self._message.context is not None: msgkey = '%s\2%s' % (self._message.context, msgkey) if msgkey in self._messageids: # We use '%r' instead of '%d' because there are situations # when it returns an "<unprintable instance object>". You can # see more details on bug #2896 raise TranslationFormatInvalidInputError( message='PO file: duplicate msgid ending on line %r' % ( self._message_lineno)) number_plural_forms = ( self._translation_file.header.number_plural_forms) if (self._message.msgid_plural and len(self._message.translations) < number_plural_forms): # Has plural forms but the number of translations is lower. # Fill the others with an empty string. for index in range( len(self._message.translations), number_plural_forms): self._message.addTranslation(index, u'') self._translation_file.messages.append(self._message) self._messageids.add(msgkey) self._message = None def _parseHeader(self, header_text, header_comment): try: header = POHeader(header_text, header_comment) self._translation_file.header = header self._translation_file.syntax_warnings += header.syntax_warnings except TranslationFormatInvalidInputError as error: if error.line_number is None: error.line_number = self._message_lineno raise self._translation_file.header.is_fuzzy = ( 'fuzzy' in self._message.flags) if self._translation_file.messages: self._emitSyntaxWarning("Header entry is not first entry.") plural_formula = self._translation_file.header.plural_form_expression if plural_formula is None: # We default to a simple plural formula which uses # a single form for translations. plural_formula = '0' self._plural_form_mapping = plural_form_mapper( plural_formula, self._expected_plural_formula) # convert buffered input to the encoding specified in the PO header self._decode() def _unescapeNumericCharSequence(self, string): """Unescape leading sequence of escaped numeric character codes. This is for characters given in hexadecimal or octal escape notation. :return: a tuple: first, any leading part of `string` as an unescaped string (empty if `string` did not start with a numeric escape sequence), and second, the remainder of `string` after the leading numeric escape sequences have been parsed. """ escaped_string = '' position = 0 length = len(string) while position + 1 < length and string[position] == '\\': # Handle escaped characters given as numeric character codes. # These will still be in the original encoding. We extract the # whole sequence of escaped chars to recode them later into # Unicode in a single call. lead_char = string[position + 1] if lead_char == 'x': # Hexadecimal escape. position += 4 elif lead_char.isdigit(): # Octal escape. position += 2 # Up to two more octal digits. for i in xrange(2): if string[position].isdigit(): position += 1 else: break elif lead_char in ESCAPE_MAP: # It's part of our mapping table, we ignore it here. break else: raise TranslationFormatSyntaxError( line_number=self._lineno, message=("Unknown escape sequence %s" % string[position:position + 2])) if position == 0: # No escaping to be done. return '', string # We found some text escaped that should be recoded to Unicode. # First, we unescape it. escaped_string, string = string[:position], string[position:] unescaped_string = escaped_string.decode('string-escape') if (self._translation_file is not None and self._translation_file.header is not None): # There is a header, so we know the original encoding for # the given string. charset = self._translation_file.header.charset know_charset = True else: # We don't know the original encoding of the imported file so we # cannot get the right values. We try ASCII. # XXX JeroenVermeulen 2008-02-08: might as well try UTF-8 here. # It's a superset, and anything that's not UTF-8 is very unlikely # to validate as UTF-8. charset = 'ascii' know_charset = False try: decoded_text = unescaped_string.decode(charset) except UnicodeDecodeError: if know_charset: message = ("Could not decode escaped string as %s: (%s)" % (charset, escaped_string)) else: message = ("Could not decode escaped string: (%s)" % escaped_string) raise TranslationFormatInvalidInputError( line_number=self._lineno, message=message) return decoded_text, string def _parseQuotedString(self, string): r"""Parse a quoted string, interpreting escape sequences. >>> parser = POParser() >>> parser._parseQuotedString(u'\"abc\"') u'abc' >>> parser._parseQuotedString(u'\"abc\\ndef\"') u'abc\ndef' >>> parser._parseQuotedString(u'\"ab\x63\"') u'abc' >>> parser._parseQuotedString(u'\"ab\143\"') u'abc' After the string has been converted to unicode, the backslash escaped sequences are still in the encoding that the charset header specifies. Such quoted sequences will be converted to unicode by this method. We don't know the encoding of the escaped characters and cannot be just recoded as Unicode so it's a TranslationFormatInvalidInputError >>> utf8_string = u'"view \\302\\253${version_title}\\302\\273"' >>> parser._parseQuotedString(utf8_string) Traceback (most recent call last): ... TranslationFormatInvalidInputError: Could not decode escaped string: (\302\253) Now, we note the original encoding so we get the right Unicode string. >>> class FakeHeader: ... charset = 'UTF-8' >>> parser._translation_file = TranslationFileData() >>> parser._translation_file.header = FakeHeader() >>> parser._parseQuotedString(utf8_string) u'view \xab${version_title}\xbb' Let's see that we raise a TranslationFormatInvalidInputError exception when we have an escaped char that is not valid in the declared encoding of the original string: >>> iso8859_1_string = u'"foo \\xf9"' >>> parser._parseQuotedString(iso8859_1_string) Traceback (most recent call last): ... TranslationFormatInvalidInputError: Could not decode escaped string as UTF-8: (\xf9) An error will be raised if the entire string isn't contained in quotes properly: >>> parser._parseQuotedString(u'abc') Traceback (most recent call last): ... TranslationFormatSyntaxError: String is not quoted >>> parser._parseQuotedString(u'\"ab') Traceback (most recent call last): ... TranslationFormatSyntaxError: String not terminated >>> parser._parseQuotedString(u'\"ab\"x') Traceback (most recent call last): ... TranslationFormatSyntaxError: Extra content found after string: (x) """ if self._escaped_line_break: # Continuing a line after an escaped newline. Strip indentation. string = string.lstrip() self._escaped_line_break = False else: # Regular string. Must start with opening quote, which we strip. if string[0] != '"': raise TranslationFormatSyntaxError( line_number=self._lineno, message="String is not quoted") string = string[1:] output = '' while len(string) > 0: if string[0] == '"': # Reached the end of the quoted string. It's rare, but there # may be another quoted string on the same line. It should be # suffixed to what we already have, with any whitespace # between the strings removed. string = string[1:].lstrip() if len(string) == 0: # End of line, end of string: the normal case break if string[0] == '"': # Start of a new string. We've already swallowed the # closing quote and any intervening whitespace; now # swallow the re-opening quote and go on as if the string # just went on normally string = string[1:] continue # if there is any non-string data afterwards, raise an # exception if len(string) > 0 and not string.isspace(): raise TranslationFormatSyntaxError( line_number=self._lineno, message=("Extra content found after string: (%s)" % string)) break elif string[0] == '\\': if len(string) == 1: self._escaped_line_break = True string = '' break elif string[1] in ESCAPE_MAP: # We got one of the special escaped chars we know about. # Unescape it using the mapping table. output += ESCAPE_MAP[string[1]] string = string[2:] else: unescaped, string = ( self._unescapeNumericCharSequence(string)) output += unescaped else: # Normal text. Eat up as much as we can in one go. text = re.match(STRAIGHT_TEXT_RUN, string) output += text.group() zero, runlength = text.span() string = string[runlength:] else: # We finished parsing the string without finding the ending quote # char. raise TranslationFormatSyntaxError( line_number=self._lineno, message="String not terminated") return output def _dumpCurrentSection(self): """Dump current parsed content inside the translation message.""" if self._section is None: # There is nothing to dump. return elif self._section == 'msgctxt': self._message.context = self._parsed_content elif self._section == 'msgid': self._message.msgid_singular = self._parsed_content elif self._section == 'msgid_plural': self._message.msgid_plural = self._parsed_content # Note in the header that there are plural forms. self._translation_file.header.has_plural_forms = True elif self._section == 'msgstr': if self._message.msgid_plural is not None: self._message.addTranslation( self._plural_form_mapping[self._plural_case], self._parsed_content) else: self._message.addTranslation( self._plural_case, self._parsed_content) else: raise AssertionError('Unknown section %s' % self._section) self._parsed_content = u'' def _parseFreshLine(self, line, original_line): """Parse a new line (not a continuation after escaped newline). :param line: Remaining part of input line. :param original_line: Line as it originally was on input. :return: If there is one, the first line of a quoted string belonging to the line's section. Otherwise, None. """ is_obsolete = False if line.startswith('#~'): if line.startswith('#~|'): # This is an old msgid for an obsolete message. return None else: is_obsolete = True line = line[2:].lstrip() if len(line) == 0: return None # If we get a comment line after a msgstr or a line starting with # msgid or msgctxt, this is a new entry. if ((line.startswith('#') or line.startswith('msgid') or line.startswith('msgctxt')) and self._section == 'msgstr'): if self._message is None: # first entry - do nothing. pass elif self._message.msgid_singular: self._dumpCurrentSection() self._storeCurrentMessage() elif self._translation_file.header is None: # When there is no msgid in the parsed message, it's the # header for this file. self._dumpCurrentSection() self._parseHeader( self._message.translations[ TranslationConstants.SINGULAR_FORM], self._message.comment) else: self._emitSyntaxWarning("We got a second header.") # Start a new message. self._message = TranslationMessageData() self._message_lineno = self._lineno self._section = None self._plural_case = None self._parsed_content = u'' if self._message is not None: # Record whether the message is obsolete. self._message.is_obsolete = is_obsolete if line[0] == '#': # Record flags if line[:2] == '#,': new_flags = [flag.strip() for flag in line[2:].split(',')] self._message.flags.update(new_flags) return None # Record file references if line[:2] == '#:': if self._message.file_references: # There is already a file reference, let's split it from # the new one with a new line char. self._message.file_references += '\n' self._message.file_references += line[2:].strip() return None # Record source comments if line[:2] == '#.': self._message.source_comment += line[2:].strip() + '\n' return None # Record comments self._message.comment += line[1:] + '\n' return None # Now we are in a msgctxt or msgid section, output previous section if line.startswith('msgid_plural'): if self._section != 'msgid': raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgid_plural") self._dumpCurrentSection() self._section = 'msgid_plural' line = line[len('msgid_plural'):] elif line.startswith('msgctxt'): if (self._section is not None and (self._section == 'msgctxt' or self._section.startswith('msgid'))): raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgctxt") self._section = 'msgctxt' line = line[len('msgctxt'):] elif line.startswith('msgid'): if (self._section is not None and self._section.startswith('msgid')): raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgid") if self._section is not None: self._dumpCurrentSection() self._section = 'msgid' line = line[len('msgid'):] self._plural_case = None # Now we are in a msgstr section elif line.startswith('msgstr'): self._dumpCurrentSection() self._section = 'msgstr' line = line[len('msgstr'):] # XXX kiko 2005-08-19: if line is empty, it means we got an msgstr # followed by a newline; that may be critical, but who knows? if line.startswith('['): # Plural case new_plural_case, line = line[1:].split(']', 1) try: new_plural_case = int(new_plural_case) except ValueError: # Trigger "invalid plural case number" error. new_plural_case = -1 if new_plural_case < 0: raise TranslationFormatSyntaxError( line_number=self._lineno, message="Invalid plural case number.") elif new_plural_case >= TranslationConstants.MAX_PLURAL_FORMS: raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unsupported plural case number.") if (self._plural_case is not None) and ( new_plural_case != self._plural_case + 1): self._emitSyntaxWarning("Bad plural case number.") if new_plural_case != self._plural_case: self._plural_case = new_plural_case else: self._emitSyntaxWarning( "msgstr[] repeats same plural case number.") else: self._plural_case = TranslationConstants.SINGULAR_FORM elif self._section is None: raise TranslationFormatSyntaxError( line_number=self._lineno, message='Invalid content: %r' % original_line) else: # This line could be the continuation of a previous section. pass line = line.strip() if len(line) == 0: self._emitSyntaxWarning( "Line has no content; this is not supported by some " "implementations of msgfmt.") return line def _parseLine(self, original_line): self._lineno += 1 # Skip empty lines line = original_line.strip() if len(line) == 0: return if not self._escaped_line_break: line = self._parseFreshLine(line, original_line) if line is None or len(line) == 0: return line = self._parseQuotedString(line) text_section_types = ('msgctxt', 'msgid', 'msgid_plural', 'msgstr') if self._section not in text_section_types: raise TranslationFormatSyntaxError( line_number=self._lineno, message='Invalid content: %r' % original_line) self._parsed_content += line
def _fetchDBRows(self, simulate_timeout=False): msgstr_joins = [ "LEFT OUTER JOIN POTranslation AS pt%d " "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) ] translations = [ "pt%d.translation AS translation%d" % (form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) ] substitutions = { 'translation_columns': ', '.join(translations), 'translation_joins': '\n'.join(msgstr_joins), 'language': quote(self.pofile.language), 'potemplate': quote(self.pofile.potemplate), 'flag': self._getFlagName(), } sql = """ SELECT POMsgId.msgid AS msgid, POMsgID_Plural.msgid AS msgid_plural, context, date_reviewed, %(translation_columns)s FROM POTMsgSet JOIN TranslationTemplateItem ON TranslationTemplateItem.potmsgset = POTMsgSet.id AND TranslationTemplateItem.potemplate = %(potemplate)s JOIN TranslationMessage ON POTMsgSet.id=TranslationMessage.potmsgset AND ( TranslationMessage.potemplate = %(potemplate)s OR TranslationMessage.potemplate IS NULL) AND TranslationMessage.language = %(language)s %(translation_joins)s JOIN POMsgID ON POMsgID.id = POTMsgSet.msgid_singular LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON POMsgID_Plural.id = POTMsgSet.msgid_plural WHERE %(flag)s IS TRUE ORDER BY TranslationTemplateItem.sequence, TranslationMessage.potemplate NULLS LAST """ % substitutions cur = cursor() try: # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a # timeout to work around bug 408718, but the query is # simpler now. See if we still need this. # We have to commit what we've got so far or we'll lose # it when we hit TimeoutError. transaction.commit() if simulate_timeout: # This is used in tests. timeout = '1ms' query = "SELECT pg_sleep(2)" else: timeout = 1000 * int(config.poimport.statement_timeout) query = sql cur.execute("SET statement_timeout to %s" % quote(timeout)) cur.execute(query) except TimeoutError: # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we # know whether it still happens. transaction.abort() return rows = cur.fetchall() assert TranslationConstants.MAX_PLURAL_FORMS == 6, ( "Change this code to support %d plural forms" % TranslationConstants.MAX_PLURAL_FORMS) for row in rows: msgid, msgid_plural, context, date = row[:4] # The last part of the row is msgstr0 .. msgstr5. Store them # in a dict indexed by the number of the plural form. msgstrs = dict(enumerate(row[4:])) key = (msgid, msgid_plural, context) if key in self.current_messages: message = self.current_messages[key] else: message = TranslationMessageData() self.current_messages[key] = message message.context = context message.msgid_singular = msgid message.msgid_plural = msgid_plural for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS): msgstr = msgstrs.get(plural, None) if (msgstr is not None and ((len(message.translations) > plural and message.translations[plural] is None) or (len(message.translations) <= plural))): message.addTranslation(plural, msgstr)
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key) ] value = translation.strip() message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''
def test_addTranslation0(self): # Standard use case: add a form-0 translation. data = TranslationMessageData() data.addTranslation(0, 'singular') self.assertEqual(data.translations, ['singular'])
def test_addTranslation1(self): # Unusual but possible: translate a higher form but not form 0. data = TranslationMessageData() data.addTranslation(1, 'plural') self.assertEqual(data.translations, [None, 'plural'])
class POParser(object): """Parser class for Gettext files.""" def __init__(self, plural_formula=None): self._translation_file = None self._lineno = 0 # This is a default plural form mapping (i.e. no mapping) when # no header is present in the PO file. self._plural_form_mapping = make_plurals_identity_map() self._expected_plural_formula = plural_formula # Marks when we're parsing a continuation of a string after an escaped # newline. self._escaped_line_break = False def _emitSyntaxWarning(self, message): warning = POSyntaxWarning(message, line_number=self._lineno) if self._translation_file: self._translation_file.syntax_warnings.append(unicode(warning)) def _decode(self): # is there anything to convert? if not self._pending_chars: return # if the PO header hasn't been parsed, then we don't know the # encoding yet if self._translation_file.header is None: return charset = self._translation_file.header.charset decode = codecs.getdecoder(charset) # decode as many characters as we can: try: newchars, length = decode(self._pending_chars, 'strict') except UnicodeDecodeError as exc: # XXX: James Henstridge 2006-03-16: # If the number of unconvertable chars is longer than a # multibyte sequence to be, the UnicodeDecodeError indicates # a real error, rather than a partial read. # I don't know what the longest multibyte sequence in the # encodings we need to support, but it shouldn't be more # than 10 bytes ... if len(self._pending_chars) - exc.start > 10: raise TranslationFormatInvalidInputError( line_number=self._lineno, message="Could not decode input from %s" % charset) newchars, length = decode(self._pending_chars[:exc.start], 'strict') self._pending_unichars += newchars self._pending_chars = self._pending_chars[length:] def _getHeaderLine(self): if self._translation_file.header is not None: # We know what charset the data is in, as we've already # parsed the header. However, we're going to handle this # more efficiently, so we don't want to use _getHeaderLine # except for parsing the header. raise AssertionError('using _getHeaderLine after header is parsed') # We don't know what charset the data is in, so we parse it one line # at a time until we have the header, and then we'll know how to # treat the rest of the data. parts = re.split(r'\n|\r\n|\r', self._pending_chars, 1) if len(parts) == 1: # only one line return None line, self._pending_chars = parts return line.strip() def parse(self, content_text): """Parse string as a PO file.""" # Initialize the parser. self._translation_file = TranslationFileData() self._messageids = set() self._pending_chars = content_text self._pending_unichars = u'' self._lineno = 0 # Message specific variables. self._message = TranslationMessageData() self._message_lineno = self._lineno self._section = None self._plural_case = None self._parsed_content = u'' # First thing to do is to get the charset used in the content_text. charset = parse_charset(content_text) # Now, parse the header, inefficiently. It ought to be short, so # this isn't disastrous. line = self._getHeaderLine() while line is not None: self._parseLine(line.decode(charset)) if (self._translation_file.header is not None or self._message.msgid_singular): # Either found the header already or it's a message with a # non empty msgid which means is not a header. break line = self._getHeaderLine() if line is None: if (self._translation_file.header is None and not self._message.msgid_singular): # This file contains no actual messages. self._dumpCurrentSection() # It may contain a header though. if not self._message.translations: raise TranslationFormatSyntaxError( message="File contains no messages.") self._parseHeader( self._message.translations[ TranslationConstants.SINGULAR_FORM], self._message.comment) # There is nothing left to parse. return self._translation_file # Parse anything left all in one go. lines = re.split(r'\n|\r\n|\r', self._pending_unichars) for line in lines: self._parseLine(line) if self._translation_file.header is None: raise TranslationFormatSyntaxError( message='No header found in this pofile') if self._message is not None: # We need to dump latest message. if self._section is None: # The message has not content or it's just a comment, ignore # it. return self._translation_file elif self._section == 'msgstr': self._dumpCurrentSection() self._storeCurrentMessage() else: raise TranslationFormatSyntaxError( line_number=self._lineno, message='Got a truncated message!') return self._translation_file def _storeCurrentMessage(self): if self._message is not None: msgkey = self._message.msgid_singular if self._message.context is not None: msgkey = '%s\2%s' % (self._message.context, msgkey) if msgkey in self._messageids: # We use '%r' instead of '%d' because there are situations # when it returns an "<unprintable instance object>". You can # see more details on bug #2896 raise TranslationFormatInvalidInputError( message='PO file: duplicate msgid ending on line %r' % (self._message_lineno)) number_plural_forms = ( self._translation_file.header.number_plural_forms) if (self._message.msgid_plural and len(self._message.translations) < number_plural_forms): # Has plural forms but the number of translations is lower. # Fill the others with an empty string. for index in range(len(self._message.translations), number_plural_forms): self._message.addTranslation(index, u'') self._translation_file.messages.append(self._message) self._messageids.add(msgkey) self._message = None def _parseHeader(self, header_text, header_comment): try: header = POHeader(header_text, header_comment) self._translation_file.header = header self._translation_file.syntax_warnings += header.syntax_warnings except TranslationFormatInvalidInputError as error: if error.line_number is None: error.line_number = self._message_lineno raise self._translation_file.header.is_fuzzy = ('fuzzy' in self._message.flags) if self._translation_file.messages: self._emitSyntaxWarning("Header entry is not first entry.") plural_formula = self._translation_file.header.plural_form_expression if plural_formula is None: # We default to a simple plural formula which uses # a single form for translations. plural_formula = '0' self._plural_form_mapping = plural_form_mapper( plural_formula, self._expected_plural_formula) # convert buffered input to the encoding specified in the PO header self._decode() def _unescapeNumericCharSequence(self, string): """Unescape leading sequence of escaped numeric character codes. This is for characters given in hexadecimal or octal escape notation. :return: a tuple: first, any leading part of `string` as an unescaped string (empty if `string` did not start with a numeric escape sequence), and second, the remainder of `string` after the leading numeric escape sequences have been parsed. """ escaped_string = '' position = 0 length = len(string) while position + 1 < length and string[position] == '\\': # Handle escaped characters given as numeric character codes. # These will still be in the original encoding. We extract the # whole sequence of escaped chars to recode them later into # Unicode in a single call. lead_char = string[position + 1] if lead_char == 'x': # Hexadecimal escape. position += 4 elif lead_char.isdigit(): # Octal escape. position += 2 # Up to two more octal digits. for i in range(2): if string[position].isdigit(): position += 1 else: break elif lead_char in ESCAPE_MAP: # It's part of our mapping table, we ignore it here. break else: raise TranslationFormatSyntaxError( line_number=self._lineno, message=("Unknown escape sequence %s" % string[position:position + 2])) if position == 0: # No escaping to be done. return '', string # We found some text escaped that should be recoded to Unicode. # First, we unescape it. escaped_string, string = string[:position], string[position:] unescaped_string = escaped_string.decode('string-escape') if (self._translation_file is not None and self._translation_file.header is not None): # There is a header, so we know the original encoding for # the given string. charset = self._translation_file.header.charset know_charset = True else: # We don't know the original encoding of the imported file so we # cannot get the right values. We try ASCII. # XXX JeroenVermeulen 2008-02-08: might as well try UTF-8 here. # It's a superset, and anything that's not UTF-8 is very unlikely # to validate as UTF-8. charset = 'ascii' know_charset = False try: decoded_text = unescaped_string.decode(charset) except UnicodeDecodeError: if know_charset: message = ("Could not decode escaped string as %s: (%s)" % (charset, escaped_string)) else: message = ("Could not decode escaped string: (%s)" % escaped_string) raise TranslationFormatInvalidInputError(line_number=self._lineno, message=message) return decoded_text, string def _parseQuotedString(self, string): r"""Parse a quoted string, interpreting escape sequences. >>> parser = POParser() >>> parser._parseQuotedString(u'\"abc\"') u'abc' >>> parser._parseQuotedString(u'\"abc\\ndef\"') u'abc\ndef' >>> parser._parseQuotedString(u'\"ab\x63\"') u'abc' >>> parser._parseQuotedString(u'\"ab\143\"') u'abc' After the string has been converted to unicode, the backslash escaped sequences are still in the encoding that the charset header specifies. Such quoted sequences will be converted to unicode by this method. We don't know the encoding of the escaped characters and cannot be just recoded as Unicode so it's a TranslationFormatInvalidInputError >>> utf8_string = u'"view \\302\\253${version_title}\\302\\273"' >>> parser._parseQuotedString(utf8_string) Traceback (most recent call last): ... TranslationFormatInvalidInputError: Could not decode escaped string: (\302\253) Now, we note the original encoding so we get the right Unicode string. >>> class FakeHeader: ... charset = 'UTF-8' >>> parser._translation_file = TranslationFileData() >>> parser._translation_file.header = FakeHeader() >>> parser._parseQuotedString(utf8_string) u'view \xab${version_title}\xbb' Let's see that we raise a TranslationFormatInvalidInputError exception when we have an escaped char that is not valid in the declared encoding of the original string: >>> iso8859_1_string = u'"foo \\xf9"' >>> parser._parseQuotedString(iso8859_1_string) Traceback (most recent call last): ... TranslationFormatInvalidInputError: Could not decode escaped string as UTF-8: (\xf9) An error will be raised if the entire string isn't contained in quotes properly: >>> parser._parseQuotedString(u'abc') Traceback (most recent call last): ... TranslationFormatSyntaxError: String is not quoted >>> parser._parseQuotedString(u'\"ab') Traceback (most recent call last): ... TranslationFormatSyntaxError: String not terminated >>> parser._parseQuotedString(u'\"ab\"x') Traceback (most recent call last): ... TranslationFormatSyntaxError: Extra content found after string: (x) """ if self._escaped_line_break: # Continuing a line after an escaped newline. Strip indentation. string = string.lstrip() self._escaped_line_break = False else: # Regular string. Must start with opening quote, which we strip. if string[0] != '"': raise TranslationFormatSyntaxError( line_number=self._lineno, message="String is not quoted") string = string[1:] output = '' while len(string) > 0: if string[0] == '"': # Reached the end of the quoted string. It's rare, but there # may be another quoted string on the same line. It should be # suffixed to what we already have, with any whitespace # between the strings removed. string = string[1:].lstrip() if len(string) == 0: # End of line, end of string: the normal case break if string[0] == '"': # Start of a new string. We've already swallowed the # closing quote and any intervening whitespace; now # swallow the re-opening quote and go on as if the string # just went on normally string = string[1:] continue # if there is any non-string data afterwards, raise an # exception if len(string) > 0 and not string.isspace(): raise TranslationFormatSyntaxError( line_number=self._lineno, message=("Extra content found after string: (%s)" % string)) break elif string[0] == '\\': if len(string) == 1: self._escaped_line_break = True string = '' break elif string[1] in ESCAPE_MAP: # We got one of the special escaped chars we know about. # Unescape it using the mapping table. output += ESCAPE_MAP[string[1]] string = string[2:] else: unescaped, string = ( self._unescapeNumericCharSequence(string)) output += unescaped else: # Normal text. Eat up as much as we can in one go. text = re.match(STRAIGHT_TEXT_RUN, string) output += text.group() zero, runlength = text.span() string = string[runlength:] else: # We finished parsing the string without finding the ending quote # char. raise TranslationFormatSyntaxError(line_number=self._lineno, message="String not terminated") return output def _dumpCurrentSection(self): """Dump current parsed content inside the translation message.""" if self._section is None: # There is nothing to dump. return elif self._section == 'msgctxt': self._message.context = self._parsed_content elif self._section == 'msgid': self._message.msgid_singular = self._parsed_content elif self._section == 'msgid_plural': self._message.msgid_plural = self._parsed_content # Note in the header that there are plural forms. self._translation_file.header.has_plural_forms = True elif self._section == 'msgstr': if self._message.msgid_plural is not None: self._message.addTranslation( self._plural_form_mapping[self._plural_case], self._parsed_content) else: self._message.addTranslation(self._plural_case, self._parsed_content) else: raise AssertionError('Unknown section %s' % self._section) self._parsed_content = u'' def _parseFreshLine(self, line, original_line): """Parse a new line (not a continuation after escaped newline). :param line: Remaining part of input line. :param original_line: Line as it originally was on input. :return: If there is one, the first line of a quoted string belonging to the line's section. Otherwise, None. """ is_obsolete = False if line.startswith('#~'): if line.startswith('#~|'): # This is an old msgid for an obsolete message. return None else: is_obsolete = True line = line[2:].lstrip() if len(line) == 0: return None # If we get a comment line after a msgstr or a line starting with # msgid or msgctxt, this is a new entry. if ((line.startswith('#') or line.startswith('msgid') or line.startswith('msgctxt')) and self._section == 'msgstr'): if self._message is None: # first entry - do nothing. pass elif self._message.msgid_singular: self._dumpCurrentSection() self._storeCurrentMessage() elif self._translation_file.header is None: # When there is no msgid in the parsed message, it's the # header for this file. self._dumpCurrentSection() self._parseHeader( self._message.translations[ TranslationConstants.SINGULAR_FORM], self._message.comment) else: self._emitSyntaxWarning("We got a second header.") # Start a new message. self._message = TranslationMessageData() self._message_lineno = self._lineno self._section = None self._plural_case = None self._parsed_content = u'' if self._message is not None: # Record whether the message is obsolete. self._message.is_obsolete = is_obsolete if line[0] == '#': # Record flags if line[:2] == '#,': new_flags = [flag.strip() for flag in line[2:].split(',')] self._message.flags.update(new_flags) return None # Record file references if line[:2] == '#:': if self._message.file_references: # There is already a file reference, let's split it from # the new one with a new line char. self._message.file_references += '\n' self._message.file_references += line[2:].strip() return None # Record source comments if line[:2] == '#.': self._message.source_comment += line[2:].strip() + '\n' return None # Record comments self._message.comment += line[1:] + '\n' return None # Now we are in a msgctxt or msgid section, output previous section if line.startswith('msgid_plural'): if self._section != 'msgid': raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgid_plural") self._dumpCurrentSection() self._section = 'msgid_plural' line = line[len('msgid_plural'):] elif line.startswith('msgctxt'): if (self._section is not None and (self._section == 'msgctxt' or self._section.startswith('msgid'))): raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgctxt") self._section = 'msgctxt' line = line[len('msgctxt'):] elif line.startswith('msgid'): if (self._section is not None and self._section.startswith('msgid')): raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unexpected keyword: msgid") if self._section is not None: self._dumpCurrentSection() self._section = 'msgid' line = line[len('msgid'):] self._plural_case = None # Now we are in a msgstr section elif line.startswith('msgstr'): self._dumpCurrentSection() self._section = 'msgstr' line = line[len('msgstr'):] # XXX kiko 2005-08-19: if line is empty, it means we got an msgstr # followed by a newline; that may be critical, but who knows? if line.startswith('['): # Plural case new_plural_case, line = line[1:].split(']', 1) try: new_plural_case = int(new_plural_case) except ValueError: # Trigger "invalid plural case number" error. new_plural_case = -1 if new_plural_case < 0: raise TranslationFormatSyntaxError( line_number=self._lineno, message="Invalid plural case number.") elif new_plural_case >= TranslationConstants.MAX_PLURAL_FORMS: raise TranslationFormatSyntaxError( line_number=self._lineno, message="Unsupported plural case number.") if (self._plural_case is not None) and (new_plural_case != self._plural_case + 1): self._emitSyntaxWarning("Bad plural case number.") if new_plural_case != self._plural_case: self._plural_case = new_plural_case else: self._emitSyntaxWarning( "msgstr[] repeats same plural case number.") else: self._plural_case = TranslationConstants.SINGULAR_FORM elif self._section is None: raise TranslationFormatSyntaxError(line_number=self._lineno, message='Invalid content: %r' % original_line) else: # This line could be the continuation of a previous section. pass line = line.strip() if len(line) == 0: self._emitSyntaxWarning( "Line has no content; this is not supported by some " "implementations of msgfmt.") return line def _parseLine(self, original_line): self._lineno += 1 # Skip empty lines line = original_line.strip() if len(line) == 0: return if not self._escaped_line_break: line = self._parseFreshLine(line, original_line) if line is None or len(line) == 0: return line = self._parseQuotedString(line) text_section_types = ('msgctxt', 'msgid', 'msgid_plural', 'msgstr') if self._section not in text_section_types: raise TranslationFormatSyntaxError(line_number=self._lineno, message='Invalid content: %r' % original_line) self._parsed_content += line
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError, ( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key)] value = translation.strip() message.addTranslation( TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''
def test_addTranslationMulti(self): # Regular multi-form translation. data = TranslationMessageData() data.addTranslation(0, 'singular') data.addTranslation(1, 'plural') self.assertEqual(data.translations, ['singular', 'plural'])
def _fetchDBRows(self, simulate_timeout=False): msgstr_joins = [ "LEFT OUTER JOIN POTranslation AS pt%d " "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)] translations = [ "pt%d.translation AS translation%d" % (form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)] substitutions = { 'translation_columns': ', '.join(translations), 'translation_joins': '\n'.join(msgstr_joins), 'language': quote(self.pofile.language), 'potemplate': quote(self.pofile.potemplate), 'flag': self._getFlagName(), } sql = """ SELECT POMsgId.msgid AS msgid, POMsgID_Plural.msgid AS msgid_plural, context, date_reviewed, %(translation_columns)s FROM POTMsgSet JOIN TranslationTemplateItem ON TranslationTemplateItem.potmsgset = POTMsgSet.id AND TranslationTemplateItem.potemplate = %(potemplate)s JOIN TranslationMessage ON POTMsgSet.id=TranslationMessage.potmsgset AND ( TranslationMessage.potemplate = %(potemplate)s OR TranslationMessage.potemplate IS NULL) AND TranslationMessage.language = %(language)s %(translation_joins)s JOIN POMsgID ON POMsgID.id = POTMsgSet.msgid_singular LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON POMsgID_Plural.id = POTMsgSet.msgid_plural WHERE %(flag)s IS TRUE ORDER BY TranslationTemplateItem.sequence, TranslationMessage.potemplate NULLS LAST """ % substitutions cur = cursor() try: # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a # timeout to work around bug 408718, but the query is # simpler now. See if we still need this. # We have to commit what we've got so far or we'll lose # it when we hit TimeoutError. transaction.commit() if simulate_timeout: # This is used in tests. timeout = '1ms' query = "SELECT pg_sleep(2)" else: timeout = 1000 * int(config.poimport.statement_timeout) query = sql cur.execute("SET statement_timeout to %s" % quote(timeout)) cur.execute(query) except TimeoutError: # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we # know whether it still happens. transaction.abort() return rows = cur.fetchall() assert TranslationConstants.MAX_PLURAL_FORMS == 6, ( "Change this code to support %d plural forms" % TranslationConstants.MAX_PLURAL_FORMS) for row in rows: msgid, msgid_plural, context, date = row[:4] # The last part of the row is msgstr0 .. msgstr5. Store them # in a dict indexed by the number of the plural form. msgstrs = dict(enumerate(row[4:])) key = (msgid, msgid_plural, context) if key in self.current_messages: message = self.current_messages[key] else: message = TranslationMessageData() self.current_messages[key] = message message.context = context message.msgid_singular = msgid message.msgid_plural = msgid_plural for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS): msgstr = msgstrs.get(plural, None) if (msgstr is not None and ((len(message.translations) > plural and message.translations[plural] is None) or (len(message.translations) <= plural))): message.addTranslation(plural, msgstr)
def test_addTranslationReversed(self): # Translate to multiple forms, but in a strange order. data = TranslationMessageData() data.addTranslation(1, 'plural') data.addTranslation(0, 'singular') self.assertEqual(data.translations, ['singular', 'plural'])
def test_duplicateTranslation(self): # Providing multiple translations for the same form is an error. data = TranslationMessageData() data.addTranslation(0, 'singular') self.assertRaises(TranslationFormatSyntaxError, data.addTranslation, 0, 'ralugnis')
def test_resetAllTranslations(self): # resetAllTranslations clears the message's translations. data = TranslationMessageData() data.addTranslation(0, 'singular') data.resetAllTranslations() self.assertEqual(data.translations, [])