def testBestCliqueSortIsStable(self): factory = clique.UberClique() text = 'hello' msg_no_description = tclib.Message(text=text) msg_id_description_a = tclib.Message(text=text, description='ID: a') msg_id_description_b = tclib.Message(text=text, description='ID: b') msg_description_x = tclib.Message(text=text, description='x') msg_description_y = tclib.Message(text=text, description='y') clique_id = msg_no_description.GetId() # Insert in an order that tests all outcomes. clique_no_description = factory.MakeClique(msg_no_description, translateable=True) self.failUnless(factory.BestClique(clique_id) == clique_no_description) clique_id_description_b = factory.MakeClique(msg_id_description_b, translateable=True) self.failUnless( factory.BestClique(clique_id) == clique_id_description_b) clique_id_description_a = factory.MakeClique(msg_id_description_a, translateable=True) self.failUnless( factory.BestClique(clique_id) == clique_id_description_a) clique_description_y = factory.MakeClique(msg_description_y, translateable=True) self.failUnless(factory.BestClique(clique_id) == clique_description_y) clique_description_x = factory.MakeClique(msg_description_x, translateable=True) self.failUnless(factory.BestClique(clique_id) == clique_description_x)
def testSemiIdenticalCliques(self): messages = [ tclib.Message( text='Hello USERNAME', placeholders=[tclib.Placeholder('USERNAME', '$1', 'Joi')]), tclib.Message( text='Hello USERNAME', placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]), ] self.failUnless(messages[0].GetId() == messages[1].GetId()) # Both of the above would share a translation. translation = tclib.Translation( id=messages[0].GetId(), text='Bonjour USERNAME', placeholders=[tclib.Placeholder('USERNAME', '$1', 'Joi')]) factory = clique.UberClique() cliques = [factory.MakeClique(msg) for msg in messages] for clq in cliques: clq.AddTranslation(translation, 'fr') self.failUnless(cliques[0].MessageForLanguage('fr').GetRealContent() == 'Bonjour $1') self.failUnless(cliques[1].MessageForLanguage('fr').GetRealContent() == 'Bonjour %s')
def testFunctionality(self): c = self.uq.MakeClique(tclib.Message(text="Hello &there")) c.AddToShortcutGroup('group_name') c = self.uq.MakeClique(tclib.Message(text="Howdie &there partner")) c.AddToShortcutGroup('group_name') warnings = shortcuts.GenerateDuplicateShortcutsWarnings(self.uq, 'PROJECT') self.failUnless(warnings)
def testAmpersandEscaping(self): c = self.uq.MakeClique(tclib.Message(text="Hello &there")) c.AddToShortcutGroup('group_name') c = self.uq.MakeClique(tclib.Message(text="S&&T are the &letters S and T")) c.AddToShortcutGroup('group_name') warnings = shortcuts.GenerateDuplicateShortcutsWarnings(self.uq, 'PROJECT') self.failUnless(len(warnings) == 0)
def testBestClique(self): factory = clique.UberClique() factory.MakeClique(tclib.Message(text='Alfur', description='alfaholl')) factory.MakeClique(tclib.Message(text='Alfur', description='')) factory.MakeClique(tclib.Message(text='Vaettur', description='')) factory.MakeClique(tclib.Message(text='Vaettur', description='')) factory.MakeClique(tclib.Message(text='Troll', description='')) factory.MakeClique( tclib.Message(text='Gryla', description='ID: IDS_GRYLA')) factory.MakeClique( tclib.Message(text='Gryla', description='vondakerling')) factory.MakeClique( tclib.Message(text='Leppaludi', description='ID: IDS_LL')) factory.MakeClique(tclib.Message(text='Leppaludi', description='')) count_best_cliques = 0 for c in factory.BestCliquePerId(): count_best_cliques += 1 msg = c.GetMessage() text = msg.GetRealContent() description = msg.GetDescription() if text == 'Alfur': self.failUnless(description == 'alfaholl') elif text == 'Gryla': self.failUnless(description == 'vondakerling') elif text == 'Leppaludi': self.failUnless(description == 'ID: IDS_LL') self.failUnless(count_best_cliques == 5)
def testWhitespaceMessagesAreNontranslateable(self): factory = clique.UberClique() message = tclib.Message(text=' \t') c = factory.MakeClique(message, translateable=True) self.failIf(c.IsTranslateable()) message = tclib.Message(text='\n \n ') c = factory.MakeClique(message, translateable=True) self.failIf(c.IsTranslateable()) message = tclib.Message(text='\n hello') c = factory.MakeClique(message, translateable=True) self.failUnless(c.IsTranslateable())
def testEachCliqueKeptSorted(self): factory = clique.UberClique() msg_a = tclib.Message(text='hello', description='a') msg_b = tclib.Message(text='hello', description='b') msg_c = tclib.Message(text='hello', description='c') # Insert out of order clique_b = factory.MakeClique(msg_b, translateable=True) clique_a = factory.MakeClique(msg_a, translateable=True) clique_c = factory.MakeClique(msg_c, translateable=True) clique_list = factory.cliques_[clique_a.GetId()] self.failUnless(len(clique_list) == 3) self.failUnless(clique_list[0] == clique_a) self.failUnless(clique_list[1] == clique_b) self.failUnless(clique_list[2] == clique_c)
def _ParseNode(self, node): '''Traverses the subtree of a DOM node, and register a tclib message for all the <message> nodes. ''' att_text = [] if node.attributes: for key, value in sorted(node.attributes.items()): att_text.append(' %s=\"%s\"' % (key, value)) self._AddNontranslateableChunk("<%s%s>" % (node.tagName, ''.join(att_text))) if node.tagName == 'message': msg = tclib.Message(description=node.attributes['desc']) for child in node.childNodes: if child.nodeType == minidom.Node.TEXT_NODE: if msg == None: self._AddNontranslateableChunk(child.data) else: msg.AppendText(child.data) elif child.nodeType == minidom.Node.ELEMENT_NODE: if child.tagName == 'ph': self._ParsePlaceholder(child, msg) else: assert False self.skeleton_.append(self.uberclique.MakeClique(msg)) else: for child in node.childNodes: if child.nodeType == minidom.Node.TEXT_NODE: self._AddNontranslateableChunk(child.data) elif node.nodeType == minidom.Node.ELEMENT_NODE: self._ParseNode(child) self._AddNontranslateableChunk("</%s>" % node.tagName)
def _ParseMessage(self, string, desc): '''Parses a given string and adds it to the output as a translatable chunk with a given description. Args: string: The message string to parse. desc: The description of the message (for the translators). ''' msg = tclib.Message(description=desc) xml = '<msg>' + string + '</msg>' try: node = minidom.parseString(xml).childNodes[0] except ExpatError: reason = '''Input isn't valid XML (has < & > been escaped?): ''' + string six.reraise(Exception, reason, sys.exc_info()[2]) for child in node.childNodes: if child.nodeType == minidom.Node.TEXT_NODE: msg.AppendText(child.data) elif child.nodeType == minidom.Node.ELEMENT_NODE: if child.tagName == 'ph': self._ParsePlaceholder(child, msg) else: raise Exception("Not implemented.") else: raise Exception("Not implemented.") self.skeleton_.append(self.uberclique.MakeClique(msg))
def AddMessage(self, msgtext, description, meaning, translateable): if msgtext == '': return msg = tclib.Message(description=description, meaning=meaning) unescaped_text = self.UnEscape(msgtext) parts = PLACEHOLDER_RE.split(unescaped_text) in_placeholder = False for part in parts: if part == '': continue elif part == '[![': in_placeholder = True elif part == ']!]': in_placeholder = False else: if in_placeholder: msg.AppendPlaceholder( tclib.Placeholder(part, '[![%s]!]' % part, '(placeholder)')) else: msg.AppendText(part) self.skeleton_.append( self.uberclique.MakeClique(msg, translateable=translateable)) # if statement needed because this is supposed to be idempotent (so never # set back to false) if translateable: self.translatable_chunk_ = True
def SubstituteMessage(self, msg): '''Apply substitutions to a tclib.Message object. Text of the form [message_name] will be replaced by a new placeholder, whose presentation will take the form the message_name_{UsageCount}, and whose example will be the message's value. Existing placeholders are not affected. Args: msg: A tclib.Message object. Returns: A tclib.Message object, with substitutions done. ''' from grit import tclib # avoid circular import counts = {} text = msg.GetPresentableContent() placeholders = [] newtext = '' for f in self.GetExp().split(text): sub = self._SubFragment(f) if f != sub: f = str(f) count = counts.get(f, 0) + 1 counts[f] = count name = "%s_%d" % (f[1:-1], count) placeholders.append(tclib.Placeholder(name, f, sub)) newtext += name else: newtext += f if placeholders: return tclib.Message(newtext, msg.GetPlaceholders() + placeholders, msg.GetDescription(), msg.GetMeaning()) else: return msg
def CreateTclibMessage(self, android_string): """Transforms a <string/> element from strings.xml into a tclib.Message. Interprets whitespace, quotes, and escaped characters in the android_string according to Android's formatting and styling rules for strings. Also converts <xliff:g> placeholders into <ph> placeholders, e.g.: <xliff:g id="website" example="google.com">%s</xliff:g> becomes <ph name="website"><ex>google.com</ex>%s</ph> Returns: The tclib.Message. """ msg = tclib.Message() current_text = '' # Accumulated text that hasn't yet been added to msg. nodes = android_string.childNodes for i, node in enumerate(nodes): # Handle text nodes. if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): current_text += node.data # Handle <xliff:g> and other tags. elif node.nodeType == Node.ELEMENT_NODE: if node.tagName == 'xliff:g': assert node.hasAttribute( 'id'), 'missing id: ' + node.data() placeholder_id = node.getAttribute('id') placeholder_text = self.__FormatPlaceholderText(node) placeholder_example = node.getAttribute('example') if not placeholder_example: print( 'Info: placeholder does not contain an example: %s' % node.toxml()) placeholder_example = placeholder_id.upper() msg.AppendPlaceholder( tclib.Placeholder(placeholder_id, placeholder_text, placeholder_example)) else: print( 'Warning: removing tag <%s> which must be inside a ' 'placeholder: %s' % (node.tagName, node.toxml())) msg.AppendText(self.__FormatPlaceholderText(node)) # Handle other nodes. elif node.nodeType != Node.COMMENT_NODE: assert False, 'Unknown node type: %s' % node.nodeType is_last_node = (i == len(nodes) - 1) if (current_text and (is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)): # For messages containing just text and comments (no xml tags) Android # strips leading and trailing whitespace. We mimic that behavior. if not msg.GetContent() and is_last_node: current_text = current_text.strip() msg.AppendText(self.__FormatAndroidString(current_text)) current_text = '' return msg
def testInit(self): msg = tclib.Message(text=u'Hello Earthlings', description='Greetings\n\t message') self.failUnlessEqual(msg.GetPresentableContent(), 'Hello Earthlings') self.failUnless( isinstance(msg.GetPresentableContent(), types.StringTypes)) self.failUnlessEqual(msg.GetDescription(), 'Greetings message')
def AddMessages(self, rctext, node): '''Scans 'rctext' for all messages in string tables, preprocesses them as much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d type format specifiers get those specifiers replaced with placeholders, and HTML-formatted messages get run through the HTML-placeholderizer). Adds each message as a <message> node child of 'node'.''' for tm in _STRING_TABLE.finditer(rctext): table = tm.group('body') for mm in _MESSAGE.finditer(table): comment_block = mm.group('comment') comment_text = [] for cm in _COMMENT_TEXT.finditer(comment_block): comment_text.append(cm.group('text')) comment_text = ' '.join(comment_text) id = mm.group('id') text = rc.Section.UnEscape(mm.group('text')) self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text)) msg_obj = self.Placeholderize(text) # Messages that contain only placeholders do not need translation. is_translateable = False for item in msg_obj.GetContent(): if isinstance(item, six.string_types): if not _WHITESPACE_ONLY.match(item): is_translateable = True if self.not_localizable_re.search(comment_text): is_translateable = False message_meaning = '' internal_comment = '' # If we have a "role model" (existing GRD file) and this node exists # in the role model, use the description, meaning and translateable # attributes from the role model. if self.role_model: role_node = self.role_model.GetNodeById(id) if role_node: is_translateable = role_node.IsTranslateable() message_meaning = role_node.attrs['meaning'] comment_text = role_node.attrs['desc'] internal_comment = role_node.attrs['internal_comment'] # For nontranslateable messages, we don't want the complexity of # placeholderizing everything. if not is_translateable: msg_obj = tclib.Message(text=text) msg_node = message.MessageNode.Construct(node, msg_obj, id, desc=comment_text, translateable=is_translateable, meaning=message_meaning) msg_node.attrs['internal_comment'] = internal_comment node.AddChild(msg_node) self.ExtraVerboseOut('Done processing message %s\n' % id)
def Parse(self): '''Knows how to parse menu resource sections. Because it is important that menu shortcuts are unique within the menu, we return each menu as a single message with placeholders to break up the different menu items, rather than return a single message per menu item. we also add an automatic description with instructions for the translators.''' self.single_message_ = tclib.Message(description=self.MENU_MESSAGE_DESCRIPTION) self._RegExpParse(self.menu_re_, self.text_)
def testValidate(self): factory = clique.UberClique() msg = tclib.Message(text='Bingo bongo') c = factory.MakeClique(msg) c.SetCustomType(filename.WindowsFilename()) translation = tclib.Translation(id=msg.GetId(), text='Bilingo bolongo:') c.AddTranslation(translation, 'fr') self.failUnless(c.MessageForLanguage('fr').GetRealContent() == 'Bilingo bolongo ')
def testCustomTypes(self): factory = clique.UberClique() message = tclib.Message(text='Bingo bongo') c = factory.MakeClique(message) try: c.SetCustomType(DummyCustomType()) self.fail() except: pass # expected case - 'Bingo bongo' does not start with 'jjj' message = tclib.Message(text='jjjBingo bongo') c = factory.MakeClique(message) c.SetCustomType(util.NewClassInstance( 'grit.clique_unittest.DummyCustomType', clique.CustomType)) translation = tclib.Translation(id=message.GetId(), text='Bilingo bolongo') c.AddTranslation(translation, 'fr') self.failUnless(c.MessageForLanguage('fr').GetRealContent().startswith('jjj'))
def testPseudoMessage(self): msg = tclib.Message( text='Hello USERNAME, how are you?', placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]) trans = pseudo.PseudoMessage(msg) # TODO(joi) It would be nicer if 'you' -> 'youPou' instead of # 'you' -> 'youPyou' and if we handled the silent e in 'are' self.failUnless(trans.GetPresentableContent() == pseudo.MapVowels( u'HePelloPo USERNAME, hoPow aParePe youPyou?', 1))
def testMissingTranslations(self): messages = [tclib.Message(text='Hello'), tclib.Message(text='Goodbye')] factory = clique.UberClique() cliques = [factory.MakeClique(msg) for msg in messages] cliques[1].MessageForLanguage('fr', False, True) self.failUnless(not factory.HasMissingTranslations()) cliques[0].MessageForLanguage('de', False, False) self.failUnless(factory.HasMissingTranslations()) report = factory.MissingTranslationsReport() self.failUnless(report.count('WARNING') == 1) self.failUnless(report.count('8053599568341804890 "Goodbye" fr') == 1) self.failUnless(report.count('ERROR') == 1) self.failUnless(report.count('800120468867715734 "Hello" de') == 1)
def EndParsing(self): super(type(self), self).EndParsing() # Make the text (including placeholder references) and list of placeholders, # then strip and store leading and trailing whitespace and create the # tclib.Message() and a clique to contain it. text = '' placeholders = [] for item in self.mixed_content: if isinstance(item, types.StringTypes): text += item else: presentation = item.attrs['name'].upper() text += presentation ex = ' ' if len(item.children): ex = item.children[0].GetCdata() original = item.GetCdata() placeholders.append( tclib.Placeholder(presentation, original, ex)) m = _WHITESPACE.match(text) if m: self.ws_at_start = m.group('start') self.ws_at_end = m.group('end') text = m.group('body') self.shortcut_groups_ = self._SPLIT_RE.split( self.attrs['shortcut_groups']) self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != ''] description_or_id = self.attrs['desc'] if description_or_id == '' and 'name' in self.attrs: description_or_id = 'ID: %s' % self.attrs['name'] assigned_id = None if (self.attrs['use_name_for_id'] == 'true' and self.SatisfiesOutputCondition()): assigned_id = self.attrs['name'] message = tclib.Message(text=text, placeholders=placeholders, description=description_or_id, meaning=self.attrs['meaning'], assigned_id=assigned_id) self.clique = self.UberClique().MakeClique(message, self.IsTranslateable()) for group in self.shortcut_groups_: self.clique.AddToShortcutGroup(group) if self.attrs['custom_type'] != '': self.clique.SetCustomType( util.NewClassInstance(self.attrs['custom_type'], clique.CustomType)) elif self.attrs['validation_expr'] != '': self.clique.SetCustomType( clique.OneOffCustomType(self.attrs['validation_expr']))
def testAll(self): text = u'Howdie USERNAME' phs = [tclib.Placeholder(u'USERNAME', u'%s', 'Joi')] msg = tclib.Message(text=text, placeholders=phs) self.failUnless(msg.GetPresentableContent() == 'Howdie USERNAME') trans = tclib.Translation(text=text, placeholders=phs) self.failUnless(trans.GetPresentableContent() == 'Howdie USERNAME') self.failUnless( isinstance(trans.GetPresentableContent(), types.StringTypes))
def testConstruct(self): msg = tclib.Message(text=" Hello USERNAME, how are you? BINGO\t\t", placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi'), tclib.Placeholder('BINGO', '%d', '11')]) msg_node = message.MessageNode.Construct(None, msg, 'BINGOBONGO') self.failUnless(msg_node.children[0].name == 'ph') self.failUnless(msg_node.children[0].children[0].name == 'ex') self.failUnless(msg_node.children[0].children[0].GetCdata() == 'Joi') self.failUnless(msg_node.children[1].children[0].GetCdata() == '11') self.failUnless(msg_node.ws_at_start == ' ') self.failUnless(msg_node.ws_at_end == '\t\t')
def testTagsWithCommonSubstring(self): word = 'ABCDEFGHIJ' text = ' '.join([word[:i] for i in range(1, 11)]) phs = [ tclib.Placeholder(word[:i], str(i), str(i)) for i in range(1, 11) ] try: msg = tclib.Message(text=text, placeholders=phs) self.failUnless(msg.GetRealContent() == '1 2 3 4 5 6 7 8 9 10') except: self.fail('tclib.Message() should handle placeholders that are ' 'substrings of each other')
def ProcessString(self, is_gather, output, prefix, tree): message = tclib.Message(text=tree, description=prefix) if is_gather: clique = self.uberclique.MakeClique(message) output = output.append(clique) return tree else: clique = self.uberclique.BestClique(message.GetId()) content = clique.MessageForLanguage( self.lang_, self.pseudo_if_not_available_, self.fallback_to_english_).GetRealContent() return content
def testXmlFormatContentWithEntities(self): '''Tests a bug where would not be escaped correctly.''' from grit import tclib msg_node = message.MessageNode.Construct(None, tclib.Message( text = 'BEGIN_BOLDHelloWHITESPACEthere!END_BOLD Bingo!', placeholders = [ tclib.Placeholder('BEGIN_BOLD', '<b>', 'bla'), tclib.Placeholder('WHITESPACE', ' ', 'bla'), tclib.Placeholder('END_BOLD', '</b>', 'bla')]), 'BINGOBONGO') xml = msg_node.FormatXml() self.failUnless(xml.find(' ') == -1, 'should have no entities')
def Parse(self): if self.have_parsed_: return self.have_parsed_ = True text = self._LoadInputFile() # Ignore the BOM character if the document starts with one. if text.startswith(u'\ufeff'): text = text[1:] self.text_ = text # Parsing is done in two phases: First, we break the document into # translateable and nontranslateable chunks. Second, we run through each # translateable chunk and insert placeholders for any HTML elements, # unescape escaped characters, etc. # First handle the silly little [!]-prefixed header because it's not # handled by our HTML parsers. m = _SILLY_HEADER.match(text) if m: self.skeleton_.append(text[:m.start('title')]) self.skeleton_.append( self.uberclique.MakeClique( tclib.Message(text=text[m.start('title'):m.end('title')]))) self.skeleton_.append(text[m.end('title'):m.end()]) text = text[m.end():] chunks = HtmlChunks().Parse(text, self.fold_whitespace_) for chunk in chunks: if chunk[0]: # Chunk is translateable self.skeleton_.append( self.uberclique.MakeClique( HtmlToMessage(chunk[1], description=chunk[2]))) else: self.skeleton_.append(chunk[1]) # Go through the skeleton and change any messages that consist solely of # placeholders and whitespace into nontranslateable strings. for ix in range(len(self.skeleton_)): got_text = False if isinstance(self.skeleton_[ix], clique.MessageClique): msg = self.skeleton_[ix].GetMessage() for item in msg.GetContent(): if (isinstance(item, types.StringTypes) and _NON_WHITESPACE.search(item) and item != ' '): got_text = True break if not got_text: self.skeleton_[ix] = msg.GetRealContent()
def ParseFile(text, uberclique): cliques = [] lines = text.split('\n') for line in lines: match = MSG_RE.match(line) if match: msg = match.group('msg') msg = Unescape(msg) placeholders = GetPlaceholders(msg) msg = GetPlaceholderizedText(msg) cliques.append( uberclique.MakeClique( tclib.Message(text=msg, placeholders=placeholders))) return cliques
def _AddTranslateableChunk(self, chunk): '''Adds a translateable chunk. It will be unescaped before being added.''' # We don't want empty messages since they are redundant and the TC # doesn't allow them. if chunk == '': return unescaped_text = self.UnEscape(chunk) if self.single_message_: self.single_message_.AppendText(unescaped_text) else: self.skeleton_.append( self.uberclique.MakeClique(tclib.Message(text=unescaped_text))) self.translatable_chunk_ = True
def testRegressionTranslationInherited(self): '''Regression tests a bug that was caused by grit.tclib.Translation inheriting from the translation console's Translation object instead of only owning an instance of it. ''' msg = tclib.Message(text=u"BLA1\r\nFrom: BLA2 \u00fe BLA3", placeholders=[ tclib.Placeholder('BLA1', '%s', '%s'), tclib.Placeholder('BLA2', '%s', '%s'), tclib.Placeholder('BLA3', '%s', '%s') ]) transl = tclib.Translation(text=msg.GetPresentableContent(), placeholders=msg.GetPlaceholders()) content = transl.GetContent() self.failUnless(isinstance(content[3], types.UnicodeType))
def EndParsing(self): super(MessageNode, self).EndParsing() # Make the text (including placeholder references) and list of placeholders, # then strip and store leading and trailing whitespace and create the # tclib.Message() and a clique to contain it. text = '' placeholders = [] for item in self.mixed_content: if isinstance(item, types.StringTypes): text += item else: presentation = item.attrs['name'].upper() text += presentation ex = ' ' if len(item.children): ex = item.children[0].GetCdata() original = item.GetCdata() placeholders.append( tclib.Placeholder(presentation, original, ex)) m = _WHITESPACE.match(text) if m: self.ws_at_start = m.group('start') self.ws_at_end = m.group('end') text = m.group('body') self.shortcut_groups_ = self._SPLIT_RE.split( self.attrs['shortcut_groups']) self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != ''] description_or_id = self.attrs['desc'] if description_or_id == '' and 'name' in self.attrs: description_or_id = 'ID: %s' % self.attrs['name'] assigned_id = None if self.attrs['use_name_for_id'] == 'true': assigned_id = self.attrs['name'] message = tclib.Message(text=text, placeholders=placeholders, description=description_or_id, meaning=self.attrs['meaning'], assigned_id=assigned_id) self.InstallMessage(message)