def testSemiIdenticalCliques(self): messages = [ tclib.Message( text='Hello USERNAME', placeholders=[tclib.Placeholder('USERNAME', '$1', 'Joi')]), tclib.Message( text='Hello USERNAME', placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]), ] self.failUnless(messages[0].GetId() == messages[1].GetId()) # Both of the above would share a translation. translation = tclib.Translation( id=messages[0].GetId(), text='Bonjour USERNAME', placeholders=[tclib.Placeholder('USERNAME', '$1', 'Joi')]) factory = clique.UberClique() cliques = [factory.MakeClique(msg) for msg in messages] for clq in cliques: clq.AddTranslation(translation, 'fr') self.failUnless(cliques[0].MessageForLanguage('fr').GetRealContent() == 'Bonjour $1') self.failUnless(cliques[1].MessageForLanguage('fr').GetRealContent() == 'Bonjour %s')
def GetPlaceholders(msg): tag_list = [] ph_names = re.findall( '%{([^}]+)}|<(([a-zA-Z]+)[^>]*)(?<!/)>|</(([a-zA-Z]+)[^>]*)>|<(([a-zA-Z]+)[^>]*)/>', msg) placeholders = [] for (gettext_ph, open_tag_contents, open_tag, close_tag_contents, close_tag, unary_tag_contents, unary_tag) in ph_names: if gettext_ph != '': placeholders.append( tclib.Placeholder(gettext_ph.upper(), '%%{%s}' % gettext_ph, '(replaceable)')) elif open_tag != '': tag_list.append(open_tag) placeholders.append( tclib.Placeholder( MakeHtmlPlaceholderName(open_tag, 'begin', tag_list), '<%s>' % open_tag_contents, '(HTML code)')) elif close_tag != '': placeholders.append( tclib.Placeholder( MakeHtmlPlaceholderName(close_tag, 'end', tag_list), '</%s>' % close_tag_contents, '(HTML code)')) elif unary_tag != '': tag_list.append(unary_tag) placeholders.append( tclib.Placeholder( MakeHtmlPlaceholderName(unary_tag, None, tag_list), '<%s/>' % unary_tag_contents, '(HTML code)')) return placeholders
def testConstruct(self): msg = tclib.Message(text=" Hello USERNAME, how are you? BINGO\t\t", placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi'), tclib.Placeholder('BINGO', '%d', '11')]) msg_node = message.MessageNode.Construct(None, msg, 'BINGOBONGO') self.failUnless(msg_node.children[0].name == 'ph') self.failUnless(msg_node.children[0].children[0].name == 'ex') self.failUnless(msg_node.children[0].children[0].GetCdata() == 'Joi') self.failUnless(msg_node.children[1].children[0].GetCdata() == '11') self.failUnless(msg_node.ws_at_start == ' ') self.failUnless(msg_node.ws_at_end == '\t\t')
def testXmlFormatContentWithEntities(self): '''Tests a bug where would not be escaped correctly.''' from grit import tclib msg_node = message.MessageNode.Construct(None, tclib.Message( text = 'BEGIN_BOLDHelloWHITESPACEthere!END_BOLD Bingo!', placeholders = [ tclib.Placeholder('BEGIN_BOLD', '<b>', 'bla'), tclib.Placeholder('WHITESPACE', ' ', 'bla'), tclib.Placeholder('END_BOLD', '</b>', 'bla')]), 'BINGOBONGO') xml = msg_node.FormatXml() self.failUnless(xml.find(' ') == -1, 'should have no entities')
def testRegressionTranslationInherited(self): '''Regression tests a bug that was caused by grit.tclib.Translation inheriting from the translation console's Translation object instead of only owning an instance of it. ''' msg = tclib.Message(text=u"BLA1\r\nFrom: BLA2 \u00fe BLA3", placeholders=[ tclib.Placeholder('BLA1', '%s', '%s'), tclib.Placeholder('BLA2', '%s', '%s'), tclib.Placeholder('BLA3', '%s', '%s') ]) transl = tclib.Translation(text=msg.GetPresentableContent(), placeholders=msg.GetPlaceholders()) content = transl.GetContent() self.failUnless(isinstance(content[3], types.UnicodeType))
def AddMessage(self, msgtext, description, meaning, translateable): if msgtext == '': return msg = tclib.Message(description=description, meaning=meaning) unescaped_text = self.UnEscape(msgtext) parts = PLACEHOLDER_RE.split(unescaped_text) in_placeholder = False for part in parts: if part == '': continue elif part == '[![': in_placeholder = True elif part == ']!]': in_placeholder = False else: if in_placeholder: msg.AppendPlaceholder( tclib.Placeholder(part, '[![%s]!]' % part, '(placeholder)')) else: msg.AppendText(part) self.skeleton_.append( self.uberclique.MakeClique(msg, translateable=translateable)) # if statement needed because this is supposed to be idempotent (so never # set back to false) if translateable: self.translatable_chunk_ = True
def SubstituteMessage(self, msg): '''Apply substitutions to a tclib.Message object. Text of the form [message_name] will be replaced by a new placeholder, whose presentation will take the form the message_name_{UsageCount}, and whose example will be the message's value. Existing placeholders are not affected. Args: msg: A tclib.Message object. Returns: A tclib.Message object, with substitutions done. ''' from grit import tclib # avoid circular import counts = {} text = msg.GetPresentableContent() placeholders = [] newtext = '' for f in self.GetExp().split(text): sub = self._SubFragment(f) if f != sub: f = str(f) count = counts.get(f, 0) + 1 counts[f] = count name = "%s_%d" % (f[1:-1], count) placeholders.append(tclib.Placeholder(name, f, sub)) newtext += name else: newtext += f if placeholders: return tclib.Message(newtext, msg.GetPlaceholders() + placeholders, msg.GetDescription(), msg.GetMeaning()) else: return msg
def Callback(id, structure): if id not in self.cliques_: if debug: print "Ignoring translation #%s" % id return if debug: print "Adding translation #%s" % id # We fetch placeholder information from the original message (the XTB file # only contains placeholder names). original_msg = self.BestClique(id).GetMessage() translation = tclib.Translation(id=id) for is_ph,text in structure: if not is_ph: translation.AppendText(text) else: found_placeholder = False for ph in original_msg.GetPlaceholders(): if ph.GetPresentation() == text: translation.AppendPlaceholder(tclib.Placeholder( ph.GetPresentation(), ph.GetOriginal(), ph.GetExample())) found_placeholder = True break if not found_placeholder: raise exception.MismatchingPlaceholders( 'Translation for message ID %s had <ph name="%s%/>, no match\n' 'in original message' % (id, text)) self.FindCliqueAndAddTranslation(translation, lang)
def CreateTclibMessage(self, android_string): """Transforms a <string/> element from strings.xml into a tclib.Message. Interprets whitespace, quotes, and escaped characters in the android_string according to Android's formatting and styling rules for strings. Also converts <xliff:g> placeholders into <ph> placeholders, e.g.: <xliff:g id="website" example="google.com">%s</xliff:g> becomes <ph name="website"><ex>google.com</ex>%s</ph> Returns: The tclib.Message. """ msg = tclib.Message() current_text = '' # Accumulated text that hasn't yet been added to msg. nodes = android_string.childNodes for i, node in enumerate(nodes): # Handle text nodes. if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): current_text += node.data # Handle <xliff:g> and other tags. elif node.nodeType == Node.ELEMENT_NODE: if node.tagName == 'xliff:g': assert node.hasAttribute( 'id'), 'missing id: ' + node.data() placeholder_id = node.getAttribute('id') placeholder_text = self.__FormatPlaceholderText(node) placeholder_example = node.getAttribute('example') if not placeholder_example: print( 'Info: placeholder does not contain an example: %s' % node.toxml()) placeholder_example = placeholder_id.upper() msg.AppendPlaceholder( tclib.Placeholder(placeholder_id, placeholder_text, placeholder_example)) else: print( 'Warning: removing tag <%s> which must be inside a ' 'placeholder: %s' % (node.tagName, node.toxml())) msg.AppendText(self.__FormatPlaceholderText(node)) # Handle other nodes. elif node.nodeType != Node.COMMENT_NODE: assert False, 'Unknown node type: %s' % node.nodeType is_last_node = (i == len(nodes) - 1) if (current_text and (is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)): # For messages containing just text and comments (no xml tags) Android # strips leading and trailing whitespace. We mimic that behavior. if not msg.GetContent() and is_last_node: current_text = current_text.strip() msg.AppendText(self.__FormatAndroidString(current_text)) current_text = '' return msg
def _ParsePlaceholder(self, placeholder, msg): '''Extracts a placeholder from a DOM node and adds it to a tclib Message. Args: placeholder: A DOM node of the form: <ph name="PLACEHOLDER_NAME">Placeholder text<ex>Example value</ex></ph> msg: The placeholder is added to this message. ''' text = [] example_text = [] for node1 in placeholder.childNodes: if (node1.nodeType == minidom.Node.TEXT_NODE): text.append(node1.data) elif (node1.nodeType == minidom.Node.ELEMENT_NODE and node1.tagName == 'ex'): for node2 in node1.childNodes: example_text.append(node2.toxml()) else: raise Exception('Unexpected element inside a placeholder: ' + node2.toxml()) if example_text == []: # In such cases the original text is okay for an example. example_text = text replaced_text = self.Escape(''.join(text).strip()) replaced_text = replaced_text.replace('$1', self._config['app_name']) replaced_text = replaced_text.replace('$2', self._config['os_name']) replaced_text = replaced_text.replace('$3', self._config['frame_name']) msg.AppendPlaceholder( tclib.Placeholder(placeholder.attributes['name'].value, replaced_text, ''.join(example_text).strip()))
def _AddNontranslateableChunk(self, chunk): '''Adds a nontranslateable chunk.''' if self.single_message_: ph = tclib.Placeholder('XX%02dXX' % self.ph_counter_, chunk, chunk) self.ph_counter_ += 1 self.single_message_.AppendPlaceholder(ph) else: self.skeleton_.append(chunk)
def testPseudoMessage(self): msg = tclib.Message( text='Hello USERNAME, how are you?', placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]) trans = pseudo.PseudoMessage(msg) # TODO(joi) It would be nicer if 'you' -> 'youPou' instead of # 'you' -> 'youPyou' and if we handled the silent e in 'are' self.failUnless(trans.GetPresentableContent() == pseudo.MapVowels( u'HePelloPo USERNAME, hoPow aParePe youPyou?', 1))
def EndParsing(self): super(type(self), self).EndParsing() # Make the text (including placeholder references) and list of placeholders, # then strip and store leading and trailing whitespace and create the # tclib.Message() and a clique to contain it. text = '' placeholders = [] for item in self.mixed_content: if isinstance(item, types.StringTypes): text += item else: presentation = item.attrs['name'].upper() text += presentation ex = ' ' if len(item.children): ex = item.children[0].GetCdata() original = item.GetCdata() placeholders.append( tclib.Placeholder(presentation, original, ex)) m = _WHITESPACE.match(text) if m: self.ws_at_start = m.group('start') self.ws_at_end = m.group('end') text = m.group('body') self.shortcut_groups_ = self._SPLIT_RE.split( self.attrs['shortcut_groups']) self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != ''] description_or_id = self.attrs['desc'] if description_or_id == '' and 'name' in self.attrs: description_or_id = 'ID: %s' % self.attrs['name'] assigned_id = None if (self.attrs['use_name_for_id'] == 'true' and self.SatisfiesOutputCondition()): assigned_id = self.attrs['name'] message = tclib.Message(text=text, placeholders=placeholders, description=description_or_id, meaning=self.attrs['meaning'], assigned_id=assigned_id) self.clique = self.UberClique().MakeClique(message, self.IsTranslateable()) for group in self.shortcut_groups_: self.clique.AddToShortcutGroup(group) if self.attrs['custom_type'] != '': self.clique.SetCustomType( util.NewClassInstance(self.attrs['custom_type'], clique.CustomType)) elif self.attrs['validation_expr'] != '': self.clique.SetCustomType( clique.OneOffCustomType(self.attrs['validation_expr']))
def testAll(self): text = u'Howdie USERNAME' phs = [tclib.Placeholder(u'USERNAME', u'%s', 'Joi')] msg = tclib.Message(text=text, placeholders=phs) self.failUnless(msg.GetPresentableContent() == 'Howdie USERNAME') trans = tclib.Translation(text=text, placeholders=phs) self.failUnless(trans.GetPresentableContent() == 'Howdie USERNAME') self.failUnless( isinstance(trans.GetPresentableContent(), types.StringTypes))
def testPseudolocales(self): p1 = tclib.Placeholder(u'USERNAME', '%s', 'foo') p2 = tclib.Placeholder(u'EMAIL', '%s', 'bar') msg = tclib.Message() msg.AppendText('h_') msg.AppendPlaceholder(p1) msg.AppendPlaceholder(p2) msg.AppendText('w') self.assertEqual( pl.PseudoLongStringMessage(msg).GetContent(), [u'\u0125_', p1, p2, u'\u0175', ' - one two three four']) msg.AppendText('hello world') self.assertEqual( pl.PseudoRTLMessage(msg).GetContent(), [ u'\u202eh\u202c_', p1, p2, u'\u202ewhello\u202c \u202eworld\u202c' ])
def testClique(self): factory = clique.UberClique() msg = tclib.Message( text='Hello USERNAME, how are you?', placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]) c = factory.MakeClique(msg) self.failUnless(c.GetMessage() == msg) self.failUnless(c.GetId() == msg.GetId()) msg_fr = tclib.Translation( text='Bonjour USERNAME, comment ca va?', id=msg.GetId(), placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]) msg_de = tclib.Translation( text='Guten tag USERNAME, wie geht es dir?', id=msg.GetId(), placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]) c.AddTranslation(msg_fr, 'fr') factory.FindCliqueAndAddTranslation(msg_de, 'de') # sort() sorts lists in-place and does not return them for lang in ('en', 'fr', 'de'): self.failUnless(lang in c.clique) self.failUnless( c.MessageForLanguage('fr').GetRealContent() == msg_fr.GetRealContent()) try: c.MessageForLanguage('zh-CN', False) self.fail('Should have gotten exception') except: pass self.failUnless(c.MessageForLanguage('zh-CN', True) != None) rex = re.compile('fr|de|bingo') self.failUnless(len(c.AllMessagesThatMatch(rex, False)) == 2) self.failUnless( c.AllMessagesThatMatch(rex, True)[pseudo.PSEUDO_LANG] is not None)
def testTagsWithCommonSubstring(self): word = 'ABCDEFGHIJ' text = ' '.join([word[:i] for i in range(1, 11)]) phs = [ tclib.Placeholder(word[:i], str(i), str(i)) for i in range(1, 11) ] try: msg = tclib.Message(text=text, placeholders=phs) self.failUnless(msg.GetRealContent() == '1 2 3 4 5 6 7 8 9 10') except: self.fail('tclib.Message() should handle placeholders that are ' 'substrings of each other')
def testBuildAndUnbuildTree(self): p1 = tclib.Placeholder(u'USERNAME', '%s', 'foo') p2 = tclib.Placeholder(u'EMAIL', '%s', 'bar') msg = tclib.Message() msg.AppendText('hello') msg.AppendPlaceholder(p1) msg.AppendPlaceholder(p2) msg.AppendText('world') tree, placeholders = pl.BuildTreeFromMessage(msg) self.assertTreesEqual( tree, pl.NodeSequence([ pl.RawText('hello'), PLACEHOLDER_NODE, PLACEHOLDER_NODE, pl.RawText('world') ])) self.assertEqual(placeholders, [p1, p2]) transl = pl.ToTranslation(tree, placeholders) self.assertEqual(transl.GetContent(), ['hello', p1, p2, 'world'])
def EndParsing(self): super(MessageNode, self).EndParsing() # Make the text (including placeholder references) and list of placeholders, # then strip and store leading and trailing whitespace and create the # tclib.Message() and a clique to contain it. text = '' placeholders = [] for item in self.mixed_content: if isinstance(item, types.StringTypes): text += item else: presentation = item.attrs['name'].upper() text += presentation ex = ' ' if len(item.children): ex = item.children[0].GetCdata() original = item.GetCdata() placeholders.append( tclib.Placeholder(presentation, original, ex)) m = _WHITESPACE.match(text) if m: self.ws_at_start = m.group('start') self.ws_at_end = m.group('end') text = m.group('body') self.shortcut_groups_ = self._SPLIT_RE.split( self.attrs['shortcut_groups']) self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != ''] description_or_id = self.attrs['desc'] if description_or_id == '' and 'name' in self.attrs: description_or_id = 'ID: %s' % self.attrs['name'] assigned_id = None if self.attrs['use_name_for_id'] == 'true': assigned_id = self.attrs['name'] message = tclib.Message(text=text, placeholders=placeholders, description=description_or_id, meaning=self.attrs['meaning'], assigned_id=assigned_id) self.InstallMessage(message)
def Placeholderize(self, text): '''Creates a tclib.Message object from 'text', attempting to recognize a few different formats of text that can be automatically placeholderized (HTML code, printf-style format strings, and FormatMessage-style format strings). ''' try: # First try HTML placeholderizing. # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing msg = tr_html.HtmlToMessage(text, True) for item in msg.GetContent(): if not isinstance(item, types.StringTypes): return msg # Contained at least one placeholder, so we're done # HTML placeholderization didn't do anything, so try to find printf or # FormatMessage format specifiers and change them into placeholders. msg = tclib.Message() parts = _FORMAT_SPECIFIER.split(text) todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc. for part in parts: if _FORMAT_SPECIFIER.match(part): msg.AppendPlaceholder( tclib.Placeholder('TODO_%04d' % todo_counter, part, 'TODO')) todo_counter += 1 elif part != '': msg.AppendText(part) if self.role_model and len( parts) > 1: # there are TODO placeholders role_model_msg = self.role_model.UberClique( ).BestCliqueByOriginalText(msg.GetRealContent(), '') if role_model_msg: # replace wholesale to get placeholder names and examples msg = role_model_msg return msg except: print 'Exception processing message with text "%s"' % text raise
def testPlaceholderNameChecking(self): try: ph = tclib.Placeholder('BINGO BONGO', 'bla', 'bla') raise Exception("We shouldn't get here") except exception.InvalidPlaceholderName: pass # Expect exception to be thrown because presentation contained space
def EndParsing(self): super(MessageNode, self).EndParsing() # Make the text (including placeholder references) and list of placeholders, # verify placeholder formats, then strip and store leading and trailing # whitespace and create the tclib.Message() and a clique to contain it. text = '' placeholders = [] for item in self.mixed_content: if isinstance(item, six.string_types): # Not a <ph> element: fail if any <ph> formatters are detected. if _FORMATTERS.search(item): print(_BAD_PLACEHOLDER_MSG % (item, self.source)) raise exception.PlaceholderNotInsidePhNode text += item else: # Extract the <ph> element components. presentation = item.attrs['name'].upper() text += presentation ex = ' ' # <ex> example element cdata if present. if len(item.children): ex = item.children[0].GetCdata() original = item.GetCdata() # Sanity check the <ph> element content. cdata = original # Replace all HTML tag tokens in cdata. match = _HTMLTOKEN.search(cdata) while match: cdata = cdata.replace(match.group(0), '_') match = _HTMLTOKEN.search(cdata) # Replace all HTML entities in cdata. match = _HTMLENTITY.search(cdata) while match: cdata = cdata.replace(match.group(0), '_') match = _HTMLENTITY.search(cdata) # Remove first matching formatter from cdata. match = _FORMATTERS.search(cdata) if match: cdata = cdata.replace(match.group(0), '') # Fail if <ph> special chars remain in cdata. if re.search(r'[%\$]', cdata): message_id = self.attrs['name'] + ' ' + original print(_INVALID_PH_CHAR_MSG % (message_id, self.source)) raise exception.InvalidCharactersInsidePhNode # Otherwise, accept this <ph> placeholder. placeholders.append( tclib.Placeholder(presentation, original, ex)) m = _WHITESPACE.match(text) if m: self.ws_at_start = m.group('start') self.ws_at_end = m.group('end') text = m.group('body') self.shortcut_groups_ = self._SPLIT_RE.split( self.attrs['shortcut_groups']) self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != ''] description_or_id = self.attrs['desc'] if description_or_id == '' and 'name' in self.attrs: description_or_id = 'ID: %s' % self.attrs['name'] assigned_id = None if self.attrs['use_name_for_id'] == 'true': assigned_id = self.attrs['name'] message = tclib.Message(text=text, placeholders=placeholders, description=description_or_id, meaning=self.attrs['meaning'], assigned_id=assigned_id) self.InstallMessage(message)
def HtmlToMessage(html, include_block_tags=False, description=''): '''Takes a bit of HTML, which must contain only "inline" HTML elements, and changes it into a tclib.Message. This involves escaping any entities and replacing any HTML code with placeholders. If include_block_tags is true, no error will be given if block tags (e.g. <p> or <br>) are included in the HTML. Args: html: 'Hello <b>[USERNAME]</b>, how <i>are</i> you?' include_block_tags: False Return: tclib.Message('Hello START_BOLD1USERNAMEEND_BOLD, ' 'howNBSPSTART_ITALICareEND_ITALIC you?', [ Placeholder('START_BOLD', '<b>', ''), Placeholder('USERNAME', '[USERNAME]', ''), Placeholder('END_BOLD', '</b>', ''), Placeholder('START_ITALIC', '<i>', ''), Placeholder('END_ITALIC', '</i>', ''), ]) ''' # Approach is: # - first placeholderize, finding <elements>, [REPLACEABLES] and # - then escape all character entities in text in-between placeholders parts = [] # List of strings (for text chunks) and tuples (ID, original) # for placeholders count_names = {} # Map of base names to number of times used end_names = { } # Map of base names to stack of end tags (for correct nesting) def MakeNameClosure(base, type=''): '''Returns a closure that can be called once all names have been allocated to return the final name of the placeholder. This allows us to minimally number placeholders for non-overlap. Also ensures that END_XXX_Y placeholders have the same Y as the corresponding BEGIN_XXX_Y placeholder when we have nested tags of the same type. Args: base: 'phname' type: '' | 'begin' | 'end' Return: Closure() ''' name = base.upper() if type != '': name = ('%s_%s' % (type, base)).upper() if name in count_names.keys(): count_names[name] += 1 else: count_names[name] = 1 def MakeFinalName(name_=name, index=count_names[name] - 1): if (type.lower() == 'end' and base in end_names.keys() and len(end_names[base])): return end_names[base].pop(-1) # For correct nesting if count_names[name_] != 1: name_ = '%s_%s' % (name_, _SUFFIXES[index]) # We need to use a stack to ensure that the end-tag suffixes match # the begin-tag suffixes. Only needed when more than one tag of the # same type. if type == 'begin': end_name = ('END_%s_%s' % (base, _SUFFIXES[index])).upper() if base in end_names.keys(): end_names[base].append(end_name) else: end_names[base] = [end_name] return name_ return MakeFinalName current = 0 last_nobreak = False while current < len(html): m = _MESSAGE_NO_BREAK_COMMENT.match(html[current:]) if m: last_nobreak = True current += m.end() continue m = _NBSP.match(html[current:]) if m: parts.append((MakeNameClosure('SPACE'), m.group())) current += m.end() continue m = _REPLACEABLE.match(html[current:]) if m: # Replaceables allow - but placeholders don't, so replace - with _ ph_name = MakeNameClosure('X_%s_X' % m.group('name').replace('-', '_')) parts.append((ph_name, m.group())) current += m.end() continue m = _SPECIAL_ELEMENT.match(html[current:]) if m: if not include_block_tags: if last_nobreak: last_nobreak = False else: raise exception.BlockTagInTranslateableChunk(html) element_name = 'block' # for simplification # Get the appropriate group name for group in m.groupdict().keys(): if m.groupdict()[group]: break parts.append((MakeNameClosure(element_name, 'begin'), html[current:current + m.start(group)])) parts.append(m.group(group)) parts.append((MakeNameClosure(element_name, 'end'), html[current + m.end(group):current + m.end()])) current += m.end() continue m = _ELEMENT.match(html[current:]) if m: element_name = m.group('element').lower() if not include_block_tags and not element_name in _INLINE_TAGS: if last_nobreak: last_nobreak = False else: raise exception.BlockTagInTranslateableChunk( html[current:]) if element_name in _HTML_PLACEHOLDER_NAMES: # use meaningful names element_name = _HTML_PLACEHOLDER_NAMES[element_name] # Make a name for the placeholder type = '' if not m.group('empty'): if m.group('closing'): type = 'end' else: type = 'begin' parts.append((MakeNameClosure(element_name, type), m.group())) current += m.end() continue if len(parts) and isinstance(parts[-1], types.StringTypes): parts[-1] += html[current] else: parts.append(html[current]) current += 1 msg_text = '' placeholders = [] for part in parts: if isinstance(part, types.TupleType): final_name = part[0]() original = part[1] msg_text += final_name placeholders.append( tclib.Placeholder(final_name, original, '(HTML code)')) else: msg_text += part msg = tclib.Message(text=msg_text, placeholders=placeholders, description=description) content = msg.GetContent() for ix in range(len(content)): if isinstance(content[ix], types.StringTypes): content[ix] = util.UnescapeHtml(content[ix], replace_nbsp=False) return msg
def testProdFailures(self): p1 = tclib.Placeholder(u'USERNAME', '%s', 'foo') msg = tclib.Message() msg.AppendText(u'{LINE_COUNT, plural,\n =1 {<1 line not shown>}\n' ' other {<') msg.AppendPlaceholder(p1) msg.AppendText(u' lines not shown>}\n}') tree, _ = pl.BuildTreeFromMessage(msg) self.assertTreesEqual( tree, pl.Plural('{LINE_COUNT, plural,\n ', [ pl.PluralOption('=1 {', [pl.RawText('<1 line not shown>')]), pl.PluralOption('other {', [ pl.RawText('<'), PLACEHOLDER_NODE, pl.RawText(' lines not shown>') ]) ])) msg = tclib.Message() msg.AppendText(u'{1, plural,\n \n =1 {Rated ') msg.AppendPlaceholder(p1) msg.AppendText(u' by one user.}\n other{Rated ') msg.AppendPlaceholder(p1) msg.AppendText(u' by # users.}}') tree, _ = pl.BuildTreeFromMessage(msg) self.assertTreesEqual( tree, pl.Plural('{1, plural,\n \n ', [ pl.PluralOption('=1 {', [ pl.RawText('Rated '), PLACEHOLDER_NODE, pl.RawText(' by one user.') ]), pl.PluralOption('other{', [ pl.RawText('Rated '), PLACEHOLDER_NODE, pl.RawText(' by # users.') ]), ])) self.assertBuildTree( '{count, plural, offset:2\n' ' =1 {{VAR}}\n' ' =2 {{VAR}, {VAR}}\n' ' other {{VAR}, {VAR}, and # more}\n' ' }', pl.Plural('{count, plural, offset:2\n ', [ pl.PluralOption('=1 {', [VAR_NODE]), pl.PluralOption( '=2 {', [VAR_NODE, pl.RawText(', '), VAR_NODE]), pl.PluralOption('other {', [ VAR_NODE, pl.RawText(', '), VAR_NODE, pl.RawText(', and # more') ]), ])) self.assertBuildTree( '{NUM_POPUPS,plural,=1{Pop-up blocked} other{# pop-ups blocked}}', pl.Plural('{NUM_POPUPS,plural,', [ pl.PluralOption('=1{', [pl.RawText('Pop-up blocked')]), pl.PluralOption('other{', [pl.RawText('# pop-ups blocked')]) ])) self.assertBuildTree( 'Open ${url}', pl.NodeSequence([pl.RawText('Open '), pl.BasicVariable('${url}')]))