Exemple #1
0
    def testSemiIdenticalCliques(self):
        messages = [
            tclib.Message(
                text='Hello USERNAME',
                placeholders=[tclib.Placeholder('USERNAME', '$1', 'Joi')]),
            tclib.Message(
                text='Hello USERNAME',
                placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')]),
        ]
        self.failUnless(messages[0].GetId() == messages[1].GetId())

        # Both of the above would share a translation.
        translation = tclib.Translation(
            id=messages[0].GetId(),
            text='Bonjour USERNAME',
            placeholders=[tclib.Placeholder('USERNAME', '$1', 'Joi')])

        factory = clique.UberClique()
        cliques = [factory.MakeClique(msg) for msg in messages]

        for clq in cliques:
            clq.AddTranslation(translation, 'fr')

        self.failUnless(cliques[0].MessageForLanguage('fr').GetRealContent() ==
                        'Bonjour $1')
        self.failUnless(cliques[1].MessageForLanguage('fr').GetRealContent() ==
                        'Bonjour %s')
Exemple #2
0
def GetPlaceholders(msg):
    tag_list = []
    ph_names = re.findall(
        '%{([^}]+)}|<(([a-zA-Z]+)[^>]*)(?<!/)>|</(([a-zA-Z]+)[^>]*)>|<(([a-zA-Z]+)[^>]*)/>',
        msg)
    placeholders = []
    for (gettext_ph, open_tag_contents, open_tag, close_tag_contents,
         close_tag, unary_tag_contents, unary_tag) in ph_names:
        if gettext_ph != '':
            placeholders.append(
                tclib.Placeholder(gettext_ph.upper(), '%%{%s}' % gettext_ph,
                                  '(replaceable)'))
        elif open_tag != '':
            tag_list.append(open_tag)
            placeholders.append(
                tclib.Placeholder(
                    MakeHtmlPlaceholderName(open_tag, 'begin', tag_list),
                    '<%s>' % open_tag_contents, '(HTML code)'))
        elif close_tag != '':
            placeholders.append(
                tclib.Placeholder(
                    MakeHtmlPlaceholderName(close_tag, 'end', tag_list),
                    '</%s>' % close_tag_contents, '(HTML code)'))
        elif unary_tag != '':
            tag_list.append(unary_tag)
            placeholders.append(
                tclib.Placeholder(
                    MakeHtmlPlaceholderName(unary_tag, None, tag_list),
                    '<%s/>' % unary_tag_contents, '(HTML code)'))
    return placeholders
Exemple #3
0
 def testConstruct(self):
   msg = tclib.Message(text="   Hello USERNAME, how are you?   BINGO\t\t",
                       placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi'),
                                     tclib.Placeholder('BINGO', '%d', '11')])
   msg_node = message.MessageNode.Construct(None, msg, 'BINGOBONGO')
   self.failUnless(msg_node.children[0].name == 'ph')
   self.failUnless(msg_node.children[0].children[0].name == 'ex')
   self.failUnless(msg_node.children[0].children[0].GetCdata() == 'Joi')
   self.failUnless(msg_node.children[1].children[0].GetCdata() == '11')
   self.failUnless(msg_node.ws_at_start == '   ')
   self.failUnless(msg_node.ws_at_end == '\t\t')
Exemple #4
0
 def testXmlFormatContentWithEntities(self):
   '''Tests a bug where &nbsp; would not be escaped correctly.'''
   from grit import tclib
   msg_node = message.MessageNode.Construct(None, tclib.Message(
     text = 'BEGIN_BOLDHelloWHITESPACEthere!END_BOLD Bingo!',
     placeholders = [
       tclib.Placeholder('BEGIN_BOLD', '<b>', 'bla'),
       tclib.Placeholder('WHITESPACE', '&nbsp;', 'bla'),
       tclib.Placeholder('END_BOLD', '</b>', 'bla')]),
                                            'BINGOBONGO')
   xml = msg_node.FormatXml()
   self.failUnless(xml.find('&nbsp;') == -1, 'should have no entities')
Exemple #5
0
 def testRegressionTranslationInherited(self):
     '''Regression tests a bug that was caused by grit.tclib.Translation
 inheriting from the translation console's Translation object
 instead of only owning an instance of it.
 '''
     msg = tclib.Message(text=u"BLA1\r\nFrom: BLA2 \u00fe BLA3",
                         placeholders=[
                             tclib.Placeholder('BLA1', '%s', '%s'),
                             tclib.Placeholder('BLA2', '%s', '%s'),
                             tclib.Placeholder('BLA3', '%s', '%s')
                         ])
     transl = tclib.Translation(text=msg.GetPresentableContent(),
                                placeholders=msg.GetPlaceholders())
     content = transl.GetContent()
     self.failUnless(isinstance(content[3], types.UnicodeType))
Exemple #6
0
    def AddMessage(self, msgtext, description, meaning, translateable):
        if msgtext == '':
            return

        msg = tclib.Message(description=description, meaning=meaning)

        unescaped_text = self.UnEscape(msgtext)
        parts = PLACEHOLDER_RE.split(unescaped_text)
        in_placeholder = False
        for part in parts:
            if part == '':
                continue
            elif part == '[![':
                in_placeholder = True
            elif part == ']!]':
                in_placeholder = False
            else:
                if in_placeholder:
                    msg.AppendPlaceholder(
                        tclib.Placeholder(part, '[![%s]!]' % part,
                                          '(placeholder)'))
                else:
                    msg.AppendText(part)

        self.skeleton_.append(
            self.uberclique.MakeClique(msg, translateable=translateable))

        # if statement needed because this is supposed to be idempotent (so never
        # set back to false)
        if translateable:
            self.translatable_chunk_ = True
Exemple #7
0
  def SubstituteMessage(self, msg):
    '''Apply substitutions to a tclib.Message object.

    Text of the form [message_name] will be replaced by a new placeholder,
    whose presentation will take the form the message_name_{UsageCount}, and
    whose example will be the message's value. Existing placeholders are
    not affected.

    Args:
      msg: A tclib.Message object.

    Returns:
      A tclib.Message object, with substitutions done.
    '''
    from grit import tclib  # avoid circular import
    counts = {}
    text = msg.GetPresentableContent()
    placeholders = []
    newtext = ''
    for f in self.GetExp().split(text):
      sub = self._SubFragment(f)
      if f != sub:
        f = str(f)
        count = counts.get(f, 0) + 1
        counts[f] = count
        name = "%s_%d" % (f[1:-1], count)
        placeholders.append(tclib.Placeholder(name, f, sub))
        newtext += name
      else:
        newtext += f
    if placeholders:
      return tclib.Message(newtext, msg.GetPlaceholders() + placeholders,
                           msg.GetDescription(), msg.GetMeaning())
    else:
      return msg
Exemple #8
0
    def Callback(id, structure):
      if id not in self.cliques_:
        if debug: print "Ignoring translation #%s" % id
        return

      if debug: print "Adding translation #%s" % id

      # We fetch placeholder information from the original message (the XTB file
      # only contains placeholder names).
      original_msg = self.BestClique(id).GetMessage()

      translation = tclib.Translation(id=id)
      for is_ph,text in structure:
        if not is_ph:
          translation.AppendText(text)
        else:
          found_placeholder = False
          for ph in original_msg.GetPlaceholders():
            if ph.GetPresentation() == text:
              translation.AppendPlaceholder(tclib.Placeholder(
                ph.GetPresentation(), ph.GetOriginal(), ph.GetExample()))
              found_placeholder = True
              break
          if not found_placeholder:
            raise exception.MismatchingPlaceholders(
              'Translation for message ID %s had <ph name="%s%/>, no match\n'
              'in original message' % (id, text))
      self.FindCliqueAndAddTranslation(translation, lang)
Exemple #9
0
    def CreateTclibMessage(self, android_string):
        """Transforms a <string/> element from strings.xml into a tclib.Message.

    Interprets whitespace, quotes, and escaped characters in the android_string
    according to Android's formatting and styling rules for strings.  Also
    converts <xliff:g> placeholders into <ph> placeholders, e.g.:

      <xliff:g id="website" example="google.com">%s</xliff:g>
        becomes
      <ph name="website"><ex>google.com</ex>%s</ph>

    Returns:
      The tclib.Message.
    """
        msg = tclib.Message()
        current_text = ''  # Accumulated text that hasn't yet been added to msg.
        nodes = android_string.childNodes

        for i, node in enumerate(nodes):
            # Handle text nodes.
            if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
                current_text += node.data

            # Handle <xliff:g> and other tags.
            elif node.nodeType == Node.ELEMENT_NODE:
                if node.tagName == 'xliff:g':
                    assert node.hasAttribute(
                        'id'), 'missing id: ' + node.data()
                    placeholder_id = node.getAttribute('id')
                    placeholder_text = self.__FormatPlaceholderText(node)
                    placeholder_example = node.getAttribute('example')
                    if not placeholder_example:
                        print(
                            'Info: placeholder does not contain an example: %s'
                            % node.toxml())
                        placeholder_example = placeholder_id.upper()
                    msg.AppendPlaceholder(
                        tclib.Placeholder(placeholder_id, placeholder_text,
                                          placeholder_example))
                else:
                    print(
                        'Warning: removing tag <%s> which must be inside a '
                        'placeholder: %s' % (node.tagName, node.toxml()))
                    msg.AppendText(self.__FormatPlaceholderText(node))

            # Handle other nodes.
            elif node.nodeType != Node.COMMENT_NODE:
                assert False, 'Unknown node type: %s' % node.nodeType

            is_last_node = (i == len(nodes) - 1)
            if (current_text and
                (is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)):
                # For messages containing just text and comments (no xml tags) Android
                # strips leading and trailing whitespace.  We mimic that behavior.
                if not msg.GetContent() and is_last_node:
                    current_text = current_text.strip()
                msg.AppendText(self.__FormatAndroidString(current_text))
                current_text = ''

        return msg
Exemple #10
0
    def _ParsePlaceholder(self, placeholder, msg):
        '''Extracts a placeholder from a DOM node and adds it to a tclib Message.

    Args:
      placeholder: A DOM node of the form:
        <ph name="PLACEHOLDER_NAME">Placeholder text<ex>Example value</ex></ph>
      msg: The placeholder is added to this message.
    '''
        text = []
        example_text = []
        for node1 in placeholder.childNodes:
            if (node1.nodeType == minidom.Node.TEXT_NODE):
                text.append(node1.data)
            elif (node1.nodeType == minidom.Node.ELEMENT_NODE
                  and node1.tagName == 'ex'):
                for node2 in node1.childNodes:
                    example_text.append(node2.toxml())
            else:
                raise Exception('Unexpected element inside a placeholder: ' +
                                node2.toxml())
        if example_text == []:
            # In such cases the original text is okay for an example.
            example_text = text

        replaced_text = self.Escape(''.join(text).strip())
        replaced_text = replaced_text.replace('$1', self._config['app_name'])
        replaced_text = replaced_text.replace('$2', self._config['os_name'])
        replaced_text = replaced_text.replace('$3', self._config['frame_name'])

        msg.AppendPlaceholder(
            tclib.Placeholder(placeholder.attributes['name'].value,
                              replaced_text, ''.join(example_text).strip()))
Exemple #11
0
 def _AddNontranslateableChunk(self, chunk):
     '''Adds a nontranslateable chunk.'''
     if self.single_message_:
         ph = tclib.Placeholder('XX%02dXX' % self.ph_counter_, chunk, chunk)
         self.ph_counter_ += 1
         self.single_message_.AppendPlaceholder(ph)
     else:
         self.skeleton_.append(chunk)
Exemple #12
0
 def testPseudoMessage(self):
     msg = tclib.Message(
         text='Hello USERNAME, how are you?',
         placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')])
     trans = pseudo.PseudoMessage(msg)
     # TODO(joi) It would be nicer if 'you' -> 'youPou' instead of
     # 'you' -> 'youPyou' and if we handled the silent e in 'are'
     self.failUnless(trans.GetPresentableContent() == pseudo.MapVowels(
         u'HePelloPo USERNAME, hoPow aParePe youPyou?', 1))
Exemple #13
0
    def EndParsing(self):
        super(type(self), self).EndParsing()

        # Make the text (including placeholder references) and list of placeholders,
        # then strip and store leading and trailing whitespace and create the
        # tclib.Message() and a clique to contain it.

        text = ''
        placeholders = []
        for item in self.mixed_content:
            if isinstance(item, types.StringTypes):
                text += item
            else:
                presentation = item.attrs['name'].upper()
                text += presentation
                ex = ' '
                if len(item.children):
                    ex = item.children[0].GetCdata()
                original = item.GetCdata()
                placeholders.append(
                    tclib.Placeholder(presentation, original, ex))

        m = _WHITESPACE.match(text)
        if m:
            self.ws_at_start = m.group('start')
            self.ws_at_end = m.group('end')
            text = m.group('body')

        self.shortcut_groups_ = self._SPLIT_RE.split(
            self.attrs['shortcut_groups'])
        self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != '']

        description_or_id = self.attrs['desc']
        if description_or_id == '' and 'name' in self.attrs:
            description_or_id = 'ID: %s' % self.attrs['name']

        assigned_id = None
        if (self.attrs['use_name_for_id'] == 'true'
                and self.SatisfiesOutputCondition()):
            assigned_id = self.attrs['name']
        message = tclib.Message(text=text,
                                placeholders=placeholders,
                                description=description_or_id,
                                meaning=self.attrs['meaning'],
                                assigned_id=assigned_id)
        self.clique = self.UberClique().MakeClique(message,
                                                   self.IsTranslateable())
        for group in self.shortcut_groups_:
            self.clique.AddToShortcutGroup(group)
        if self.attrs['custom_type'] != '':
            self.clique.SetCustomType(
                util.NewClassInstance(self.attrs['custom_type'],
                                      clique.CustomType))
        elif self.attrs['validation_expr'] != '':
            self.clique.SetCustomType(
                clique.OneOffCustomType(self.attrs['validation_expr']))
Exemple #14
0
    def testAll(self):
        text = u'Howdie USERNAME'
        phs = [tclib.Placeholder(u'USERNAME', u'%s', 'Joi')]
        msg = tclib.Message(text=text, placeholders=phs)
        self.failUnless(msg.GetPresentableContent() == 'Howdie USERNAME')

        trans = tclib.Translation(text=text, placeholders=phs)
        self.failUnless(trans.GetPresentableContent() == 'Howdie USERNAME')
        self.failUnless(
            isinstance(trans.GetPresentableContent(), types.StringTypes))
Exemple #15
0
    def testPseudolocales(self):
        p1 = tclib.Placeholder(u'USERNAME', '%s', 'foo')
        p2 = tclib.Placeholder(u'EMAIL', '%s', 'bar')
        msg = tclib.Message()
        msg.AppendText('h_')
        msg.AppendPlaceholder(p1)
        msg.AppendPlaceholder(p2)
        msg.AppendText('w')

        self.assertEqual(
            pl.PseudoLongStringMessage(msg).GetContent(),
            [u'\u0125_', p1, p2, u'\u0175', ' - one two three four'])

        msg.AppendText('hello world')
        self.assertEqual(
            pl.PseudoRTLMessage(msg).GetContent(), [
                u'\u202eh\u202c_', p1, p2,
                u'\u202ewhello\u202c \u202eworld\u202c'
            ])
Exemple #16
0
    def testClique(self):
        factory = clique.UberClique()
        msg = tclib.Message(
            text='Hello USERNAME, how are you?',
            placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')])
        c = factory.MakeClique(msg)

        self.failUnless(c.GetMessage() == msg)
        self.failUnless(c.GetId() == msg.GetId())

        msg_fr = tclib.Translation(
            text='Bonjour USERNAME, comment ca va?',
            id=msg.GetId(),
            placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')])
        msg_de = tclib.Translation(
            text='Guten tag USERNAME, wie geht es dir?',
            id=msg.GetId(),
            placeholders=[tclib.Placeholder('USERNAME', '%s', 'Joi')])

        c.AddTranslation(msg_fr, 'fr')
        factory.FindCliqueAndAddTranslation(msg_de, 'de')

        # sort() sorts lists in-place and does not return them
        for lang in ('en', 'fr', 'de'):
            self.failUnless(lang in c.clique)

        self.failUnless(
            c.MessageForLanguage('fr').GetRealContent() ==
            msg_fr.GetRealContent())

        try:
            c.MessageForLanguage('zh-CN', False)
            self.fail('Should have gotten exception')
        except:
            pass

        self.failUnless(c.MessageForLanguage('zh-CN', True) != None)

        rex = re.compile('fr|de|bingo')
        self.failUnless(len(c.AllMessagesThatMatch(rex, False)) == 2)
        self.failUnless(
            c.AllMessagesThatMatch(rex, True)[pseudo.PSEUDO_LANG] is not None)
Exemple #17
0
 def testTagsWithCommonSubstring(self):
     word = 'ABCDEFGHIJ'
     text = ' '.join([word[:i] for i in range(1, 11)])
     phs = [
         tclib.Placeholder(word[:i], str(i), str(i)) for i in range(1, 11)
     ]
     try:
         msg = tclib.Message(text=text, placeholders=phs)
         self.failUnless(msg.GetRealContent() == '1 2 3 4 5 6 7 8 9 10')
     except:
         self.fail('tclib.Message() should handle placeholders that are '
                   'substrings of each other')
Exemple #18
0
    def testBuildAndUnbuildTree(self):
        p1 = tclib.Placeholder(u'USERNAME', '%s', 'foo')
        p2 = tclib.Placeholder(u'EMAIL', '%s', 'bar')

        msg = tclib.Message()
        msg.AppendText('hello')
        msg.AppendPlaceholder(p1)
        msg.AppendPlaceholder(p2)
        msg.AppendText('world')

        tree, placeholders = pl.BuildTreeFromMessage(msg)
        self.assertTreesEqual(
            tree,
            pl.NodeSequence([
                pl.RawText('hello'), PLACEHOLDER_NODE, PLACEHOLDER_NODE,
                pl.RawText('world')
            ]))
        self.assertEqual(placeholders, [p1, p2])

        transl = pl.ToTranslation(tree, placeholders)
        self.assertEqual(transl.GetContent(), ['hello', p1, p2, 'world'])
Exemple #19
0
    def EndParsing(self):
        super(MessageNode, self).EndParsing()

        # Make the text (including placeholder references) and list of placeholders,
        # then strip and store leading and trailing whitespace and create the
        # tclib.Message() and a clique to contain it.

        text = ''
        placeholders = []
        for item in self.mixed_content:
            if isinstance(item, types.StringTypes):
                text += item
            else:
                presentation = item.attrs['name'].upper()
                text += presentation
                ex = ' '
                if len(item.children):
                    ex = item.children[0].GetCdata()
                original = item.GetCdata()
                placeholders.append(
                    tclib.Placeholder(presentation, original, ex))

        m = _WHITESPACE.match(text)
        if m:
            self.ws_at_start = m.group('start')
            self.ws_at_end = m.group('end')
            text = m.group('body')

        self.shortcut_groups_ = self._SPLIT_RE.split(
            self.attrs['shortcut_groups'])
        self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != '']

        description_or_id = self.attrs['desc']
        if description_or_id == '' and 'name' in self.attrs:
            description_or_id = 'ID: %s' % self.attrs['name']

        assigned_id = None
        if self.attrs['use_name_for_id'] == 'true':
            assigned_id = self.attrs['name']
        message = tclib.Message(text=text,
                                placeholders=placeholders,
                                description=description_or_id,
                                meaning=self.attrs['meaning'],
                                assigned_id=assigned_id)
        self.InstallMessage(message)
Exemple #20
0
    def Placeholderize(self, text):
        '''Creates a tclib.Message object from 'text', attempting to recognize
    a few different formats of text that can be automatically placeholderized
    (HTML code, printf-style format strings, and FormatMessage-style format
    strings).
    '''

        try:
            # First try HTML placeholderizing.
            # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
            msg = tr_html.HtmlToMessage(text, True)
            for item in msg.GetContent():
                if not isinstance(item, types.StringTypes):
                    return msg  # Contained at least one placeholder, so we're done

            # HTML placeholderization didn't do anything, so try to find printf or
            # FormatMessage format specifiers and change them into placeholders.
            msg = tclib.Message()
            parts = _FORMAT_SPECIFIER.split(text)
            todo_counter = 1  # We make placeholder IDs 'TODO_0001' etc.
            for part in parts:
                if _FORMAT_SPECIFIER.match(part):
                    msg.AppendPlaceholder(
                        tclib.Placeholder('TODO_%04d' % todo_counter, part,
                                          'TODO'))
                    todo_counter += 1
                elif part != '':
                    msg.AppendText(part)

            if self.role_model and len(
                    parts) > 1:  # there are TODO placeholders
                role_model_msg = self.role_model.UberClique(
                ).BestCliqueByOriginalText(msg.GetRealContent(), '')
                if role_model_msg:
                    # replace wholesale to get placeholder names and examples
                    msg = role_model_msg

            return msg
        except:
            print 'Exception processing message with text "%s"' % text
            raise
Exemple #21
0
 def testPlaceholderNameChecking(self):
     try:
         ph = tclib.Placeholder('BINGO BONGO', 'bla', 'bla')
         raise Exception("We shouldn't get here")
     except exception.InvalidPlaceholderName:
         pass  # Expect exception to be thrown because presentation contained space
Exemple #22
0
    def EndParsing(self):
        super(MessageNode, self).EndParsing()

        # Make the text (including placeholder references) and list of placeholders,
        # verify placeholder formats, then strip and store leading and trailing
        # whitespace and create the tclib.Message() and a clique to contain it.

        text = ''
        placeholders = []

        for item in self.mixed_content:
            if isinstance(item, six.string_types):
                # Not a <ph> element: fail if any <ph> formatters are detected.
                if _FORMATTERS.search(item):
                    print(_BAD_PLACEHOLDER_MSG % (item, self.source))
                    raise exception.PlaceholderNotInsidePhNode
                text += item
            else:
                # Extract the <ph> element components.
                presentation = item.attrs['name'].upper()
                text += presentation
                ex = ' '  # <ex> example element cdata if present.
                if len(item.children):
                    ex = item.children[0].GetCdata()
                original = item.GetCdata()

                # Sanity check the <ph> element content.
                cdata = original
                # Replace all HTML tag tokens in cdata.
                match = _HTMLTOKEN.search(cdata)
                while match:
                    cdata = cdata.replace(match.group(0), '_')
                    match = _HTMLTOKEN.search(cdata)
                # Replace all HTML entities in cdata.
                match = _HTMLENTITY.search(cdata)
                while match:
                    cdata = cdata.replace(match.group(0), '_')
                    match = _HTMLENTITY.search(cdata)
                # Remove first matching formatter from cdata.
                match = _FORMATTERS.search(cdata)
                if match:
                    cdata = cdata.replace(match.group(0), '')
                # Fail if <ph> special chars remain in cdata.
                if re.search(r'[%\$]', cdata):
                    message_id = self.attrs['name'] + ' ' + original
                    print(_INVALID_PH_CHAR_MSG % (message_id, self.source))
                    raise exception.InvalidCharactersInsidePhNode

                # Otherwise, accept this <ph> placeholder.
                placeholders.append(
                    tclib.Placeholder(presentation, original, ex))

        m = _WHITESPACE.match(text)
        if m:
            self.ws_at_start = m.group('start')
            self.ws_at_end = m.group('end')
            text = m.group('body')

        self.shortcut_groups_ = self._SPLIT_RE.split(
            self.attrs['shortcut_groups'])
        self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != '']

        description_or_id = self.attrs['desc']
        if description_or_id == '' and 'name' in self.attrs:
            description_or_id = 'ID: %s' % self.attrs['name']

        assigned_id = None
        if self.attrs['use_name_for_id'] == 'true':
            assigned_id = self.attrs['name']
        message = tclib.Message(text=text,
                                placeholders=placeholders,
                                description=description_or_id,
                                meaning=self.attrs['meaning'],
                                assigned_id=assigned_id)
        self.InstallMessage(message)
Exemple #23
0
def HtmlToMessage(html, include_block_tags=False, description=''):
    '''Takes a bit of HTML, which must contain only "inline" HTML elements,
  and changes it into a tclib.Message.  This involves escaping any entities and
  replacing any HTML code with placeholders.

  If include_block_tags is true, no error will be given if block tags (e.g.
  <p> or <br>) are included in the HTML.

  Args:
    html: 'Hello <b>[USERNAME]</b>, how&nbsp;<i>are</i> you?'
    include_block_tags: False

  Return:
    tclib.Message('Hello START_BOLD1USERNAMEEND_BOLD, '
                  'howNBSPSTART_ITALICareEND_ITALIC you?',
                  [ Placeholder('START_BOLD', '<b>', ''),
                    Placeholder('USERNAME', '[USERNAME]', ''),
                    Placeholder('END_BOLD', '</b>', ''),
                    Placeholder('START_ITALIC', '<i>', ''),
                    Placeholder('END_ITALIC', '</i>', ''), ])
  '''
    # Approach is:
    # - first placeholderize, finding <elements>, [REPLACEABLES] and &nbsp;
    # - then escape all character entities in text in-between placeholders

    parts = []  # List of strings (for text chunks) and tuples (ID, original)
    # for placeholders

    count_names = {}  # Map of base names to number of times used
    end_names = {
    }  # Map of base names to stack of end tags (for correct nesting)

    def MakeNameClosure(base, type=''):
        '''Returns a closure that can be called once all names have been allocated
    to return the final name of the placeholder.  This allows us to minimally
    number placeholders for non-overlap.

    Also ensures that END_XXX_Y placeholders have the same Y as the
    corresponding BEGIN_XXX_Y placeholder when we have nested tags of the same
    type.

    Args:
      base: 'phname'
      type: '' | 'begin' | 'end'

    Return:
      Closure()
    '''
        name = base.upper()
        if type != '':
            name = ('%s_%s' % (type, base)).upper()

        if name in count_names.keys():
            count_names[name] += 1
        else:
            count_names[name] = 1

        def MakeFinalName(name_=name, index=count_names[name] - 1):
            if (type.lower() == 'end' and base in end_names.keys()
                    and len(end_names[base])):
                return end_names[base].pop(-1)  # For correct nesting
            if count_names[name_] != 1:
                name_ = '%s_%s' % (name_, _SUFFIXES[index])
                # We need to use a stack to ensure that the end-tag suffixes match
                # the begin-tag suffixes.  Only needed when more than one tag of the
                # same type.
                if type == 'begin':
                    end_name = ('END_%s_%s' % (base, _SUFFIXES[index])).upper()
                    if base in end_names.keys():
                        end_names[base].append(end_name)
                    else:
                        end_names[base] = [end_name]

            return name_

        return MakeFinalName

    current = 0
    last_nobreak = False

    while current < len(html):
        m = _MESSAGE_NO_BREAK_COMMENT.match(html[current:])
        if m:
            last_nobreak = True
            current += m.end()
            continue

        m = _NBSP.match(html[current:])
        if m:
            parts.append((MakeNameClosure('SPACE'), m.group()))
            current += m.end()
            continue

        m = _REPLACEABLE.match(html[current:])
        if m:
            # Replaceables allow - but placeholders don't, so replace - with _
            ph_name = MakeNameClosure('X_%s_X' %
                                      m.group('name').replace('-', '_'))
            parts.append((ph_name, m.group()))
            current += m.end()
            continue

        m = _SPECIAL_ELEMENT.match(html[current:])
        if m:
            if not include_block_tags:
                if last_nobreak:
                    last_nobreak = False
                else:
                    raise exception.BlockTagInTranslateableChunk(html)
            element_name = 'block'  # for simplification
            # Get the appropriate group name
            for group in m.groupdict().keys():
                if m.groupdict()[group]:
                    break
            parts.append((MakeNameClosure(element_name, 'begin'),
                          html[current:current + m.start(group)]))
            parts.append(m.group(group))
            parts.append((MakeNameClosure(element_name, 'end'),
                          html[current + m.end(group):current + m.end()]))
            current += m.end()
            continue

        m = _ELEMENT.match(html[current:])
        if m:
            element_name = m.group('element').lower()
            if not include_block_tags and not element_name in _INLINE_TAGS:
                if last_nobreak:
                    last_nobreak = False
                else:
                    raise exception.BlockTagInTranslateableChunk(
                        html[current:])
            if element_name in _HTML_PLACEHOLDER_NAMES:  # use meaningful names
                element_name = _HTML_PLACEHOLDER_NAMES[element_name]

            # Make a name for the placeholder
            type = ''
            if not m.group('empty'):
                if m.group('closing'):
                    type = 'end'
                else:
                    type = 'begin'
            parts.append((MakeNameClosure(element_name, type), m.group()))
            current += m.end()
            continue

        if len(parts) and isinstance(parts[-1], types.StringTypes):
            parts[-1] += html[current]
        else:
            parts.append(html[current])
        current += 1

    msg_text = ''
    placeholders = []
    for part in parts:
        if isinstance(part, types.TupleType):
            final_name = part[0]()
            original = part[1]
            msg_text += final_name
            placeholders.append(
                tclib.Placeholder(final_name, original, '(HTML code)'))
        else:
            msg_text += part

    msg = tclib.Message(text=msg_text,
                        placeholders=placeholders,
                        description=description)
    content = msg.GetContent()
    for ix in range(len(content)):
        if isinstance(content[ix], types.StringTypes):
            content[ix] = util.UnescapeHtml(content[ix], replace_nbsp=False)

    return msg
Exemple #24
0
    def testProdFailures(self):
        p1 = tclib.Placeholder(u'USERNAME', '%s', 'foo')

        msg = tclib.Message()
        msg.AppendText(u'{LINE_COUNT, plural,\n      =1 {<1 line not shown>}\n'
                       '      other {<')
        msg.AppendPlaceholder(p1)
        msg.AppendText(u' lines not shown>}\n}')
        tree, _ = pl.BuildTreeFromMessage(msg)
        self.assertTreesEqual(
            tree,
            pl.Plural('{LINE_COUNT, plural,\n      ', [
                pl.PluralOption('=1 {', [pl.RawText('<1 line not shown>')]),
                pl.PluralOption('other {', [
                    pl.RawText('<'), PLACEHOLDER_NODE,
                    pl.RawText(' lines not shown>')
                ])
            ]))

        msg = tclib.Message()
        msg.AppendText(u'{1, plural,\n   \n             =1 {Rated ')
        msg.AppendPlaceholder(p1)
        msg.AppendText(u' by one user.}\n      other{Rated ')
        msg.AppendPlaceholder(p1)
        msg.AppendText(u' by # users.}}')
        tree, _ = pl.BuildTreeFromMessage(msg)
        self.assertTreesEqual(
            tree,
            pl.Plural('{1, plural,\n   \n             ', [
                pl.PluralOption('=1 {', [
                    pl.RawText('Rated '), PLACEHOLDER_NODE,
                    pl.RawText(' by one user.')
                ]),
                pl.PluralOption('other{', [
                    pl.RawText('Rated '), PLACEHOLDER_NODE,
                    pl.RawText(' by # users.')
                ]),
            ]))

        self.assertBuildTree(
            '{count, plural, offset:2\n'
            '        =1 {{VAR}}\n'
            '        =2 {{VAR}, {VAR}}\n'
            '        other {{VAR}, {VAR}, and # more}\n'
            '      }',
            pl.Plural('{count, plural, offset:2\n        ', [
                pl.PluralOption('=1 {', [VAR_NODE]),
                pl.PluralOption(
                    '=2 {', [VAR_NODE, pl.RawText(', '), VAR_NODE]),
                pl.PluralOption('other {', [
                    VAR_NODE,
                    pl.RawText(', '), VAR_NODE,
                    pl.RawText(', and # more')
                ]),
            ]))

        self.assertBuildTree(
            '{NUM_POPUPS,plural,=1{Pop-up blocked} other{# pop-ups blocked}}',
            pl.Plural('{NUM_POPUPS,plural,', [
                pl.PluralOption('=1{', [pl.RawText('Pop-up blocked')]),
                pl.PluralOption('other{', [pl.RawText('# pop-ups blocked')])
            ]))

        self.assertBuildTree(
            'Open ${url}',
            pl.NodeSequence([pl.RawText('Open '),
                             pl.BasicVariable('${url}')]))