def testHtmlToMessageWithBlockTags(self): msg = tr_html.HtmlToMessage( 'Hello<p>Howdie<img alt="bingo" src="image.gif">', True) result = msg.GetPresentableContent() self.failUnless( result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK') msg = tr_html.HtmlToMessage( 'Hello<p>Howdie<input type="button" value="bingo">', True) result = msg.GetPresentableContent() self.failUnless( result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK')
def Placeholderize(self, text): '''Creates a tclib.Message object from 'text', attempting to recognize a few different formats of text that can be automatically placeholderized (HTML code, printf-style format strings, and FormatMessage-style format strings). ''' try: # First try HTML placeholderizing. # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing msg = tr_html.HtmlToMessage(text, True) for item in msg.GetContent(): if not isinstance(item, types.StringTypes): return msg # Contained at least one placeholder, so we're done # HTML placeholderization didn't do anything, so try to find printf or # FormatMessage format specifiers and change them into placeholders. msg = tclib.Message() parts = _FORMAT_SPECIFIER.split(text) todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc. for part in parts: if _FORMAT_SPECIFIER.match(part): msg.AppendPlaceholder( tclib.Placeholder('TODO_%04d' % todo_counter, part, 'TODO')) todo_counter += 1 elif part != '': msg.AppendText(part) if self.role_model and len( parts) > 1: # there are TODO placeholders role_model_msg = self.role_model.UberClique( ).BestCliqueByOriginalText(msg.GetRealContent(), '') if role_model_msg: # replace wholesale to get placeholder names and examples msg = role_model_msg return msg except: print 'Exception processing message with text "%s"' % text raise
def testHtmlToMessageRegressions(self): msg = tr_html.HtmlToMessage(' - ', True) result = msg.GetPresentableContent() self.failUnless(result == ' - ')
def testTranslateableHtmlToMessage(self): msg = tr_html.HtmlToMessage( 'Hello <b>[USERNAME]</b>, <how> <i>are</i> you?') pres = msg.GetPresentableContent() self.failUnless(pres == 'Hello BEGIN_BOLDX_USERNAME_XEND_BOLD, ' '<how> BEGIN_ITALICareEND_ITALIC you?') msg = tr_html.HtmlToMessage('<b>Hello</b><I>Hello</I><b>Hello</b>') pres = msg.GetPresentableContent() self.failUnless(pres == 'BEGIN_BOLD_1HelloEND_BOLD_1BEGIN_ITALICHelloEND_ITALIC' 'BEGIN_BOLD_2HelloEND_BOLD_2') # Check that nesting (of the <font> tags) is handled correctly - i.e. that # the closing placeholder numbers match the opening placeholders. msg = tr_html.HtmlToMessage( '''<font size=-1><font color=#FF0000>Update!</font> ''' '''<a href='http://desktop.google.com/whatsnew.html?hl=[$~LANG~$]'>''' '''New Features</a>: Now search PDFs, MP3s, Firefox web history, and ''' '''more</font>''') pres = msg.GetPresentableContent() self.failUnless(pres == 'BEGIN_FONT_1BEGIN_FONT_2Update!END_FONT_2 BEGIN_LINK' 'New FeaturesEND_LINK: Now search PDFs, MP3s, Firefox ' 'web history, and moreEND_FONT_1') msg = tr_html.HtmlToMessage('''<a href='[$~URL~$]'><b>[NUM][CAT]</b></a>''') pres = msg.GetPresentableContent() self.failUnless(pres == 'BEGIN_LINKBEGIN_BOLDX_NUM_XX_CAT_XEND_BOLDEND_LINK') msg = tr_html.HtmlToMessage( '''<font size=-1><a class=q onClick='return window.qs?qs(this):1' ''' '''href='http://[WEBSERVER][SEARCH_URI]'>Desktop</a></font> ''' ''' ''') pres = msg.GetPresentableContent() self.failUnless(pres == '''BEGIN_FONTBEGIN_LINKDesktopEND_LINKEND_FONTSPACE''') msg = tr_html.HtmlToMessage( '''<br><br><center><font size=-2>©2005 Google </font></center>''', 1) pres = msg.GetPresentableContent() self.failUnless(pres == u'BEGIN_BREAK_1BEGIN_BREAK_2BEGIN_CENTERBEGIN_FONT\xa92005' u' Google END_FONTEND_CENTER') msg = tr_html.HtmlToMessage( ''' - <a class=c href=[$~CACHE~$]>Cached</a>''') pres = msg.GetPresentableContent() self.failUnless(pres == ' - BEGIN_LINKCachedEND_LINK') # Check that upper-case tags are handled correctly. msg = tr_html.HtmlToMessage( '''You can read the <A HREF='http://desktop.google.com/privacypolicy.''' '''html?hl=[LANG_CODE]'>Privacy Policy</A> and <A HREF='http://desktop''' '''.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy FAQ</A> online.''') pres = msg.GetPresentableContent() self.failUnless(pres == 'You can read the BEGIN_LINK_1Privacy PolicyEND_LINK_1 and ' 'BEGIN_LINK_2Privacy FAQEND_LINK_2 online.') # Check that tags with linebreaks immediately preceding them are handled # correctly. msg = tr_html.HtmlToMessage( '''You can read the <A HREF='http://desktop.google.com/privacypolicy.html?hl=[LANG_CODE]'>Privacy Policy</A> and <A HREF='http://desktop.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy FAQ</A> online.''') pres = msg.GetPresentableContent() self.failUnless(pres == '''You can read the BEGIN_LINK_1Privacy PolicyEND_LINK_1 and BEGIN_LINK_2Privacy FAQEND_LINK_2 online.''')