def test_script_style(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [ Token('<style>TR {white-space: nowrap;}</style>') ]) assert ( output == [ Token('<style>', 'html', False), Token('TR {white-space: nowrap;}', 'style', False), Token('</style>', 'html', False) ] ) output = trans.transform(self.vartok, [ Token('<script>console.log("foo");</script>') ]) assert ( output == [ Token('<script>', 'html', False), Token('console.log("foo");', 'script', False), Token('</script>', 'html', False) ] )
def test_alt_title_placeholder(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [ Token('<img alt="foo">')]) eq_(output, [ Token(u'<img alt="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ] ) output = trans.transform(self.vartok, [ Token('<img title="foo">')]) eq_(output, [ Token(u'<img title="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ] ) output = trans.transform(self.vartok, [ Token('<input placeholder="foo">')]) eq_(output, [ Token(u'<input placeholder="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ] )
def test_basic(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [Token("")]) assert output == [Token("", "text", True)] output = trans.transform(self.vartok, [Token("<b>hi</b>")]) assert output == [ Token("<b>", "html", False), Token("hi", "text", True), Token("</b>", "html", False), ]
def test_basic(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [Token('')]) assert output == [Token(u'', 'text', True)] output = trans.transform(self.vartok, [Token('<b>hi</b>')]) assert (output == [ Token(u'<b>', 'html', False), Token(u'hi', 'text', True), Token(u'</b>', 'html', False), ])
def test_basic(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [Token('')]) eq_(output, [Token(u'', 'text', True)]) output = trans.transform(self.vartok, [Token('<b>hi</b>')]) eq_(output, [ Token(u'<b>', 'html', False), Token(u'hi', 'text', True), Token(u'</b>', 'html', False), ] )
def test_script_style(self): trans = HTMLExtractorTransform() output = trans.transform( self.vartok, [Token('<style>TR {white-space: nowrap;}</style>')]) assert (output == [ Token(u'<style>', 'html', False), Token(u'TR {white-space: nowrap;}', 'style', False), Token(u'</style>', 'html', False) ]) output = trans.transform( self.vartok, [Token('<script>console.log("foo");</script>')]) assert (output == [ Token(u'<script>', 'html', False), Token(u'console.log("foo");', 'script', False), Token(u'</script>', 'html', False) ])
def test_script_style(self): trans = HTMLExtractorTransform() output = trans.transform( self.vartok, [Token("<style>TR {white-space: nowrap;}</style>")] ) assert output == [ Token("<style>", "html", False), Token("TR {white-space: nowrap;}", "style", False), Token("</style>", "html", False), ] output = trans.transform( self.vartok, [Token('<script>console.log("foo");</script>')] ) assert output == [ Token("<script>", "html", False), Token('console.log("foo");', "script", False), Token("</script>", "html", False), ]
def test_alt_title(self): htmle = HTMLExtractorTransform() output = htmle.transform(self.vartok, [ Token('<img alt="foo">')]) eq_(output, [ Token(u'<img alt="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ] ) output = htmle.transform(self.vartok, [ Token('<img title="foo">')]) eq_(output, [ Token(u'<img title="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ] )
def test_alt_title_placeholder(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [Token('<img alt="foo">')]) assert output == [ Token('<img alt="', "html", False), Token("foo", "text", True), Token('">', "html", False), ] output = trans.transform(self.vartok, [Token('<img title="foo">')]) assert output == [ Token('<img title="', "html", False), Token("foo", "text", True), Token('">', "html", False), ] output = trans.transform(self.vartok, [Token('<input placeholder="foo">')]) assert output == [ Token('<input placeholder="', "html", False), Token("foo", "text", True), Token('">', "html", False), ]
def test_alt_title_placeholder(self): trans = HTMLExtractorTransform() output = trans.transform(self.vartok, [Token('<img alt="foo">')]) assert (output == [ Token(u'<img alt="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ]) output = trans.transform(self.vartok, [Token('<img title="foo">')]) assert (output == [ Token(u'<img title="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ]) output = trans.transform(self.vartok, [Token('<input placeholder="foo">')]) assert (output == [ Token(u'<input placeholder="', 'html', False), Token(u'foo', 'text', True), Token(u'">', 'html', False), ])
def lint(self, vartok, linted_entry): msgs = [] from dennis.translator import HTMLExtractorTransform, Token html = HTMLExtractorTransform() def equiv(left, right): return left == right def tokenize(text): """Tokenizes the text using the HTMLExtractorTransform :raises HTMLParseError: If it's invalid HTML. """ tokens = [ token for token in html.transform(vartok, [Token(text)]) if token.type == 'html' and not token.s.startswith('&') ] return sorted(tokens, key=lambda token: token.s) for trstr in linted_entry.strs: if not trstr.msgstr_string: continue try: msgid_parts = tokenize(trstr.msgid_strings[0]) except HTMLParseError as exc: errmsg = ( u'invalid html: msgid has invalid html {0}'.format(exc)) msgs.append( LintMessage(WARNING, linted_entry.poentry.linenum, 0, self.num_error, errmsg, linted_entry.poentry)) return msgs if len(trstr.msgid_strings) > 1: # If this is a plural, then we check to see if the two # msgid strings match each other. If not, then we move # on because I have no idea what to do in this case. try: msgid_plural_parts = tokenize(trstr.msgid_strings[1]) except HTMLParseError as exc: errmsg = ( u'invalid html: msgid_plural has invalid html {0}'. format(exc)) msgs.append( LintMessage(WARNING, linted_entry.poentry.linenum, 0, self.num_error, errmsg, linted_entry.poentry)) return msgs zipped_parts = izip_longest(msgid_parts, msgid_plural_parts, fillvalue=None) for left, right in zipped_parts: if not left or not right or not equiv(left, right): return [] try: msgstr_parts = tokenize(trstr.msgstr_string) except HTMLParseError as exc: errmsg = ( u'invalid html: msgstr has invalid html {0}'.format(exc)) msgs.append( LintMessage(WARNING, linted_entry.poentry.linenum, 0, self.num_error, errmsg, linted_entry.poentry)) return msgs for left, right in izip_longest(msgid_parts, msgstr_parts, fillvalue=None): if not left or not right or not equiv(left, right): msgs.append( LintMessage( WARNING, linted_entry.poentry.linenum, 0, self.num, u'different html: "{0}" vs. "{1}"'.format( left.s if left else u'', right.s if right else u''), linted_entry.poentry)) break return msgs
def _trans(text): return HTMLExtractorTransform().transform(self.vartok, [Token(text)])