Beispiel #1
0
    def test_alt_title_placeholder(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [
                Token('<img alt="foo">')])
        eq_(output,
            [
                Token(u'<img alt="', 'html', False),
                Token(u'foo', 'text', True),
                Token(u'">', 'html', False),
            ]
        )

        output = trans.transform(self.vartok, [
                Token('<img title="foo">')])
        eq_(output,
            [
                Token(u'<img title="', 'html', False),
                Token(u'foo', 'text', True),
                Token(u'">', 'html', False),
            ]
        )

        output = trans.transform(self.vartok, [
                Token('<input placeholder="foo">')])
        eq_(output,
            [
                Token(u'<input placeholder="', 'html', False),
                Token(u'foo', 'text', True),
                Token(u'">', 'html', False),
            ]
        )
Beispiel #2
0
    def test_script_style(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [
            Token('<style>TR {white-space: nowrap;}</style>')
        ])
        assert (
            output ==
            [
                Token('<style>', 'html', False),
                Token('TR {white-space: nowrap;}', 'style', False),
                Token('</style>', 'html', False)
            ]
        )

        output = trans.transform(self.vartok, [
            Token('<script>console.log("foo");</script>')
        ])
        assert (
            output ==
            [
                Token('<script>', 'html', False),
                Token('console.log("foo");', 'script', False),
                Token('</script>', 'html', False)
            ]
        )
Beispiel #3
0
    def test_basic(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [Token("")])
        assert output == [Token("", "text", True)]

        output = trans.transform(self.vartok, [Token("<b>hi</b>")])
        assert output == [
            Token("<b>", "html", False),
            Token("hi", "text", True),
            Token("</b>", "html", False),
        ]
Beispiel #4
0
    def test_basic(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [Token('')])
        assert output == [Token(u'', 'text', True)]

        output = trans.transform(self.vartok, [Token('<b>hi</b>')])
        assert (output == [
            Token(u'<b>', 'html', False),
            Token(u'hi', 'text', True),
            Token(u'</b>', 'html', False),
        ])
Beispiel #5
0
    def test_basic(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [Token('')])
        eq_(output, [Token(u'', 'text', True)])

        output = trans.transform(self.vartok, [Token('<b>hi</b>')])
        eq_(output,
            [
                Token(u'<b>', 'html', False),
                Token(u'hi', 'text', True),
                Token(u'</b>', 'html', False),
            ]
        )
Beispiel #6
0
    def test_script_style(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(
            self.vartok, [Token('<style>TR {white-space: nowrap;}</style>')])
        assert (output == [
            Token(u'<style>', 'html', False),
            Token(u'TR {white-space: nowrap;}', 'style', False),
            Token(u'</style>', 'html', False)
        ])

        output = trans.transform(
            self.vartok, [Token('<script>console.log("foo");</script>')])
        assert (output == [
            Token(u'<script>', 'html', False),
            Token(u'console.log("foo");', 'script', False),
            Token(u'</script>', 'html', False)
        ])
Beispiel #7
0
    def test_script_style(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(
            self.vartok, [Token("<style>TR {white-space: nowrap;}</style>")]
        )
        assert output == [
            Token("<style>", "html", False),
            Token("TR {white-space: nowrap;}", "style", False),
            Token("</style>", "html", False),
        ]

        output = trans.transform(
            self.vartok, [Token('<script>console.log("foo");</script>')]
        )
        assert output == [
            Token("<script>", "html", False),
            Token('console.log("foo");', "script", False),
            Token("</script>", "html", False),
        ]
Beispiel #8
0
    def test_alt_title(self):
        htmle = HTMLExtractorTransform()
        output = htmle.transform(self.vartok, [
                Token('<img alt="foo">')])
        eq_(output,
            [
                Token(u'<img alt="', 'html', False),
                Token(u'foo', 'text', True),
                Token(u'">', 'html', False),
            ]
        )

        output = htmle.transform(self.vartok, [
                Token('<img title="foo">')])
        eq_(output,
            [
                Token(u'<img title="', 'html', False),
                Token(u'foo', 'text', True),
                Token(u'">', 'html', False),
            ]
        )
Beispiel #9
0
    def test_alt_title_placeholder(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [Token('<img alt="foo">')])
        assert output == [
            Token('<img alt="', "html", False),
            Token("foo", "text", True),
            Token('">', "html", False),
        ]

        output = trans.transform(self.vartok, [Token('<img title="foo">')])
        assert output == [
            Token('<img title="', "html", False),
            Token("foo", "text", True),
            Token('">', "html", False),
        ]

        output = trans.transform(self.vartok, [Token('<input placeholder="foo">')])
        assert output == [
            Token('<input placeholder="', "html", False),
            Token("foo", "text", True),
            Token('">', "html", False),
        ]
Beispiel #10
0
    def test_alt_title_placeholder(self):
        trans = HTMLExtractorTransform()
        output = trans.transform(self.vartok, [Token('<img alt="foo">')])
        assert (output == [
            Token(u'<img alt="', 'html', False),
            Token(u'foo', 'text', True),
            Token(u'">', 'html', False),
        ])

        output = trans.transform(self.vartok, [Token('<img title="foo">')])
        assert (output == [
            Token(u'<img title="', 'html', False),
            Token(u'foo', 'text', True),
            Token(u'">', 'html', False),
        ])

        output = trans.transform(self.vartok,
                                 [Token('<input placeholder="foo">')])
        assert (output == [
            Token(u'<input placeholder="', 'html', False),
            Token(u'foo', 'text', True),
            Token(u'">', 'html', False),
        ])