Beispiel #1
0
    def test_title_attribute(self):
        self.assertContainsSameWords(
            html_to_plaintext('<p title="title">content</p>'), 'title content')

        self.assertContainsSameWords(
            html_to_plaintext(
                '<p title="title">content</p><p title="title2">content2</p>'),
            'title content title2 content2')
Beispiel #2
0
    def test_alt_attribute(self):
        self.assertContainsSameWords(
            html_to_plaintext('<img alt="image description" />'),
            'image description')

        self.assertContainsSameWords(
            html_to_plaintext('<p>content</p><img alt="image description" />'),
            'content image description')
Beispiel #3
0
    def test_comments(self):

        self.assertContainsSameWords(
            html_to_plaintext("<!-- comment --><p>content</p>"), 'content')

        self.assertContainsSameWords(
            html_to_plaintext("<p>content<!-- comment --></p>"),
            'content',
        )
Beispiel #4
0
    def test_alt_attribute(self):
        self.assertContainsSameWords(
            html_to_plaintext('<img alt="image description" />'),
            'image description'
        )

        self.assertContainsSameWords(
            html_to_plaintext('<p>content</p><img alt="image description" />'),
            'content image description'
        )
Beispiel #5
0
    def test_title_attribute(self):
        self.assertContainsSameWords(
            html_to_plaintext('<p title="title">content</p>'),
            'title content'
        )

        self.assertContainsSameWords(
            html_to_plaintext('<p title="title">content</p><p title="title2">content2</p>'),
            'title content title2 content2'
        )
Beispiel #6
0
    def test_comments(self):

        self.assertContainsSameWords(
            html_to_plaintext("<!-- comment --><p>content</p>"),
            'content'
        )

        self.assertContainsSameWords(
            html_to_plaintext("<p>content<!-- comment --></p>"),
            'content',
        )
Beispiel #7
0
    def test_non_text(self):
        self.assertContainsSameWords(
            html_to_plaintext("<script>javascript</script><p>content</p>"),
            'content')

        self.assertContainsSameWords(
            html_to_plaintext("<style>css</style><p>content<p>"), 'content')

        self.assertContainsSameWords(
            html_to_plaintext(
                "<script>javascript<p>javascript</p></script><p>content</p>"),
            'content')
Beispiel #8
0
    def test_other_attributes(self):
        self.assertContainsSameWords(
            html_to_plaintext('<a href="http://example.com">example.com</a>'),
            'example.com')

        self.assertContainsSameWords(
            html_to_plaintext(
                '<body onload="javascript:alert(\'hello\')">content</body>'),
            'content')

        self.assertContainsSameWords(
            html_to_plaintext(
                '<span lang="en" onclick="javascript:void(0)">content</span>'),
            'content')
Beispiel #9
0
    def test_non_text(self):
        self.assertContainsSameWords(
            html_to_plaintext("<script>javascript</script><p>content</p>"),
            'content'
        )

        self.assertContainsSameWords(
            html_to_plaintext("<style>css</style><p>content<p>"),
            'content'
        )

        self.assertContainsSameWords(
            html_to_plaintext("<script>javascript<p>javascript</p></script><p>content</p>"),
            'content'
        )
Beispiel #10
0
    def test_other_attributes(self):
        self.assertContainsSameWords(
            html_to_plaintext('<a href="http://example.com">example.com</a>'),
            'example.com'
        )

        self.assertContainsSameWords(
            html_to_plaintext('<body onload="javascript:alert(\'hello\')">content</body>'),
            'content'
        )

        self.assertContainsSameWords(
            html_to_plaintext('<span lang="en" onclick="javascript:void(0)">content</span>'),
            'content'
        )
    def prepare(self, obj):
        self.prepared_data = super(BlogIndex, self).prepare(obj)

        request = fake_request()
        node = obj.content.get_published_node(request)
        if node is not None:
            # prepare() has to work on unpublished blogs because haystack
            # filters them out at query time, not index time.
            blog_layout = node.content
            ctx = {
                'request': fake_request(),
                'root_node_override': node,
            }
            html = render_root(ctx, obj, 'content')
            content = [
                html_to_plaintext(html),
                blog_layout.title,
                blog_layout.summary,
            ]

            self.prepared_data['title'] = blog_layout.title
            self.prepared_data['text'] = ' '.join(content)
            self.prepared_data['get_absolute_url'] = obj.get_absolute_url_with_layout(blog_layout)

        return self.prepared_data
    def prepare(self, obj):
        self.prepared_data = super(BlogIndex, self).prepare(obj)

        request = fake_request()
        node = obj.content.get_published_node(request)
        if node is not None:
            # prepare() has to work on unpublished blogs because haystack
            # filters them out at query time, not index time.
            blog_layout = node.content
            ctx = {
                'request': fake_request(),
                'root_node_override': node,
            }
            html = render_root(ctx, obj, 'content')
            content = [
                html_to_plaintext(html),
                blog_layout.title,
                blog_layout.summary,
            ]

            self.prepared_data['title'] = blog_layout.title
            self.prepared_data['text'] = ' '.join(content)
            self.prepared_data[
                'get_absolute_url'] = obj.get_absolute_url_with_layout(
                    blog_layout)

        return self.prepared_data
Beispiel #13
0
 def prepare_text(self, obj):
     context = {'_current_page': obj.page_ptr, 'page': obj.page_ptr}
     html = render_root(context, obj, 'root_node')
     content = html_to_plaintext(html)
     keywords = ' '.join(self.prepare_keywords(obj))
     return ' '.join([obj.title, keywords, obj.description,
                      content])
Beispiel #14
0
    def test_basic_conversion(self):

        self.assertContainsSameWords(html_to_plaintext("<p>content</p>"),
                                     'content')

        self.assertContainsSameWords(
            html_to_plaintext("<p>content with multiple words</p>"),
            'content with multiple words')

        self.assertContainsSameWords(
            html_to_plaintext("<p>multiple</p><p>content</p>"),
            'multiple content')

        self.assertContainsSameWords(
            html_to_plaintext('''<div><p>complex</p><p>content</p></div>
                <p>with <a href="#">encapsulation</a></p>'''),
            'complex content with encapsulation')
Beispiel #15
0
 def prepare_text(self, obj):
     content = html_to_plaintext(obj.content)
     categories = ' '.join(prepare_attribute_list(obj, 'categories'))
     authors = ' '.join(prepare_attribute_list(obj, 'authors'))
     tags = ' '.join([force_text(k) for k in obj.tags_list])
     return ' '.join([
         obj.title, content, categories, authors, tags, obj.slug,
         obj.image_caption, obj.model_subtitle
     ])
Beispiel #16
0
    def test_basic_conversion(self):

        self.assertContainsSameWords(
            html_to_plaintext("<p>content</p>"),
            'content'
        )

        self.assertContainsSameWords(
            html_to_plaintext("<p>content with multiple words</p>"),
            'content with multiple words'
        )

        self.assertContainsSameWords(
            html_to_plaintext("<p>multiple</p><p>content</p>"),
            'multiple content'
        )

        self.assertContainsSameWords(
            html_to_plaintext(
                '''<div><p>complex</p><p>content</p></div>
                <p>with <a href="#">encapsulation</a></p>'''
            ),
            'complex content with encapsulation'
        )
    def prepare(self, obj):
        self.prepared_data = super(BlogIndex, self).prepare(obj)
        request = fake_request()

        node = obj.content.get_published_node(request)
        blog_layout = node.content
        ctx = {
            'request': fake_request(),
            'root_node_override': node,
        }
        html = render_root(ctx, obj, 'content')
        content = [
            html_to_plaintext(html),
            blog_layout.title,
            blog_layout.summary,
        ]

        self.prepared_data['title'] = blog_layout.title
        self.prepared_data['text'] = ' '.join(content)
        self.prepared_data['get_absolute_url'] = obj.get_absolute_url_with_layout(blog_layout)

        return self.prepared_data
    def prepare(self, obj):
        self.prepared_data = super(BlogIndex, self).prepare(obj)
        request = fake_request()

        node = obj.content.get_published_node(request)
        blog_layout = node.content
        ctx = {
            'request': fake_request(),
            'root_node_override': node,
        }
        html = render_root(ctx, obj, 'content')
        content = [
            html_to_plaintext(html),
            blog_layout.title,
            blog_layout.summary,
        ]

        self.prepared_data['title'] = blog_layout.title
        self.prepared_data['text'] = ' '.join(content)
        self.prepared_data[
            'get_absolute_url'] = obj.get_absolute_url_with_layout(blog_layout)

        return self.prepared_data
Beispiel #19
0
 def prepare_text(self, obj):
     html = render_root({}, obj, "root_node")
     content = html_to_plaintext(html)
     keywords = " ".join(self.prepare_keywords(obj))
     return " ".join([obj.title, keywords, obj.description, content])
Beispiel #20
0
 def prepare_text(self, obj):
     context = {"_current_page": obj.page_ptr, "page": obj.page_ptr}
     html = render_root(context, obj, "root_node")
     content = html_to_plaintext(html)
     keywords = " ".join(self.prepare_keywords(obj))
     return " ".join([obj.title, keywords, obj.description, content])