Ejemplo n.º 1
0
    def _generate_text_metadata(topic: Topic) -> None:
        """Generate metadata for a text topic (word count and excerpt)."""
        html_tree = HTMLParser().parseFragment(topic.rendered_html)

        # extract the text from all of the HTML elements
        extracted_text = ''.join(
            [element_text for element_text in html_tree.itertext()])

        # sanitize unicode, remove leading/trailing whitespace, etc.
        extracted_text = simplify_string(extracted_text)

        # create a short excerpt by truncating the simplified string
        excerpt = truncate_string(
            extracted_text,
            length=200,
            truncate_at_chars=' ',
        )

        topic.content_metadata = {
            'word_count': word_count(extracted_text),
            'excerpt': excerpt,
        }