def test_shorten_html_text(self):
        # truncate_html_words in utils.py
        html = 'plain text'
        truncate = utils.truncate_html_words(html, 10)

        self.assertEqual(truncate, html)

        html = 'single'
        truncate = utils.truncate_html_words(html * 10, 10)

        self.assertEqual(truncate, html * 10)
Example #2
0
 def _get_summary(self):
     """Returns the summary of an article, based on the summary metadata
     if it is set, else truncate the content."""
     if hasattr(self, "_summary"):
         return self._summary
     else:
         return truncate_html_words(self.content, 50)
Example #3
0
def insert_read_more_link(instance):
    """
    Insert an inline "read more" link into the last element of the summary

    :param instance: Content intance.
    :return:
    """

    if type(instance) != contents.Article:
        return

    site_url = instance.settings.get('SITEURL')
    summary_max_length = instance.settings.get('SUMMARY_MAX_LENGTH')
    link_text = instance.settings.get('READ_MORE_LINK_TEXT')
    link_html_template = instance.settings.get(
        'READ_MORE_LINK_TEMPLATE',
        '<a class="read-more" href="{{ url }}">{{ text }}</a>',
    )

    summary = (getattr(instance, '_summary', None)
               or truncate_html_words(instance.content, summary_max_length))

    if summary != instance.content:
        link_html = jinja2.Template(link_html_template).render(
            url=f'{site_url}/{instance.url}',
            text=link_text,
        )
        instance.metadata['summary'] = insert_into_last_element(
            summary,
            link_html,
        )
def insert_read_more_link(instance):
    """
    Insert an inline "read more" link into the last element of the summary
    :param instance:
    :return:
    """

    # only deals with Article type
    if type(instance) != contents.Article: return


    SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH')
    READ_MORE_LINK = instance.settings.get('READ_MORE_LINK', None)
    READ_MORE_LINK_FORMAT = instance.settings.get('READ_MORE_LINK_FORMAT',
                                                  '<a class="read-more" href="/{url}">{text}</a>')

    if not (SUMMARY_MAX_LENGTH and READ_MORE_LINK and READ_MORE_LINK_FORMAT): return

    if hasattr(instance, '_summary') and instance._summary:
        summary = instance._summary
    else:
        summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH)

    if summary != instance.content:
        read_more_link = READ_MORE_LINK_FORMAT.format(url=instance.url, text=READ_MORE_LINK)
        instance._summary = insert_into_last_element(summary, read_more_link)
Example #5
0
def insert_read_more_link(instance):
    """
    Insert an inline "read more" link into the last element of the summary
    :param instance:
    :return:
    """

    # only deals with Article type
    if type(instance) != contents.Article: return

    SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH')
    READ_MORE_LINK = instance.settings.get('READ_MORE_LINK', None)
    READ_MORE_LINK_FORMAT = instance.settings.get(
        'READ_MORE_LINK_FORMAT',
        '<a class="read-more" href="/{url}">{text}</a>')

    if not (SUMMARY_MAX_LENGTH and READ_MORE_LINK and READ_MORE_LINK_FORMAT):
        return

    if hasattr(instance, '_summary') and instance._summary:
        summary = instance._summary
    else:
        summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH)

    if summary < instance.content:
        read_more_link = READ_MORE_LINK_FORMAT.format(url=instance.url,
                                                      text=READ_MORE_LINK)
        instance._summary = insert_into_last_element(summary, read_more_link)
Example #6
0
 def _get_summary(self):
     """Returns the summary of an article, based on the summary metadata
     if it is set, else truncate the content."""
     if hasattr(self, '_summary'):
         return self._summary
     else:
         if self.settings['SUMMARY_MAX_LENGTH']:
             return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH'])
         return self.content
Example #7
0
 def _get_summary(self):
     """Returns the summary of an article, based on the summary metadata
     if it is set, else truncate the content."""
     if hasattr(self, '_summary'):
         return self._summary
     else:
         if self.settings['SUMMARY_MAX_LENGTH']:
             return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH'])
         return self.content
Example #8
0
    def __init__(self, content, metadatas={}, settings={}, filename=None):
        self._content = content
        self.translations = []

        self.status = "published"  # default value
        for key, value in metadatas.items():
            setattr(self, key.lower(), value)

        if not hasattr(self, 'author'):
            if 'AUTHOR' in settings:
                self.author = settings['AUTHOR']

        default_lang = settings.get('DEFAULT_LANG').lower()
        if not hasattr(self, 'lang'):
            self.lang = default_lang

        self.in_default_lang = (self.lang == default_lang)

        if not hasattr(self, 'slug'):
            self.slug = slugify(self.title)

        if not hasattr(self, 'save_as'):
            if self.in_default_lang:
                self.save_as = '%s.html' % self.slug
                clean_url = '%s/' % self.slug
            else:
                self.save_as = '%s-%s.html' % (self.slug, self.lang)
                clean_url = '%s-%s/' % (self.slug, self.lang)

        if settings.get('CLEAN_URLS', False):
            self.url = clean_url
        else:
            self.url = self.save_as

        if filename:
            self.filename = filename

        if not hasattr(self, 'date_format'):
            if self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if hasattr(self, 'date'):
            self.locale_date = self.date.strftime(
                self.date_format.encode('ascii',
                                        'xmlcharrefreplace')).decode('utf')

        if not hasattr(self, 'summary'):
            self.summary = property(
                lambda self: truncate_html_words(self.content, 50)).__get__(
                    self, Page)

        # store the settings ref.
        self._settings = settings
Example #9
0
def truncate(generator):
    read_more = generator.settings.get('READ_MORE_RE',
                                           r'<!--\s*?more\s*?-->')
    read_more_re = re.compile(r'^(.*?)' + read_more, re.S)
    max_length = generator.settings.get('SUMMARY_MAX_LENGTH')
    for article in tuple(generator.articles):
        content = article.content
        match = read_more_re.search(content)
        if match:
            article._summary = match.group(1)
        else:
            article._summary = truncate_html_words(content, max_length)
Example #10
0
    def test_truncate_html_words(self):
        self.assertEqual(
            utils.truncate_html_words('short string', 20),
            'short string')

        self.assertEqual(
            utils.truncate_html_words('word ' * 100, 20),
            'word ' * 20 + '...')

        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '...</p>')

        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>')

        self.assertEqual(
            utils.truncate_html_words('<br>' + 'word ' * 100, 20),
            '<br>' + 'word ' * 20 + '...')

        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '...')
Example #11
0
    def test_truncate_html_words(self):
        self.assertEqual(
            utils.truncate_html_words('short string', 20),
            'short string')

        self.assertEqual(
            utils.truncate_html_words('word ' * 100, 20),
            'word ' * 20 + '...')

        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '...</p>')

        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>')

        self.assertEqual(
            utils.truncate_html_words('<br>' + 'word ' * 100, 20),
            '<br>' + 'word ' * 20 + '...')

        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '...')
Example #12
0
 def test_summary_max_length(self):
     # If a :SUMMARY_MAX_LENGTH: is set, and there is no other summary,
     # generated summary should not exceed the given length.
     page_kwargs = self._copy_page_kwargs()
     settings = _DEFAULT_CONFIG.copy()
     page_kwargs['settings'] = settings
     del page_kwargs['metadata']['summary']
     settings['SUMMARY_MAX_LENGTH'] = None
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, TEST_CONTENT)
     settings['SUMMARY_MAX_LENGTH'] = 10
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
Example #13
0
 def test_summary_max_length(self):
     """If a :SUMMARY_MAX_LENGTH: is set, and there is no other summary, generated summary
        should not exceed the given length."""
     page_kwargs = self._copy_page_kwargs()
     settings = _DEFAULT_CONFIG.copy()
     page_kwargs['settings'] = settings
     del page_kwargs['metadata']['summary']
     settings['SUMMARY_MAX_LENGTH'] = None
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, TEST_CONTENT)
     settings['SUMMARY_MAX_LENGTH'] = 10
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
def redo_summary(instance):
    summary = ""
    if 'summary' in instance.metadata:
        summary = instance.metadata.get('summary')
    elif hasattr(instance, "_summary") and instance._summary is not None:
        summary = instance._summary
    elif hasattr(instance, "_content") and instance._content is not None:
        summary_max_length = instance._context.get("SUMMARY_MAX_LENGTH", SUMMARY_MAX_LENGTH)
        summary = truncate_html_words(instance._content, summary_max_length)

    soup = BeautifulSoup(summary, from_encoding='utf-8')
    [s.extract() for s in soup('img')]
    instance.text_summary = soup
Example #15
0
 def test_summary_end_marker(self):
     # If a :SUMMARY_END_MARKER: is set, and there is no other summary,
     # generated summary should contain the specified marker at the end.
     page_kwargs = self._copy_page_kwargs()
     settings = get_settings()
     page_kwargs['settings'] = settings
     del page_kwargs['metadata']['summary']
     settings['SUMMARY_END_MARKER'] = 'test_marker'
     settings['SUMMARY_MAX_LENGTH'] = 10
     page = Page(**page_kwargs)
     self.assertEqual(page.summary,
                      truncate_html_words(TEST_CONTENT, 10, 'test_marker'))
     self.assertIn('test_marker', page.summary)
Example #16
0
    def get_summary(self, siteurl):
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
        content.
        """
        if hasattr(self, "_summary"):
            return self._update_content(self._summary, siteurl)

        if self.settings["SUMMARY_MAX_LENGTH"] is None:
            return self.content

        return truncate_html_words(self.content, self.settings["SUMMARY_MAX_LENGTH"])
Example #17
0
    def __init__(self, content, metadatas={}, settings={}, filename=None):
        self._content = content
        self.translations = []

        self.status = "published"  # default value
        for key, value in metadatas.items():
            setattr(self, key.lower(), value)

        if not hasattr(self, "author"):
            if "AUTHOR" in settings:
                self.author = settings["AUTHOR"]

        default_lang = settings.get("DEFAULT_LANG").lower()
        if not hasattr(self, "lang"):
            self.lang = default_lang

        self.in_default_lang = self.lang == default_lang

        if not hasattr(self, "slug"):
            self.slug = slugify(self.title)

        if not hasattr(self, "save_as"):
            if self.in_default_lang:
                self.save_as = "%s.html" % self.slug
                clean_url = "%s/" % self.slug
            else:
                self.save_as = "%s-%s.html" % (self.slug, self.lang)
                clean_url = "%s-%s/" % (self.slug, self.lang)

        if settings.get("CLEAN_URLS", False):
            self.url = clean_url
        else:
            self.url = self.save_as

        if filename:
            self.filename = filename

        if not hasattr(self, "date_format"):
            if self.lang in settings["DATE_FORMATS"]:
                self.date_format = settings["DATE_FORMATS"][self.lang]
            else:
                self.date_format = settings["DEFAULT_DATE_FORMAT"]

        if hasattr(self, "date"):
            self.locale_date = self.date.strftime(self.date_format.encode("ascii", "xmlcharrefreplace")).decode("utf")

        if not hasattr(self, "summary"):
            self.summary = property(lambda self: truncate_html_words(self.content, 50)).__get__(self, Page)

        # store the settings ref.
        self._settings = settings
Example #18
0
    def __init__(self, content, metadatas={}, settings={}, filename=None):
        self._content = content
        self.translations = []

        self.status = "published"  # default value
        for key, value in metadatas.items():
            setattr(self, key.lower(), value)

        if not hasattr(self, 'author'):
            if 'AUTHOR' in settings:
                self.author = settings['AUTHOR']

        default_lang = settings.get('DEFAULT_LANG').lower()
        if not hasattr(self, 'lang'):
            self.lang = default_lang

        self.in_default_lang = (self.lang == default_lang)

        if not hasattr(self, 'slug'):
            self.slug = slugify(self.title)

        if not hasattr(self, 'save_as'):
            if self.in_default_lang:
                self.save_as = '%s.html' % self.slug
                clean_url = '%s/' % self.slug
            else:
                self.save_as = '%s-%s.html' % (self.slug, self.lang)
                clean_url = '%s-%s/' % (self.slug, self.lang)

        if settings.get('CLEAN_URLS', False):
            self.url = clean_url
        else:
            self.url = self.save_as

        if filename:
            self.filename = filename

        if not hasattr(self, 'date_format'):
            if self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if hasattr(self, 'date'):
            self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf')

        if not hasattr(self, 'summary'):
            self.summary = property(lambda self: truncate_html_words(self.content, 50))

        # store the settings ref.
        self._settings = settings
Example #19
0
    def _get_summary(self):
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
        content.
        """
        if hasattr(self, '_summary'):
            return self._summary

        if self.settings['SUMMARY_MAX_LENGTH'] is None:
            return self.content

        return truncate_html_words(self.content,
                                   self.settings['SUMMARY_MAX_LENGTH'])
Example #20
0
    def get_summary(self, siteurl):
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
        content.
        """
        if hasattr(self, '_summary'):
            return self._update_content(self._summary, siteurl)

        if self.settings['SUMMARY_MAX_LENGTH'] is None:
            return self.content

        return truncate_html_words(self.content,
                                   self.settings['SUMMARY_MAX_LENGTH'])
Example #21
0
    def get_summary(self, siteurl):
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
        content.
        """
        if 'summary' in self.metadata:
            return self.metadata['summary']

        if self.settings['SUMMARY_MAX_LENGTH'] is None:
            return self.content

        return truncate_html_words(self.content,
                                   self.settings['SUMMARY_MAX_LENGTH'])
Example #22
0
    def get_summary(self, siteurl):
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
        content.
        """
        if 'summary' in self.metadata:
            return self.metadata['summary']

        if self.settings['SUMMARY_MAX_LENGTH'] is None:
            return self.content

        return truncate_html_words(self.content,
                                   self.settings['SUMMARY_MAX_LENGTH'])
Example #23
0
 def test_summary_max_length(self):
     # If a :SUMMARY_MAX_LENGTH: is set, and there is no other summary,
     # generated summary should not exceed the given length.
     page_kwargs = self._copy_page_kwargs()
     settings = get_settings()
     page_kwargs["settings"] = settings
     del page_kwargs["metadata"]["summary"]
     settings["SUMMARY_MAX_LENGTH"] = None
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, TEST_CONTENT)
     settings["SUMMARY_MAX_LENGTH"] = 10
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
     settings["SUMMARY_MAX_LENGTH"] = 0
     page = Page(**page_kwargs)
     self.assertEqual(page.summary, "")
Example #24
0
def fix_summary_ellipsis(instance):
    """
    Pelican uses '...' as the ellipsis when creating a summary.
    We want a proper ellipsis though, coupled with a non-breaking space.
    :param instance:
    :return:
    """

    # only deals with Article type
    if type(instance) != contents.Article: return

    SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH')

    if not (SUMMARY_MAX_LENGTH): return

    if not (hasattr(instance, '_summary') and instance._summary):
        summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH,
                                      '&hellip;')
        instance._summary = summary.replace(' &hellip;', '&nbsp;&hellip;')
def summary_remove_elements(instance):
    if type(instance) == contents.Article:
        summary = None
        SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH')
        if hasattr(instance, '_summary') and instance._summary:
            summary = instance._summary
        else:
            summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH)
        summary = BeautifulSoup(summary, 'html.parser')
        for tag in summary.findAll(True):
            if tag.name in invalid_tags:
                s = ""

                for c in tag.contents:
                    if not isinstance(c, NavigableString):
                        c = strip_tags(unicode(c), invalid_tags)
                    s += unicode(c)
                tag.replaceWith(s)
        instance._summary = text_type(summary)
Example #26
0
def process_summary(article):
    """Ensures summaries are not cut off. Also inserts
    mathjax script so that math will be rendered"""

    if (hasattr(article, '_summary')):
        summary = article._update_content(article._summary, article.get_siteurl())
    elif article.settings['SUMMARY_MAX_LENGTH'] is None:
        summary = article.content
    else:
        summary = truncate_html_words(article.content, article.settings['SUMMARY_MAX_LENGTH'])
    summary_parsed = BeautifulSoup(summary, 'html.parser')
    math = summary_parsed.find_all(class_='math')

    if len(math) > 0:
        last_math_text = math[-1].get_text()
        if len(last_math_text) > 3 and last_math_text[-3:] == '...':
            content_parsed = BeautifulSoup(article._content, 'html.parser')
            full_text = content_parsed.find_all(class_='math')[len(math)-1].get_text()
            math[-1].string = "%s ..." % full_text
            summary = summary_parsed.decode()

        article._summary = "%s<script type='text/javascript'>%s</script>" % (summary, process_summary.mathjax_script)
Example #27
0
def get_entry_summary(entry):
    # https://stackoverflow.com/a/12982689/11441
    def cleanhtml(raw_html):
        cleanr = re.compile("<.*?>")
        cleantext = re.sub(cleanr, "", raw_html)
        return cleantext

    try:
        # this will get the first of 'summary' and 'subtitle'
        summary = entry["description"]
    except KeyError:
        summary = ""

    if settings[WEBRING_CLEAN_SUMMARY_HTML_STR] > 0:
        summary = utils.truncate_html_words(
            summary, settings[WEBRING_SUMMARY_WORDS_STR]
        )

    # feedparser sanitizes html by default, but it can still contain html tags.
    if settings[WEBRING_CLEAN_SUMMARY_HTML_STR]:
        summary = cleanhtml(summary)

    return summary
Example #28
0
    def __init__(self, content, metadata=None, settings=None, filename=None, source=None):
        # init parameters
        if not metadata:
            metadata = {}
        if not settings:
            settings = _DEFAULT_CONFIG

        self._content = content
        self.translations = []
        self.source = source

        local_metadata = dict(settings.get('DEFAULT_METADATA', ()))
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            setattr(self, key.lower(), value)
        
        # default author to the one in settings if not defined
        if not hasattr(self, 'author'):
            if 'AUTHOR' in settings:
                self.author = settings['AUTHOR']
            else:
                self.author = getenv('USER', 'John Doe')
                warning(u"Author of `{0}' unknow, assuming that his name is `{1}'".format(filename, self.author))

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, from the title
        if not hasattr(self, 'slug') and hasattr(self, 'title'):
            self.slug = slugify(self.title)

        # create save_as from the slug (+lang)
        if not hasattr(self, 'save_as') and hasattr(self, 'slug'):
            
            if not self.in_default_lang:
                self.url = '%s-%s' % (self.slug, self.lang)
            
            if settings.get('CLEAN_URLS', False):
                self.save_as = '%s/index.html' % self.slug
                self.url = '%s/' % self.slug
                self.source_url = '%s.txt' % self.slug
            else:
                self.save_as = '%s.html' % self.slug
                self.url = self.save_as
                self.source_url = '%s.txt' % self.slug
        
        elif hasattr(self, 'save_as'):
            self.url = self.save_as
            self.source_url = self.save_as

        if filename:
            self.filename = filename

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if hasattr(self, 'date'):
            if platform == 'win32':
                self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode(stdin.encoding)
            else:
                self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf')
        
        # manage status
        if not hasattr(self, 'status'):
            self.status = settings['DEFAULT_STATUS']
            if not settings['WITH_FUTURE_DATES']:
                if hasattr(self, 'date') and self.date > datetime.now():
                    self.status = 'draft'
        
        # set summary
        if not hasattr(self, 'summary'):
            self.summary = truncate_html_words(self.content, 50)
Example #29
0
def truncate_html_words(html, max_length=15):
    return pelican_utils.truncate_html_words(html, max_length)
Example #30
0
    def __init__(self, content, metadata=None, settings=None,
                 filename=None):
        # init parameters
        if not metadata:
            metadata = {}
        if not settings:
            settings = _DEFAULT_CONFIG

        self.settings = settings
        self._content = content
        self.translations = []

        local_metadata = dict(settings.get('DEFAULT_METADATA', ()))
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            setattr(self, key.lower(), value)

        # default author to the one in settings if not defined
        if not hasattr(self, 'author'):
            if 'AUTHOR' in settings:
                self.author = Author(settings['AUTHOR'], settings)
            else:
                self.author = Author(getenv('USER', 'John Doe'), settings)
                warning(u"Author of `{0}' unknow, assuming that his name is "
                         "`{1}'".format(filename or self.title, self.author))

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, fro mthe title
        if not hasattr(self, 'slug') and hasattr(self, 'title'):
            self.slug = slugify(self.title)

        if filename:
            self.filename = filename

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if isinstance(self.date_format, tuple):
            locale.setlocale(locale.LC_ALL, self.date_format[0])
            self.date_format = self.date_format[1]

        if hasattr(self, 'date'):
            encoded_date = self.date.strftime(
                    self.date_format.encode('ascii', 'xmlcharrefreplace'))

            if platform == 'win32':
                self.locale_date = encoded_date.decode(stdin.encoding)
            else:
                self.locale_date = encoded_date.decode('utf')

        # manage status
        if not hasattr(self, 'status'):
            self.status = settings['DEFAULT_STATUS']
            if not settings['WITH_FUTURE_DATES']:
                if hasattr(self, 'date') and self.date > datetime.now():
                    self.status = 'draft'

        # set summary
        if not hasattr(self, 'summary'):
            self.summary = truncate_html_words(self.content, 50)
Example #31
0
 def _get_summary(self):
     """Returns the summary of an article, based on to the content"""
     return truncate_html_words(self.content, 50)
Example #32
0
 def _get_summary(self):
     """Returns the summary of an article, based on to the content"""
     return truncate_html_words(self.content, 50)
Example #33
0
    def __init__(self, content, metadata=None, settings=None, filename=None):
        # init parameters
        if not metadata:
            metadata = {}
        if not settings:
            settings = _DEFAULT_CONFIG

        self._content = content
        self.translations = []

        local_metadata = dict(settings.get('DEFAULT_METADATA', ()))
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            setattr(self, key.lower(), value)
        
        # default author to the one in settings if not defined
        if not hasattr(self, 'author'):
            if 'AUTHOR' in settings:
                self.author = settings['AUTHOR']
            else:
                self.author = getenv('USER', 'John Doe')
                warning(u"Author of `{0}' unknow, assuming that his name is `{1}'".format(filename or self.title, self.author))

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, fro mthe title
        if not hasattr(self, 'slug') and hasattr(self, 'title'):
            self.slug = slugify(self.title)

        # create save_as from the slug (+lang)
        if not hasattr(self, 'save_as') and hasattr(self, 'slug'):
            if self.in_default_lang:
                if settings.get('CLEAN_URLS', False):
                    self.save_as = '%s/index.html' % self.slug
                else:
                    self.save_as = '%s.html' % self.slug

                clean_url = '%s/' % self.slug
            else:
                if settings.get('CLEAN_URLS', False):
                    self.save_as = '%s-%s/index.html' % (self.slug, self.lang)
                else:
                    self.save_as = '%s-%s.html' % (self.slug, self.lang)

                clean_url = '%s-%s/' % (self.slug, self.lang)

        # change the save_as regarding the settings
        if settings.get('CLEAN_URLS', False):
            self.url = clean_url
        elif hasattr(self, 'save_as'):
            self.url = self.save_as

        if filename:
            self.filename = filename

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if hasattr(self, 'date'):
            if platform == 'win32':
                self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode(stdin.encoding)
            else:
                self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf')
        
        # manage status
        if not hasattr(self, 'status'):
            self.status = settings['DEFAULT_STATUS']
            if not settings['WITH_FUTURE_DATES']:
                if hasattr(self, 'date') and self.date > datetime.now():
                    self.status = 'draft'
        
        # set summary
        if not hasattr(self, 'summary'):
            self.summary = truncate_html_words(self.content, 50)
Example #34
0
 def summary(self):
     return truncate_html_words(self.content, 50)
Example #35
0
def make_summary(text, max_words=None):
    if max_words is None:
        return text

    return truncate_html_words(text, max_words, end_text='…')
Example #36
0
    def __init__(self, content, metadata=None, settings=None, filename=None):
        # init parameters
        if not metadata:
            metadata = {}
        if not settings:
            settings = _DEFAULT_CONFIG

        self._content = content
        self.translations = []

        self.status = "published"  # default value

        local_metadata = dict(settings.get('DEFAULT_METADATA', ()))
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            setattr(self, key.lower(), value)
        
        # default author to the one in settings if not defined
        if not hasattr(self, 'author'):
            if 'AUTHOR' in settings:
                self.author = settings['AUTHOR']

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, fro mthe title
        if not hasattr(self, 'slug') and hasattr(self, 'title'):
            self.slug = slugify(self.title)

        # create save_as from the slug (+lang)
        if not hasattr(self, 'save_as') and hasattr(self, 'slug'):
            if self.in_default_lang:
                self.save_as = '%s.html' % self.slug
                clean_url = '%s/' % self.slug
            else:
                self.save_as = '%s-%s.html' % (self.slug, self.lang)
                clean_url = '%s-%s/' % (self.slug, self.lang)

        # change the save_as regarding the settings
        if settings.get('CLEAN_URLS', False):
            self.url = clean_url
        elif hasattr(self, 'save_as'):
            self.url = self.save_as

        if filename:
            self.filename = filename

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if hasattr(self, 'date'):
            self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf')

        # manage summary
        if not hasattr(self, 'summary'):
            self.summary = property(lambda self: truncate_html_words(self.content, 50)).__get__(self, Page)

        # manage status
        if not hasattr(self, 'status'):
            self.status = settings['DEFAULT_STATUS']
Example #37
0
    def test_truncate_html_words(self):
        # Plain text.
        self.assertEqual(utils.truncate_html_words('short string', 20),
                         'short string')
        self.assertEqual(utils.truncate_html_words('word ' * 100, 20),
                         'word ' * 20 + '…')

        # Words enclosed or intervaled by HTML tags.
        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '…</p>')
        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n…\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n…\n">' + 'word ' * 20 + '…</span>')
        self.assertEqual(utils.truncate_html_words('<br>' + 'word ' * 100, 20),
                         '<br>' + 'word ' * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '…')

        # Words with hypens and apostrophes.
        self.assertEqual(utils.truncate_html_words("a-b " * 100, 20),
                         "a-b " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("it's " * 100, 20),
                         "it's " * 20 + '…')

        # Words with HTML entity references.
        self.assertEqual(utils.truncate_html_words("&eacute; " * 100, 20),
                         "&eacute; " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("caf&eacute; " * 100, 20),
                         "caf&eacute; " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("&egrave;lite " * 100, 20),
                         "&egrave;lite " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("cafeti&eacute;re " * 100, 20),
            "cafeti&eacute;re " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("&int;dx " * 100, 20),
                         "&int;dx " * 20 + '…')

        # Words with HTML character references inside and outside
        # the ASCII range.
        self.assertEqual(utils.truncate_html_words("&#xe9; " * 100, 20),
                         "&#xe9; " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("&#x222b;dx " * 100, 20),
                         "&#x222b;dx " * 20 + '…')

        # Words with invalid or broken HTML references.
        self.assertEqual(utils.truncate_html_words('&invalid;', 20),
                         '&invalid;')
        self.assertEqual(utils.truncate_html_words('&#9999999999;', 20),
                         '&#9999999999;')
        self.assertEqual(utils.truncate_html_words('&#xfffffffff;', 20),
                         '&#xfffffffff;')
        self.assertEqual(utils.truncate_html_words('&mdash text', 20),
                         '&mdash text')
        self.assertEqual(utils.truncate_html_words('&#1234 text', 20),
                         '&#1234 text')
        self.assertEqual(utils.truncate_html_words('&#xabc text', 20),
                         '&#xabc text')
Example #38
0
    def test_truncate_html_words(self):
        # Plain text.
        self.assertEqual(utils.truncate_html_words('short string', 20),
                         'short string')
        self.assertEqual(utils.truncate_html_words('word ' * 100, 20),
                         'word ' * 20 + '...')

        # Words enclosed or intervaled by HTML tags.
        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '...</p>')
        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>')
        self.assertEqual(utils.truncate_html_words('<br>' + 'word ' * 100, 20),
                         '<br>' + 'word ' * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '...')

        # Words with hypens and apostrophes.
        self.assertEqual(utils.truncate_html_words("a-b " * 100, 20),
                         "a-b " * 20 + '...')
        self.assertEqual(utils.truncate_html_words("it's " * 100, 20),
                         "it's " * 20 + '...')

        # Words with HTML entity references.
        self.assertEqual(utils.truncate_html_words("&eacute; " * 100, 20),
                         "&eacute; " * 20 + '...')
        self.assertEqual(utils.truncate_html_words("caf&eacute; " * 100, 20),
                         "caf&eacute; " * 20 + '...')
        self.assertEqual(utils.truncate_html_words("&egrave;lite " * 100, 20),
                         "&egrave;lite " * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words("cafeti&eacute;re " * 100, 20),
            "cafeti&eacute;re " * 20 + '...')
Example #39
0
    def test_truncate_html_words(self):
        # Plain text.
        self.assertEqual(utils.truncate_html_words('short string', 20),
                         'short string')
        self.assertEqual(utils.truncate_html_words('word ' * 100, 20),
                         'word ' * 20 + '…')

        # Plain text with Unicode content.
        self.assertEqual(
            utils.truncate_html_words(
                '我愿意这样,朋友——我独自远行,不但没有你,\
                 并且再没有别的影在黑暗里。', 12), '我愿意这样,朋友——我独自远行' + ' …')
        self.assertEqual(
            utils.truncate_html_words(
                'Ты мелькнула, ты предстала, Снова сердце задрожало,', 3),
            'Ты мелькнула, ты' + ' …')
        self.assertEqual(
            utils.truncate_html_words('Trong đầm gì đẹp bằng sen', 4),
            'Trong đầm gì đẹp' + ' …')

        # Words enclosed or intervaled by HTML tags.
        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '…</p>')
        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n…\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n…\n">' + 'word ' * 20 + '…</span>')
        self.assertEqual(utils.truncate_html_words('<br>' + 'word ' * 100, 20),
                         '<br>' + 'word ' * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '…')

        # Words with hypens and apostrophes.
        self.assertEqual(utils.truncate_html_words("a-b " * 100, 20),
                         "a-b " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("it's " * 100, 20),
                         "it's " * 20 + '…')

        # Words with HTML entity references.
        self.assertEqual(utils.truncate_html_words("&eacute; " * 100, 20),
                         "&eacute; " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("caf&eacute; " * 100, 20),
                         "caf&eacute; " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("&egrave;lite " * 100, 20),
                         "&egrave;lite " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("cafeti&eacute;re " * 100, 20),
            "cafeti&eacute;re " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("&int;dx " * 100, 20),
                         "&int;dx " * 20 + '…')

        # Words with HTML character references inside and outside
        # the ASCII range.
        self.assertEqual(utils.truncate_html_words("&#xe9; " * 100, 20),
                         "&#xe9; " * 20 + '…')
        self.assertEqual(utils.truncate_html_words("&#x222b;dx " * 100, 20),
                         "&#x222b;dx " * 20 + '…')

        # Words with invalid or broken HTML references.
        self.assertEqual(utils.truncate_html_words('&invalid;', 20),
                         '&invalid;')
        self.assertEqual(utils.truncate_html_words('&#9999999999;', 20),
                         '&#9999999999;')
        self.assertEqual(utils.truncate_html_words('&#xfffffffff;', 20),
                         '&#xfffffffff;')
        self.assertEqual(utils.truncate_html_words('&mdash text', 20),
                         '&mdash text')
        self.assertEqual(utils.truncate_html_words('&#1234 text', 20),
                         '&#1234 text')
        self.assertEqual(utils.truncate_html_words('&#xabc text', 20),
                         '&#xabc text')
Example #40
0
    def test_truncate_html_words(self):
        # Plain text.
        self.assertEqual(
            utils.truncate_html_words('short string', 20),
            'short string')
        self.assertEqual(
            utils.truncate_html_words('word ' * 100, 20),
            'word ' * 20 + '…')

        # Words enclosed or intervaled by HTML tags.
        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '…</p>')
        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n…\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n…\n">' + 'word ' * 20 + '…</span>')
        self.assertEqual(
            utils.truncate_html_words('<br>' + 'word ' * 100, 20),
            '<br>' + 'word ' * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '…')

        # Words with hypens and apostrophes.
        self.assertEqual(
            utils.truncate_html_words("a-b " * 100, 20),
            "a-b " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("it's " * 100, 20),
            "it's " * 20 + '…')

        # Words with HTML entity references.
        self.assertEqual(
            utils.truncate_html_words("&eacute; " * 100, 20),
            "&eacute; " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("caf&eacute; " * 100, 20),
            "caf&eacute; " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("&egrave;lite " * 100, 20),
            "&egrave;lite " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("cafeti&eacute;re " * 100, 20),
            "cafeti&eacute;re " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("&int;dx " * 100, 20),
            "&int;dx " * 20 + '…')

        # Words with HTML character references inside and outside
        # the ASCII range.
        self.assertEqual(
            utils.truncate_html_words("&#xe9; " * 100, 20),
            "&#xe9; " * 20 + '…')
        self.assertEqual(
            utils.truncate_html_words("&#x222b;dx " * 100, 20),
            "&#x222b;dx " * 20 + '…')
Example #41
0
    def __init__(self, content, metadata=None, settings=None, filename=None):
        # init parameters
        if not metadata:
            metadata = {}
        if not settings:
            settings = _DEFAULT_CONFIG

        self._content = content
        self.translations = []

        local_metadata = dict(settings.get("DEFAULT_METADATA", ()))
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            setattr(self, key.lower(), value)

        # default author to the one in settings if not defined
        if not hasattr(self, "author"):
            if "AUTHOR" in settings:
                self.author = settings["AUTHOR"]
            else:
                self.author = getenv("USER", "John Doe")
                warning(
                    u"Author of `{0}' unknow, assuming that his name is `{1}'".format(
                        filename or self.title, self.author
                    )
                )

        # manage languages
        self.in_default_lang = True
        if "DEFAULT_LANG" in settings:
            default_lang = settings["DEFAULT_LANG"].lower()
            if not hasattr(self, "lang"):
                self.lang = default_lang

            self.in_default_lang = self.lang == default_lang

        # create the slug if not existing, fro mthe title
        if not hasattr(self, "slug") and hasattr(self, "title"):
            self.slug = slugify(self.title)

        # create save_as from the slug (+lang)
        if not hasattr(self, "save_as") and hasattr(self, "slug"):
            if self.in_default_lang:
                self.save_as = "%s.html" % self.slug
                clean_url = "%s/" % self.slug
            else:
                self.save_as = "%s-%s.html" % (self.slug, self.lang)
                clean_url = "%s-%s/" % (self.slug, self.lang)

        # change the save_as regarding the settings
        if settings.get("CLEAN_URLS", False):
            self.url = clean_url
        elif hasattr(self, "save_as"):
            self.url = self.save_as

        if filename:
            self.filename = filename

        # manage the date format
        if not hasattr(self, "date_format"):
            if hasattr(self, "lang") and self.lang in settings["DATE_FORMATS"]:
                self.date_format = settings["DATE_FORMATS"][self.lang]
            else:
                self.date_format = settings["DEFAULT_DATE_FORMAT"]

        if hasattr(self, "date"):
            if platform == "win32":
                self.locale_date = self.date.strftime(self.date_format.encode("ascii", "xmlcharrefreplace")).decode(
                    stdin.encoding
                )
            else:
                self.locale_date = self.date.strftime(self.date_format.encode("ascii", "xmlcharrefreplace")).decode(
                    "utf"
                )

        # manage status
        if not hasattr(self, "status"):
            self.status = settings["DEFAULT_STATUS"]
            if not settings["WITH_FUTURE_DATES"]:
                if hasattr(self, "date") and self.date > datetime.now():
                    self.status = "draft"

        # set summary
        if not hasattr(self, "summary"):
            self.summary = truncate_html_words(self.content, 50)
Example #42
0
 def truncate(self, html: str, max_length: int, without_tag: bool) -> str:
     summary = pelican_utils.truncate_html_words(html, max_length)
     if without_tag:
         summary = re.sub(r'<[^>]+>', "", summary)
     return summary
Example #43
0
    def test_truncate_html_words(self):
        # Plain text.
        self.assertEqual(
            utils.truncate_html_words('short string', 20),
            'short string')
        self.assertEqual(
            utils.truncate_html_words('word ' * 100, 20),
            'word ' * 20 + '...')

        # Words enclosed or intervaled by HTML tags.
        self.assertEqual(
            utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
            '<p>' + 'word ' * 20 + '...</p>')
        self.assertEqual(
            utils.truncate_html_words(
                '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20),
            '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>')
        self.assertEqual(
            utils.truncate_html_words('<br>' + 'word ' * 100, 20),
            '<br>' + 'word ' * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20),
            '<!-- comment -->' + 'word ' * 20 + '...')

        # Words with hypens and apostrophes.
        self.assertEqual(
            utils.truncate_html_words("a-b " * 100, 20),
            "a-b " * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words("it's " * 100, 20),
            "it's " * 20 + '...')

        # Words with HTML entity references.
        self.assertEqual(
            utils.truncate_html_words("&eacute; " * 100, 20),
            "&eacute; " * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words("caf&eacute; " * 100, 20),
            "caf&eacute; " * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words("&egrave;lite " * 100, 20),
            "&egrave;lite " * 20 + '...')
        self.assertEqual(
            utils.truncate_html_words("cafeti&eacute;re " * 100, 20),
            "cafeti&eacute;re " * 20 + '...')