def test_shorten_html_text(self): # truncate_html_words in utils.py html = 'plain text' truncate = utils.truncate_html_words(html, 10) self.assertEqual(truncate, html) html = 'single' truncate = utils.truncate_html_words(html * 10, 10) self.assertEqual(truncate, html * 10)
def _get_summary(self): """Returns the summary of an article, based on the summary metadata if it is set, else truncate the content.""" if hasattr(self, "_summary"): return self._summary else: return truncate_html_words(self.content, 50)
def insert_read_more_link(instance): """ Insert an inline "read more" link into the last element of the summary :param instance: Content intance. :return: """ if type(instance) != contents.Article: return site_url = instance.settings.get('SITEURL') summary_max_length = instance.settings.get('SUMMARY_MAX_LENGTH') link_text = instance.settings.get('READ_MORE_LINK_TEXT') link_html_template = instance.settings.get( 'READ_MORE_LINK_TEMPLATE', '<a class="read-more" href="{{ url }}">{{ text }}</a>', ) summary = (getattr(instance, '_summary', None) or truncate_html_words(instance.content, summary_max_length)) if summary != instance.content: link_html = jinja2.Template(link_html_template).render( url=f'{site_url}/{instance.url}', text=link_text, ) instance.metadata['summary'] = insert_into_last_element( summary, link_html, )
def insert_read_more_link(instance): """ Insert an inline "read more" link into the last element of the summary :param instance: :return: """ # only deals with Article type if type(instance) != contents.Article: return SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH') READ_MORE_LINK = instance.settings.get('READ_MORE_LINK', None) READ_MORE_LINK_FORMAT = instance.settings.get('READ_MORE_LINK_FORMAT', '<a class="read-more" href="/{url}">{text}</a>') if not (SUMMARY_MAX_LENGTH and READ_MORE_LINK and READ_MORE_LINK_FORMAT): return if hasattr(instance, '_summary') and instance._summary: summary = instance._summary else: summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH) if summary != instance.content: read_more_link = READ_MORE_LINK_FORMAT.format(url=instance.url, text=READ_MORE_LINK) instance._summary = insert_into_last_element(summary, read_more_link)
def insert_read_more_link(instance): """ Insert an inline "read more" link into the last element of the summary :param instance: :return: """ # only deals with Article type if type(instance) != contents.Article: return SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH') READ_MORE_LINK = instance.settings.get('READ_MORE_LINK', None) READ_MORE_LINK_FORMAT = instance.settings.get( 'READ_MORE_LINK_FORMAT', '<a class="read-more" href="/{url}">{text}</a>') if not (SUMMARY_MAX_LENGTH and READ_MORE_LINK and READ_MORE_LINK_FORMAT): return if hasattr(instance, '_summary') and instance._summary: summary = instance._summary else: summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH) if summary < instance.content: read_more_link = READ_MORE_LINK_FORMAT.format(url=instance.url, text=READ_MORE_LINK) instance._summary = insert_into_last_element(summary, read_more_link)
def _get_summary(self): """Returns the summary of an article, based on the summary metadata if it is set, else truncate the content.""" if hasattr(self, '_summary'): return self._summary else: if self.settings['SUMMARY_MAX_LENGTH']: return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH']) return self.content
def __init__(self, content, metadatas={}, settings={}, filename=None): self._content = content self.translations = [] self.status = "published" # default value for key, value in metadatas.items(): setattr(self, key.lower(), value) if not hasattr(self, 'author'): if 'AUTHOR' in settings: self.author = settings['AUTHOR'] default_lang = settings.get('DEFAULT_LANG').lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) if not hasattr(self, 'slug'): self.slug = slugify(self.title) if not hasattr(self, 'save_as'): if self.in_default_lang: self.save_as = '%s.html' % self.slug clean_url = '%s/' % self.slug else: self.save_as = '%s-%s.html' % (self.slug, self.lang) clean_url = '%s-%s/' % (self.slug, self.lang) if settings.get('CLEAN_URLS', False): self.url = clean_url else: self.url = self.save_as if filename: self.filename = filename if not hasattr(self, 'date_format'): if self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if hasattr(self, 'date'): self.locale_date = self.date.strftime( self.date_format.encode('ascii', 'xmlcharrefreplace')).decode('utf') if not hasattr(self, 'summary'): self.summary = property( lambda self: truncate_html_words(self.content, 50)).__get__( self, Page) # store the settings ref. self._settings = settings
def truncate(generator): read_more = generator.settings.get('READ_MORE_RE', r'<!--\s*?more\s*?-->') read_more_re = re.compile(r'^(.*?)' + read_more, re.S) max_length = generator.settings.get('SUMMARY_MAX_LENGTH') for article in tuple(generator.articles): content = article.content match = read_more_re.search(content) if match: article._summary = match.group(1) else: article._summary = truncate_html_words(content, max_length)
def test_truncate_html_words(self): self.assertEqual( utils.truncate_html_words('short string', 20), 'short string') self.assertEqual( utils.truncate_html_words('word ' * 100, 20), 'word ' * 20 + '...') self.assertEqual( utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20), '<p>' + 'word ' * 20 + '...</p>') self.assertEqual( utils.truncate_html_words( '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20), '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>') self.assertEqual( utils.truncate_html_words('<br>' + 'word ' * 100, 20), '<br>' + 'word ' * 20 + '...') self.assertEqual( utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20), '<!-- comment -->' + 'word ' * 20 + '...')
def test_summary_max_length(self): # If a :SUMMARY_MAX_LENGTH: is set, and there is no other summary, # generated summary should not exceed the given length. page_kwargs = self._copy_page_kwargs() settings = _DEFAULT_CONFIG.copy() page_kwargs['settings'] = settings del page_kwargs['metadata']['summary'] settings['SUMMARY_MAX_LENGTH'] = None page = Page(**page_kwargs) self.assertEqual(page.summary, TEST_CONTENT) settings['SUMMARY_MAX_LENGTH'] = 10 page = Page(**page_kwargs) self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
def test_summary_max_length(self): """If a :SUMMARY_MAX_LENGTH: is set, and there is no other summary, generated summary should not exceed the given length.""" page_kwargs = self._copy_page_kwargs() settings = _DEFAULT_CONFIG.copy() page_kwargs['settings'] = settings del page_kwargs['metadata']['summary'] settings['SUMMARY_MAX_LENGTH'] = None page = Page(**page_kwargs) self.assertEqual(page.summary, TEST_CONTENT) settings['SUMMARY_MAX_LENGTH'] = 10 page = Page(**page_kwargs) self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
def redo_summary(instance): summary = "" if 'summary' in instance.metadata: summary = instance.metadata.get('summary') elif hasattr(instance, "_summary") and instance._summary is not None: summary = instance._summary elif hasattr(instance, "_content") and instance._content is not None: summary_max_length = instance._context.get("SUMMARY_MAX_LENGTH", SUMMARY_MAX_LENGTH) summary = truncate_html_words(instance._content, summary_max_length) soup = BeautifulSoup(summary, from_encoding='utf-8') [s.extract() for s in soup('img')] instance.text_summary = soup
def test_summary_end_marker(self): # If a :SUMMARY_END_MARKER: is set, and there is no other summary, # generated summary should contain the specified marker at the end. page_kwargs = self._copy_page_kwargs() settings = get_settings() page_kwargs['settings'] = settings del page_kwargs['metadata']['summary'] settings['SUMMARY_END_MARKER'] = 'test_marker' settings['SUMMARY_MAX_LENGTH'] = 10 page = Page(**page_kwargs) self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10, 'test_marker')) self.assertIn('test_marker', page.summary)
def get_summary(self, siteurl): """Returns the summary of an article. This is based on the summary metadata if set, otherwise truncate the content. """ if hasattr(self, "_summary"): return self._update_content(self._summary, siteurl) if self.settings["SUMMARY_MAX_LENGTH"] is None: return self.content return truncate_html_words(self.content, self.settings["SUMMARY_MAX_LENGTH"])
def __init__(self, content, metadatas={}, settings={}, filename=None): self._content = content self.translations = [] self.status = "published" # default value for key, value in metadatas.items(): setattr(self, key.lower(), value) if not hasattr(self, "author"): if "AUTHOR" in settings: self.author = settings["AUTHOR"] default_lang = settings.get("DEFAULT_LANG").lower() if not hasattr(self, "lang"): self.lang = default_lang self.in_default_lang = self.lang == default_lang if not hasattr(self, "slug"): self.slug = slugify(self.title) if not hasattr(self, "save_as"): if self.in_default_lang: self.save_as = "%s.html" % self.slug clean_url = "%s/" % self.slug else: self.save_as = "%s-%s.html" % (self.slug, self.lang) clean_url = "%s-%s/" % (self.slug, self.lang) if settings.get("CLEAN_URLS", False): self.url = clean_url else: self.url = self.save_as if filename: self.filename = filename if not hasattr(self, "date_format"): if self.lang in settings["DATE_FORMATS"]: self.date_format = settings["DATE_FORMATS"][self.lang] else: self.date_format = settings["DEFAULT_DATE_FORMAT"] if hasattr(self, "date"): self.locale_date = self.date.strftime(self.date_format.encode("ascii", "xmlcharrefreplace")).decode("utf") if not hasattr(self, "summary"): self.summary = property(lambda self: truncate_html_words(self.content, 50)).__get__(self, Page) # store the settings ref. self._settings = settings
def __init__(self, content, metadatas={}, settings={}, filename=None): self._content = content self.translations = [] self.status = "published" # default value for key, value in metadatas.items(): setattr(self, key.lower(), value) if not hasattr(self, 'author'): if 'AUTHOR' in settings: self.author = settings['AUTHOR'] default_lang = settings.get('DEFAULT_LANG').lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) if not hasattr(self, 'slug'): self.slug = slugify(self.title) if not hasattr(self, 'save_as'): if self.in_default_lang: self.save_as = '%s.html' % self.slug clean_url = '%s/' % self.slug else: self.save_as = '%s-%s.html' % (self.slug, self.lang) clean_url = '%s-%s/' % (self.slug, self.lang) if settings.get('CLEAN_URLS', False): self.url = clean_url else: self.url = self.save_as if filename: self.filename = filename if not hasattr(self, 'date_format'): if self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if hasattr(self, 'date'): self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf') if not hasattr(self, 'summary'): self.summary = property(lambda self: truncate_html_words(self.content, 50)) # store the settings ref. self._settings = settings
def _get_summary(self): """Returns the summary of an article. This is based on the summary metadata if set, otherwise truncate the content. """ if hasattr(self, '_summary'): return self._summary if self.settings['SUMMARY_MAX_LENGTH'] is None: return self.content return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH'])
def get_summary(self, siteurl): """Returns the summary of an article. This is based on the summary metadata if set, otherwise truncate the content. """ if hasattr(self, '_summary'): return self._update_content(self._summary, siteurl) if self.settings['SUMMARY_MAX_LENGTH'] is None: return self.content return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH'])
def get_summary(self, siteurl): """Returns the summary of an article. This is based on the summary metadata if set, otherwise truncate the content. """ if 'summary' in self.metadata: return self.metadata['summary'] if self.settings['SUMMARY_MAX_LENGTH'] is None: return self.content return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH'])
def test_summary_max_length(self): # If a :SUMMARY_MAX_LENGTH: is set, and there is no other summary, # generated summary should not exceed the given length. page_kwargs = self._copy_page_kwargs() settings = get_settings() page_kwargs["settings"] = settings del page_kwargs["metadata"]["summary"] settings["SUMMARY_MAX_LENGTH"] = None page = Page(**page_kwargs) self.assertEqual(page.summary, TEST_CONTENT) settings["SUMMARY_MAX_LENGTH"] = 10 page = Page(**page_kwargs) self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10)) settings["SUMMARY_MAX_LENGTH"] = 0 page = Page(**page_kwargs) self.assertEqual(page.summary, "")
def fix_summary_ellipsis(instance): """ Pelican uses '...' as the ellipsis when creating a summary. We want a proper ellipsis though, coupled with a non-breaking space. :param instance: :return: """ # only deals with Article type if type(instance) != contents.Article: return SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH') if not (SUMMARY_MAX_LENGTH): return if not (hasattr(instance, '_summary') and instance._summary): summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH, '…') instance._summary = summary.replace(' …', ' …')
def summary_remove_elements(instance): if type(instance) == contents.Article: summary = None SUMMARY_MAX_LENGTH = instance.settings.get('SUMMARY_MAX_LENGTH') if hasattr(instance, '_summary') and instance._summary: summary = instance._summary else: summary = truncate_html_words(instance.content, SUMMARY_MAX_LENGTH) summary = BeautifulSoup(summary, 'html.parser') for tag in summary.findAll(True): if tag.name in invalid_tags: s = "" for c in tag.contents: if not isinstance(c, NavigableString): c = strip_tags(unicode(c), invalid_tags) s += unicode(c) tag.replaceWith(s) instance._summary = text_type(summary)
def process_summary(article): """Ensures summaries are not cut off. Also inserts mathjax script so that math will be rendered""" if (hasattr(article, '_summary')): summary = article._update_content(article._summary, article.get_siteurl()) elif article.settings['SUMMARY_MAX_LENGTH'] is None: summary = article.content else: summary = truncate_html_words(article.content, article.settings['SUMMARY_MAX_LENGTH']) summary_parsed = BeautifulSoup(summary, 'html.parser') math = summary_parsed.find_all(class_='math') if len(math) > 0: last_math_text = math[-1].get_text() if len(last_math_text) > 3 and last_math_text[-3:] == '...': content_parsed = BeautifulSoup(article._content, 'html.parser') full_text = content_parsed.find_all(class_='math')[len(math)-1].get_text() math[-1].string = "%s ..." % full_text summary = summary_parsed.decode() article._summary = "%s<script type='text/javascript'>%s</script>" % (summary, process_summary.mathjax_script)
def get_entry_summary(entry): # https://stackoverflow.com/a/12982689/11441 def cleanhtml(raw_html): cleanr = re.compile("<.*?>") cleantext = re.sub(cleanr, "", raw_html) return cleantext try: # this will get the first of 'summary' and 'subtitle' summary = entry["description"] except KeyError: summary = "" if settings[WEBRING_CLEAN_SUMMARY_HTML_STR] > 0: summary = utils.truncate_html_words( summary, settings[WEBRING_SUMMARY_WORDS_STR] ) # feedparser sanitizes html by default, but it can still contain html tags. if settings[WEBRING_CLEAN_SUMMARY_HTML_STR]: summary = cleanhtml(summary) return summary
def __init__(self, content, metadata=None, settings=None, filename=None, source=None): # init parameters if not metadata: metadata = {} if not settings: settings = _DEFAULT_CONFIG self._content = content self.translations = [] self.source = source local_metadata = dict(settings.get('DEFAULT_METADATA', ())) local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): setattr(self, key.lower(), value) # default author to the one in settings if not defined if not hasattr(self, 'author'): if 'AUTHOR' in settings: self.author = settings['AUTHOR'] else: self.author = getenv('USER', 'John Doe') warning(u"Author of `{0}' unknow, assuming that his name is `{1}'".format(filename, self.author)) # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, from the title if not hasattr(self, 'slug') and hasattr(self, 'title'): self.slug = slugify(self.title) # create save_as from the slug (+lang) if not hasattr(self, 'save_as') and hasattr(self, 'slug'): if not self.in_default_lang: self.url = '%s-%s' % (self.slug, self.lang) if settings.get('CLEAN_URLS', False): self.save_as = '%s/index.html' % self.slug self.url = '%s/' % self.slug self.source_url = '%s.txt' % self.slug else: self.save_as = '%s.html' % self.slug self.url = self.save_as self.source_url = '%s.txt' % self.slug elif hasattr(self, 'save_as'): self.url = self.save_as self.source_url = self.save_as if filename: self.filename = filename # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if hasattr(self, 'date'): if platform == 'win32': self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode(stdin.encoding) else: self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf') # manage status if not hasattr(self, 'status'): self.status = settings['DEFAULT_STATUS'] if not settings['WITH_FUTURE_DATES']: if hasattr(self, 'date') and self.date > datetime.now(): self.status = 'draft' # set summary if not hasattr(self, 'summary'): self.summary = truncate_html_words(self.content, 50)
def truncate_html_words(html, max_length=15): return pelican_utils.truncate_html_words(html, max_length)
def __init__(self, content, metadata=None, settings=None, filename=None): # init parameters if not metadata: metadata = {} if not settings: settings = _DEFAULT_CONFIG self.settings = settings self._content = content self.translations = [] local_metadata = dict(settings.get('DEFAULT_METADATA', ())) local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): setattr(self, key.lower(), value) # default author to the one in settings if not defined if not hasattr(self, 'author'): if 'AUTHOR' in settings: self.author = Author(settings['AUTHOR'], settings) else: self.author = Author(getenv('USER', 'John Doe'), settings) warning(u"Author of `{0}' unknow, assuming that his name is " "`{1}'".format(filename or self.title, self.author)) # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, fro mthe title if not hasattr(self, 'slug') and hasattr(self, 'title'): self.slug = slugify(self.title) if filename: self.filename = filename # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if isinstance(self.date_format, tuple): locale.setlocale(locale.LC_ALL, self.date_format[0]) self.date_format = self.date_format[1] if hasattr(self, 'date'): encoded_date = self.date.strftime( self.date_format.encode('ascii', 'xmlcharrefreplace')) if platform == 'win32': self.locale_date = encoded_date.decode(stdin.encoding) else: self.locale_date = encoded_date.decode('utf') # manage status if not hasattr(self, 'status'): self.status = settings['DEFAULT_STATUS'] if not settings['WITH_FUTURE_DATES']: if hasattr(self, 'date') and self.date > datetime.now(): self.status = 'draft' # set summary if not hasattr(self, 'summary'): self.summary = truncate_html_words(self.content, 50)
def _get_summary(self): """Returns the summary of an article, based on to the content""" return truncate_html_words(self.content, 50)
def __init__(self, content, metadata=None, settings=None, filename=None): # init parameters if not metadata: metadata = {} if not settings: settings = _DEFAULT_CONFIG self._content = content self.translations = [] local_metadata = dict(settings.get('DEFAULT_METADATA', ())) local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): setattr(self, key.lower(), value) # default author to the one in settings if not defined if not hasattr(self, 'author'): if 'AUTHOR' in settings: self.author = settings['AUTHOR'] else: self.author = getenv('USER', 'John Doe') warning(u"Author of `{0}' unknow, assuming that his name is `{1}'".format(filename or self.title, self.author)) # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, fro mthe title if not hasattr(self, 'slug') and hasattr(self, 'title'): self.slug = slugify(self.title) # create save_as from the slug (+lang) if not hasattr(self, 'save_as') and hasattr(self, 'slug'): if self.in_default_lang: if settings.get('CLEAN_URLS', False): self.save_as = '%s/index.html' % self.slug else: self.save_as = '%s.html' % self.slug clean_url = '%s/' % self.slug else: if settings.get('CLEAN_URLS', False): self.save_as = '%s-%s/index.html' % (self.slug, self.lang) else: self.save_as = '%s-%s.html' % (self.slug, self.lang) clean_url = '%s-%s/' % (self.slug, self.lang) # change the save_as regarding the settings if settings.get('CLEAN_URLS', False): self.url = clean_url elif hasattr(self, 'save_as'): self.url = self.save_as if filename: self.filename = filename # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if hasattr(self, 'date'): if platform == 'win32': self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode(stdin.encoding) else: self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf') # manage status if not hasattr(self, 'status'): self.status = settings['DEFAULT_STATUS'] if not settings['WITH_FUTURE_DATES']: if hasattr(self, 'date') and self.date > datetime.now(): self.status = 'draft' # set summary if not hasattr(self, 'summary'): self.summary = truncate_html_words(self.content, 50)
def summary(self): return truncate_html_words(self.content, 50)
def make_summary(text, max_words=None): if max_words is None: return text return truncate_html_words(text, max_words, end_text='…')
def __init__(self, content, metadata=None, settings=None, filename=None): # init parameters if not metadata: metadata = {} if not settings: settings = _DEFAULT_CONFIG self._content = content self.translations = [] self.status = "published" # default value local_metadata = dict(settings.get('DEFAULT_METADATA', ())) local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): setattr(self, key.lower(), value) # default author to the one in settings if not defined if not hasattr(self, 'author'): if 'AUTHOR' in settings: self.author = settings['AUTHOR'] # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, fro mthe title if not hasattr(self, 'slug') and hasattr(self, 'title'): self.slug = slugify(self.title) # create save_as from the slug (+lang) if not hasattr(self, 'save_as') and hasattr(self, 'slug'): if self.in_default_lang: self.save_as = '%s.html' % self.slug clean_url = '%s/' % self.slug else: self.save_as = '%s-%s.html' % (self.slug, self.lang) clean_url = '%s-%s/' % (self.slug, self.lang) # change the save_as regarding the settings if settings.get('CLEAN_URLS', False): self.url = clean_url elif hasattr(self, 'save_as'): self.url = self.save_as if filename: self.filename = filename # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if hasattr(self, 'date'): self.locale_date = self.date.strftime(self.date_format.encode('ascii','xmlcharrefreplace')).decode('utf') # manage summary if not hasattr(self, 'summary'): self.summary = property(lambda self: truncate_html_words(self.content, 50)).__get__(self, Page) # manage status if not hasattr(self, 'status'): self.status = settings['DEFAULT_STATUS']
def test_truncate_html_words(self): # Plain text. self.assertEqual(utils.truncate_html_words('short string', 20), 'short string') self.assertEqual(utils.truncate_html_words('word ' * 100, 20), 'word ' * 20 + '…') # Words enclosed or intervaled by HTML tags. self.assertEqual( utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20), '<p>' + 'word ' * 20 + '…</p>') self.assertEqual( utils.truncate_html_words( '<span\nstyle="\n…\n">' + 'word ' * 100 + '</span>', 20), '<span\nstyle="\n…\n">' + 'word ' * 20 + '…</span>') self.assertEqual(utils.truncate_html_words('<br>' + 'word ' * 100, 20), '<br>' + 'word ' * 20 + '…') self.assertEqual( utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20), '<!-- comment -->' + 'word ' * 20 + '…') # Words with hypens and apostrophes. self.assertEqual(utils.truncate_html_words("a-b " * 100, 20), "a-b " * 20 + '…') self.assertEqual(utils.truncate_html_words("it's " * 100, 20), "it's " * 20 + '…') # Words with HTML entity references. self.assertEqual(utils.truncate_html_words("é " * 100, 20), "é " * 20 + '…') self.assertEqual(utils.truncate_html_words("café " * 100, 20), "café " * 20 + '…') self.assertEqual(utils.truncate_html_words("èlite " * 100, 20), "èlite " * 20 + '…') self.assertEqual( utils.truncate_html_words("cafetiére " * 100, 20), "cafetiére " * 20 + '…') self.assertEqual(utils.truncate_html_words("∫dx " * 100, 20), "∫dx " * 20 + '…') # Words with HTML character references inside and outside # the ASCII range. self.assertEqual(utils.truncate_html_words("é " * 100, 20), "é " * 20 + '…') self.assertEqual(utils.truncate_html_words("∫dx " * 100, 20), "∫dx " * 20 + '…') # Words with invalid or broken HTML references. self.assertEqual(utils.truncate_html_words('&invalid;', 20), '&invalid;') self.assertEqual(utils.truncate_html_words('�', 20), '�') self.assertEqual(utils.truncate_html_words('�', 20), '�') self.assertEqual(utils.truncate_html_words('&mdash text', 20), '&mdash text') self.assertEqual(utils.truncate_html_words('Ӓ text', 20), 'Ӓ text') self.assertEqual(utils.truncate_html_words('઼ text', 20), '઼ text')
def test_truncate_html_words(self): # Plain text. self.assertEqual(utils.truncate_html_words('short string', 20), 'short string') self.assertEqual(utils.truncate_html_words('word ' * 100, 20), 'word ' * 20 + '...') # Words enclosed or intervaled by HTML tags. self.assertEqual( utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20), '<p>' + 'word ' * 20 + '...</p>') self.assertEqual( utils.truncate_html_words( '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20), '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>') self.assertEqual(utils.truncate_html_words('<br>' + 'word ' * 100, 20), '<br>' + 'word ' * 20 + '...') self.assertEqual( utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20), '<!-- comment -->' + 'word ' * 20 + '...') # Words with hypens and apostrophes. self.assertEqual(utils.truncate_html_words("a-b " * 100, 20), "a-b " * 20 + '...') self.assertEqual(utils.truncate_html_words("it's " * 100, 20), "it's " * 20 + '...') # Words with HTML entity references. self.assertEqual(utils.truncate_html_words("é " * 100, 20), "é " * 20 + '...') self.assertEqual(utils.truncate_html_words("café " * 100, 20), "café " * 20 + '...') self.assertEqual(utils.truncate_html_words("èlite " * 100, 20), "èlite " * 20 + '...') self.assertEqual( utils.truncate_html_words("cafetiére " * 100, 20), "cafetiére " * 20 + '...')
def test_truncate_html_words(self): # Plain text. self.assertEqual(utils.truncate_html_words('short string', 20), 'short string') self.assertEqual(utils.truncate_html_words('word ' * 100, 20), 'word ' * 20 + '…') # Plain text with Unicode content. self.assertEqual( utils.truncate_html_words( '我愿意这样,朋友——我独自远行,不但没有你,\ 并且再没有别的影在黑暗里。', 12), '我愿意这样,朋友——我独自远行' + ' …') self.assertEqual( utils.truncate_html_words( 'Ты мелькнула, ты предстала, Снова сердце задрожало,', 3), 'Ты мелькнула, ты' + ' …') self.assertEqual( utils.truncate_html_words('Trong đầm gì đẹp bằng sen', 4), 'Trong đầm gì đẹp' + ' …') # Words enclosed or intervaled by HTML tags. self.assertEqual( utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20), '<p>' + 'word ' * 20 + '…</p>') self.assertEqual( utils.truncate_html_words( '<span\nstyle="\n…\n">' + 'word ' * 100 + '</span>', 20), '<span\nstyle="\n…\n">' + 'word ' * 20 + '…</span>') self.assertEqual(utils.truncate_html_words('<br>' + 'word ' * 100, 20), '<br>' + 'word ' * 20 + '…') self.assertEqual( utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20), '<!-- comment -->' + 'word ' * 20 + '…') # Words with hypens and apostrophes. self.assertEqual(utils.truncate_html_words("a-b " * 100, 20), "a-b " * 20 + '…') self.assertEqual(utils.truncate_html_words("it's " * 100, 20), "it's " * 20 + '…') # Words with HTML entity references. self.assertEqual(utils.truncate_html_words("é " * 100, 20), "é " * 20 + '…') self.assertEqual(utils.truncate_html_words("café " * 100, 20), "café " * 20 + '…') self.assertEqual(utils.truncate_html_words("èlite " * 100, 20), "èlite " * 20 + '…') self.assertEqual( utils.truncate_html_words("cafetiére " * 100, 20), "cafetiére " * 20 + '…') self.assertEqual(utils.truncate_html_words("∫dx " * 100, 20), "∫dx " * 20 + '…') # Words with HTML character references inside and outside # the ASCII range. self.assertEqual(utils.truncate_html_words("é " * 100, 20), "é " * 20 + '…') self.assertEqual(utils.truncate_html_words("∫dx " * 100, 20), "∫dx " * 20 + '…') # Words with invalid or broken HTML references. self.assertEqual(utils.truncate_html_words('&invalid;', 20), '&invalid;') self.assertEqual(utils.truncate_html_words('�', 20), '�') self.assertEqual(utils.truncate_html_words('�', 20), '�') self.assertEqual(utils.truncate_html_words('&mdash text', 20), '&mdash text') self.assertEqual(utils.truncate_html_words('Ӓ text', 20), 'Ӓ text') self.assertEqual(utils.truncate_html_words('઼ text', 20), '઼ text')
def test_truncate_html_words(self): # Plain text. self.assertEqual( utils.truncate_html_words('short string', 20), 'short string') self.assertEqual( utils.truncate_html_words('word ' * 100, 20), 'word ' * 20 + '…') # Words enclosed or intervaled by HTML tags. self.assertEqual( utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20), '<p>' + 'word ' * 20 + '…</p>') self.assertEqual( utils.truncate_html_words( '<span\nstyle="\n…\n">' + 'word ' * 100 + '</span>', 20), '<span\nstyle="\n…\n">' + 'word ' * 20 + '…</span>') self.assertEqual( utils.truncate_html_words('<br>' + 'word ' * 100, 20), '<br>' + 'word ' * 20 + '…') self.assertEqual( utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20), '<!-- comment -->' + 'word ' * 20 + '…') # Words with hypens and apostrophes. self.assertEqual( utils.truncate_html_words("a-b " * 100, 20), "a-b " * 20 + '…') self.assertEqual( utils.truncate_html_words("it's " * 100, 20), "it's " * 20 + '…') # Words with HTML entity references. self.assertEqual( utils.truncate_html_words("é " * 100, 20), "é " * 20 + '…') self.assertEqual( utils.truncate_html_words("café " * 100, 20), "café " * 20 + '…') self.assertEqual( utils.truncate_html_words("èlite " * 100, 20), "èlite " * 20 + '…') self.assertEqual( utils.truncate_html_words("cafetiére " * 100, 20), "cafetiére " * 20 + '…') self.assertEqual( utils.truncate_html_words("∫dx " * 100, 20), "∫dx " * 20 + '…') # Words with HTML character references inside and outside # the ASCII range. self.assertEqual( utils.truncate_html_words("é " * 100, 20), "é " * 20 + '…') self.assertEqual( utils.truncate_html_words("∫dx " * 100, 20), "∫dx " * 20 + '…')
def __init__(self, content, metadata=None, settings=None, filename=None): # init parameters if not metadata: metadata = {} if not settings: settings = _DEFAULT_CONFIG self._content = content self.translations = [] local_metadata = dict(settings.get("DEFAULT_METADATA", ())) local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): setattr(self, key.lower(), value) # default author to the one in settings if not defined if not hasattr(self, "author"): if "AUTHOR" in settings: self.author = settings["AUTHOR"] else: self.author = getenv("USER", "John Doe") warning( u"Author of `{0}' unknow, assuming that his name is `{1}'".format( filename or self.title, self.author ) ) # manage languages self.in_default_lang = True if "DEFAULT_LANG" in settings: default_lang = settings["DEFAULT_LANG"].lower() if not hasattr(self, "lang"): self.lang = default_lang self.in_default_lang = self.lang == default_lang # create the slug if not existing, fro mthe title if not hasattr(self, "slug") and hasattr(self, "title"): self.slug = slugify(self.title) # create save_as from the slug (+lang) if not hasattr(self, "save_as") and hasattr(self, "slug"): if self.in_default_lang: self.save_as = "%s.html" % self.slug clean_url = "%s/" % self.slug else: self.save_as = "%s-%s.html" % (self.slug, self.lang) clean_url = "%s-%s/" % (self.slug, self.lang) # change the save_as regarding the settings if settings.get("CLEAN_URLS", False): self.url = clean_url elif hasattr(self, "save_as"): self.url = self.save_as if filename: self.filename = filename # manage the date format if not hasattr(self, "date_format"): if hasattr(self, "lang") and self.lang in settings["DATE_FORMATS"]: self.date_format = settings["DATE_FORMATS"][self.lang] else: self.date_format = settings["DEFAULT_DATE_FORMAT"] if hasattr(self, "date"): if platform == "win32": self.locale_date = self.date.strftime(self.date_format.encode("ascii", "xmlcharrefreplace")).decode( stdin.encoding ) else: self.locale_date = self.date.strftime(self.date_format.encode("ascii", "xmlcharrefreplace")).decode( "utf" ) # manage status if not hasattr(self, "status"): self.status = settings["DEFAULT_STATUS"] if not settings["WITH_FUTURE_DATES"]: if hasattr(self, "date") and self.date > datetime.now(): self.status = "draft" # set summary if not hasattr(self, "summary"): self.summary = truncate_html_words(self.content, 50)
def truncate(self, html: str, max_length: int, without_tag: bool) -> str: summary = pelican_utils.truncate_html_words(html, max_length) if without_tag: summary = re.sub(r'<[^>]+>', "", summary) return summary
def test_truncate_html_words(self): # Plain text. self.assertEqual( utils.truncate_html_words('short string', 20), 'short string') self.assertEqual( utils.truncate_html_words('word ' * 100, 20), 'word ' * 20 + '...') # Words enclosed or intervaled by HTML tags. self.assertEqual( utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20), '<p>' + 'word ' * 20 + '...</p>') self.assertEqual( utils.truncate_html_words( '<span\nstyle="\n...\n">' + 'word ' * 100 + '</span>', 20), '<span\nstyle="\n...\n">' + 'word ' * 20 + '...</span>') self.assertEqual( utils.truncate_html_words('<br>' + 'word ' * 100, 20), '<br>' + 'word ' * 20 + '...') self.assertEqual( utils.truncate_html_words('<!-- comment -->' + 'word ' * 100, 20), '<!-- comment -->' + 'word ' * 20 + '...') # Words with hypens and apostrophes. self.assertEqual( utils.truncate_html_words("a-b " * 100, 20), "a-b " * 20 + '...') self.assertEqual( utils.truncate_html_words("it's " * 100, 20), "it's " * 20 + '...') # Words with HTML entity references. self.assertEqual( utils.truncate_html_words("é " * 100, 20), "é " * 20 + '...') self.assertEqual( utils.truncate_html_words("café " * 100, 20), "café " * 20 + '...') self.assertEqual( utils.truncate_html_words("èlite " * 100, 20), "èlite " * 20 + '...') self.assertEqual( utils.truncate_html_words("cafetiére " * 100, 20), "cafetiére " * 20 + '...')