def __set_feed(self, gone=0, feed=None, status=None, version=None, bozo_exception=None, encoding=None, etag=None, modified=None, entries=None): """ Set feed data after the feed was parsed. """ self.__feed_gone = gone if feed is not None: self.__feed_feed = deepcopy(feed) else: self.__feed_feed = None self.__feed_status = status self.__feed_version = version self.__feed_bozo_exception = bozo_exception self.__feed_encoding = encoding self.__feed_etag = etag self.__feed_modified = modified if entries is not None: for entry in entries: if isinstance(entry, dict) and entry.has_key('title'): entry['title'] = unescape_html_entities(entry['title']) self.__feed_entries = deepcopy(entries) else: self.__feed_entries = [] self._p_changed = 1
def html2text(html, trim_length=512, ellipsis=False): """ Strip all tags from ``html``. If ``trim_length`` is not None, limit the output length to ``trim_length`` characters. If the `ellipsis` flag is set to True, and `trim length` is not a false value (e.g. zero, None), then search for the nearest word boundary to the left, trim there, and insert an ellipsis ("..."). """ soup = BeautifulSoup(html) text = unescape_html_entities(''.join(soup.findAll(text=True))).strip() if trim_length and trim_length < len(text): text = text[:trim_length] if ellipsis: ELLIPSIS = u'\u2026' text = re.sub(r'(?<=\s)\S+$', ELLIPSIS, text) return text
def unescape_html_entities(text): """ unescape html entities from the given text """ return unescape_html_entities(text)