예제 #1
0
    def __set_feed(self,
                   gone=0,
                   feed=None,
                   status=None,
                   version=None,
                   bozo_exception=None,
                   encoding=None,
                   etag=None,
                   modified=None,
                   entries=None):
        """
        Set feed data after the feed was parsed.
        """

        self.__feed_gone = gone
        if feed is not None: self.__feed_feed = deepcopy(feed)
        else: self.__feed_feed = None
        self.__feed_status = status
        self.__feed_version = version
        self.__feed_bozo_exception = bozo_exception
        self.__feed_encoding = encoding
        self.__feed_etag = etag
        self.__feed_modified = modified
        if entries is not None:
            for entry in entries:
                if isinstance(entry, dict) and entry.has_key('title'):
                    entry['title'] = unescape_html_entities(entry['title'])
            self.__feed_entries = deepcopy(entries)
        else:
            self.__feed_entries = []
        self._p_changed = 1
예제 #2
0
def html2text(html, trim_length=512, ellipsis=False):
    """
    Strip all tags from ``html``. If ``trim_length`` is not None,
    limit the output length to ``trim_length`` characters.

    If the `ellipsis` flag is set to True, and `trim length` is not
    a false value (e.g. zero, None), then search for the nearest word
    boundary to the left, trim there, and insert an ellipsis ("...").
    """
    soup = BeautifulSoup(html)
    text = unescape_html_entities(''.join(soup.findAll(text=True))).strip()
    if trim_length and trim_length < len(text):
        text = text[:trim_length]
        if ellipsis:
            ELLIPSIS = u'\u2026'
            text = re.sub(r'(?<=\s)\S+$', ELLIPSIS, text)
    return text
예제 #3
0
def html2text(html, trim_length=512, ellipsis=False):
    """
    Strip all tags from ``html``. If ``trim_length`` is not None,
    limit the output length to ``trim_length`` characters.

    If the `ellipsis` flag is set to True, and `trim length` is not
    a false value (e.g. zero, None), then search for the nearest word
    boundary to the left, trim there, and insert an ellipsis ("...").
    """
    soup = BeautifulSoup(html)
    text = unescape_html_entities(''.join(soup.findAll(text=True))).strip()
    if trim_length and trim_length < len(text):
        text = text[:trim_length]
        if ellipsis:
            ELLIPSIS = u'\u2026'
            text = re.sub(r'(?<=\s)\S+$', ELLIPSIS, text)
    return text
예제 #4
0
    def __set_feed(self, gone=0, feed=None, status=None, version=None,
                   bozo_exception=None, encoding=None, etag=None,
                   modified=None, entries=None):
        """
        Set feed data after the feed was parsed.
        """

        self.__feed_gone = gone
        if feed is not None: self.__feed_feed = deepcopy(feed)
        else: self.__feed_feed = None
        self.__feed_status = status
        self.__feed_version = version
        self.__feed_bozo_exception = bozo_exception
        self.__feed_encoding = encoding
        self.__feed_etag = etag
        self.__feed_modified = modified
        if entries is not None:
            for entry in entries:
                if isinstance(entry, dict) and entry.has_key('title'):
                    entry['title'] = unescape_html_entities(entry['title'])
            self.__feed_entries = deepcopy(entries)
        else: self.__feed_entries = []
        self._p_changed = 1
예제 #5
0
def unescape_html_entities(text):
    """ unescape html entities from the given text """
    return unescape_html_entities(text)
예제 #6
0
def unescape_html_entities(text):
    """ unescape html entities from the given text """
    return unescape_html_entities(text)