Example #1
0
def generate_feed_entries(url, **kw):
    """
    generator; yields feed entries

    keywords:
        max - max number of entries to return
    yields:
        each FeedEntry instance
    """
    logging.debug("generate_feed_entries url:%s" % url)
    max_entries = kw.get("max", None)

    doc = feedparser.parse(url)
    if doc.bozo:
        raise FeedException(doc.bozo_exception)
        
    count = 0
    for e in doc.entries:
        title = e.title
        logging.debug("generate_feed_entries processing entry '%s'" % title)
        
        entry = FeedEntry(title=e.title,
            link=e.link)

        if hasattr(doc, "etag"):
            entry.set(etag=doc.etag)
        
        if hasattr(doc, "modified"):
            entry.set(modified=doc.modified)

        if hasattr(e, "content"):
            raw_content = e.content[0]['value']
        elif e.summary:
            raw_content = e.summary
        else:
            logging.debug("no content found")
            continue
        # unescapes entities
        raw_content = unescape(raw_content)
        
        stripped_content = unicode(strip_html(raw_content))
        entry.set(raw_content=raw_content)
        entry.set(stripped_content=stripped_content)

        yield entry

        count += 1
        if max_entries and count == max_entries:
            break
Example #2
0
def generate_feed_entries(url, **kw):
    """
    generator; yields feed entries

    keywords:
        max - max number of entries to return
    yields:
        each FeedEntry instance
    """
    logging.debug("generate_feed_entries url:%s" % url)
    max_entries = kw.get("max", None)

    doc = feedparser.parse(url)
    if doc.bozo:
        raise FeedException(doc.bozo_exception)

    count = 0
    for e in doc.entries:
        title = e.title
        logging.debug("generate_feed_entries processing entry '%s'" % title)

        entry = FeedEntry(title=e.title, link=e.link)

        if hasattr(doc, "etag"):
            entry.set(etag=doc.etag)

        if hasattr(doc, "modified"):
            entry.set(modified=doc.modified)

        if hasattr(e, "content"):
            raw_content = e.content[0]['value']
        elif e.summary:
            raw_content = e.summary
        else:
            logging.debug("no content found")
            continue
        # unescapes entities
        raw_content = unescape(raw_content)

        stripped_content = unicode(strip_html(raw_content))
        entry.set(raw_content=raw_content)
        entry.set(stripped_content=stripped_content)

        yield entry

        count += 1
        if max_entries and count == max_entries:
            break
Example #3
0
 def test_unescape_no_entities(self):
     s = "hi"
     self.assertEquals(s, unescape(s))
Example #4
0
 def test_unescape_with_entities(self):
     self.assertEquals("<", unescape("&lt;"))
     self.assertEquals("abc", unescape("&#x61;&#98;&#x63;"))