Beispiel #1
0
def get_feeditem_model(url, timestamp, item):
    """
    Returns a new feeditem model with cleaned attributes.
    Returns none if item already exists.

    Parameters:

    url - The feed's url
    item - A feedparser rss-entry
    """
    for feeditem in FeedItemModel.query(FeedItemModel.key == FeedItemKey(url, item.link)).iter(keys_only=True):
        # Existing feed, so ignore
        return None

    # Some feeds use content, some description.
    # Take the longest.
    d = item.get("description", "")
    try:
        c = item.get("content", None)
        # Can be many content, with different types
        # This might fail for some
        c = c[0].value
    except:
        c = ""

    # Use d
    if len(c) > len(d):
        d = c

    clean_description = strip_bloat(d)

    return FeedItemModel(key=FeedItemKey(url, item.link),
                         title=item.title,
                         description=clean_description,
                         link=item.link,
                         title_stripped=get_snippet(item.title),
                         snippet=get_snippet(clean_description),
                         timestamp=timestamp,
                         feed_link=url,
                         images=get_images(clean_description),
                         published=convert_timestamp(item.get("published", None)),
                         author=item.get("author", None),
                         comments=item.get("comments", None),
                         enclosures=[e.href for e in item.get("enclosures", [])],
                         tags=[t.term for t in item.get("tags", [])])
Beispiel #2
0
def get_feeditems(url, min_timestamp=None):
    entries = FeedItemModel.query(FeedItemModel.feed_link == url)
    entries.order(FeedItemModel.published)
    # filter on timestamps
    if min_timestamp is not None and parse_timestamp(min_timestamp) is not None:
        entries = entries.filter(FeedItemModel.timestamp > parse_timestamp(min_timestamp))

    # Get individual items
    items = []
    for item in entries:
        print("Iterating an item:", item.title)
        items.append(feeditem_from_model(item))

    return items
Beispiel #3
0
def get_feeditems(url, min_timestamp=None):
    entries = FeedItemModel.query(FeedItemModel.feed_link == url)
    entries.order(FeedItemModel.published)
    # filter on timestamps
    if (min_timestamp is not None and
        parse_timestamp(min_timestamp) is not None):
        entries = entries.filter(FeedItemModel.timestamp >\
                                parse_timestamp(min_timestamp))

    # Get individual items
    items = []
    for item in entries:
        print("Iterating an item:", item.title)
        items.append(feeditem_from_model(item))

    return items