Example #1
0
def update_feedflow(feed, username):  # update feed flow, # to memcache, all to db
    global FEED_NUM_IN_CACHE
    global TIME_INITIAL
    feedContent = feed_query(feed)
    f = db.GqlQuery("select * from feed_db_info where feed_url = :1", feed)  # get the db feed flow first
    f = f.get()
    if f:
        feed_flow_db = json.loads(f.feed_flow)
        last_entry = f.last_entry

    else:  # initialize
        feed_flow_db = []
        last_entry = ""

    # update content
    if feedContent:
        update_feed_flow_db = []
        for i in range(0, len(feedContent)):
            feeditem = feedContent[i]
            if feeditem[3] == last_entry:  # if it is already the most recent feed, by comparing the url
                break
            else:
                update_feed_flow_db.append(feeditem)
        feed_flow_db = update_feed_flow_db + feed_flow_db  # append to the left

    # determine the latest entry
    if len(feedContent) > 0:
        last_entry = feedContent[0][3]
    else:
        last_entry = ""

    # update db first
    if f:
        setattr(f, "feed_flow", json.dumps(feed_flow_db))
        setattr(f, "last_entry", last_entry)
    else:
        f = feed_db_info(feed_url=feed, feed_flow=json.dumps(feed_flow_db), last_entry=last_entry)
    f.put()

    # update memcache
    feed_info_cache = memcache.get(feed)
    if feed_info_cache:
        feed_info_cache = json.loads(feed_info_cache)
        feed_info_cache["cached_flow"] = feed_flow_db[0 : min(FEED_NUM_IN_CACHE, len(feed_flow_db))]
        feed_info_cache["last_query"] = util.serialize_time(datetime.now())
    else:  # initialize the data structure
        feed_flow_cache = feed_flow_db[0 : min(FEED_NUM_IN_CACHE, len(feedContent)) - 1]  # only save # items in cache
        subscriber = {"username": username, "unread": INITIAL_UNREAD_ITEM_NUM, "last_read": ""}
        feed_info_cache = {"last_query": TIME_INITIAL, "subscriber": subscriber, "cached_flow": feed_flow_cache}

    memcache.set(feed, json.dumps(feed_info_cache))
Example #2
0
import collections
from datetime import datetime

sys.path.append(os.path.abspath("lib"))
import feedparser as fp

# configuration of feedparser to allow it support embedded objects
fp._HTMLSanitizer.acceptable_elements.add("object")
fp._HTMLSanitizer.acceptable_elements.add("embed")
fp._HTMLSanitizer.acceptable_elements.add("iframe")

# global variables section
INITIAL_UNREAD_ITEM_NUM = "10"
FEED_QUERY_GAP_SECONDS = 1200
FEED_NUM_IN_CACHE = 100
TIME_INITIAL = util.serialize_time(datetime.strptime("1900-01-01 00:00:00.1000", "%Y-%m-%d %H:%M:%S.%f"))

# user section, user info control, user adds or deletes a feed
class rss_user_data(db.Model):
    username = db.StringProperty(required=True)
    rss_feeds = db.TextProperty()  # can be empty at initial stage


# feed section, store entire feed flow for each feed in the db
class feed_db_info(db.Model):
    feed_url = db.StringProperty(required=True)
    feed_flow = db.TextProperty()  # can be empty at initial stage
    last_entry = db.StringProperty()


def add_feed(username, feed):