def get_entries(): url = URL start = now() last = now() pages = 0 while url and diff(start, last) < OLDEST_SEC: debug(u"diff {diff}", diff=diff(start, last)) page = get_page(url) pages += 1 posts = len(page['posts']) info(u"page {pages} {url} {posts} posts") for post in page['posts']: entry = get_entry(post['link'], post['prefix'], post['title']) yield entry last = min(last, entry['updated']) sleep() if not page['posts']: sleep() url = page['next_url'] sleep()
def get_title(e): points = int((re.findall('(\d+)\s*points',e.summary) or [0])[0]) title = "%dp %s" % (points, e.title) info(title) return title