def fetch_entries(feed,entries): for entry in entries: entry_link = entry['link'] entry_model = dao.get_by_link(entry_link, Entry) if not entry_model: entry_model = Entry(feed=feed, link=entry_link) else: continue entry_model.title = entry['title'] if len(entry_model.title) >= 200: continue entry_model.author = entry.get('author', 'unknow') entry_model.summary = entry.get('summary', '') if not entry_model.summary: content = entry.get('content', '') try: entry_model.summary = (type(content) == unicode and content) or content[0].get('value', '') except: continue #clear html tags entry_model.summary = strip_tags(entry_model.summary) if len(entry_model.summary) <= 100: return entry_model.when = entry.get('updated_parsed','') or time.localtime(entry.get('updated')) if entry_model.when: entry_model.when = datetime.datetime(entry_model.when[0],entry_model.when[1],entry_model.when[2],entry_model.when[3],entry_model.when[4]) tags = None if entry.has_key('tags'): tags = entry.get('tags', '') tags = tags[0].get('term','') if not tags and entry.has_key('categories'): tags = entry.get('categories') tags = tags.values()[0] if not tags: print 'no tags.ignored...' #continue else: cat = dao.save_category(tags) entry_model.category = cat try: dao.save_model(entry_model) except Exception,e: print 'save error:',e
try: soup = feedparser.parse(url) except Exception,e: print 'parsing error',e return feed_link = soup.feed.get('link','') feed = dao.get_by_link(feed_link, Feed) if not feed: feed = Feed(link=feed_link) feed.title = soup.feed.get('title','') if Feed.objects.filter(title=feed.title): return 'pass' feed.description = soup.feed.get('description','') feed.rss_link = url dao.save_model(feed) if not soup['entries']: print 'this feed has not entries' return if is_write: fetch_entries(feed,soup['entries']) def fetch_entries(feed,entries): for entry in entries: entry_link = entry['link'] entry_model = dao.get_by_link(entry_link, Entry) if not entry_model: entry_model = Entry(feed=feed, link=entry_link) else: continue