def get(self): # wave = self.robot.new_wave(domain="googlewave.com", participants=['*****@*****.**', # '*****@*****.**', # '*****@*****.**']) wave = self.robot.new_wave(domain='googlewave.com', participants=['*****@*****.**']) d = feedparser.parse(FEED) wave.title = d.channel.description for entry in d.entries: # append markup currently only supports <p>, <div>, <b>, <strong>, <i>, <em>, <u>. wave.root_blip.append("\n") wave.root_blip.append_markup("<p><b>%s</b></p><p>%s</p><p> </p>" % (entry.title, entry.description)) self.robot.submit(wave)
def update_defs(): feeds = Feed.objects.filter(feed_deleted=False) for feed in feeds: try: feed_items = feedparser.parse(feed.feed_url) for entry in feed_items['entries']: date_published = entry.get('published', entry.get('updated')) if not date_published: date_published = str(datetime.datetime.utcnow()) protocol_index = entry['link'][0:7].find("://") if protocol_index != -1: permalink = entry['link'][:protocol_index+3] + urlquote(entry['link'][protocol_index+3:]) else: permalink = urlquote(entry['link']) date_published = dateutil.parser.parse(date_published) date_published = (date_published - date_published.utcoffset()).replace(tzinfo=None) items_count = Item.objects.filter( Q(item_date = date_published) | Q(item_permalink = permalink)).filter( item_feed = feed).count() if items_count == 0: feed_content = entry.get('content') if feed_content is not None: feed_content = feed_content[0]['value'] content = stripper.strip_tags(feed_content) clean_content = stripper.strip_tags(feed_content, ()) else: content = None clean_content = None i = Item(item_feed = feed, item_date = date_published, item_title = entry.get('title'), item_content = content, item_clean_content = clean_content, item_author = entry.get('author'), item_permalink = permalink ) i.save() tags = () if 'tags' in entry: for tag in entry['tags']: slug = urlquote(tag.get('term').lower()) try: tagobj = Tag.objects.get(tag_slug=slug) except: tagobj = Tag(tag_name = tag['term'], tag_slug = slug, tag_count = 1) tagobj.save() i.item_tags(tagobj); i.save() except Exception, e: print e
def real_update(self, forced=False): if not self.xmlUrl: # Not a real feed af=self.allFeeds() for f in af: f.update() if self.lastModified: mod=self.lastModified else: mod=datetime.datetime(1970, 1, 1) if self.title: statusQueue.put(u"Updating: "+ self.title) d=fp.parse(self.xmlUrl, etag=self.etag, modified=mod.timetuple()) try: self.lastUpdated=datetime.datetime.now() elixir.session.commit() except: elixir.session.rollback() if d.status==304: # No need to fetch return if d.status==301: # Permanent redirect self.xmlUrl=d.href if d.status==410: # Feed deleted. FIXME: tell the user and stop trying! return self.updating=True # Notify feed is updating # feedStatusQueue.put([0, self.id]) posts=[] for post in d['entries']: try: # Date can be one of several fields if 'created_parsed' in post: dkey='created_parsed' elif 'published_parsed' in post: dkey='published_parsed' elif 'modified_parsed' in post: dkey='modified_parsed' else: dkey=None if dkey and post[dkey]: date=datetime.datetime.\ fromtimestamp(time.mktime(post[dkey])) else: date=datetime.datetime.now() # So can the "unique ID for this entry" if 'id' in post: idkey='id' elif 'link' in post: idkey='link' # So can the content if 'content' in post: content='<hr>'.join([c.value for c in post['content']]) elif 'summary' in post: content=post['summary'] elif 'value' in post: content=post['value'] # Rudimentary NON-html detection if not '<' in content: content=escape(content).replace('\n\n', '<p>') # Author if available, else None author='' # First, we may have author_detail, which is the nicer one if 'author_detail' in post: ad=post['author_detail'] author=detailToAuthor(ad) # Or maybe just an author elif 'author' in post: author=post['author'] # But we may have a list of contributors if 'contributors' in post: # Which may have the same detail as the author's author+=' - '.join([detailToAuthor(contrib) \ for contrib in post[contributors]]) if not author: #FIXME: how about using the feed's author, # or something like that author=None # The link should be simple ;-) if 'link' in post: link=post['link'] else: link=None # Titles may be in plain title, but a title_detail is preferred if 'title_detail' in post: title=detailToTitle(post['title_detail']) else: title=post['title'] # Search for enclosures if 'enclosures' in post: enclosures=post['enclosures'] else: enclosures=None try: # FIXME: if I use date to check here, I get duplicates on # posts where I use artificial date because it's not in the # feed's entry. If I don't I don't re-get updated posts. p = Post.get_by(feed=self, post_id=post[idkey]) if p: if p.content<>content: p.content=content if p.title<>title: p.title=title else: # This is because of google news: the same news gets # reposted over and over with different post_id :-( p = Post.get_by(feed=self, title=title) if p: if p.post_id<>post[idkey]: p.post_id=post[idkey] if p.content<>content: p.content=content if p.title<>title: p.title=title else: p=Post(feed=self, date=date, title=title, post_id=post[idkey], content=content, author=author, link=link) if self.markRead: p.unread=False # Tag support if 'tags' in post: p.tags=','.join([t.term for t in post['tags']]) posts.append(p) # Create enclosures for e in enclosures: enc=Enclosure(post=p, href=e.href, filetype=e.type, length=e.length, filename=None) elixir.session.commit() except: traceback.print_exc(1) elixir.session.rollback() except KeyError: debug(post) try: self.updateFeedData(d) if 'modified' in d: self.lastModified=datetime.datetime(*d['modified'][:6]) if 'etag' in d: self.etag=d['etag'] elixir.session.commit() except: elixir.session.rollback() try: # Silly way to release the posts objects # we don't need anymore post_ids=[post.id for post in posts] if len(post_ids): # Mark feed UI for updating self.curUnread=-1 # Fix freshness Post.table.update().where( sql.except_(Post.table.select(Post.feed==self), Post.table.select(Post.id.in_(post_ids)))).\ values(fresh=False).execute() elixir.session.commit() except: elixir.session.rollback()