def get_fulltext(blogitem_id): """Fetch the fulltext of a summary-only item.""" try: item = BlogItem.objects.get(pk=blogitem_id) except BlogItem.DoesNotExist: print "blog.task.get_fulltext item doesn't exist" return item.tried_fetch_count += 1 # XXX: Verify increment works try: msg = get(item.uri) if msg.has_errors(): print "Just failed to grab web item" return data = web2feed(node.uri, content=msg.get_body()) feed = data['feed'] meta = data['meta'] if not feed or type(feed) != dict: raise Exception, "web2feed did not return a dictionary." except Exception: item.save() return if feed['uri'] != item.uri: print "WARNING: BLOGITEM URIS DO NOT MATCH" if 'title' in feed: item.title = feed['title'] if 'date' in feed: item.datetime_created = feed['date'] if 'contents' in feed and feed['contents']: item.contents = feed['contents'] item.has_contents = True if 'author' in feed: item.www_author_name = feed['author'] item.save()
def get_feed(node_id): """Get the feed of 'latest' blogitem posts.""" print "task: get_feed" try: node = Node.objects.get(pk=node_id) except Node.DoesNotExist: print "blog.task.get_feed failure: node %d doesn't exist" % node_id return # XXX: This is only for Bootstrapped blog items # When blogitems are shared in sylph (very soon), then we'll use # the sylph protocol try: msg = get(node.uri, timeout=20) if msg.has_errors(): node.just_failed(save=True) print "Just failed to grab from node" return data = web2feed(node.uri, content=msg.get_body()) feed = data['feed'] meta = data['meta'] except Exception: node.just_failed(save=True) raise #print e #raise e node.just_pulled_from(save=False) try: if 'title' in meta: node.name = meta['title'] if 'description' in meta: node.description = meta['description'] node.save() except: node.save() print "fetched %d blogitems from %s" %(len(feed), node.uri) for item in feed: try: blog = BlogItem() # uniqueness constraint prevents duplicates blog.uri = item['uri'] blog.title = item['title'] if 'date' in item: blog.datetime_created = item['date'] if 'contents' in item and item['contents']: blog.contents = item['contents'] blog.has_contents = True if 'summary' in item and item['summary']: blog.summary = item['summary'] blog.has_summary = True if 'author' in item: blog.www_author_name = item['author'] blog.save() # Schedule fetch of contents if not blog.contents: get_fulltext.delay(blog.pk) except Exception: #exp = str(type(e)) #if 'IntegrityError' in exp: # continue #print e # DEBUG continue