def obtain(uri): """Obtain the resource at the URI.""" uri = hashless(uri) res = None try: res = Resource.objects.get(uri=uri) return # XXX: Don't modify existing. except Resource.DoesNotExist: pass message = comms.get(uri) # XXX TODO: Handle resources. if message.is_image(): res = Image.new_from_message(message, save=True) elif message.is_html(): # TODO/XXX: Try web2feed extraction web = Web2Feed(uri) web.set_contents(message.get_body) feed = web.get_feed() # XXX: TODO #typ = message.get_content_type() #if typ in ['image/jpeg', 'image/png', 'image/gif']: # res = Image.new_from_message(message, save=True) return res
def test(request): #from sylph.core.node.api import ping_response #return ping_response(request) from sylph.core.resource.models import Resource from sylph.utils.data.RdfParser import RdfParser from sylph.utils.data.RdfSerializer import RdfSerializer #from sylph.utils.http import Message, Request, Response #from sylph.utils.http import get, send from sylph.utils.comms import SylphMessage, get, send response = get('http://slashdot.org') print response print '========' print response.get_headers() return HttpResponse('view terminal output...')
def get_fulltext(blogitem_id): """Fetch the fulltext of a summary-only item.""" try: item = BlogItem.objects.get(pk=blogitem_id) except BlogItem.DoesNotExist: print "blog.task.get_fulltext item doesn't exist" return item.tried_fetch_count += 1 # XXX: Verify increment works try: msg = get(item.uri) if msg.has_errors(): print "Just failed to grab web item" return data = web2feed(node.uri, content=msg.get_body()) feed = data['feed'] meta = data['meta'] if not feed or type(feed) != dict: raise Exception, "web2feed did not return a dictionary." except Exception: item.save() return if feed['uri'] != item.uri: print "WARNING: BLOGITEM URIS DO NOT MATCH" if 'title' in feed: item.title = feed['title'] if 'date' in feed: item.datetime_created = feed['date'] if 'contents' in feed and feed['contents']: item.contents = feed['contents'] item.has_contents = True if 'author' in feed: item.www_author_name = feed['author'] item.save()
def get_feed(node_id): """Get the feed of 'latest' blogitem posts.""" print "task: get_feed" try: node = Node.objects.get(pk=node_id) except Node.DoesNotExist: print "blog.task.get_feed failure: node %d doesn't exist" % node_id return # XXX: This is only for Bootstrapped blog items # When blogitems are shared in sylph (very soon), then we'll use # the sylph protocol try: msg = get(node.uri, timeout=20) if msg.has_errors(): node.just_failed(save=True) print "Just failed to grab from node" return data = web2feed(node.uri, content=msg.get_body()) feed = data['feed'] meta = data['meta'] except Exception: node.just_failed(save=True) raise #print e #raise e node.just_pulled_from(save=False) try: if 'title' in meta: node.name = meta['title'] if 'description' in meta: node.description = meta['description'] node.save() except: node.save() print "fetched %d blogitems from %s" %(len(feed), node.uri) for item in feed: try: blog = BlogItem() # uniqueness constraint prevents duplicates blog.uri = item['uri'] blog.title = item['title'] if 'date' in item: blog.datetime_created = item['date'] if 'contents' in item and item['contents']: blog.contents = item['contents'] blog.has_contents = True if 'summary' in item and item['summary']: blog.summary = item['summary'] blog.has_summary = True if 'author' in item: blog.www_author_name = item['author'] blog.save() # Schedule fetch of contents if not blog.contents: get_fulltext.delay(blog.pk) except Exception: #exp = str(type(e)) #if 'IntegrityError' in exp: # continue #print e # DEBUG continue