Exemple #1
0
def obtain(uri):
	"""Obtain the resource at the URI."""
	uri = hashless(uri)
	res = None
	try:
		res = Resource.objects.get(uri=uri)
		return # XXX: Don't modify existing.
	except Resource.DoesNotExist:
		pass

	message = comms.get(uri)

	# XXX TODO: Handle resources.
	if message.is_image():
		res = Image.new_from_message(message, save=True)

	elif message.is_html():
		# TODO/XXX: Try web2feed extraction
		web = Web2Feed(uri)
		web.set_contents(message.get_body)
		feed = web.get_feed()
		# XXX: TODO

	#typ = message.get_content_type()
	#if typ in ['image/jpeg', 'image/png', 'image/gif']:
	#	res = Image.new_from_message(message, save=True)	
	return res
Exemple #2
0
def test(request):
	#from sylph.core.node.api import ping_response
	#return ping_response(request)

	from sylph.core.resource.models import Resource
	from sylph.utils.data.RdfParser import RdfParser
	from sylph.utils.data.RdfSerializer import RdfSerializer

	#from sylph.utils.http import Message, Request, Response
	#from sylph.utils.http import get, send

	from sylph.utils.comms import SylphMessage, get, send

	response = get('http://slashdot.org')

	print response
	print '========'
	print response.get_headers()



	return HttpResponse('view terminal output...')
Exemple #3
0
def get_fulltext(blogitem_id):
	"""Fetch the fulltext of a summary-only item."""
	try:
		item = BlogItem.objects.get(pk=blogitem_id)
	except BlogItem.DoesNotExist:
		print "blog.task.get_fulltext item doesn't exist"
		return

	item.tried_fetch_count += 1 # XXX: Verify increment works

	try:
		msg = get(item.uri)
		if msg.has_errors():
			print "Just failed to grab web item"
			return
		data = web2feed(node.uri, content=msg.get_body())
		feed = data['feed']
		meta = data['meta']
		if not feed or type(feed) != dict:
			raise Exception, "web2feed did not return a dictionary."
	except Exception:
		item.save()
		return

	if feed['uri'] != item.uri:
		print "WARNING: BLOGITEM URIS DO NOT MATCH"

	if 'title' in feed:
		item.title = feed['title']
	if 'date' in feed:
		item.datetime_created = feed['date']
	if 'contents' in feed and feed['contents']:
		item.contents = feed['contents']
		item.has_contents = True
	if 'author' in feed:
		item.www_author_name = feed['author']

	item.save()
Exemple #4
0
def get_feed(node_id):
	"""Get the feed of 'latest' blogitem posts."""
	print "task: get_feed"
	try:
		node = Node.objects.get(pk=node_id)
	except Node.DoesNotExist:
		print "blog.task.get_feed failure: node %d doesn't exist" % node_id
		return

	# XXX: This is only for Bootstrapped blog items
	# When blogitems are shared in sylph (very soon), then we'll use
	# the sylph protocol
	try:
		msg = get(node.uri, timeout=20)
		if msg.has_errors():
			node.just_failed(save=True)
			print "Just failed to grab from node"
			return
		data = web2feed(node.uri, content=msg.get_body())
		feed = data['feed']
		meta = data['meta']
	except Exception:
		node.just_failed(save=True)
		raise
		#print e
		#raise e

	node.just_pulled_from(save=False)
	try:
		if 'title' in meta:
			node.name = meta['title']
		if 'description' in meta:
			node.description = meta['description']
		node.save()
	except:
		node.save()

	print "fetched %d blogitems from %s" %(len(feed), node.uri)

	for item in feed:
		try:
			blog = BlogItem()

			# uniqueness constraint prevents duplicates
			blog.uri = item['uri']
			blog.title = item['title']

			if 'date' in item:
				blog.datetime_created = item['date']
			if 'contents' in item and item['contents']:
				blog.contents = item['contents']
				blog.has_contents = True
			if 'summary' in item and item['summary']:
				blog.summary = item['summary']
				blog.has_summary = True
			if 'author' in item:
				blog.www_author_name = item['author']

			blog.save()

			# Schedule fetch of contents
			if not blog.contents:
				get_fulltext.delay(blog.pk)

		except Exception:
			#exp = str(type(e))
			#if 'IntegrityError' in exp:
			#	continue
			#print e # DEBUG
			continue