Esempio n. 1
0
def process(url):
	try:
		print('DOWNLOADING', url)
	except:
		pass

	if not urlhelper.can_fetch(url):
		logger.info('disallowed ' + url)
		return

	request, response, data, soup = crawler.load(url)

	if request == None:
		return

	if response.geturl().endswith('.pdf'):
		return

	drequest = { k: v for k, v in request.header_items() }
	dresponse = { k: v for k, v in response.getheaders() }
	dresponse.update({
		'status': response.status,
		'reason': response.reason
	})



	ProcessQueue.push(
		response.geturl(),
		json.dumps(drequest, sort_keys=True),
		json.dumps(dresponse, sort_keys=True),
		data)

	"""
Esempio n. 2
0
def start():
    queue = ProcessQueue()
    queue.bind(process)

    queue.run()