Exemple #1
0
def crawls(qry, to_grab, to_handle):
	'''does crawling based on a  given rule
		until either the rule does not hold or maxiter has been reached'''
	to_crawl = copy(qry['crawl'])
	iters = 1
	response = []
	try:
		nextlink = to_crawl.pop('next')
	except:
		raise Exception('Crawler needs a \'next\' parameter', [])
	rule = to_crawl.pop('rule','`true`')
	#if no rule and no max, errrorrrrrr
	try:
		maxiter = int(to_crawl.pop('max',10))
	except:
		raise Exception('max crawl val must be an integer!')
	crawl_kwargs = to_crawl
	raw_response = request(to_grab)
	res = handles(raw_response, to_handle)
	if type(res) == list:
		response.extend(res)
	else:
		response.append(res)
	link = handles(raw_response, nextlink)
	valid = search(rule, link)
	while valid and iters < maxiter:
		link.update(crawl_kwargs)
		pprint(link)
		raw_response = request(link)
		res = handles(raw_response, to_handle)
		link = handles(raw_response, nextlink)
		valid =  search(rule, link)
		iters += 1
		if type(res) == list:
			response.extend(res)
		else:
			response.append(res)
	return response
Exemple #2
0
def query(qry):
	'''takes a query object, which contains:
		-how to grab,
		-(how to handle)
		-(crawl instructions)
	most simple flow just has how to grab, then returns the result'''
	qry = copy(qry)
	to_handle = qry.get('handle',None)
	try:
		to_grab = qry['request']
	except:
		raise Exception('Query has no request parameters!')
	if 'crawl' not in qry.keys():
		response = request(to_grab)
		return handles(response, to_handle)
	else:
		return crawls(qry, to_grab, to_handle)