Beispiel #1
0
def start():
	try:
		global start_url, current_page, resultPath
		init()
		printDH('Iniciando la app...')
		driver.get(start_url)
		name = driver.find_element_by_css_selector('#HEADING').text.encode('UTF-8')
		image = None
		try:
			image = driver.find_element_by_css_selector('#HERO_PHOTO')
			image = image.get_attribute('src')
		except:
			try:
				image = driver.find_element_by_css_selector('#BIG_PHOTO_CAROUSEL img')
				image = image.get_attribute('src')
			except:
				image = "https://watermarkherveybay.files.wordpress.com/2015/08/tripadvisor-logo-nw1.jpg?w=620&h=350"
		printDH('Extrayendo las opiniones de %s' % name)
		printDH('Leyendo la página %d de %d' % (current_page, max_page))
		reviews = parse_reviews_list()
		while(if_next_page_exists_go()):
			current_page+=1
			printDH('Leyendo la página %d de %d' % (current_page, max_page) )
			reviews = reviews + parse_reviews_list()
		result = {'@context':'http://schema.org', 'name':name, 'image':image, 'reviews':reviews}
		saveText(json.dumps(result), resultPath)
		#print json.dumps(result)
	except Exception as e:
		print str(e)
		returnError()
Beispiel #2
0
def returnError(error=None):
	global resultPath
	item = {'error':'Crawler has failed to fetch the comments', 'loading':False}
	saveText(json.dumps(item), resultPath)