def start(): try: global start_url, current_page, resultPath init() printDH('Iniciando la app...') driver.get(start_url) name = driver.find_element_by_css_selector('#HEADING').text.encode('UTF-8') image = None try: image = driver.find_element_by_css_selector('#HERO_PHOTO') image = image.get_attribute('src') except: try: image = driver.find_element_by_css_selector('#BIG_PHOTO_CAROUSEL img') image = image.get_attribute('src') except: image = "https://watermarkherveybay.files.wordpress.com/2015/08/tripadvisor-logo-nw1.jpg?w=620&h=350" printDH('Extrayendo las opiniones de %s' % name) printDH('Leyendo la página %d de %d' % (current_page, max_page)) reviews = parse_reviews_list() while(if_next_page_exists_go()): current_page+=1 printDH('Leyendo la página %d de %d' % (current_page, max_page) ) reviews = reviews + parse_reviews_list() result = {'@context':'http://schema.org', 'name':name, 'image':image, 'reviews':reviews} saveText(json.dumps(result), resultPath) #print json.dumps(result) except Exception as e: print str(e) returnError()
def returnError(error=None): global resultPath item = {'error':'Crawler has failed to fetch the comments', 'loading':False} saveText(json.dumps(item), resultPath)