コード例 #1
0
 def process_request(self, request, spider):
     global driver
     global drive_count
     try:
         print(request.url)
         driver.get(request.url)
     except:
         driver.execute_script('window.stop()')
     # WebDriverWait(driver, 10).until(
     #     EC.presence_of_element_located((By.XPATH, "//p[@data-a-target='carousel-broadcaster-displayname']"))
     # )
     url_list = [
         i.get_attribute("href")
         for i in driver.find_elements_by_tag_name("a")
     ]
     # driver.execute_script('window.stop()')
     body = driver.page_source
     resp = HtmlResponse(driver.current_url,
                         body=body,
                         encoding='utf-8',
                         request=request)
     resp.url_list = url_list
     drive_count += 1
     if drive_count == 100:
         driver.quit()
         driver = webdriver.Chrome(
             options=opt, service_args=['--ignore-ssl-errors = true'])
         # driver.set_page_load_timeout(0.1)
         # driver.set_script_timeout(0.1)
         # driver.implicitly_wait(60)
         drive_count = 0
         # har = json.loads(driver.get_log('har')[0]['message'])
     return resp