def process_request(self, request, spider): options = Options() options.add_argument('--headless') options.add_argument('--disable-gpu') options.binary_locaion = '/usr/bin/google-chrome-stable' capabilities = {} capabilities['platform'] = 'Linux' capabilities['version'] = '16.04' if spider.name == "music163": print '=======start parse {0}========='.format(request.url) #driver = webdriver.PhantomJS() driver = webdriver.Chrome( executable_path='/usr/local/bin/chromedriver', chrome_options=options, desired_capabilities=capabilities) try: driver.get(request.url) driver.switch_to.frame('g_iframe') body = driver.page_source print '=======finished parse {0}======'.format(request.url) return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request) except: driver.quit()
def __init__(self, user_agent=None, proxy=None, binary_locaion=None): options = Options() options.add_argument('--headless') options.add_argument('--disable-gpu') if not (user_agent is None): options.add_argument("user-agent=%s" % user_agent) if not (user_agent is None): options.add_argument('--proxy=%s' % proxy) if not (binary_locaion is None): options.binary_locaion = binary_locaion self.driver = webdriver.Chrome(chrome_options=options) self.wait = WebDriverWait(self.driver, 15)