コード例 #1
0
    def set_cookies(self):
        """Hit the main URL, and get the cookies so we can use them elsewhere.

        This gets around some of their throttling mechanisms.
        """
        logger.info("Running Selenium browser PhantomJS to get the cookies...")
        add_delay(20, 5)
        driver = webdriver.PhantomJS(
            executable_path=phantomjs_executable_path,
            service_log_path=os.path.devnull,  # Disable ghostdriver.log
        )

        driver.set_window_size(1920, 1080)
        driver.get(self.url)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.NAME, "dtEndDate")))
        self.cookies = normalize_cookies(driver.get_cookies())
        driver.close()
コード例 #2
0
ファイル: nyappterm_1st.py プロジェクト: janderse/juriscraper
    def set_cookies(self):
        """Hit the main URL, and get the cookies so we can use them elsewhere.

        This gets around some of their throttling mechanisms.
        """
        logger.info("Running Selenium browser PhantomJS to get the cookies...")
        add_delay(20, 5)
        driver = webdriver.PhantomJS(
            executable_path='/usr/local/phantomjs/phantomjs',
            service_log_path=os.path.devnull,  # Disable ghostdriver.log
        )

        driver.set_window_size(1920, 1080)
        driver.get(self.url)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.NAME, "dtEndDate"))
        )
        self.cookies = normalize_cookies(driver.get_cookies())
        driver.close()
コード例 #3
0
    def _download(self, request_dict={}):
        """
        We use selenium to get the cookies, and then we check if we got the
        correct page. If not we retry for a total of 11 times.
        """
        if self.method == 'LOCAL':
            return super(Site, self)._download(request_dict)

        self.set_cookies()
        logger.info("Using cookies: %s" % self.cookies)
        request_dict.update({'cookies': self.cookies})

        html__ = super(Site, self)._download(request_dict)
        i = 0
        while not html__.xpath('//table') and i < 10:
            add_delay(20, 5)
            html__ = super(Site, self)._download(request_dict)
            i += 1
            logger.info("Got a bad response {} time(s)".format(i))
        return html__
コード例 #4
0
ファイル: nyappterm_1st.py プロジェクト: janderse/juriscraper
    def _download(self, request_dict={}):
        """
        We use selenium to get the cookies, and then we check if we got the
        correct page. If not we retry for a total of 11 times.
        """
        if self.method == 'LOCAL':
            return super(Site, self)._download(request_dict)

        self.set_cookies()
        logger.info("Using cookies: %s" % self.cookies)
        request_dict.update({'cookies': self.cookies})

        html__ = super(Site, self)._download(request_dict)
        i = 0
        while not html__.xpath('//table') and i < 10:
            add_delay(20, 5)
            html__ = super(Site, self)._download(request_dict)
            i += 1
            logger.info("Got a bad response {} time(s)".format(i))
        return html__
コード例 #5
0
    def _download(self, request_dict={}):
        """
        We use selenium to get the cookies, and then we check if we got the
        correct page. If not we retry for a total of 11 times.
        """
        if self.test_mode_enabled():
            return super(Site, self)._download(request_dict)

        # use selenium to establish required cookies
        logger.info("Running Selenium browser to get the cookies...")
        add_delay(20, 5)
        self.initiate_webdriven_session()
        logger.info("Using cookies: %s" % self.cookies)
        request_dict.update({"cookies": self.cookies})

        html__ = super(Site, self)._download(request_dict)
        i = 0
        while not html__.xpath("//table") and i < 10:
            add_delay(20, 5)
            html__ = super(Site, self)._download(request_dict)
            i += 1
            logger.info("Got a bad response {} time(s)".format(i))
        return html__