예제 #1
0
    def set_cookies(self):
        """Hit the main URL, and get the cookies so we can use them elsewhere.

        This gets around some of their throttling mechanisms.
        """
        logger.info("Running Selenium browser PhantomJS to get the cookies...")
        add_delay(20, 5)
        driver = webdriver.PhantomJS(
            executable_path=phantomjs_executable_path,
            service_log_path=os.path.devnull,  # Disable ghostdriver.log
        )

        driver.set_window_size(1920, 1080)
        driver.get(self.url)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.NAME, "dtEndDate")))
        self.cookies = normalize_cookies(driver.get_cookies())
        driver.close()
예제 #2
0
    def set_cookies(self):
        """Hit the main URL, and get the cookies so we can use them elsewhere.

        This gets around some of their throttling mechanisms.
        """
        logger.info("Running Selenium browser PhantomJS to get the cookies...")
        add_delay(20, 5)
        driver = webdriver.PhantomJS(
            executable_path='/usr/local/phantomjs/phantomjs',
            service_log_path=os.path.devnull,  # Disable ghostdriver.log
        )

        driver.set_window_size(1920, 1080)
        driver.get(self.url)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.NAME, "dtEndDate"))
        )
        self.cookies = normalize_cookies(driver.get_cookies())
        driver.close()
예제 #3
0
    def _download(self, request_dict={}):
        """
        We use selenium to get the cookies, and then we check if we got the
        correct page. If not we retry for a total of 11 times.
        """
        if self.method == 'LOCAL':
            return super(Site, self)._download(request_dict)

        self.set_cookies()
        logger.info("Using cookies: %s" % self.cookies)
        request_dict.update({'cookies': self.cookies})

        html__ = super(Site, self)._download(request_dict)
        i = 0
        while not html__.xpath('//table') and i < 10:
            add_delay(20, 5)
            html__ = super(Site, self)._download(request_dict)
            i += 1
            logger.info("Got a bad response {} time(s)".format(i))
        return html__
예제 #4
0
    def _download(self, request_dict={}):
        """
        We use selenium to get the cookies, and then we check if we got the
        correct page. If not we retry for a total of 11 times.
        """
        if self.method == 'LOCAL':
            return super(Site, self)._download(request_dict)

        self.set_cookies()
        logger.info("Using cookies: %s" % self.cookies)
        request_dict.update({'cookies': self.cookies})

        html__ = super(Site, self)._download(request_dict)
        i = 0
        while not html__.xpath('//table') and i < 10:
            add_delay(20, 5)
            html__ = super(Site, self)._download(request_dict)
            i += 1
            logger.info("Got a bad response {} time(s)".format(i))
        return html__
예제 #5
0
    def _download(self, request_dict={}):
        """
        We use selenium to get the cookies, and then we check if we got the
        correct page. If not we retry for a total of 11 times.
        """
        if self.test_mode_enabled():
            return super(Site, self)._download(request_dict)

        # use selenium to establish required cookies
        logger.info("Running Selenium browser to get the cookies...")
        add_delay(20, 5)
        self.initiate_webdriven_session()
        logger.info("Using cookies: %s" % self.cookies)
        request_dict.update({"cookies": self.cookies})

        html__ = super(Site, self)._download(request_dict)
        i = 0
        while not html__.xpath("//table") and i < 10:
            add_delay(20, 5)
            html__ = super(Site, self)._download(request_dict)
            i += 1
            logger.info("Got a bad response {} time(s)".format(i))
        return html__