Esempio n. 1
0
        def get_post_content():

            ok = check_limited_access()

            if ok is True and \
                    self.ignore_limited_access is False:

                Logger.warning('You have a limited access :' + url)

                invalid = input('\tDo you want to continue - [y/n]: ')

                if invalid.lower() == 'n':

                    return

                elif invalid.lower() != 'y':

                    Logger.fail('Abort')

                    return

            elif ok is True and self.ignore_limited_access:

                Logger.warning('You have a limited access :' + url)

            elements_text = self.find_elements_by_xpath(xpath=text_xpath,
                                                        raise_error=False)
            elements_figure = self.find_elements_by_xpath(xpath=figure_xpath,
                                                          raise_error=False)

            text = get_text(elements_text)

            img_src, img_caption = get_figure(elements_figure)

            keys = list(self.posts_content.keys())

            if len(keys) == 0:

                self.posts_content = {
                    'url': [],
                    'text': [],
                    'img_src': [],
                    'caption': []
                }

            self.posts_content['url'].append(url)
            self.posts_content['text'].append(text)
            self.posts_content['img_src'].append(img_src)
            self.posts_content['caption'].append(img_caption)
Esempio n. 2
0
    def get(self, url):

        self.driver.set_page_load_timeout(time_to_wait=self.time_to_wait)

        if 'script_timeout' in self.kwargs.keys():

            self.driver.set_script_timeout(
                time_to_wait=self.kwargs['script_timeout'])

        else:

            self.driver.set_script_timeout(0.001)

        for i in range(self.reload_page_count):

            try:

                self.driver.get(url=url)

                break

            except TimeoutException as error:

                if i < self.reload_page_count - 1:

                    Logger.fail(
                        str(i + 1) + ': timeout::page has been reloaded')
                    Logger.set_line(length=60)

                else:

                    Logger.fail(
                        str(i + 1) +
                        ': timeout::page reload Limit has been exceed\n'
                        '\tdo you want to try again - [y/n]: ',
                        end='')
                    ok = input()

                    Logger.set_line(length=60)

                    if ok.lower() == 'y':

                        self.time_to_wait = float(input('time to wait :'))
                        self.reload_page_count = int(input('reload count :'))

                        self.get(url)

                    elif ok.lower() == 'n':

                        self.___timeout_export__()
                        Logger.error(error)

                    else:

                        self.___timeout_export__()

                        Logger.fail('Abort')
                        Logger.error(error)

        self.scroll_height = self.driver.execute_script(
            "return document.body.scrollHeight")