Python debug Examples, histmag_to_kindle.logger.debug Python Examples

Example #1

0

Show file

File: generator.py Project: krzysztofzuraw/histmag_to_kindle

def _generate_html(pages):
    """Generate html for given pages.

    :param pages: list of `Page.class`
    :return: path to tempdir.
    :rtype: string.
    """
    tempdir = tempfile.mkdtemp()

    doc = html.html(
        html.head(
            # python don't allow keyword args with hypens
            html.meta(content="text/html; charset=utf-8", **{"http-equiv": "Content-Type"}),
            _get_tile(html, pages)
        ),
        html.body(
            html.div(_generate_body(html, pages, tempdir), id='article'),
        )
    )

    with open(os.path.join(tempdir, 'histmag.html'), 'wb') as out_file:
        logger.debug(
            'Saving generated html to {file}'.format(file=os.path.join(tempdir, 'histmag.html'))
        )
        out_file.write(doc.unicode(indent=2).encode('utf8'))

    return tempdir

Example #2

0

Show file

File: sender.py Project: krzysztofzuraw/histmag_to_kindle

def send_email_to_kindle(kindle_email, name='histmag.mobi'):
    """Sending html_article to kindle_email using mailgun api.

    Basic Usage::

    >>>from histmag_to_kindle import send_email_to_kindle
    >>>send_email_to_kindle(kindle_email='your_kindle_email', name=html)
    <Response [200]>

    :param kindle_email: your kindle email
    :param name: path to file to send
    :return: response
    """
    if not os.environ.get('MAILGUN_API_KEY') or not os.environ.get('EMAIL_SERVER'):
        raise ImproperlyConfigured('Either MAILGUN_API_KEY or EMAIL_SERVER variable not found in enviroment variables.')
    api_key = os.environ.get('MAILGUN_API_KEY')
    server = os.environ.get('EMAIL_SERVER')
    logger.debug('Sending html_article to {email}'.format(email=kindle_email))
    return requests.post("https://api.mailgun.net/v3/{server}/messages".format(server=server),
                         auth=("api", api_key),
                         files=[("attachment", open(name, 'rb'))],
                         data={"from": "Excited User <mailgun@{server}>".format(server=server),
                               "to": kindle_email,
                               "subject": "Upload",
                               "text": "send to kidle"})

Example #3

0

Show file

File: parser.py Project: krzysztofzuraw/histmag_to_kindle

    def parse_page(self, url):
        """Parse page and retrive its contents.

        :param url: webpage address with 'http://'.
        :type url: string.
        :return: Page object with addr and contents
        :rtype: :class:`Page`
        """
        logger.debug('Started parsing page with url: {url}'.format(url=url))
        response = requests.get(url)
        parsed_page = html.fromstring(response.content)
        page_contents = []
        Element = namedtuple('Elements', ['tag', 'value'])
        for elem in parsed_page.xpath('{root}//child::p[not(contains(@class, "article-tags")) '
                                      'and not(contains(@class, "article-info"))] '
                                      '| {root}//a[contains(@href, "author")]'
                                      '| {root}//em '
                                      '| {root}//img '
                                      '| {root}//span'.format(root=self.xpath_root)):
            if elem.tag == 'img':
                page_contents.append(Element(elem.tag, elem.attrib['src']))
            elif elem.tag == 'span':
                page_contents.append(Element(elem.tag, elem.text_content() or ''))
            else:
                page_contents.append(Element(elem.tag, elem.text or ''))

        return Page(url, contents=page_contents)

Example #4

0

Show file

File: parser.py Project: krzysztofzuraw/histmag_to_kindle

    def get_first_link(self, url, word='następna'):
        """Get first link that contains given word.

        :param url: webpage address with 'http://'.
        :type url: string.
        :param word: word that is in <a> tag.
        :type word: string.
        :return: link with full path.
        ":rtype: string.
        """
        response = requests.get(url)
        parsed_page = html.fromstring(response.content)
        link_xpath = '//a[contains(.,"' + word + '")]/@href)[1]'
        try:
            return parsed_page.xpath('(' + self.xpath_root + link_xpath)[0]
        except IndexError:
            logger.debug('No link found for word: {word}'.format(word=word))
            return None

Example #5

0

Show file

File: generator.py Project: krzysztofzuraw/histmag_to_kindle

def _generate_body(html, pages, tempdir):
    """Generate html body.

    :param html: `Py.xml` html object.
    :param pages: list of `Page.class`.
    :param tempdir: `tempfile` tempdir object.
    :return: `Py.xml` html objects.
    :rtype: list.
    """
    list_of_bodies = []
    for page in pages:
        for content in page.contents:
            if content.value not in [i[0] for i in list_of_bodies if i.xmlname != 'img']:
                if content.tag == 'img':
                    content = _download_images(content, tempdir)
                    list_of_bodies.append(getattr(html, content.tag)(src=content.value))
                else:
                    list_of_bodies.append(getattr(html, content.tag)(content.value))
    logger.debug('Html body generated')
    return list_of_bodies

Example #6

0

Show file

File: parser.py Project: krzysztofzuraw/histmag_to_kindle

    def get_articles(self):
        """Get full article with subpages for `self.addr`.

        :return: all subpages in form of :class:`Page`
        :rtype: list
        """
        urls_queue = deque([self.addr])
        found_urls = {self.addr}
        articles = []

        while len(urls_queue):
            url = urls_queue.popleft()
            logger.debug('Found url: {url}'.format(url=url))
            current_page = self.parse_page(url)
            link = self.get_first_link(url)
            if link:
                found_urls.add(link)
                urls_queue.append(link)
            articles.append(current_page)

        logger.info('Articles has been extracted')
        return articles