Ejemplo n.º 1
0
def generate_mobi(pages, output='histmag.mobi'):  # pragma: no cover
    """Generate mobi file using kindlegen binary from histmag_to_kindle/bin/kindlegen.

    Basic Usage::

    >>> from histmag_to_kindle import generate_mobi
    >>> generator = generate_mobi(articles)
    '/tmp/path_to_directory_with_mobi'

    :param pages: list with `Page.class`.
    :type pages: list.
    :param output: name of file, default histmag.mobi.
    :type output: string.
    :return: path to directory with generated html.
    :rtype: string.
    """
    if not os.environ.get('KINDLEGEN'):
        raise ImproperlyConfigured('No kindlegen library in env variables.')
    html_dir_path = _generate_html(pages)
    proc = subprocess.Popen(
        [
            os.environ.get('KINDLEGEN'),
            os.path.join(html_dir_path, 'histmag.html'),
            '-o',
            output
        ],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT
    )

    stdout = proc.communicate()[0].decode()
    for line in stdout.split('\n'):
        if line.startswith('Error'):
            raise GenerateMobiError(line)
    logger.info('Mobi generated')
    return html_dir_path
Ejemplo n.º 2
0
    def get_articles(self):
        """Get full article with subpages for `self.addr`.

        :return: all subpages in form of :class:`Page`
        :rtype: list
        """
        urls_queue = deque([self.addr])
        found_urls = {self.addr}
        articles = []

        while len(urls_queue):
            url = urls_queue.popleft()
            logger.debug('Found url: {url}'.format(url=url))
            current_page = self.parse_page(url)
            link = self.get_first_link(url)
            if link:
                found_urls.add(link)
                urls_queue.append(link)
            articles.append(current_page)

        logger.info('Articles has been extracted')
        return articles