예제 #1
0
파일: leech.py 프로젝트: imclab/leech
def leech(url, filename=None):
    # we have: a page, which could be absolutely any part of a story, or not a story at all
    # check a bunch of things which are completely ff.n specific, to get text from it
    site = _get_site(url)
    if not site:
        raise Exception("No site handler found")

    story = site.extract(url, fetch)
    if not story:
        raise Exception("Couldn't extract story")

    metadata = {
        'title': story['title'],
        'author': story['author'],
        'unique_id': url,
    }
    html = []
    for i, chapter in enumerate(story['chapters']):
        html.append((chapter[0], 'chapter%d.html' % (i + 1), html_template.format(title=chapter[0], text=chapter[1])))

    filename = filename or story['title'] + '.epub'

    epub.make_epub(filename, html, metadata)

    return filename
예제 #2
0
def leech(url, session, filename=None, args=None):
    # we have: a page, which could be absolutely any part of a story, or not a story at all
    # check a bunch of things which are completely ff.n specific, to get text from it
    site, url = sites.get(url)
    if not site:
        raise Exception("No site handler found")

    print("Handler", site, url)

    handler = site(session, args=args)

    with open('leech.json') as store_file:
        store = json.load(store_file)
        login = store.get('logins', {}).get(site.__name__, False)
        if login:
            handler.login(login)

    story = handler.extract(url)
    if not story:
        raise Exception("Couldn't extract story")

    dates = list(story.dates())
    metadata = {
        'title': story.title,
        'author': story.author,
        'unique_id': url,
        'started': min(dates),
        'updated': max(dates),
    }

    # The cover is static, and the only change comes from the image which we generate
    html = [('Cover', 'cover.html', cover_template)]
    cover_image = ('images/cover.png',
                   cover.make_cover(story.title,
                                    story.author).read(), 'image/png')

    html.append(('Front Matter', 'frontmatter.html',
                 frontmatter_template.format(now=datetime.datetime.now(),
                                             **metadata)))

    html.extend(chapter_html(story))

    css = (
        'Styles/base.css',
        session.get(
            'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css'
        ).text, 'text/css')

    filename = filename or story.title + '.epub'

    # print([c[0:-1] for c in html])
    filename = epub.make_epub(filename,
                              html,
                              metadata,
                              extra_files=(css, cover_image))

    return filename
예제 #3
0
def leech(url, session, filename=None, args=None):
    # we have: a page, which could be absolutely any part of a story, or not a story at all
    # check a bunch of things which are completely ff.n specific, to get text from it
    site = sites.get(url)
    if not site:
        raise Exception("No site handler found")

    handler = site(session, args=args)

    with open('leech.json') as store_file:
        store = json.load(store_file)
        login = store.get('logins', {}).get(site.__name__, False)
        if login:
            handler.login(login)

    story = handler.extract(url)
    if not story:
        raise Exception("Couldn't extract story")

    dates = list(story.dates())
    metadata = {
        'title': story.title,
        'author': story.author,
        'unique_id': url,
        'started': min(dates),
        'updated': max(dates),
    }

    # The cover is static, and the only change comes from the image which we generate
    html = [('Cover', 'cover.html', cover_template)]
    cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png')

    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))

    html.extend(chapter_html(story))

    css = ('Styles/base.css', session.get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')

    filename = filename or story.title + '.epub'

    # print([c[0:-1] for c in html])
    filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image))

    return filename