def leech(url, filename=None): # we have: a page, which could be absolutely any part of a story, or not a story at all # check a bunch of things which are completely ff.n specific, to get text from it site = _get_site(url) if not site: raise Exception("No site handler found") story = site.extract(url, fetch) if not story: raise Exception("Couldn't extract story") metadata = { 'title': story['title'], 'author': story['author'], 'unique_id': url, } html = [] for i, chapter in enumerate(story['chapters']): html.append((chapter[0], 'chapter%d.html' % (i + 1), html_template.format(title=chapter[0], text=chapter[1]))) filename = filename or story['title'] + '.epub' epub.make_epub(filename, html, metadata) return filename
def leech(url, session, filename=None, args=None): # we have: a page, which could be absolutely any part of a story, or not a story at all # check a bunch of things which are completely ff.n specific, to get text from it site, url = sites.get(url) if not site: raise Exception("No site handler found") print("Handler", site, url) handler = site(session, args=args) with open('leech.json') as store_file: store = json.load(store_file) login = store.get('logins', {}).get(site.__name__, False) if login: handler.login(login) story = handler.extract(url) if not story: raise Exception("Couldn't extract story") dates = list(story.dates()) metadata = { 'title': story.title, 'author': story.author, 'unique_id': url, 'started': min(dates), 'updated': max(dates), } # The cover is static, and the only change comes from the image which we generate html = [('Cover', 'cover.html', cover_template)] cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png') html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) html.extend(chapter_html(story)) css = ( 'Styles/base.css', session.get( 'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css' ).text, 'text/css') filename = filename or story.title + '.epub' # print([c[0:-1] for c in html]) filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image)) return filename
def leech(url, session, filename=None, args=None): # we have: a page, which could be absolutely any part of a story, or not a story at all # check a bunch of things which are completely ff.n specific, to get text from it site = sites.get(url) if not site: raise Exception("No site handler found") handler = site(session, args=args) with open('leech.json') as store_file: store = json.load(store_file) login = store.get('logins', {}).get(site.__name__, False) if login: handler.login(login) story = handler.extract(url) if not story: raise Exception("Couldn't extract story") dates = list(story.dates()) metadata = { 'title': story.title, 'author': story.author, 'unique_id': url, 'started': min(dates), 'updated': max(dates), } # The cover is static, and the only change comes from the image which we generate html = [('Cover', 'cover.html', cover_template)] cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png') html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) html.extend(chapter_html(story)) css = ('Styles/base.css', session.get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css') filename = filename or story.title + '.epub' # print([c[0:-1] for c in html]) filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image)) return filename