class Book(object): """Represtents an ebook in html format.""" def __init__(self, title): self.meta = Metadata(title) self.url = self.meta['url'] print('%(title)s from %(url)s' % self.meta) self.content = '' def make(self): #, filename): """Retrieve a book from the given url.""" # Get all the html content of the book request = Request(self.url) pages = request.retrieve(self.meta) # Remove headers, footers, fix relative links content = '' images = [] progress = ProgressBar(len(pages), message='Removing non content:\t') for i, page in enumerate(pages): progress.update(i+1) section = Section(page) section.removeHeader(**self.meta['header-attrs']) section.removeFooter(self.meta['footer-tag'], **self.meta['footer-attrs']) section.fixRelativeLinks() #section.getImages(self.meta) images += section.getImages(self.meta) content += section.soup.prettify() #.append(section) # Get all the images in the book. request.retrieveImages(self.meta, removeDup(images)) # Make a local copy of the html book. self.content = '<html><head><title>%s</title></head><body>' % self.meta['title'] self.content += content self.content += '</body></html>' filename = self.meta.filename(ext='.html') f = open(filename, 'w') f.write(self.content) f.close() def convert(self, format_): """Convert the book from html to another format.""" print('Converting from html to %s' % format_) command = ['ebook-convert', self.meta.filename(ext='.html'), self.meta.filename(ext=format_), ' --authors \"%(author)s\"' % self.meta, ' --level1-toc //h:h1', ' --level2-toc //h:h2' ] output_dir = self.meta.filename(ext='') if not os.path.exists(output_dir): os.makedirs(output_dir) log = open(os.path.join(output_dir, 'ebook-convert.log'), 'w') subprocess.call(command, stdout=log) log.close()