def main(): """ Main program. """ try: config() except configparser.Error as what: error("Error in configuration file: %s", str(what)) return 1 Logger.set_log_level(options.verbose) options.types = options.types or ['all'] options.types = CommonCode.add_dependencies(options.types, DEPENDENCIES, BUILD_ORDER) debug("Building types: %s" % ' '.join(options.types)) ParserFactory.load_parsers() WriterFactory.load_writers() PackagerFactory.load_packagers() if options.is_job_queue: job_queue = cPickle.load(sys.stdin.buffer) # read bytes else: options.dc = get_dc(options.url) job_queue = [] output_files = dict() for type_ in options.types: job = CommonCode.Job(type_) job.url = options.url job.ebook = options.ebook job.dc = options.dc job.outputdir = options.outputdir job.outputfile = options.outputfile or make_output_filename( type_, options.dc) output_files[type_] = job.outputfile if job.type == 'kindle.images': job.url = os.path.join(job.outputdir, output_files['epub.images']) elif job.type == 'kindle.noimages': job.url = os.path.join(job.outputdir, output_files['epub.noimages']) job_queue.append(job) for j in job_queue: do_job(j) packager = PackagerFactory.create(options.packager, 'push') if packager: # HACK: the WWers ever only convert one ebook at a time job = job_queue[0] job.outputfile = '%d-final.zip' % (options.dc.project_gutenberg_id) packager.package(job) return 0
def do_job(job): """ Do one job. """ log_handler = None Logger.ebook = job.ebook if job.logfile: log_handler = open_log( os.path.join(os.path.abspath(job.outputdir), job.logfile)) debug('=== Building %s ===' % job.type) start_time = datetime.datetime.now() try: if job.url: spider = Spider.Spider() dirpath = os.path.dirname(job.url) # platform native path spider.include_urls += (options.include_urls or [parsers.webify_url(dirpath) + '/*'] ) # use for parser only spider.include_mediatypes += options.include_mediatypes if job.subtype == '.images' or job.type == 'rst.gen': spider.include_mediatypes.append('image/*') spider.exclude_urls += options.exclude_urls spider.exclude_mediatypes += options.exclude_mediatypes spider.max_depth = options.max_depth or six.MAXSIZE for rewrite in options.rewrite: from_url, to_url = rewrite.split('>') spider.add_redirection(from_url, to_url) attribs = parsers.ParserAttributes() attribs.url = parsers.webify_url(job.url) attribs.id = 'start' if options.input_mediatype: attribs.orig_mediatype = attribs.HeaderElement.from_str( options.input_mediatype) spider.recursive_parse(attribs) elect_coverpage(spider, job.url) job.url = spider.redirect(job.url) job.base_url = job.url job.spider = spider writer = WriterFactory.create(job.maintype) writer.build(job) if options.validate: writer.validate(job) packager = PackagerFactory.create(options.packager, job.type) if packager: packager.package(job) if job.type == 'html.images': # FIXME: hack for push packager options.html_images_list = list(job.spider.aux_file_iter()) except SkipOutputFormat as what: warning("%s" % what) except Exception as what: exception("%s" % what) end_time = datetime.datetime.now() info(' %s made in %s' % (job.type, end_time - start_time)) if log_handler: close_log(log_handler) log_handler = None