def single_input(args, config=None): """ Single Input Mode works to convert a single input XML file into EPUB. This is probably the most typical use case and is the most highly configurable, see the argument parser and oaepub --help """ if config is None: config = get_config_module() #Determination of input type and processing #Fetch by URL if 'http:' in args.input: raw_name = u_input.url_input(args.input) abs_input_path = os.path.join(LOCAL_DIR, raw_name+'.xml') parsed_article = Article(abs_input_path, validation=args.no_dtd_validation) #Fetch by DOI elif args.input[:4] == 'doi:': raw_name = u_input.doi_input(args.input) abs_input_path = os.path.join(LOCAL_DIR, raw_name+'.xml') parsed_article = Article(abs_input_path, validation=args.no_dtd_validation) #Local XML input else: abs_input_path = utils.get_absolute_path(args.input) raw_name = u_input.local_input(abs_input_path) parsed_article = Article(abs_input_path, validation=args.no_dtd_validation) #Generate the output path name, this will be the directory name for the #output. This output directory will later be zipped into an EPUB output_name = os.path.join(utils.get_output_directory(args), raw_name) #Make the EPUB make_epub(parsed_article, outdirect=output_name, explicit_images=args.images, # Explicit image path batch=False, config=config) #Cleanup removes the produced output directory, keeps the ePub file if args.clean: # Defaults to False, --clean or -c to toggle on shutil.rmtree(output_name) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if args.no_epubcheck: epubcheck('{0}.epub'.format(output_name), config)
def batch_input(args, config=None): """ Batch Input Mode works to convert all of the article XML files in a specified directory into individual article EPUB files. Batch Input Mode is employed under a few simplifying assumptions: any pre-existing folder for article EPUB conversion will be eliminated without asking user permission, all output that except the .epub and .log files will be removed, and image files in a custom directory are not being used. Unlike the other input modes, Batch Input Mode output is always relative to the batch directory rather than the working directory of oaepub execution. Batch Input Mode has default epubcheck behavior, it will place a system call to epubcheck unless specified otherwise (--no-epubcheck or -N flags). """ if config is None: config = get_config_module() error_file = open('batch_tracebacks.txt', 'w') #Iterate over all listed files in the batch directory for item in os.listdir(args.batch): item_path = os.path.join(args.batch, item) #Skip directories and files without .xml extension _root, extension = os.path.splitext(item) if not os.path.isfile(item_path): continue if not extension == '.xml': continue print(item_path) #Parse the article try: raw_name = u_input.local_input(item_path) except: traceback.print_exc(file=error_file) else: parsed_article = Article(os.path.join(args.batch, raw_name+'.xml'), validation=args.no_dtd_validation) #Create the output name output_name = os.path.join(utils.get_output_directory(args), raw_name) #Make the EPUB try: make_epub(parsed_article, outdirect=output_name, explicit_images=None, # No explicit image path batch=True, config=config) except: error_file.write(item_path + '\n') traceback.print_exc(file=error_file) #Cleanup output directory, keeps EPUB and log shutil.rmtree(output_name) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if args.no_epubcheck: epubcheck('{0}.epub'.format(output_name), config) error_file.close()