def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) if args['--images'] is not None and '*' not in args['--images']: sys.exit('Argument for --images option must contain "*"') #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], args['--log-to'], args['--log-level'], args['--silent'], args['--verbosity']) #Get a logger, the 'openaccess_epub' logger was set up above command_log = logging.getLogger('openaccess_epub.commands.batch') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() for directory in args['DIR']: for xml_file in files_with_ext('.xml', directory, recursive=args['--recursive']): #We have to temporarily re-base our log while utils work if not args['--no-log-file']: oae_logging.replace_filehandler( logname='openaccess_epub', new_file='temp.log', level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) command_log.info('Processing input: {0}'.format(xml_file)) root_name = openaccess_epub.utils.file_root_name(xml_file) abs_input_path = openaccess_epub.utils.get_absolute_path(xml_file) if not args['--no-log-file']: log_name = root_name + '.log' log_path = os.path.join(os.path.dirname(abs_input_path), log_name) #Re-base the log file to the new file location oae_logging.replace_filehandler( logname='openaccess_epub', new_file=log_path, level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) #Now we move over to the new log file shutil.move('temp.log', log_path) #Parse the article now that logging is ready parsed_article = Article(abs_input_path, validation=not args['--no-validate']) #Get the output directory if args['--output'] is not None: output_directory = openaccess_epub.utils.get_absolute_path( args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath( os.path.join(abs_dirname, config.default_output)) #The root name must be added on for output output_directory = os.path.join(output_directory, root_name) #Make the call to make_EPUB success = make_EPUB(parsed_article, output_directory, abs_input_path, args['--images'], config_module=config, batch=True) #Cleanup is mandatory command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) if not args['--no-epubcheck'] and success: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) if args['--images'] is not None and '*' not in args['--images']: sys.exit('Argument for --images option must contain "*"') #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], args['--log-to'], args['--log-level'], args['--silent'], args['--verbosity']) #Get a logger, the 'openaccess_epub' logger was set up above command_log = logging.getLogger('openaccess_epub.commands.batch') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() for directory in args['DIR']: for xml_file in files_with_ext('.xml', directory, recursive=args['--recursive']): #We have to temporarily re-base our log while utils work if not args['--no-log-file']: oae_logging.replace_filehandler(logname='openaccess_epub', new_file='temp.log', level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) command_log.info('Processing input: {0}'.format(xml_file)) root_name = openaccess_epub.utils.file_root_name(xml_file) abs_input_path = openaccess_epub.utils.get_absolute_path(xml_file) if not args['--no-log-file']: log_name = root_name + '.log' log_path = os.path.join(os.path.dirname(abs_input_path), log_name) #Re-base the log file to the new file location oae_logging.replace_filehandler(logname='openaccess_epub', new_file=log_path, level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) #Now we move over to the new log file shutil.move('temp.log', log_path) #Parse the article now that logging is ready parsed_article = Article(abs_input_path, validation=not args['--no-validate']) #Get the output directory if args['--output'] is not None: output_directory = openaccess_epub.utils.get_absolute_path(args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output)) #The root name must be added on for output output_directory = os.path.join(output_directory, root_name) #Make the call to make_EPUB success = make_EPUB(parsed_article, output_directory, abs_input_path, args['--images'], config_module=config, batch=True) #Cleanup is mandatory command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) if not args['--no-epubcheck'] and success: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) if args['--epub3']: epub_version = 3 elif args['--epub2']: epub_version = 2 else: epub_version = None #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], args['--log-to'], args['--log-level'], args['--silent'], args['--verbosity']) #Get a logger, the 'openaccess_epub' logger was set up above command_log = logging.getLogger('openaccess_epub.commands.convert') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() current_dir = os.getcwd() #Our basic flow is to iterate over the args['INPUT'] list for inpt in args['INPUT']: #We have to temporarily re-base our log while input utils do some work if not args['--no-log-file'] and not args['--log-to']: oae_logging.replace_filehandler(logname='openaccess_epub', new_file='temp.log', level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) command_log.info('Processing input: {0}'.format(inpt)) #First we need to know the name of the file and where it is if inpt.lower().endswith('.xml'): # This is direct XML file root_name = openaccess_epub.utils.file_root_name(inpt) abs_input_path = openaccess_epub.utils.get_absolute_path(inpt) elif inpt.lower().startswith('doi:'): # This is a DOI root_name = input_utils.doi_input(inpt) abs_input_path = os.path.join(current_dir, root_name + '.xml') elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']): root_name = input_utils.url_input(inpt) abs_input_path = os.path.join(current_dir, root_name + '.xml') else: sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt)) if not args['--no-log-file'] and not args['--log-to']: log_name = root_name + '.log' log_path = os.path.join(os.path.dirname(abs_input_path), log_name) #Re-base the log file to the new file location oae_logging.replace_filehandler(logname='openaccess_epub', new_file=log_path, level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) #Now we move over to the new log file shutil.copy2('temp.log', log_path) os.remove('temp.log') #Now that we should be done configuring logging, let's parse the article parsed_article = Article(abs_input_path, validation=not args['--no-validate']) if parsed_article.publisher is None: command_log.critical( 'Publisher support was not established, aborting') sys.exit(1) #Get the output directory if args['--output'] is not None: output_directory = openaccess_epub.utils.get_absolute_path( args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath( os.path.join(abs_dirname, config.default_output)) #The root name must be added on for output output_directory = os.path.join(output_directory, root_name) #Make the call to make_EPUB success = make_EPUB(parsed_article, output_directory, abs_input_path, args['--images'], config_module=config, epub_version=epub_version) #Cleanup removes the produced output directory, keeps the EPUB if not args['--no-cleanup']: command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) #Running epubcheck on the output verifies the validity of the EPUB, #requires a local installation of java and epubcheck. if not args['--no-epubcheck'] and success: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) if args['--epub3']: epub_version = 3 elif args['--epub2']: epub_version = 2 else: epub_version = None #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], args['--log-to'], args['--log-level'], args['--silent'], args['--verbosity']) #Get a logger, the 'openaccess_epub' logger was set up above command_log = logging.getLogger('openaccess_epub.commands.convert') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() current_dir = os.getcwd() #Our basic flow is to iterate over the args['INPUT'] list for inpt in args['INPUT']: #We have to temporarily re-base our log while input utils do some work if not args['--no-log-file'] and not args['--log-to']: oae_logging.replace_filehandler(logname='openaccess_epub', new_file='temp.log', level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) command_log.info('Processing input: {0}'.format(inpt)) #First we need to know the name of the file and where it is if inpt.lower().endswith('.xml'): # This is direct XML file root_name = openaccess_epub.utils.file_root_name(inpt) abs_input_path = openaccess_epub.utils.get_absolute_path(inpt) elif inpt.lower().startswith('doi:'): # This is a DOI root_name = input_utils.doi_input(inpt) abs_input_path = os.path.join(current_dir, root_name + '.xml') elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']): root_name = input_utils.url_input(inpt) abs_input_path = os.path.join(current_dir, root_name + '.xml') else: sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt)) if not args['--no-log-file'] and not args['--log-to']: log_name = root_name + '.log' log_path = os.path.join(os.path.dirname(abs_input_path), log_name) #Re-base the log file to the new file location oae_logging.replace_filehandler(logname='openaccess_epub', new_file=log_path, level=args['--log-level'], frmt=oae_logging.STANDARD_FORMAT) #Now we move over to the new log file shutil.copy2('temp.log', log_path) os.remove('temp.log') #Now that we should be done configuring logging, let's parse the article parsed_article = Article(abs_input_path, validation=not args['--no-validate']) if parsed_article.publisher is None: command_log.critical('Publisher support was not established, aborting') sys.exit(1) #Get the output directory if args['--output'] is not None: output_directory = openaccess_epub.utils.get_absolute_path(args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output)) #The root name must be added on for output output_directory = os.path.join(output_directory, root_name) #Make the call to make_EPUB success = make_EPUB(parsed_article, output_directory, abs_input_path, args['--images'], config_module=config, epub_version=epub_version) #Cleanup removes the produced output directory, keeps the EPUB if not args['--no-cleanup']: command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) #Running epubcheck on the output verifies the validity of the EPUB, #requires a local installation of java and epubcheck. if not args['--no-epubcheck'] and success: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) formatter = logging.Formatter('%(message)s') log = logging.getLogger('openaccess_epub.commands.validate') log.setLevel(logging.DEBUG) if not args['--silent']: sh_echo = logging.StreamHandler(sys.stdout) sh_echo.setLevel(logging.INFO) sh_echo.setFormatter(formatter) log.addHandler(sh_echo) for directory in args['DIR']: #Render the path to the directory if os.path.isabs(directory): dir_path = directory else: dir_path = os.path.normpath(os.path.join(os.getcwd(), directory)) #Create the filename for the log if args['--log-to']: log_path = args['--log-to'] else: logname = os.path.basename(dir_path) + '_validation.log' log_path = os.path.join(dir_path, logname) #Add the filehandler for the log if logging is enabled if not args['--print-only']: logs.replace_filehandler('openaccess_epub.commands.validate', new_file=log_path, level='INFO', frmt='%(message)s') #Iteration over the XML files for xml_file in files_with_ext('.xml', directory, recursive=args['--recursive']): try: document = lxml.etree.parse(xml_file) except lxml.etree.XMLSyntaxError as err: log.info('FAILED: Parse Error; {0} '.format(xml_file)) log.info(str(err)) continue #Find its public id so we can identify the appropriate DTD public_id = document.docinfo.public_id #Get the dtd by the public id try: dtd = DTDS[public_id] except KeyError as err: log.info('FAILED: Unknown DTD Error; {0}'.format(xml_file)) log.info(str(err)) #Actual DTD validation if not dtd.validate(document): log.info('FAILED: DTD Validation Error; {0}'.format(xml_file)) log.info(str(dtd.error_log.filter_from_errors())) #Clear the error_log dtd._clear_error_log() else: if args['--record-pass']: log.info('PASSED: Validated by DTD; {0}'.format(xml_file))