def get_images(output_directory, explicit, input_path, config, parsed_article): """ Main logic controller for the placement of images into the output directory Controlling logic for placement of the appropriate imager files into the EPUB directory. This function interacts with interface arguments as well as the local installation config.py file. These may change behavior of this function in terms of how it looks for images relative to the input, where it finds explicit images, whether it will attempt to download images, and whether successfully downloaded images will be stored in the cache. Parameters ---------- output_directory : str The directory path where the EPUB is being constructed/output explicit : str A directory path to a user specified directory of images. Allows * wildcard expansion. input_path : str The absolute path to the input XML file. config : config module The imported configuration module parsed_article : openaccess_epub.article.Article object The Article instance for the article being converted to EPUB """ #Split the DOI journal_doi, article_doi = parsed_article.doi.split('/') log.debug('journal-doi : {0}'.format(journal_doi)) log.debug('article-doi : {0}'.format(article_doi)) #Get the rootname for wildcard expansion rootname = utils.file_root_name(input_path) #Specify where to place the images in the output img_dir = os.path.join(output_directory, 'EPUB', 'images-{0}'.format(article_doi)) log.info('Using {0} as image directory target'.format(img_dir)) #Construct path to cache for article article_cache = os.path.join(config.image_cache, journal_doi, article_doi) #Use manual image directory, explicit images if explicit: success = explicit_images(explicit, img_dir, rootname, config) if success and config.use_image_cache: move_images_to_cache(img_dir, article_cache) #Explicit images prevents all other image methods return success #Input-Relative import, looks for any one of the listed options if config.use_input_relative_images: #Prevents other image methods only if successful if input_relative_images(input_path, img_dir, rootname, config): if config.use_image_cache: move_images_to_cache(img_dir, article_cache) return True #Use cache for article if it exists if config.use_image_cache: #Prevents other image methods only if successful if image_cache(article_cache, img_dir): return True #Download images from Internet if config.use_image_fetching: os.mkdir(img_dir) if journal_doi == '10.3389': fetch_frontiers_images(article_doi, img_dir) if config.use_image_cache: move_images_to_cache(img_dir, article_cache) return True elif journal_doi == '10.1371': success = fetch_plos_images(article_doi, img_dir, parsed_article) if success and config.use_image_cache: move_images_to_cache(img_dir, article_cache) return success else: log.error('Fetching images for this publisher is not supported!') return False return False
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) c_file = args['COLLECTION_FILE'] c_file_root = utils.file_root_name(c_file) abs_input_path = utils.get_absolute_path(c_file) if not args['--log-to']: log_to = os.path.join(os.path.dirname(abs_input_path), c_file_root + '.log') else: log_to = args['--log-to'] #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], log_to, args['--log-level'], args['--silent'], args['--verbosity']) command_log = logging.getLogger('openaccess_epub.commands.collection') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() #Quit if the collection file is not there if not os.path.isfile(c_file): command_log.critical('File does not exist {0}'.format(c_file)) sys.exit('Unable to continue') command_log.info('Parsing collection file: {0}'.format(c_file)) with open(c_file, 'r') as f: inputs = [line.strip() for line in f.readlines()] #Get the output directory if args['--output'] is not None: output_directory = utils.get_absolute_path(args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath( os.path.join(abs_dirname, config.default_output)) output_directory = os.path.join(output_directory, c_file_root) command_log.info( 'Processing collection output in {0}'.format(output_directory)) if os.path.isdir(output_directory): utils.dir_exists(output_directory) try: os.makedirs(output_directory) except OSError as err: if err.errno != 17: command_log.exception( 'Unable to recursively create output directories') #Instantiate collection NCX and OPF navigation = Navigation(collection=True) package = Package(collection=True, title=c_file_root) #Copy over the basic epub directory make_epub_base(output_directory) epub_version = None #Iterate over the inputs for xml_file in inputs: xml_path = utils.evaluate_relative_path( os.path.dirname(abs_input_path), xml_file) parsed_article = Article(xml_path, validation=not args['--no-validate']) if epub_version is None: # Only set this once, no mixing! if args['--epub2']: epub_version = 2 elif args['--epub3']: epub_version = 3 else: epub_version = parsed_article.publisher.epub_default navigation.process(parsed_article) package.process(parsed_article) #Get the Digital Object Identifier doi = parsed_article.get_DOI() journal_doi, article_doi = doi.split('/') #Get the images openaccess_epub.utils.images.get_images(output_directory, args['--images'], xml_path, config, parsed_article) parsed_article.publisher.render_content(output_directory, epub_version) if epub_version == 2: navigation.render_EPUB2(output_directory) package.render_EPUB2(output_directory) elif epub_version == 3: navigation.render_EPUB3(output_directory) package.render_EPUB3(output_directory) epub_zip(output_directory) #Cleanup removes the produced output directory, keeps the EPUB if not args['--no-cleanup']: command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if not args['--no-epubcheck']: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)
def get_images(output_directory, explicit, input_path, config, parsed_article): """ Main logic controller for the placement of images into the output directory Controlling logic for placement of the appropriate imager files into the EPUB directory. This function interacts with interface arguments as well as the local installation config.py file. These may change behavior of this function in terms of how it looks for images relative to the input, where it finds explicit images, whether it will attempt to download images, and whether successfully downloaded images will be stored in the cache. Parameters ---------- output_directory : str The directory path where the EPUB is being constructed/output explicit : str A directory path to a user specified directory of images. Allows * wildcard expansion. input_path : str The absolute path to the input XML file. config : config module The imported configuration module parsed_article : openaccess_epub.article.Article object The Article instance for the article being converted to EPUB """ #Split the DOI journal_doi, article_doi = parsed_article.doi.split('/') log.debug('journal-doi : {0}'.format(journal_doi)) log.debug('article-doi : {0}'.format(article_doi)) #Get the rootname for wildcard expansion rootname = utils.file_root_name(input_path) #Specify where to place the images in the output img_dir = os.path.join(output_directory, 'EPUB', 'images-{0}'.format(article_doi)) log.info('Using {0} as image directory target'.format(img_dir)) #Construct path to cache for article article_cache = os.path.join(config.image_cache, journal_doi, article_doi) #Use manual image directory, explicit images if explicit: success = explicit_images(explicit, img_dir, rootname, config) if success and config.use_image_cache: move_images_to_cache(img_dir, article_cache) #Explicit images prevents all other image methods return success #Input-Relative import, looks for any one of the listed options if config.use_input_relative_images: #Prevents other image methods only if successful if input_relative_images(input_path, img_dir, rootname, config): if config.use_image_cache: move_images_to_cache(img_dir, article_cache) return True #Use cache for article if it exists if config.use_image_cache: #Prevents other image methods only if successful if image_cache(article_cache, img_dir): return True #Download images from Internet if config.use_image_fetching: os.mkdir(img_dir) if journal_doi == '10.3389': fetch_frontiers_images(article_doi, img_dir) if config.use_image_cache: move_images_to_cache(img_dir, article_cache) return True elif journal_doi == '10.1371': success = fetch_plos_images(article_doi, img_dir, parsed_article) if success and config.use_image_cache: move_images_to_cache(img_dir, article_cache) return success else: log.error('Fetching images for this publisher is not supported!') return False return False
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) c_file = args['COLLECTION_FILE'] c_file_root = utils.file_root_name(c_file) abs_input_path = utils.get_absolute_path(c_file) if not args['--log-to']: log_to = os.path.join(os.path.dirname(abs_input_path), c_file_root + '.log') else: log_to = args['--log-to'] #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], log_to, args['--log-level'], args['--silent'], args['--verbosity']) command_log = logging.getLogger('openaccess_epub.commands.collection') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() #Quit if the collection file is not there if not os.path.isfile(c_file): command_log.critical('File does not exist {0}'.format(c_file)) sys.exit('Unable to continue') command_log.info('Parsing collection file: {0}'.format(c_file)) with open(c_file, 'r') as f: inputs = [line.strip() for line in f.readlines()] #Get the output directory if args['--output'] is not None: output_directory = utils.get_absolute_path(args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output)) output_directory = os.path.join(output_directory, c_file_root) command_log.info('Processing collection output in {0}'.format(output_directory)) if os.path.isdir(output_directory): utils.dir_exists(output_directory) try: os.makedirs(output_directory) except OSError as err: if err.errno != 17: command_log.exception('Unable to recursively create output directories') #Instantiate collection NCX and OPF navigation = Navigation(collection=True) package = Package(collection=True, title=c_file_root) #Copy over the basic epub directory make_epub_base(output_directory) epub_version = None #Iterate over the inputs for xml_file in inputs: xml_path = utils.evaluate_relative_path(os.path.dirname(abs_input_path), xml_file) parsed_article = Article(xml_path, validation=not args['--no-validate']) if epub_version is None: # Only set this once, no mixing! if args['--epub2']: epub_version = 2 elif args['--epub3']: epub_version = 3 else: epub_version = parsed_article.publisher.epub_default navigation.process(parsed_article) package.process(parsed_article) #Get the Digital Object Identifier doi = parsed_article.get_DOI() journal_doi, article_doi = doi.split('/') #Get the images openaccess_epub.utils.images.get_images(output_directory, args['--images'], xml_path, config, parsed_article) parsed_article.publisher.render_content(output_directory, epub_version) if epub_version == 2: navigation.render_EPUB2(output_directory) package.render_EPUB2(output_directory) elif epub_version == 3: navigation.render_EPUB3(output_directory) package.render_EPUB3(output_directory) epub_zip(output_directory) #Cleanup removes the produced output directory, keeps the EPUB if not args['--no-cleanup']: command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if not args['--no-epubcheck']: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)