Beispiel #1
0
def get_images(output_directory, explicit, input_path, config, parsed_article):
    """
    Main logic controller for the placement of images into the output directory

    Controlling logic for placement of the appropriate imager files into the
    EPUB directory. This function interacts with interface arguments as well as
    the local installation config.py file. These may change behavior of this
    function in terms of how it looks for images relative to the input, where it
    finds explicit images, whether it will attempt to download images, and
    whether successfully downloaded images will be stored in the cache.

    Parameters
    ----------
    output_directory : str
        The directory path where the EPUB is being constructed/output
    explicit : str
        A directory path to a user specified directory of images. Allows *
        wildcard expansion.
    input_path : str
        The absolute path to the input XML file.
    config : config module
        The imported configuration module
    parsed_article : openaccess_epub.article.Article object
        The Article instance for the article being converted to EPUB
    """
    #Split the DOI
    journal_doi, article_doi = parsed_article.doi.split('/')
    log.debug('journal-doi : {0}'.format(journal_doi))
    log.debug('article-doi : {0}'.format(article_doi))

    #Get the rootname for wildcard expansion
    rootname = utils.file_root_name(input_path)

    #Specify where to place the images in the output
    img_dir = os.path.join(output_directory, 'EPUB',
                           'images-{0}'.format(article_doi))
    log.info('Using {0} as image directory target'.format(img_dir))

    #Construct path to cache for article
    article_cache = os.path.join(config.image_cache, journal_doi, article_doi)

    #Use manual image directory, explicit images
    if explicit:
        success = explicit_images(explicit, img_dir, rootname, config)
        if success and config.use_image_cache:
            move_images_to_cache(img_dir, article_cache)
        #Explicit images prevents all other image methods
        return success

    #Input-Relative import, looks for any one of the listed options
    if config.use_input_relative_images:
        #Prevents other image methods only if successful
        if input_relative_images(input_path, img_dir, rootname, config):
            if config.use_image_cache:
                move_images_to_cache(img_dir, article_cache)
            return True

    #Use cache for article if it exists
    if config.use_image_cache:
        #Prevents other image methods only if successful
        if image_cache(article_cache, img_dir):
            return True

    #Download images from Internet
    if config.use_image_fetching:
        os.mkdir(img_dir)
        if journal_doi == '10.3389':
            fetch_frontiers_images(article_doi, img_dir)
            if config.use_image_cache:
                move_images_to_cache(img_dir, article_cache)
            return True
        elif journal_doi == '10.1371':
            success = fetch_plos_images(article_doi, img_dir, parsed_article)
            if success and config.use_image_cache:
                move_images_to_cache(img_dir, article_cache)
            return success
        else:
            log.error('Fetching images for this publisher is not supported!')
            return False
    return False
Beispiel #2
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    c_file = args['COLLECTION_FILE']
    c_file_root = utils.file_root_name(c_file)
    abs_input_path = utils.get_absolute_path(c_file)

    if not args['--log-to']:
        log_to = os.path.join(os.path.dirname(abs_input_path),
                              c_file_root + '.log')
    else:
        log_to = args['--log-to']

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], log_to,
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    command_log = logging.getLogger('openaccess_epub.commands.collection')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    #Quit if the collection file is not there
    if not os.path.isfile(c_file):
        command_log.critical('File does not exist {0}'.format(c_file))
        sys.exit('Unable to continue')

    command_log.info('Parsing collection file: {0}'.format(c_file))
    with open(c_file, 'r') as f:
        inputs = [line.strip() for line in f.readlines()]

    #Get the output directory
    if args['--output'] is not None:
        output_directory = utils.get_absolute_path(args['--output'])
    else:
        if os.path.isabs(config.default_output):  # Absolute remains so
            output_directory = config.default_output
        else:  # Else rendered relative to input
            abs_dirname = os.path.dirname(abs_input_path)
            output_directory = os.path.normpath(
                os.path.join(abs_dirname, config.default_output))

    output_directory = os.path.join(output_directory, c_file_root)
    command_log.info(
        'Processing collection output in {0}'.format(output_directory))

    if os.path.isdir(output_directory):
        utils.dir_exists(output_directory)
    try:
        os.makedirs(output_directory)
    except OSError as err:
        if err.errno != 17:
            command_log.exception(
                'Unable to recursively create output directories')

    #Instantiate collection NCX and OPF
    navigation = Navigation(collection=True)
    package = Package(collection=True, title=c_file_root)

    #Copy over the basic epub directory
    make_epub_base(output_directory)

    epub_version = None

    #Iterate over the inputs
    for xml_file in inputs:
        xml_path = utils.evaluate_relative_path(
            os.path.dirname(abs_input_path), xml_file)
        parsed_article = Article(xml_path,
                                 validation=not args['--no-validate'])
        if epub_version is None:  # Only set this once, no mixing!
            if args['--epub2']:
                epub_version = 2
            elif args['--epub3']:
                epub_version = 3
            else:
                epub_version = parsed_article.publisher.epub_default

        navigation.process(parsed_article)
        package.process(parsed_article)

        #Get the Digital Object Identifier
        doi = parsed_article.get_DOI()
        journal_doi, article_doi = doi.split('/')

        #Get the images
        openaccess_epub.utils.images.get_images(output_directory,
                                                args['--images'], xml_path,
                                                config, parsed_article)

        parsed_article.publisher.render_content(output_directory, epub_version)

    if epub_version == 2:
        navigation.render_EPUB2(output_directory)
        package.render_EPUB2(output_directory)
    elif epub_version == 3:
        navigation.render_EPUB3(output_directory)
        package.render_EPUB3(output_directory)
    epub_zip(output_directory)

    #Cleanup removes the produced output directory, keeps the EPUB
    if not args['--no-cleanup']:
        command_log.info('Removing {0}'.format(output_directory))
        shutil.rmtree(output_directory)

    #Running epubcheck on the output verifies the validity of the ePub,
    #requires a local installation of java and epubcheck.
    if not args['--no-epubcheck']:
        epub_name = '{0}.epub'.format(output_directory)
        openaccess_epub.utils.epubcheck(epub_name, config)
Beispiel #3
0
def get_images(output_directory, explicit, input_path, config, parsed_article):
    """
    Main logic controller for the placement of images into the output directory

    Controlling logic for placement of the appropriate imager files into the
    EPUB directory. This function interacts with interface arguments as well as
    the local installation config.py file. These may change behavior of this
    function in terms of how it looks for images relative to the input, where it
    finds explicit images, whether it will attempt to download images, and
    whether successfully downloaded images will be stored in the cache.

    Parameters
    ----------
    output_directory : str
        The directory path where the EPUB is being constructed/output
    explicit : str
        A directory path to a user specified directory of images. Allows *
        wildcard expansion.
    input_path : str
        The absolute path to the input XML file.
    config : config module
        The imported configuration module
    parsed_article : openaccess_epub.article.Article object
        The Article instance for the article being converted to EPUB
    """
    #Split the DOI
    journal_doi, article_doi = parsed_article.doi.split('/')
    log.debug('journal-doi : {0}'.format(journal_doi))
    log.debug('article-doi : {0}'.format(article_doi))

    #Get the rootname for wildcard expansion
    rootname = utils.file_root_name(input_path)

    #Specify where to place the images in the output
    img_dir = os.path.join(output_directory,
                           'EPUB',
                           'images-{0}'.format(article_doi))
    log.info('Using {0} as image directory target'.format(img_dir))

    #Construct path to cache for article
    article_cache = os.path.join(config.image_cache, journal_doi, article_doi)

    #Use manual image directory, explicit images
    if explicit:
        success = explicit_images(explicit, img_dir, rootname, config)
        if success and config.use_image_cache:
            move_images_to_cache(img_dir, article_cache)
        #Explicit images prevents all other image methods
        return success

    #Input-Relative import, looks for any one of the listed options
    if config.use_input_relative_images:
        #Prevents other image methods only if successful
        if input_relative_images(input_path, img_dir, rootname, config):
            if config.use_image_cache:
                move_images_to_cache(img_dir, article_cache)
            return True

    #Use cache for article if it exists
    if config.use_image_cache:
        #Prevents other image methods only if successful
        if image_cache(article_cache, img_dir):
            return True

    #Download images from Internet
    if config.use_image_fetching:
        os.mkdir(img_dir)
        if journal_doi == '10.3389':
            fetch_frontiers_images(article_doi, img_dir)
            if config.use_image_cache:
                move_images_to_cache(img_dir, article_cache)
            return True
        elif journal_doi == '10.1371':
            success = fetch_plos_images(article_doi, img_dir, parsed_article)
            if success and config.use_image_cache:
                move_images_to_cache(img_dir, article_cache)
            return success
        else:
            log.error('Fetching images for this publisher is not supported!')
            return False
    return False
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    c_file = args['COLLECTION_FILE']
    c_file_root = utils.file_root_name(c_file)
    abs_input_path = utils.get_absolute_path(c_file)

    if not args['--log-to']:
        log_to = os.path.join(os.path.dirname(abs_input_path),
                              c_file_root + '.log')
    else:
        log_to = args['--log-to']

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'],
                               log_to,
                               args['--log-level'],
                               args['--silent'],
                               args['--verbosity'])

    command_log = logging.getLogger('openaccess_epub.commands.collection')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    #Quit if the collection file is not there
    if not os.path.isfile(c_file):
        command_log.critical('File does not exist {0}'.format(c_file))
        sys.exit('Unable to continue')

    command_log.info('Parsing collection file: {0}'.format(c_file))
    with open(c_file, 'r') as f:
        inputs = [line.strip() for line in f.readlines()]

    #Get the output directory
    if args['--output'] is not None:
        output_directory = utils.get_absolute_path(args['--output'])
    else:
        if os.path.isabs(config.default_output):  # Absolute remains so
            output_directory = config.default_output
        else:  # Else rendered relative to input
            abs_dirname = os.path.dirname(abs_input_path)
            output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output))

    output_directory = os.path.join(output_directory, c_file_root)
    command_log.info('Processing collection output in {0}'.format(output_directory))

    if os.path.isdir(output_directory):
        utils.dir_exists(output_directory)
    try:
        os.makedirs(output_directory)
    except OSError as err:
        if err.errno != 17:
            command_log.exception('Unable to recursively create output directories')

    #Instantiate collection NCX and OPF
    navigation = Navigation(collection=True)
    package = Package(collection=True, title=c_file_root)

    #Copy over the basic epub directory
    make_epub_base(output_directory)

    epub_version = None

    #Iterate over the inputs
    for xml_file in inputs:
        xml_path = utils.evaluate_relative_path(os.path.dirname(abs_input_path),
                                                xml_file)
        parsed_article = Article(xml_path, validation=not args['--no-validate'])
        if epub_version is None:  # Only set this once, no mixing!
            if args['--epub2']:
                epub_version = 2
            elif args['--epub3']:
                epub_version = 3
            else:
                epub_version = parsed_article.publisher.epub_default

        navigation.process(parsed_article)
        package.process(parsed_article)

        #Get the Digital Object Identifier
        doi = parsed_article.get_DOI()
        journal_doi, article_doi = doi.split('/')

        #Get the images
        openaccess_epub.utils.images.get_images(output_directory,
                                                args['--images'],
                                                xml_path,
                                                config,
                                                parsed_article)

        parsed_article.publisher.render_content(output_directory, epub_version)

    if epub_version == 2:
        navigation.render_EPUB2(output_directory)
        package.render_EPUB2(output_directory)
    elif epub_version == 3:
        navigation.render_EPUB3(output_directory)
        package.render_EPUB3(output_directory)
    epub_zip(output_directory)

    #Cleanup removes the produced output directory, keeps the EPUB
    if not args['--no-cleanup']:
        command_log.info('Removing {0}'.format(output_directory))
        shutil.rmtree(output_directory)

    #Running epubcheck on the output verifies the validity of the ePub,
    #requires a local installation of java and epubcheck.
    if not args['--no-epubcheck']:
        epub_name = '{0}.epub'.format(output_directory)
        openaccess_epub.utils.epubcheck(epub_name, config)