Beispiel #1
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--images'] is not None and '*' not in args['--images']:
        sys.exit('Argument for --images option must contain "*"')

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], args['--log-to'],
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.batch')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    for directory in args['DIR']:
        for xml_file in files_with_ext('.xml',
                                       directory,
                                       recursive=args['--recursive']):

            #We have to temporarily re-base our log while utils work
            if not args['--no-log-file']:
                oae_logging.replace_filehandler(
                    logname='openaccess_epub',
                    new_file='temp.log',
                    level=args['--log-level'],
                    frmt=oae_logging.STANDARD_FORMAT)

            command_log.info('Processing input: {0}'.format(xml_file))

            root_name = openaccess_epub.utils.file_root_name(xml_file)
            abs_input_path = openaccess_epub.utils.get_absolute_path(xml_file)

            if not args['--no-log-file']:
                log_name = root_name + '.log'
                log_path = os.path.join(os.path.dirname(abs_input_path),
                                        log_name)

                #Re-base the log file to the new file location
                oae_logging.replace_filehandler(
                    logname='openaccess_epub',
                    new_file=log_path,
                    level=args['--log-level'],
                    frmt=oae_logging.STANDARD_FORMAT)
                #Now we move over to the new log file
                shutil.move('temp.log', log_path)

            #Parse the article now that logging is ready
            parsed_article = Article(abs_input_path,
                                     validation=not args['--no-validate'])
            #Get the output directory
            if args['--output'] is not None:
                output_directory = openaccess_epub.utils.get_absolute_path(
                    args['--output'])
            else:
                if os.path.isabs(config.default_output):  # Absolute remains so
                    output_directory = config.default_output
                else:  # Else rendered relative to input
                    abs_dirname = os.path.dirname(abs_input_path)
                    output_directory = os.path.normpath(
                        os.path.join(abs_dirname, config.default_output))

            #The root name must be added on for output
            output_directory = os.path.join(output_directory, root_name)

            #Make the call to make_EPUB
            success = make_EPUB(parsed_article,
                                output_directory,
                                abs_input_path,
                                args['--images'],
                                config_module=config,
                                batch=True)

            #Cleanup is mandatory
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

            if not args['--no-epubcheck'] and success:
                epub_name = '{0}.epub'.format(output_directory)
                openaccess_epub.utils.epubcheck(epub_name, config)
Beispiel #2
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    c_file = args['COLLECTION_FILE']
    c_file_root = utils.file_root_name(c_file)
    abs_input_path = utils.get_absolute_path(c_file)

    if not args['--log-to']:
        log_to = os.path.join(os.path.dirname(abs_input_path),
                              c_file_root + '.log')
    else:
        log_to = args['--log-to']

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], log_to,
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    command_log = logging.getLogger('openaccess_epub.commands.collection')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    #Quit if the collection file is not there
    if not os.path.isfile(c_file):
        command_log.critical('File does not exist {0}'.format(c_file))
        sys.exit('Unable to continue')

    command_log.info('Parsing collection file: {0}'.format(c_file))
    with open(c_file, 'r') as f:
        inputs = [line.strip() for line in f.readlines()]

    #Get the output directory
    if args['--output'] is not None:
        output_directory = utils.get_absolute_path(args['--output'])
    else:
        if os.path.isabs(config.default_output):  # Absolute remains so
            output_directory = config.default_output
        else:  # Else rendered relative to input
            abs_dirname = os.path.dirname(abs_input_path)
            output_directory = os.path.normpath(
                os.path.join(abs_dirname, config.default_output))

    output_directory = os.path.join(output_directory, c_file_root)
    command_log.info(
        'Processing collection output in {0}'.format(output_directory))

    if os.path.isdir(output_directory):
        utils.dir_exists(output_directory)
    try:
        os.makedirs(output_directory)
    except OSError as err:
        if err.errno != 17:
            command_log.exception(
                'Unable to recursively create output directories')

    #Instantiate collection NCX and OPF
    navigation = Navigation(collection=True)
    package = Package(collection=True, title=c_file_root)

    #Copy over the basic epub directory
    make_epub_base(output_directory)

    epub_version = None

    #Iterate over the inputs
    for xml_file in inputs:
        xml_path = utils.evaluate_relative_path(
            os.path.dirname(abs_input_path), xml_file)
        parsed_article = Article(xml_path,
                                 validation=not args['--no-validate'])
        if epub_version is None:  # Only set this once, no mixing!
            if args['--epub2']:
                epub_version = 2
            elif args['--epub3']:
                epub_version = 3
            else:
                epub_version = parsed_article.publisher.epub_default

        navigation.process(parsed_article)
        package.process(parsed_article)

        #Get the Digital Object Identifier
        doi = parsed_article.get_DOI()
        journal_doi, article_doi = doi.split('/')

        #Get the images
        openaccess_epub.utils.images.get_images(output_directory,
                                                args['--images'], xml_path,
                                                config, parsed_article)

        parsed_article.publisher.render_content(output_directory, epub_version)

    if epub_version == 2:
        navigation.render_EPUB2(output_directory)
        package.render_EPUB2(output_directory)
    elif epub_version == 3:
        navigation.render_EPUB3(output_directory)
        package.render_EPUB3(output_directory)
    epub_zip(output_directory)

    #Cleanup removes the produced output directory, keeps the EPUB
    if not args['--no-cleanup']:
        command_log.info('Removing {0}'.format(output_directory))
        shutil.rmtree(output_directory)

    #Running epubcheck on the output verifies the validity of the ePub,
    #requires a local installation of java and epubcheck.
    if not args['--no-epubcheck']:
        epub_name = '{0}.epub'.format(output_directory)
        openaccess_epub.utils.epubcheck(epub_name, config)
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    c_file = args['COLLECTION_FILE']
    c_file_root = utils.file_root_name(c_file)
    abs_input_path = utils.get_absolute_path(c_file)

    if not args['--log-to']:
        log_to = os.path.join(os.path.dirname(abs_input_path),
                              c_file_root + '.log')
    else:
        log_to = args['--log-to']

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'],
                               log_to,
                               args['--log-level'],
                               args['--silent'],
                               args['--verbosity'])

    command_log = logging.getLogger('openaccess_epub.commands.collection')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    #Quit if the collection file is not there
    if not os.path.isfile(c_file):
        command_log.critical('File does not exist {0}'.format(c_file))
        sys.exit('Unable to continue')

    command_log.info('Parsing collection file: {0}'.format(c_file))
    with open(c_file, 'r') as f:
        inputs = [line.strip() for line in f.readlines()]

    #Get the output directory
    if args['--output'] is not None:
        output_directory = utils.get_absolute_path(args['--output'])
    else:
        if os.path.isabs(config.default_output):  # Absolute remains so
            output_directory = config.default_output
        else:  # Else rendered relative to input
            abs_dirname = os.path.dirname(abs_input_path)
            output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output))

    output_directory = os.path.join(output_directory, c_file_root)
    command_log.info('Processing collection output in {0}'.format(output_directory))

    if os.path.isdir(output_directory):
        utils.dir_exists(output_directory)
    try:
        os.makedirs(output_directory)
    except OSError as err:
        if err.errno != 17:
            command_log.exception('Unable to recursively create output directories')

    #Instantiate collection NCX and OPF
    navigation = Navigation(collection=True)
    package = Package(collection=True, title=c_file_root)

    #Copy over the basic epub directory
    make_epub_base(output_directory)

    epub_version = None

    #Iterate over the inputs
    for xml_file in inputs:
        xml_path = utils.evaluate_relative_path(os.path.dirname(abs_input_path),
                                                xml_file)
        parsed_article = Article(xml_path, validation=not args['--no-validate'])
        if epub_version is None:  # Only set this once, no mixing!
            if args['--epub2']:
                epub_version = 2
            elif args['--epub3']:
                epub_version = 3
            else:
                epub_version = parsed_article.publisher.epub_default

        navigation.process(parsed_article)
        package.process(parsed_article)

        #Get the Digital Object Identifier
        doi = parsed_article.get_DOI()
        journal_doi, article_doi = doi.split('/')

        #Get the images
        openaccess_epub.utils.images.get_images(output_directory,
                                                args['--images'],
                                                xml_path,
                                                config,
                                                parsed_article)

        parsed_article.publisher.render_content(output_directory, epub_version)

    if epub_version == 2:
        navigation.render_EPUB2(output_directory)
        package.render_EPUB2(output_directory)
    elif epub_version == 3:
        navigation.render_EPUB3(output_directory)
        package.render_EPUB3(output_directory)
    epub_zip(output_directory)

    #Cleanup removes the produced output directory, keeps the EPUB
    if not args['--no-cleanup']:
        command_log.info('Removing {0}'.format(output_directory))
        shutil.rmtree(output_directory)

    #Running epubcheck on the output verifies the validity of the ePub,
    #requires a local installation of java and epubcheck.
    if not args['--no-epubcheck']:
        epub_name = '{0}.epub'.format(output_directory)
        openaccess_epub.utils.epubcheck(epub_name, config)
Beispiel #4
0
def collection_input(args, config=None):
    """
    Collection Input Mode works to compile multiple articles into a single
    composite ePub. This is akin to such formats as Collections, Issues, and
    Omnibus; it may also be useful for those interested in the simple
    distribution of a reading list, personal publications, or topic reference.

    Collection Input Mode produces output that is necessarily unlike the output
    generated by Single or Batch (which is just sequential Single) input modes.
    The primary difference in output lies with the ePub metadata; as applying
    metadata from any single article to the whole would be inappropriate.

    Unlike other input modes, Collection Mode is strictly dependent on the
    local directory of execution. If there is a file named "order.txt" in the
    local directory, this file should contain the name of one input XML file
    on each line; the files will be added to the ePub output by line-order.
    If there is "order.txt" file, Collection Mode will assume that all XML
    files are input and the article in order in the collection will be random.

    Collection Input Mode has default epubcheck behavior, it will place a system
    call to epubcheck unless specified otherwise (--no-epubcheck or -N flags).
    """
    if config is None:
        config = get_config_module()
    try:
        order = open('order.txt', 'r')
    except IOError:  # No order.txt
        xml_files = list_xml_files(dir=os.getcwd())
    else:
        #Add all nonempty lines, in order, to the xml_files list
        xml_files = [i.strip() for i in order.readlines() if i.strip()]
        order.close()

    #The output name will be the same as the parent directory name
    #This will also serve as the dc:title
    output_name = os.path.split(os.getcwd())[1]

    #The standard make_epub() method will not work for Collection Mode
    #So the work done here is an adaptation of it
    print('Processing output to {0}.epub'.format(output_name))
    #Copy files from base_epub to the new output
    if os.path.isdir(output_name):
        dir_exists(output_name)
    epub_base = os.path.join(CACHE_LOCATION, 'base_epub')
    shutil.copytree(epub_base, output_name)

    if args.collection is True:
        try:
            title_txt = open('title.txt', 'r')
        except IOError:  # No title.txt
            title = output_name
        else:
            title = title_txt.readline().strip()
            title_txt.close()
            if not title:
                title = output_name
                print('title.txt was empty or title was not on first line!')
                print('Defaulting to name of parent directory. {0}'.format(title))
    else:
        title = args.collection
    
    toc = ncx.NCX(oae_version=__version__, location=output_name, collection_mode=True)
    myopf = opf.OPF(location=output_name, collection_mode=True, title=title)

    #Now it is time to operate on each of the xml files
    for xml_file in xml_files:
        raw_name = u_input.local_input(xml_file)  # is this used?
        parsed_article = Article(xml_file, validation=args.no_dtd_validation)
        toc.take_article(parsed_article)
        myopf.take_article(parsed_article)
    
        #Get the Digital Object Identifier
        doi = parsed_article.get_DOI()
        journal_doi, article_doi = doi.split('/')
        
        #Check for images
        img_dir = os.path.join(output_name, 'OPS', 'images-{0}'.format(article_doi))
        expected_local = 'images-{0}'.format(raw_name)
        if os.path.isdir(expected_local):
            utils.images.local_images(expected_local, img_dir)
        else:
            article_cache = os.path.join(config.image_cache, journal_doi, article_doi)
            if os.path.isdir(article_cache):
                utils.images.image_cache(article_cache, img_dir)
            else:
                print('Images for {0} (DOI: {1}) could not be found!'.format(xml_file, doi))
                r = input('Try to download them? [Y/n]')
                if r in ['y', 'Y', '']:
                    os.mkdir(img_dir)
                    utils.images.fetch_plos_images(article_doi, img_dir, parsed_article)
                    if config.use_image_cache:
                        utils.images.move_images_to_cache(img_dir, article_cache)
                else:
                    sys.exit(1)

        #TODO: Content stuff
        if journal_doi == '10.1371':  # PLoS's publisher DOI
            ops_doc = ops.OPSPLoS(parsed_article, output_name)
            #TODO: Workflow change, parse table of contents from OPS processed document
            
    toc.write()
    myopf.write()
    utils.epub_zip(output_name)
    

    #Running epubcheck on the output verifies the validity of the ePub,
    #requires a local installation of java and epubcheck.
    if args.no_epubcheck:
        epubcheck('{0}.epub'.format(output_name), config)
Beispiel #5
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--epub3']:
        epub_version = 3
    elif args['--epub2']:
        epub_version = 2
    else:
        epub_version = None

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], args['--log-to'],
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.convert')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    current_dir = os.getcwd()
    #Our basic flow is to iterate over the args['INPUT'] list
    for inpt in args['INPUT']:
        #We have to temporarily re-base our log while input utils do some work
        if not args['--no-log-file'] and not args['--log-to']:
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file='temp.log',
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)

        command_log.info('Processing input: {0}'.format(inpt))

        #First we need to know the name of the file and where it is
        if inpt.lower().endswith('.xml'):  # This is direct XML file
            root_name = openaccess_epub.utils.file_root_name(inpt)
            abs_input_path = openaccess_epub.utils.get_absolute_path(inpt)
        elif inpt.lower().startswith('doi:'):  # This is a DOI
            root_name = input_utils.doi_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']):
            root_name = input_utils.url_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        else:
            sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt))

        if not args['--no-log-file'] and not args['--log-to']:
            log_name = root_name + '.log'
            log_path = os.path.join(os.path.dirname(abs_input_path), log_name)

            #Re-base the log file to the new file location
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file=log_path,
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)
            #Now we move over to the new log file
            shutil.copy2('temp.log', log_path)
            os.remove('temp.log')

        #Now that we should be done configuring logging, let's parse the article
        parsed_article = Article(abs_input_path,
                                 validation=not args['--no-validate'])

        if parsed_article.publisher is None:
            command_log.critical(
                'Publisher support was not established, aborting')
            sys.exit(1)

        #Get the output directory
        if args['--output'] is not None:
            output_directory = openaccess_epub.utils.get_absolute_path(
                args['--output'])
        else:
            if os.path.isabs(config.default_output):  # Absolute remains so
                output_directory = config.default_output
            else:  # Else rendered relative to input
                abs_dirname = os.path.dirname(abs_input_path)
                output_directory = os.path.normpath(
                    os.path.join(abs_dirname, config.default_output))

        #The root name must be added on for output
        output_directory = os.path.join(output_directory, root_name)

        #Make the call to make_EPUB
        success = make_EPUB(parsed_article,
                            output_directory,
                            abs_input_path,
                            args['--images'],
                            config_module=config,
                            epub_version=epub_version)

        #Cleanup removes the produced output directory, keeps the EPUB
        if not args['--no-cleanup']:
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

        #Running epubcheck on the output verifies the validity of the EPUB,
        #requires a local installation of java and epubcheck.
        if not args['--no-epubcheck'] and success:
            epub_name = '{0}.epub'.format(output_directory)
            openaccess_epub.utils.epubcheck(epub_name, config)