Esempio n. 1
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--images'] is not None and '*' not in args['--images']:
        sys.exit('Argument for --images option must contain "*"')

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], args['--log-to'],
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.batch')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    for directory in args['DIR']:
        for xml_file in files_with_ext('.xml',
                                       directory,
                                       recursive=args['--recursive']):

            #We have to temporarily re-base our log while utils work
            if not args['--no-log-file']:
                oae_logging.replace_filehandler(
                    logname='openaccess_epub',
                    new_file='temp.log',
                    level=args['--log-level'],
                    frmt=oae_logging.STANDARD_FORMAT)

            command_log.info('Processing input: {0}'.format(xml_file))

            root_name = openaccess_epub.utils.file_root_name(xml_file)
            abs_input_path = openaccess_epub.utils.get_absolute_path(xml_file)

            if not args['--no-log-file']:
                log_name = root_name + '.log'
                log_path = os.path.join(os.path.dirname(abs_input_path),
                                        log_name)

                #Re-base the log file to the new file location
                oae_logging.replace_filehandler(
                    logname='openaccess_epub',
                    new_file=log_path,
                    level=args['--log-level'],
                    frmt=oae_logging.STANDARD_FORMAT)
                #Now we move over to the new log file
                shutil.move('temp.log', log_path)

            #Parse the article now that logging is ready
            parsed_article = Article(abs_input_path,
                                     validation=not args['--no-validate'])
            #Get the output directory
            if args['--output'] is not None:
                output_directory = openaccess_epub.utils.get_absolute_path(
                    args['--output'])
            else:
                if os.path.isabs(config.default_output):  # Absolute remains so
                    output_directory = config.default_output
                else:  # Else rendered relative to input
                    abs_dirname = os.path.dirname(abs_input_path)
                    output_directory = os.path.normpath(
                        os.path.join(abs_dirname, config.default_output))

            #The root name must be added on for output
            output_directory = os.path.join(output_directory, root_name)

            #Make the call to make_EPUB
            success = make_EPUB(parsed_article,
                                output_directory,
                                abs_input_path,
                                args['--images'],
                                config_module=config,
                                batch=True)

            #Cleanup is mandatory
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

            if not args['--no-epubcheck'] and success:
                epub_name = '{0}.epub'.format(output_directory)
                openaccess_epub.utils.epubcheck(epub_name, config)
Esempio n. 2
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--images'] is not None and '*' not in args['--images']:
        sys.exit('Argument for --images option must contain "*"')

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'],
                               args['--log-to'],
                               args['--log-level'],
                               args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.batch')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    for directory in args['DIR']:
        for xml_file in files_with_ext('.xml', directory,
                                       recursive=args['--recursive']):

            #We have to temporarily re-base our log while utils work
            if not args['--no-log-file']:
                oae_logging.replace_filehandler(logname='openaccess_epub',
                                                new_file='temp.log',
                                                level=args['--log-level'],
                                                frmt=oae_logging.STANDARD_FORMAT)

            command_log.info('Processing input: {0}'.format(xml_file))

            root_name = openaccess_epub.utils.file_root_name(xml_file)
            abs_input_path = openaccess_epub.utils.get_absolute_path(xml_file)

            if not args['--no-log-file']:
                log_name = root_name + '.log'
                log_path = os.path.join(os.path.dirname(abs_input_path),
                                        log_name)

                #Re-base the log file to the new file location
                oae_logging.replace_filehandler(logname='openaccess_epub',
                                                new_file=log_path,
                                                level=args['--log-level'],
                                                frmt=oae_logging.STANDARD_FORMAT)
                #Now we move over to the new log file
                shutil.move('temp.log', log_path)

            #Parse the article now that logging is ready
            parsed_article = Article(abs_input_path,
                                     validation=not args['--no-validate'])
            #Get the output directory
            if args['--output'] is not None:
                output_directory = openaccess_epub.utils.get_absolute_path(args['--output'])
            else:
                if os.path.isabs(config.default_output):  # Absolute remains so
                    output_directory = config.default_output
                else:  # Else rendered relative to input
                    abs_dirname = os.path.dirname(abs_input_path)
                    output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output))

            #The root name must be added on for output
            output_directory = os.path.join(output_directory, root_name)

            #Make the call to make_EPUB
            success = make_EPUB(parsed_article,
                                output_directory,
                                abs_input_path,
                                args['--images'],
                                config_module=config,
                                batch=True)

            #Cleanup is mandatory
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

            if not args['--no-epubcheck'] and success:
                epub_name = '{0}.epub'.format(output_directory)
                openaccess_epub.utils.epubcheck(epub_name, config)
Esempio n. 3
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--epub3']:
        epub_version = 3
    elif args['--epub2']:
        epub_version = 2
    else:
        epub_version = None

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], args['--log-to'],
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.convert')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    current_dir = os.getcwd()
    #Our basic flow is to iterate over the args['INPUT'] list
    for inpt in args['INPUT']:
        #We have to temporarily re-base our log while input utils do some work
        if not args['--no-log-file'] and not args['--log-to']:
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file='temp.log',
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)

        command_log.info('Processing input: {0}'.format(inpt))

        #First we need to know the name of the file and where it is
        if inpt.lower().endswith('.xml'):  # This is direct XML file
            root_name = openaccess_epub.utils.file_root_name(inpt)
            abs_input_path = openaccess_epub.utils.get_absolute_path(inpt)
        elif inpt.lower().startswith('doi:'):  # This is a DOI
            root_name = input_utils.doi_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']):
            root_name = input_utils.url_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        else:
            sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt))

        if not args['--no-log-file'] and not args['--log-to']:
            log_name = root_name + '.log'
            log_path = os.path.join(os.path.dirname(abs_input_path), log_name)

            #Re-base the log file to the new file location
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file=log_path,
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)
            #Now we move over to the new log file
            shutil.copy2('temp.log', log_path)
            os.remove('temp.log')

        #Now that we should be done configuring logging, let's parse the article
        parsed_article = Article(abs_input_path,
                                 validation=not args['--no-validate'])

        if parsed_article.publisher is None:
            command_log.critical(
                'Publisher support was not established, aborting')
            sys.exit(1)

        #Get the output directory
        if args['--output'] is not None:
            output_directory = openaccess_epub.utils.get_absolute_path(
                args['--output'])
        else:
            if os.path.isabs(config.default_output):  # Absolute remains so
                output_directory = config.default_output
            else:  # Else rendered relative to input
                abs_dirname = os.path.dirname(abs_input_path)
                output_directory = os.path.normpath(
                    os.path.join(abs_dirname, config.default_output))

        #The root name must be added on for output
        output_directory = os.path.join(output_directory, root_name)

        #Make the call to make_EPUB
        success = make_EPUB(parsed_article,
                            output_directory,
                            abs_input_path,
                            args['--images'],
                            config_module=config,
                            epub_version=epub_version)

        #Cleanup removes the produced output directory, keeps the EPUB
        if not args['--no-cleanup']:
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

        #Running epubcheck on the output verifies the validity of the EPUB,
        #requires a local installation of java and epubcheck.
        if not args['--no-epubcheck'] and success:
            epub_name = '{0}.epub'.format(output_directory)
            openaccess_epub.utils.epubcheck(epub_name, config)
Esempio n. 4
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--epub3']:
        epub_version = 3
    elif args['--epub2']:
        epub_version = 2
    else:
        epub_version = None

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'],
                               args['--log-to'],
                               args['--log-level'],
                               args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.convert')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    current_dir = os.getcwd()
    #Our basic flow is to iterate over the args['INPUT'] list
    for inpt in args['INPUT']:
        #We have to temporarily re-base our log while input utils do some work
        if not args['--no-log-file'] and not args['--log-to']:
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file='temp.log',
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)

        command_log.info('Processing input: {0}'.format(inpt))

        #First we need to know the name of the file and where it is
        if inpt.lower().endswith('.xml'):  # This is direct XML file
            root_name = openaccess_epub.utils.file_root_name(inpt)
            abs_input_path = openaccess_epub.utils.get_absolute_path(inpt)
        elif inpt.lower().startswith('doi:'):  # This is a DOI
            root_name = input_utils.doi_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']):
            root_name = input_utils.url_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        else:
            sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt))

        if not args['--no-log-file'] and not args['--log-to']:
            log_name = root_name + '.log'
            log_path = os.path.join(os.path.dirname(abs_input_path), log_name)

            #Re-base the log file to the new file location
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file=log_path,
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)
            #Now we move over to the new log file
            shutil.copy2('temp.log', log_path)
            os.remove('temp.log')

        #Now that we should be done configuring logging, let's parse the article
        parsed_article = Article(abs_input_path,
                                 validation=not args['--no-validate'])

        if parsed_article.publisher is None:
            command_log.critical('Publisher support was not established, aborting')
            sys.exit(1)

        #Get the output directory
        if args['--output'] is not None:
            output_directory = openaccess_epub.utils.get_absolute_path(args['--output'])
        else:
            if os.path.isabs(config.default_output):  # Absolute remains so
                output_directory = config.default_output
            else:  # Else rendered relative to input
                abs_dirname = os.path.dirname(abs_input_path)
                output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output))

        #The root name must be added on for output
        output_directory = os.path.join(output_directory, root_name)

        #Make the call to make_EPUB
        success = make_EPUB(parsed_article,
                            output_directory,
                            abs_input_path,
                            args['--images'],
                            config_module=config,
                            epub_version=epub_version)

        #Cleanup removes the produced output directory, keeps the EPUB
        if not args['--no-cleanup']:
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

        #Running epubcheck on the output verifies the validity of the EPUB,
        #requires a local installation of java and epubcheck.
        if not args['--no-epubcheck'] and success:
            epub_name = '{0}.epub'.format(output_directory)
            openaccess_epub.utils.epubcheck(epub_name, config)
Esempio n. 5
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    formatter = logging.Formatter('%(message)s')

    log = logging.getLogger('openaccess_epub.commands.validate')
    log.setLevel(logging.DEBUG)
    if not args['--silent']:
        sh_echo = logging.StreamHandler(sys.stdout)
        sh_echo.setLevel(logging.INFO)
        sh_echo.setFormatter(formatter)
        log.addHandler(sh_echo)

    for directory in args['DIR']:
        #Render the path to the directory
        if os.path.isabs(directory):
            dir_path = directory
        else:
            dir_path = os.path.normpath(os.path.join(os.getcwd(), directory))

        #Create the filename for the log
        if args['--log-to']:
            log_path = args['--log-to']
        else:
            logname = os.path.basename(dir_path) + '_validation.log'
            log_path = os.path.join(dir_path, logname)

        #Add the filehandler for the log if logging is enabled
        if not args['--print-only']:
            logs.replace_filehandler('openaccess_epub.commands.validate',
                                     new_file=log_path,
                                     level='INFO',
                                     frmt='%(message)s')

        #Iteration over the XML files
        for xml_file in files_with_ext('.xml', directory,
                                       recursive=args['--recursive']):
            try:
                document = lxml.etree.parse(xml_file)
            except lxml.etree.XMLSyntaxError as err:
                log.info('FAILED: Parse Error; {0} '.format(xml_file))
                log.info(str(err))
                continue

            #Find its public id so we can identify the appropriate DTD
            public_id = document.docinfo.public_id
            #Get the dtd by the public id
            try:
                dtd = DTDS[public_id]
            except KeyError as err:
                log.info('FAILED: Unknown DTD Error; {0}'.format(xml_file))
                log.info(str(err))

            #Actual DTD validation
            if not dtd.validate(document):
                log.info('FAILED: DTD Validation Error; {0}'.format(xml_file))
                log.info(str(dtd.error_log.filter_from_errors()))
                #Clear the error_log
                dtd._clear_error_log()
            else:
                if args['--record-pass']:
                    log.info('PASSED: Validated by DTD; {0}'.format(xml_file))