Esempio n. 1
0
 def parse(self, path):
     """ Parse a minimal RFC tree and instantiate a writer """
     self.parser = xml2rfc.XmlRfcParser(path, quiet=True)
     self.xmlrfc = self.parser.parse()
     self.writer = xml2rfc.PaginatedTextRfcWriter(self.xmlrfc, quiet=True)
     self.writer._format_date()
     self.writer.pre_rendering()
Esempio n. 2
0
def main():
    # Populate options
    formatter = optparse.IndentedHelpFormatter(max_help_position=40)
    optionparser = optparse.OptionParser(
        usage='xml2rfc SOURCE [OPTIONS] '
        '...\nExample: xml2rfc '
        'draft.xml -o Draft-1.0 --text --html',
        formatter=formatter)

    formatgroup = optparse.OptionGroup(
        optionparser, 'Formats', 'Any or all of the following '
        'output formats may be specified. '
        'The default is --text. '
        'The destination filename will be based '
        'on the input filename, unless an '
        'argument is given to --basename.')
    formatgroup.add_option('',
                           '--text',
                           dest='text',
                           action='store_true',
                           help='outputs to a text file with proper page '
                           'breaks')
    formatgroup.add_option('',
                           '--html',
                           dest='html',
                           action='store_true',
                           help='outputs to an html file')
    formatgroup.add_option('',
                           '--nroff',
                           dest='nroff',
                           action='store_true',
                           help='outputs to an nroff file')
    formatgroup.add_option('',
                           '--raw',
                           dest='raw',
                           action='store_true',
                           help='outputs to a text file, unpaginated')
    formatgroup.add_option('',
                           '--exp',
                           dest='exp',
                           action='store_true',
                           help='outputs to an XML file with all references'
                           ' expanded')
    optionparser.add_option_group(formatgroup)

    plain_options = optparse.OptionGroup(optionparser, 'Plain Options')
    plain_options.add_option('-C',
                             '--clear-cache',
                             action='callback',
                             help='purge the cache and exit',
                             callback=clear_cache)
    plain_options.add_option('-n',
                             '--no-dtd',
                             dest='no_dtd',
                             action='store_true',
                             help='disable DTD validation step')
    plain_options.add_option(
        '-N',
        '--no-network',
        dest='no_network',
        action='store_true',
        help='don\'t use the network to resolve references',
        default=False)
    plain_options.add_option('-q',
                             '--quiet',
                             action='store_true',
                             dest='quiet',
                             help='dont print anything')
    plain_options.add_option('-v',
                             '--verbose',
                             action='store_true',
                             dest='verbose',
                             help='print extra information')
    plain_options.add_option('-V',
                             '--version',
                             action='callback',
                             help='display the version number and exit',
                             callback=display_version)
    optionparser.add_option_group(plain_options)

    value_options = optparse.OptionGroup(optionparser, 'Other Options')
    value_options.add_option('-b',
                             '--basename',
                             dest='basename',
                             metavar='NAME',
                             help='specify the base name for output files')
    value_options.add_option(
        '-c',
        '--cache',
        dest='cache',
        help='specify an alternate cache directory to write to')
    value_options.add_option('-d',
                             '--dtd',
                             dest='dtd',
                             help='specify an alternate dtd file')
    value_options.add_option(
        '-D',
        '--date',
        dest='datestring',
        metavar='DATE',
        default=datetime.datetime.today().strftime("%Y-%m-%d"),
        help='run as if thedate is DATE (format: yyyy-mm-dd)')
    value_options.add_option('-f',
                             '--filename',
                             dest='filename',
                             metavar='FILE',
                             help='Deprecated.  The same as -o.')
    value_options.add_option('-o',
                             '--out',
                             dest='output_filename',
                             metavar='FILE',
                             help='specify an explicit output filename')
    optionparser.add_option_group(value_options)

    formatoptions = optparse.OptionGroup(
        optionparser, 'Format Options',
        ' Some formats accept additional format-specific options')
    formatoptions.add_option(
        '',
        '--no-headers',
        dest='omit_headers',
        action='store_true',
        help=
        'with --text: calculate page breaks, and emit form feeds and page top'
        ' spacing, but omit headers and footers from the paginated format')
    optionparser.add_option_group(formatoptions)

    # Parse and validate arguments
    (options, args) = optionparser.parse_args()
    if len(args) < 1:
        optionparser.print_help()
        sys.exit(2)
    source = args[0]
    if not os.path.exists(source):
        sys.exit('No such file: ' + source)
    num_formats = len([
        o for o in
        [options.raw, options.text, options.nroff, options.html, options.exp]
        if o
    ])
    if num_formats > 1 and (options.filename or options.output_filename):
        sys.exit('Cannot give an explicit filename with more than one format, '
                 'use --basename instead.')
    if num_formats < 1:
        # Default to paginated text output
        options.text = True
    if options.cache:
        if not os.path.exists(options.cache):
            try:
                os.makedirs(options.cache)
                if options.verbose:
                    xml2rfc.log.write('Created cache directory at',
                                      options.cache)
            except OSError as e:
                print('Unable to make cache directory: %s ' % options.cache)
                print(e)
                sys.exit(1)
        else:
            if not os.access(options.cache, os.W_OK):
                print('Cache directory is not writable: %s' % options.cache)
                sys.exit(1)
    options.date = datetime.datetime.strptime(options.datestring,
                                              "%Y-%m-%d").date()

    if options.omit_headers and not options.text:
        sys.exit("You can only use --no-headers with paginated text output.")

    # Setup warnings module
    # xml2rfc.log.warn_error = options.warn_error and True or False
    xml2rfc.log.quiet = options.quiet and True or False
    xml2rfc.log.verbose = options.verbose

    # Parse the document into an xmlrfc tree instance
    parser = xml2rfc.XmlRfcParser(source,
                                  verbose=options.verbose,
                                  quiet=options.quiet,
                                  cache_path=options.cache,
                                  no_network=options.no_network,
                                  templates_path=globals().get(
                                      '_TEMPLATESPATH', None))
    try:
        xmlrfc = parser.parse()
    except xml2rfc.parser.XmlRfcError as e:
        xml2rfc.log.exception('Unable to parse the XML document: ' + args[0],
                              e)
        sys.exit(1)
    except lxml.etree.XMLSyntaxError as e:
        # Give the lxml.etree.XmlSyntaxError exception a line attribute which
        # matches lxml.etree._LogEntry, so we can use the same logging function
        xml2rfc.log.exception('Unable to parse the XML document: ' + args[0],
                              e.error_log)
        sys.exit(1)

    # Validate the document unless disabled
    if not options.no_dtd:
        ok, errors = xmlrfc.validate(dtd_path=options.dtd)
        if not ok:
            xml2rfc.log.exception(
                'Unable to validate the XML document: ' + args[0], errors)
            sys.exit(1)

    if options.filename:
        xml2rfc.log.warn(
            "The -f and --filename options are deprecated and will"
            " go away in version 3.0 of xml2rfc.  Use -o instead")
        if options.output_filename and options.filename != options.output_filename:
            xml2rfc.log.warn(
                "You should not specify conflicting -f and -o options.  Using -o %s"
                % options.output_filename)
        if not options.output_filename:
            options.output_filename = options.filename

    # Execute any writers specified
    try:
        source_path, source_base = os.path.split(source)
        source_name, source_ext = os.path.splitext(source_base)
        if options.basename:
            if os.path.isdir(options.basename):
                basename = os.path.join(options.basename, source_name)
            else:
                basename = options.basename
        else:
            # Create basename based on input
            basename = os.path.join(source_path, source_name)

        if options.exp:
            # Expanded XML writer needs a separate tree instance with
            # all comments and PI's preserved.  We can assume there are no
            # parse errors at this point since we didnt call sys.exit() during
            # parsing.
            new_xmlrfc = parser.parse(remove_comments=False, quiet=True)
            expwriter = xml2rfc.ExpandedXmlWriter(new_xmlrfc,
                                                  quiet=options.quiet,
                                                  verbose=options.verbose,
                                                  date=options.date)
            filename = options.output_filename
            if not filename:
                filename = basename + '.exp.xml'
            expwriter.write(filename)
        if options.html:
            htmlwriter = xml2rfc.HtmlRfcWriter(xmlrfc,
                                               quiet=options.quiet,
                                               verbose=options.verbose,
                                               date=options.date,
                                               templates_dir=globals().get(
                                                   '_TEMPLATESPATH', None))
            filename = options.output_filename
            if not filename:
                filename = basename + '.html'
            htmlwriter.write(filename)
        if options.raw:
            rawwriter = xml2rfc.RawTextRfcWriter(xmlrfc,
                                                 quiet=options.quiet,
                                                 verbose=options.verbose,
                                                 date=options.date)
            filename = options.output_filename
            if not filename:
                filename = basename + '.raw.txt'
            rawwriter.write(filename)
        if options.text:
            pagedwriter = xml2rfc.PaginatedTextRfcWriter(
                xmlrfc,
                quiet=options.quiet,
                verbose=options.verbose,
                date=options.date,
                omit_headers=options.omit_headers,
            )
            filename = options.output_filename
            if not filename:
                filename = basename + '.txt'
            pagedwriter.write(filename)
        if options.nroff:
            nroffwriter = xml2rfc.NroffRfcWriter(xmlrfc,
                                                 quiet=options.quiet,
                                                 verbose=options.verbose,
                                                 date=options.date)
            filename = options.output_filename
            if not filename:
                filename = basename + '.nroff'
            nroffwriter.write(filename)
    except xml2rfc.RfcWriterError as e:
        xml2rfc.log.error('Unable to convert the document: ' + args[0],
                          '\n  ' + e.msg)
Esempio n. 3
0
def main():
    # Populate options
    formatter = optparse.IndentedHelpFormatter(max_help_position=40)
    optionparser = optparse.OptionParser(usage='xml2rfc SOURCE [OPTIONS] '
                                        '...\nExample: xml2rfc '
                                        'draft.xml -o Draft-1.0 --text --html',
                                        formatter=formatter)

    formatgroup = optparse.OptionGroup(optionparser, 'Formats',
                                       'Any or all of the following '
                                       'output formats may be specified. '
                                       'The default is --text. '
                                       'The destination filename will be based '
                                       'on the input filename, unless an '
                                       'argument is given to --basename.')
    formatgroup.add_option('', '--text', action='store_true',
                           help='outputs to a text file with proper page breaks')
    formatgroup.add_option('', '--html', action='store_true',
                           help='outputs to an html file')
    formatgroup.add_option('', '--nroff', action='store_true',
                           help='outputs to an nroff file')
    if xml2rfc.HAVE_CAIRO and xml2rfc.HAVE_PANGO:
        formatgroup.add_option('', '--pdf', action='store_true',
                               help='outputs to a pdf file')
    else:
        formatgroup.add_option('', '--pdf', action='store_true',
                               help='(unavailable due to missing external library)')
    formatgroup.add_option('', '--raw', action='store_true',
                           help='outputs to a text file, unpaginated')
    formatgroup.add_option('', '--expand', action='store_true',
                           help='outputs to an XML file with all references expanded')
    formatgroup.add_option('', '--v2v3', action='store_true',
                           help='convert vocabulary version 2 XML to version 3')
    formatgroup.add_option('', '--preptool', action='store_true',
                           help='run preptool on the input')
    formatgroup.add_option('', '--info', action='store_true',
                           help='generate a JSON file with anchor to section lookup information')
    optionparser.add_option_group(formatgroup)


    plain_options = optparse.OptionGroup(optionparser, 'Plain Options')
    plain_options.add_option('-C', '--clear-cache', action='callback', callback=clear_cache,
                            help='purge the cache and exit')
    plain_options.add_option(      '--debug', action='store_true',
                            help='Show debugging output')
    plain_options.add_option('-H', '--pi-help', action='callback', callback=print_pi_help,
                            help='show the names and default values of PIs')
    plain_options.add_option('-n', '--no-dtd', action='store_true',
                            help='disable DTD validation step')
    plain_options.add_option('-N', '--no-network', action='store_true', default=False,
                            help='don\'t use the network to resolve references')
    plain_options.add_option('-O', '--no-org-info', dest='first_page_author_org', action='store_false', default=True,
                            help='don\'t show author orgainzation info on page one (legacy only)')
    plain_options.add_option('-r', '--remove-pis', action='store_true', default=False,
                            help='Remove XML processing instructions')
    plain_options.add_option('-q', '--quiet', action='store_true',
                            help='dont print anything')
    plain_options.add_option('-u', '--utf8', action='store_true',
                            help='generate utf8 output')
    plain_options.add_option('-v', '--verbose', action='store_true',
                            help='print extra information')
    plain_options.add_option('-V', '--version', action='store_true', 
                            help='display the version number and exit')
    optionparser.add_option_group(plain_options)


    value_options = optparse.OptionGroup(optionparser, 'Other Options')
    value_options.add_option('-b', '--basename', dest='basename', metavar='NAME',
                            help='specify the base name for output files')
    value_options.add_option('-c', '--cache', dest='cache',
                            help='specify a primary cache directory to write to; default: try [ %s ]'%', '.join(xml2rfc.CACHES) )
    value_options.add_option('-d', '--dtd', dest='dtd', help='specify an alternate dtd file')
    value_options.add_option('-D', '--date', dest='datestring', metavar='DATE',
                            default=datetime.datetime.today().strftime("%Y-%m-%d"),
                            help='run as if the date is DATE (format: yyyy-mm-dd)')
    value_options.add_option('-f', '--filename', dest='filename', metavar='FILE',
                            help='Deprecated.  The same as -o.')
    value_options.add_option('-i', '--indent', type=int, default=2, 
                            help='With some v3 formatters: Indentation to use when pretty-printing XML')
    value_options.add_option('-o', '--out', dest='output_filename', metavar='FILE',
                            help='specify an explicit output filename')
    value_options.add_option('-p', '--path', dest='output_path', metavar='PATH',
                            help='specify the directory path for output files')
    optionparser.add_option_group(value_options)


    formatoptions = optparse.OptionGroup(optionparser, 'Format Options')
    formatoptions.add_option('--v3', dest='legacy', action='store_false',
                           help='with --text and --html: use the v3 formatter, rather than the legacy one.')
    formatoptions.add_option('--legacy', default=True, action='store_true',
                           help='with --text and --html: use the legacy text formatter, rather than the v3 one.')
    optionparser.add_option_group(formatoptions)

    textoptions = optparse.OptionGroup(optionparser, 'Text Format Options')
    textoptions.add_option('--no-headers', dest='omit_headers', action='store_true',
                           help='calculate page breaks, and emit form feeds and page top'
                           ' spacing, but omit headers and footers from the paginated format')
    textoptions.add_option('--legacy-list-symbols', default=False, action='store_true',
                           help='use the legacy list bullet symbols, rather than the new ones.')
    textoptions.add_option('--legacy-date-format', default=False, action='store_true',
                           help='use the legacy date format, rather than the new one.')
    textoptions.add_option('--list-symbols', metavar='4*CHAR',
                           help='use the characters given as list bullet symbols.')
    textoptions.add_option('-P', '--no-pagination', dest='pagination', action='store_false', default=True,
                            help='don\'t do pagination of v3 draft text format.  V3 RFC text output is never paginated.')
    optionparser.add_option_group(textoptions)

    htmloptions = optparse.OptionGroup(optionparser, 'Html Format Options')
    htmloptions.add_option('--css', default=None,
                           help='Use the given CSS file instead of the builtin')
    htmloptions.add_option('--external-css', action='store_true', default=False,
                           help='place css in an external file')
    htmloptions.add_option('--rfc-base-url', default="https://www.rfc-editor.org/info/",
                           help='Base URL for RFC links')
    htmloptions.add_option('--id-base-url', default="https://www.ietf.org/archive/id/",
                           help='Base URL for Internet-Draft links')
    optionparser.add_option_group(htmloptions)

    v2v3options = optparse.OptionGroup(optionparser, 'V2-V3 Converter Options')
    v2v3options.add_option('--add-xinclude', action='store_true',
                           help='replace reference elements with RFC and Internet-Draft'
                           ' seriesInfo with the appropriate XInclude element')
    v2v3options.add_option('--strict', action='store_true',
                           help='be strict about stripping some deprecated attributes')
    optionparser.add_option_group(v2v3options)

    preptooloptions = optparse.OptionGroup(optionparser, 'Preptool Options')
    preptooloptions.add_option('--accept-prepped', action='store_true',
                           help='accept already prepped input')
    optionparser.add_option_group(preptooloptions)


    # --- Parse and validate arguments ---------------------------------

    (options, args) = optionparser.parse_args()
    # Some additional values not exposed as options
    options.doi_base_url = "https://doi.org/"
    options.no_css = False
    options.image_svg = False

    # Check that the default_options have values for all options, for people
    # calling xml2rfc library functions, rather than the command-line
    from xml2rfc.writers.base import default_options
    for key in options.__dict__:
        if not key in default_options.__dict__:
            sys.stderr.write("  Option missing from base.default_options: %s\n" % key)
            sys.exit(2)

    # Show version information, then exit
    if options.version:
        print('%s %s' % (xml2rfc.NAME, xml2rfc.__version__))
        if options.verbose:
            print('  Python %s' % sys.version.split()[0])
            extras = set(['pycairo', 'weasyprint'])
            try:
                import pkg_resources
                this = pkg_resources.working_set.by_key[xml2rfc.NAME]
                for p in this.requires():
                    if p.key in extras:
                        extras -= p.key
                    try:
                        dist = pkg_resources.get_distribution(p.key)
                        print('  %s'%dist)
                    except:
                        pass
                for key in extras:
                    try:
                        dist = pkg_resources.get_distribution(key)
                        print('  %s'%dist)
                    except:
                        pass
            except:
                pass
        sys.exit(0)

    if len(args) < 1:
        optionparser.print_help()
        sys.exit(2)

    install_info = """
    Cannot generate PDF due to missing external libraries.
    ------------------------------------------------------
    
    In order to generate PDFs, xml2rfc uses the WeasyPrint library, which
    depends on external libaries that must be installed as native packages.

    First, install the Cairo, Pango, and GDK-PixBuf library files on your
    system.  See installation instructions on the WeasyPrint Docs:
    
        https://weasyprint.readthedocs.io/en/stable/install.html

    (Python 3 is not needed if your system Python is 2.7, though).

    Next, install the pycairo and weasyprint python modules using pip.
    Depending on your system, you may need to use 'sudo' or install in
    user-specific directories, using the --user switch.  On OS X in
    particular, you may also need to install a newer version of setuptools
    using --user before weasyprint can be installed.  If you install with 
    the --user switch, you may need to also set PYTHONPATH, e.g.,
    
        PYTHONPATH=/Users/username/Library/Python/2.7/lib/python/site-packages

    for Python 2.7.

    The basic pip commands (modify as needed according to the text above)
    are:

        pip install 'pycairo>=1.18' 'weasyprint<=0.42.3'

    With these installed and available to xml2rfc, the --pdf switch will be
    enabled.
    """

    missing = ""
    if options.pdf and not xml2rfc.HAVE_WEASYPRINT:
        missing += "\nCould not import weasyprint"
    if options.pdf and not xml2rfc.HAVE_PYCAIRO:
        missing += "\nCould not import pycairo"
    if options.pdf and not xml2rfc.HAVE_CAIRO:
        missing += "\nCould not find the cairo lib"
    if options.pdf and not xml2rfc.HAVE_PANGO:
        missing += "\nCould not find the pango lib"

    if missing:
        install_info += missing + '\n'
        sys.exit(install_info)

    source = args[0]
    if not os.path.exists(source):
        sys.exit('No such file: ' + source)
    # Default (this may change over time):
    options.vocabulary = 'v2' if options.legacy else 'v3'
    # Option constraints
    if sys.argv[0].endswith('v2v3'):
        options.v2v3 = True
        options.utf8 = True
    #
    if options.preptool:
        options.vocabulary = 'v3'
        options.no_dtd = True
    else:
        if options.accept_prepped:
            sys.exit("You can only use --accept-prepped together with --preptool.")            
    if options.v2v3:
        options.vocabulary = 'v2'
        options.no_dtd = True
    #
    if options.basename:
        if options.output_path:
            sys.exit('--path and --basename has the same functionality, please use only --path')
        else:
            options.output_path = options.basename
            options.basename = None
    #
    num_formats = len([ o for o in [options.raw, options.text, options.nroff, options.html, options.expand, options.v2v3, options.preptool, options.info, options.pdf ] if o])
    if num_formats > 1 and (options.filename or options.output_filename):
        sys.exit('Cannot give an explicit filename with more than one format, '
                 'use --path instead.')
    if num_formats < 1:
        # Default to paginated text output
        options.text = True
    if options.debug:
        options.verbose = True
    #
    if options.cache:
        if not os.path.exists(options.cache):
            try:
                os.makedirs(options.cache)
                if options.verbose:
                    xml2rfc.log.write('Created cache directory at', 
                                      options.cache)
            except OSError as e:
                print('Unable to make cache directory: %s ' % options.cache)
                print(e)
                sys.exit(1)
        else:
            if not os.access(options.cache, os.W_OK):
                print('Cache directory is not writable: %s' % options.cache)
                sys.exit(1)
    options.date = datetime.datetime.strptime(options.datestring, "%Y-%m-%d").date()
    if options.omit_headers and not options.text:
        sys.exit("You can only use --no-headers with paginated text output.")
    #
    if options.text and not options.legacy:
        if options.legacy_list_symbols and options.list_symbols:
            sys.exit("You cannot specify both --list-symbols and --legacy_list_symbols.")
        if options.list_symbols:
            options.list_symbols = tuple(list(options.list_symbols))
        elif options.legacy_list_symbols:
            options.list_symbols = ('o', '*', '+', '-')
        else:
            options.list_symbols = ('*', '-', 'o', '+')
    else:
        if options.legacy_list_symbols:
            sys.exit("You can only use --legacy_list_symbols with v3 text output.")
        if options.list_symbols:
            sys.exit("You can only use --list_symbols with v3 text output.")

    if not options.legacy:
        # I.e., V3 formatter
        options.no_dtd = True        
        if options.nroff:
            sys.exit("You can only use --nroff in legacy mode")
        if options.raw:
            sys.exit("You can only use --raw in legacy mode")

    if options.utf8:
        xml2rfc.log.warn("The --utf8 switch is deprecated.  Use the new unicode insertion element <u>\nto refer to unicode values in a protocol specification.")

    # ------------------------------------------------------------------

    # Setup warnings module
    # xml2rfc.log.warn_error = options.warn_error and True or False
    xml2rfc.log.quiet = options.quiet and True or False
    xml2rfc.log.verbose = options.verbose

    # Parse the document into an xmlrfc tree instance
    parser = xml2rfc.XmlRfcParser(source,
                                  options=options,
                                  templates_path=globals().get('_TEMPLATESPATH', None),
                              )
    try:
        xmlrfc = parser.parse(remove_pis=options.remove_pis, normalize=True)
    except xml2rfc.parser.XmlRfcError as e:
        xml2rfc.log.exception('Unable to parse the XML document: ' + args[0], e)
        sys.exit(1)
    except lxml.etree.XMLSyntaxError as e:
        # Give the lxml.etree.XmlSyntaxError exception a line attribute which
        # matches lxml.etree._LogEntry, so we can use the same logging function
        xml2rfc.log.exception('Unable to parse the XML document: ' + args[0], e.error_log)
        sys.exit(1)
        

    # Remember if we're building an RFC
    options.rfc = xmlrfc.tree.getroot().get('number')
    if options.rfc:
        options.pagination = False

    # Check if we've received a version="3" document, and adjust accordingly
    if xmlrfc.tree.getroot().get('version') == '3':
        options.legacy = False
        options.no_dtd = True
        options.vocabulary = 'v3'
        if options.list_symbols is None:
            options.list_symbols = ('*', '-', 'o', '+')

    # Validate the document unless disabled
    if not options.no_dtd:
        ok, errors = xmlrfc.validate(dtd_path=options.dtd)
        if not ok:
            xml2rfc.log.exception('Unable to validate the XML document: ' + args[0], errors)
            sys.exit(1)

    if options.filename:
        xml2rfc.log.warn("The -f and --filename options are deprecated and will"
                        " go away in version 3.0 of xml2rfc.  Use -o instead")
        if options.output_filename and options.filename != options.output_filename:
            xml2rfc.log.warn("You should not specify conflicting -f and -o options.  Using -o %s"
                        % options.output_filename)
        if not options.output_filename:
            options.output_filename = options.filename

    # Execute any writers specified
    try:
        source_path, source_base = os.path.split(source)
        source_name, source_ext  = os.path.splitext(source_base)
        if options.output_path:
            if os.path.isdir(options.output_path):
                basename = os.path.join(options.output_path, source_name)
            else:
                sys.exit("The given output path '%s' is not a directory, cannot place output files there" % (options.output_path, ))
        else:
            # Create basename based on input
            basename = os.path.join(source_path, source_name)

        if options.expand and options.legacy:
            # Expanded XML writer needs a separate tree instance with
            # all comments and PI's preserved.  We can assume there are no
            # parse errors at this point since we didnt call sys.exit() during
            # parsing.
            filename = options.output_filename
            if not filename:
                filename = basename + '.exp.xml'
                options.output_filename = filename
            new_xmlrfc = parser.parse(remove_comments=False, quiet=True, normalize=False)
            expwriter = xml2rfc.ExpandedXmlWriter(new_xmlrfc,
                                                  options=options,
                                                  date=options.date)
            expwriter.write(filename)
            options.output_filename = None

        if options.html and options.legacy:
            filename = options.output_filename
            if not filename:
                filename = basename + '.html'
                options.output_filename = filename
            htmlwriter = xml2rfc.HtmlRfcWriter(xmlrfc,
                                               options=options,
                                               date=options.date,
                                               templates_dir=globals().get('_TEMPLATESPATH', None))
            htmlwriter.write(filename)
            options.output_filename = None

        if options.raw:
            filename = options.output_filename
            if not filename:
                filename = basename + '.raw.txt'
                options.output_filename = filename
            rawwriter = xml2rfc.RawTextRfcWriter(xmlrfc,
                                                 options=options,
                                                 date=options.date)
            rawwriter.write(filename)
            options.output_filename = None

        if options.text and options.legacy:
            filename = options.output_filename
            if not filename:
                filename = basename + '.txt'
                options.output_filename = filename
            pagedwriter = xml2rfc.PaginatedTextRfcWriter(xmlrfc,
                                                         options=options,
                                                         date=options.date,
                                                         omit_headers=options.omit_headers,
                                                     )
            pagedwriter.write(filename)
            options.output_filename = None

        if options.nroff:
            filename = options.output_filename
            if not filename:
                filename = basename + '.nroff'
                options.output_filename = filename
            nroffwriter = xml2rfc.NroffRfcWriter(xmlrfc,
                                                 options=options,
                                                 date=options.date)
            nroffwriter.write(filename)
            options.output_filename = None

        # --- End of legacy formatter invocations ---

        if options.expand and not options.legacy:
            xmlrfc = parser.parse(remove_comments=False, quiet=True, normalize=False, strip_cdata=False, add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.exp.xml'
                options.output_filename = filename
            #v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            #xmlrfc.tree = v2v3.convert2to3()
            expander = xml2rfc.ExpandV3XmlWriter(xmlrfc, options=options, date=options.date)
            expander.write(filename)
            options.output_filename = None

        if options.v2v3:
            xmlrfc = parser.parse(remove_comments=False, quiet=True, normalize=False, add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.v2v3.xml'
                options.output_filename = filename
            v2v3writer = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            v2v3writer.write(filename)
            options.output_filename = None

        if options.preptool:
            xmlrfc = parser.parse(remove_comments=False, quiet=True, add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.prepped.xml'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            preptool = xml2rfc.PrepToolWriter(xmlrfc, options=options, date=options.date)
            preptool.write(filename)
            options.output_filename = None

        if options.text and not options.legacy:
            xmlrfc = parser.parse(remove_comments=False, quiet=True, add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.txt'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc, options=options, date=options.date, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            writer = xml2rfc.TextWriter(xmlrfc, options=options, date=options.date)
            writer.write(filename)
            options.output_filename = None

        if options.html and not options.legacy:
            xmlrfc = parser.parse(remove_comments=False, quiet=True, add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.html'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc, options=options, date=options.date, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            writer = xml2rfc.HtmlWriter(xmlrfc, options=options, date=options.date)
            writer.write(filename)
            options.output_filename = None

        if options.pdf:
            xmlrfc = parser.parse(remove_comments=False, quiet=True, add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.pdf'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc, options=options, date=options.date, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            writer = xml2rfc.PdfWriter(xmlrfc, options=options, date=options.date)
            writer.write(filename)
            options.output_filename = None

        if options.info:
            xmlrfc = parser.parse(remove_comments=False, quiet=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.json'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc, options=options, date=options.date, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            info = extract_anchor_info(xmlrfc.tree)
            with open(filename, 'w') as fp:
                json.dump(info, fp, indent=2, ensure_ascii=False, encoding='utf-8')
            if not options.quiet:
                xml2rfc.log.write('Created file', filename)

    except xml2rfc.RfcWriterError as e:
        xml2rfc.log.error('Unable to convert the document: ' + args[0],  
                          '\n  ' + e.msg)
Esempio n. 4
0
def main():
    global optionparser
    # Populate options
    config_paths = ['/etc/xml2rfc.conf', '~/.xml2rfc.conf']
    user_conf = os.path.join(appdirs.user_config_dir(), 'xml2rfc.conf')
    if not user_conf in config_paths:
        config_paths.append(user_conf)

    optionparser = configargparse.ArgumentParser(
        usage='xml2rfc [OPTIONS] SOURCE [OPTIONS]'
        '...\nExample: xml2rfc '
        'draft.xml -b draft-foo-19 --text --html',
        add_help=False,
        add_config_file_help=False,
        default_config_files=config_paths,
    )
    input_options = optionparser.add_argument_group('Positional arguments')
    input_options.add_argument(
        'source',
        nargs='?',
        help="Input XML file to render to one or more of the available formats."
    )

    help_options = optionparser.add_argument_group(
        'Documentation options',
        'Some options to generate built-in documentation.')
    help_options.add_argument('-h',
                              '--help',
                              action='help',
                              help='show a help message and exit')
    help_options.add_argument(
        '--docfile',
        action='store_true',
        help='generate a documentation XML file ready for formatting')
    help_options.add_argument('--manpage',
                              action='store_true',
                              help='show paged text documentation')
    help_options.add_argument('--country-help',
                              action="store_true",
                              help='show the recognized <country> strings')
    help_options.add_argument('--pdf-help',
                              action="store_true",
                              help='show pdf generation requirements')
    #     help_options.add_argument('--pi-help', action="store_true",
    #                             help='show the names and default values of PIs (for v2)')
    help_options.add_argument(
        '--template-dir',
        help='directory to pull the doc.xml and doc.yaml templates from.  '
        'The default is the "templates" directory of the xml2rfc package')
    help_options.add_argument(
        '--values',
        action='store_true',
        help='show option values and from where they come')
    help_options.add_argument('-V',
                              '--version',
                              action='store_true',
                              help='display the version number and exit')

    formatgroup = optionparser.add_argument_group(
        'Format selection',
        'One or more of the following output formats may be specified. '
        'The default is --text. The destination filename will be based '
        'on the input filename, unless --out=FILE or --basename=BASE '
        'is used.')
    formatgroup.add_argument(
        '--text',
        action='store_true',
        help='outputs formatted text to file, with proper page breaks')
    formatgroup.add_argument('--html',
                             action='store_true',
                             help='outputs formatted HTML to file')
    formatgroup.add_argument(
        '--nroff',
        action='store_true',
        help='outputs formatted nroff to file (only v2 input)')
    if xml2rfc.HAVE_CAIRO and xml2rfc.HAVE_PANGO:
        formatgroup.add_argument('--pdf',
                                 action='store_true',
                                 help='outputs formatted PDF to file')
    else:
        formatgroup.add_argument(
            '--pdf',
            action='store_true',
            help='(unavailable due to missing external library)')
    formatgroup.add_argument(
        '--raw',
        action='store_true',
        help='outputs formatted text to file, unpaginated (only v2 input)')
    formatgroup.add_argument(
        '--expand',
        action='store_true',
        help='outputs XML to file with all references expanded')
    formatgroup.add_argument(
        '--v2v3',
        action='store_true',
        help='convert vocabulary version 2 XML to version 3')
    formatgroup.add_argument('--preptool',
                             action='store_true',
                             help='run preptool on the input')
    formatgroup.add_argument('--unprep',
                             action='store_true',
                             help='reduce prepped xml to unprepped')
    formatgroup.add_argument(
        '--info',
        action='store_true',
        help='generate a JSON file with anchor to section lookup information')

    plain_options = optionparser.add_argument_group('Generic Switch Options')
    plain_options.add_argument('-C',
                               '--clear-cache',
                               action='store_true',
                               default=False,
                               help='purge the cache and exit')
    plain_options.add_argument('--debug',
                               action='store_true',
                               help='Show debugging output')
    plain_options.add_argument('-n',
                               '--no-dtd',
                               action='store_true',
                               help='disable DTD validation step')
    plain_options.add_argument(
        '-N',
        '--no-network',
        action='store_true',
        default=False,
        help='don\'t use the network to resolve references')
    plain_options.add_argument(
        '-O',
        '--no-org-info',
        dest='first_page_author_org',
        action='store_false',
        default=True,
        help='don\'t show author orgainzation info on page one (legacy only)')
    plain_options.add_argument('-q',
                               '--quiet',
                               action='store_true',
                               help="don't print anything while working")
    plain_options.add_argument('--skip-config-files',
                               action="store_true",
                               default=False,
                               help='ignore config file settings')
    plain_options.add_argument('-r',
                               '--remove-pis',
                               action='store_true',
                               default=False,
                               help='Remove XML processing instructions')
    plain_options.add_argument('-u',
                               '--utf8',
                               action='store_true',
                               help='generate utf8 output')
    plain_options.add_argument('-v',
                               '--verbose',
                               action='store_true',
                               help='print extra information')

    value_options = optionparser.add_argument_group(
        'Generic Options with Values')
    value_options.add_argument('-b',
                               '--basename',
                               dest='basename',
                               metavar='NAME',
                               help='specify the base name for output files')
    value_options.add_argument(
        '-c',
        '--cache',
        dest='cache',
        metavar='PATH',
        help=
        'specify a primary cache directory to write to; default: try [ %s ]' %
        ', '.join(xml2rfc.CACHES))
    value_options.add_argument('--config-file',
                               dest="config_file",
                               metavar='FILE',
                               is_config_file_arg=True,
                               help='specify a configuration file')
    value_options.add_argument('-d',
                               '--dtd',
                               dest='dtd',
                               metavar='DTDFILE',
                               help='specify an alternate dtd file')
    value_options.add_argument(
        '-D',
        '--date',
        dest='datestring',
        metavar='DATE',
        default=datetime.date.today(),
        help=
        "run as if the date is DATE (format: yyyy-mm-dd).  Default: Today's date"
    )
    value_options.add_argument('-f',
                               '--filename',
                               dest='filename',
                               metavar='FILE',
                               help='Deprecated.  The same as -o')
    value_options.add_argument(
        '-i',
        '--indent',
        type=int,
        default=2,
        metavar='INDENT',
        help=
        'With some v3 formatters: Indentation to use when pretty-printing XML')
    value_options.add_argument('-o',
                               '--out',
                               dest='output_filename',
                               metavar='FILE',
                               help='specify an explicit output filename')
    value_options.add_argument(
        '-p',
        '--path',
        dest='output_path',
        metavar='PATH',
        help='specify the directory path for output files')
    value_options.add_argument(
        '-s',
        '--silence',
        action='append',
        type=str,
        metavar='STRING',
        help="Silence any warning beginning with the given string")

    formatoptions = optionparser.add_argument_group('Generic Format Options')
    formatoptions.add_argument(
        '--v3',
        action='store_true',
        default=True,
        help=
        'with --text and --html: use the v3 formatter, rather than the legacy one'
    )
    formatoptions.add_argument(
        '--legacy',
        '--v2',
        dest='v3',
        action='store_false',
        help=
        'with --text and --html: use the legacy output formatters, rather than the v3 ones'
    )
    formatoptions.add_argument(
        '--id-is-work-in-progress',
        default=True,
        action='store_true',
        help='in references, refer to Internet-Drafts as "Work in Progress"')

    textoptions = optionparser.add_argument_group('Text Format Options')
    textoptions.add_argument(
        '--no-headers',
        dest='omit_headers',
        action='store_true',
        help='calculate page breaks, and emit form feeds and page top'
        ' spacing, but omit headers and footers from the paginated format')
    textoptions.add_argument(
        '--legacy-list-symbols',
        default=False,
        action='store_true',
        help='use the legacy list bullet symbols, rather than the new ones')
    textoptions.add_argument(
        '--legacy-date-format',
        default=False,
        action='store_true',
        help='use the legacy date format, rather than the new one')
    textoptions.add_argument('--no-legacy-date-format',
                             dest='legacy_date_format',
                             action='store_false',
                             help="don't use the legacy date format")
    textoptions.add_argument(
        '--list-symbols',
        metavar='4*CHAR',
        help='use the characters given as list bullet symbols')
    textoptions.add_argument(
        '--bom',
        '--BOM',
        action='store_true',
        default=False,
        help='Add a BOM (unicode byte order mark) to the start of text files')
    textoptions.add_argument(
        '-P',
        '--no-pagination',
        dest='pagination',
        action='store_false',
        default=True,
        help=
        'don\'t do pagination of v3 draft text format.  V3 RFC text output is never paginated'
    )
    textoptions.add_argument(
        '--table-hyphen-breaks',
        action='store_true',
        default=False,
        help=
        'More easily do line breaks after hyphens in table cells to give a more compact table'
    )
    textoptions.add_argument(
        '--table-borders',
        default='full',
        choices=[
            'full',
            'light',
            'minimal',
            'min',
        ],
        help=
        'The style of table borders to use for text output; one of full/light/minimal'
    )

    htmloptions = optionparser.add_argument_group('Html Format Options')
    htmloptions.add_argument(
        '--css',
        default=None,
        metavar="FILE",
        help='Use the given CSS file instead of the builtin')
    htmloptions.add_argument('--external-css',
                             action='store_true',
                             default=False,
                             help='place css in external files')
    htmloptions.add_argument('--no-external-css',
                             dest='external_css',
                             action='store_false',
                             help='place css in external files')
    htmloptions.add_argument('--external-js',
                             action='store_true',
                             default=False,
                             help='place js in external files')
    htmloptions.add_argument('--no-external-js',
                             dest='external_js',
                             action='store_false',
                             help='place js in external files')
    htmloptions.add_argument('--rfc-base-url',
                             default="https://www.rfc-editor.org/rfc/",
                             help='Base URL for RFC links')
    htmloptions.add_argument('--id-base-url',
                             default="https://tools.ietf.org/html/",
                             help='Base URL for Internet-Draft links')
    htmloptions.add_argument(
        '--rfc-reference-base-url',
        default="https://rfc-editor.org/rfc/",
        help=
        'Base URL for RFC reference targets, replacing the target="..." value given in the reference entry'
    )
    htmloptions.add_argument('--id-reference-base-url',
                             default="https://tools.ietf.org/html/",
                             help='Base URL for I-D reference targets')
    htmloptions.add_argument('--metadata-js-url',
                             default="metadata.min.js",
                             help='URL for the metadata script')

    v2v3options = optionparser.add_argument_group('V2-V3 Converter Options')
    v2v3options.add_argument(
        '--add-xinclude',
        action='store_true',
        help='replace reference elements with RFC and Internet-Draft'
        ' seriesInfo with the appropriate XInclude element')
    v2v3options.add_argument(
        '--strict',
        action='store_true',
        help='be strict about stripping some deprecated attributes')

    preptooloptions = optionparser.add_argument_group('Preptool Options')
    preptooloptions.add_argument('--accept-prepped',
                                 action='store_true',
                                 help='accept already prepped input')

    # --- Parse arguments ---------------------------------

    from xml2rfc.writers.base import default_options

    options = optionparser.parse_args()
    # This is a bit wasteful, but we need to parse options first,
    # in order to know if we should ignore config files
    if options.skip_config_files:
        options = optionparser.parse_args(config_file_contents='')
    args = [options.source]
    # Some additional values not exposed as options
    options.doi_base_url = "https://doi.org/"
    options.no_css = False
    options.image_svg = False

    # --- Set default values ---------------------------------

    # Check that the default_options have values for all options, for people
    # calling xml2rfc library functions, rather than the command-line
    for key in options.__dict__:
        if not key in default_options.__dict__:
            sys.stderr.write(
                "  Option missing from base.default_options: %s\n" % key)
            sys.exit(2)
    for key in default_options.__dict__:
        if not key in options.__dict__:
            setattr(options, key, getattr(default_options, key))

    # --- Help options ---------------------------------

    if options.country_help:
        print_country_help(options, optionparser)
        sys.exit()

    if options.pdf_help:
        print_pdf_help(options, optionparser)
        sys.exit()

    if options.pi_help:
        print_pi_help(options, optionparser)
        sys.exit()

    # Show option values
    if options.values:
        print_values(options, optionparser, config_paths)
        sys.exit()

    # Show version information, then exit
    if options.version:
        print_version(options, optionparser)
        sys.exit()

    # --- Parse and validate arguments ---------------------------------

    if (options.docfile or options.manpage) and not options.list_symbols:
        options.list_symbols = default_options.list_symbols

    if not options.silence:
        options.silence = default_options.silence

    if options.docfile:
        filename = options.output_filename
        if not filename:
            filename = 'xml2rfc-doc-%s.xml' % xml2rfc.__version__
            options.output_filename = filename
        writer = xml2rfc.DocWriter(None, options=options, date=options.date)
        writer.write(filename)
        sys.exit()

    if options.manpage:
        writer = xml2rfc.DocWriter(None, options=options, date=options.date)
        writer.manpage()
        sys.exit()

    # Clear cache and exit if requested
    if options.clear_cache:
        xml2rfc.parser.XmlRfcParser('').delete_cache(path=options.cache)
        sys.exit(0)

    if len(args) < 1:
        optionparser.print_help()
        sys.exit(2)

    if options.pdf:
        header = """    Cannot generate PDF due to missing external libraries.
    ------------------------------------------------------
    """
        missing_libs = get_missing_pdf_libs()
        if missing_libs:
            pdf_requirements_info = get_pdf_help(missing_libs)
            sys.exit(header + pdf_requirements_info)

    source = args[0]
    if not source:
        sys.exit('No source file given')
    if not os.path.exists(source):
        sys.exit('No such file: ' + source)

    options.legacy = not options.v3
    # Default (this may change over time):
    options.vocabulary = 'v2' if options.legacy else 'v3'
    # Option constraints
    if sys.argv[0].endswith('v2v3'):
        options.v2v3 = True
        options.utf8 = True
    #
    if options.preptool:
        options.vocabulary = 'v3'
        options.no_dtd = True
    else:
        if options.accept_prepped:
            sys.exit(
                "You can only use --accept-prepped together with --preptool.")
    if options.v2v3:
        options.vocabulary = 'v2'
        options.no_dtd = True
    #
    if options.basename:
        if options.output_path:
            sys.exit(
                '--path and --basename has the same functionality, please use only --path'
            )
        else:
            options.output_path = options.basename
            options.basename = None
    #
    num_formats = len([
        o for o in [
            options.raw, options.text, options.nroff, options.html,
            options.expand, options.v2v3, options.preptool, options.info,
            options.pdf, options.unprep
        ] if o
    ])
    if num_formats > 1 and (options.filename or options.output_filename):
        sys.exit(
            'Cannot use an explicit output filename when generating more than one format, '
            'use --path instead.')
    if num_formats < 1:
        # Default to paginated text output
        options.text = True
    if options.debug:
        options.verbose = True
    #
    if options.cache:
        if not os.path.exists(options.cache):
            try:
                os.makedirs(options.cache)
                xml2rfc.log.note('Created cache directory at', options.cache)
            except OSError as e:
                print('Unable to make cache directory: %s ' % options.cache)
                print(e)
                sys.exit(1)
        else:
            if not os.access(options.cache, os.W_OK):
                print('Cache directory is not writable: %s' % options.cache)
                sys.exit(1)
    #
    if options.datestring:
        if isinstance(options.datestring, str):
            options.date = datetime.datetime.strptime(options.datestring,
                                                      "%Y-%m-%d").date()
        elif isinstance(options.datestring, datetime.date):
            options.date = options.datestring
        else:
            xml2rfc.log.warn("Unexpected type for options.datestring: %s" %
                             type(options.datestring))
    else:
        options.date = datetime.date.today()

    if options.omit_headers and not options.text:
        sys.exit("You can only use --no-headers with paginated text output.")
    #
    if options.utf8:
        xml2rfc.log.warn(
            "The --utf8 switch is deprecated.  Use the new unicode insertion element <u> to refer to unicode values in a protocol specification."
        )

    if options.rfc_reference_base_url:
        if not options.rfc_reference_base_url.endswith('/'):
            options.rfc_reference_base_url += '/'
    if options.id_reference_base_url:
        if not options.id_reference_base_url.endswith('/'):
            options.id_reference_base_url += '/'

    # ------------------------------------------------------------------

    # Setup warnings module
    # xml2rfc.log.warn_error = options.warn_error and True or False
    xml2rfc.log.quiet = options.quiet and True or False
    xml2rfc.log.verbose = options.verbose

    # Parse the document into an xmlrfc tree instance
    options.template_dir = options.template_dir or default_options.template_dir
    parser = xml2rfc.XmlRfcParser(
        source,
        options=options,
        templates_path=options.template_dir,
    )
    try:
        xmlrfc = parser.parse(remove_pis=options.remove_pis, normalize=True)
    except xml2rfc.parser.XmlRfcError as e:
        xml2rfc.log.exception('Unable to parse the XML document: ' + args[0],
                              e)
        sys.exit(1)
    except lxml.etree.XMLSyntaxError as e:
        # Give the lxml.etree.XmlSyntaxError exception a line attribute which
        # matches lxml.etree._LogEntry, so we can use the same logging function
        xml2rfc.log.exception('Unable to parse the XML document: ' + args[0],
                              e.error_log)
        sys.exit(1)
    # check doctype
    if xmlrfc.tree.docinfo and xmlrfc.tree.docinfo.system_url:
        version = xmlrfc.tree.getroot().get('version', '2')
        if version == '3' and xmlrfc.tree.docinfo.system_url.lower(
        ) == 'rfc2629.dtd':
            sys.exit(
                'Incompatible schema information: found "rfc2629.dtd" in <DOCTYPE> of a version 3 file'
            )

    # Remember if we're building an RFC
    options.rfc = xmlrfc.tree.getroot().get('number')
    if options.rfc:
        options.pagination = False

    # Check if we've received a version="3" document, and adjust accordingly
    if xmlrfc.tree.getroot().get('version') == '3':
        options.legacy = False
        options.no_dtd = True
        options.vocabulary = 'v3'

    # ------------------------------------------------------------------
    # Additional option checks that depend on the option.legacy settin which
    # we may have adjusted as a result of the <rfc version="..."> setting:
    if options.text and not options.legacy:
        if options.legacy_list_symbols and options.list_symbols:
            sys.exit(
                "You cannot specify both --list-symbols and --legacy_list_symbols."
            )
        if options.list_symbols:
            options.list_symbols = tuple(list(options.list_symbols))
        elif options.legacy_list_symbols:
            options.list_symbols = ('o', '*', '+', '-')
        else:
            options.list_symbols = ('*', '-', 'o', '+')
    else:
        if options.legacy_list_symbols:
            sys.exit(
                "You can only use --legacy-list-symbols with v3 text output.")
        if options.list_symbols:
            sys.exit("You can only use --list-symbols with v3 text output.")

    if not options.legacy:
        # I.e., V3 formatter
        options.no_dtd = True
        if options.nroff:
            sys.exit("You can only use --nroff in legacy mode")
        if options.raw:
            sys.exit("You can only use --raw in legacy mode")

    # ------------------------------------------------------------------
    # Validate the document unless disabled
    if not options.no_dtd:
        ok, errors = xmlrfc.validate(dtd_path=options.dtd)
        if not ok:
            xml2rfc.log.exception(
                'Unable to validate the XML document: ' + args[0], errors)
            sys.exit(1)

    if options.filename:
        xml2rfc.log.warn(
            "The -f and --filename options are deprecated and will"
            " go away in version 3.0 of xml2rfc.  Use -o instead")
        if options.output_filename and options.filename != options.output_filename:
            xml2rfc.log.warn(
                "You should not specify conflicting -f and -o options.  Using -o %s"
                % options.output_filename)
        if not options.output_filename:
            options.output_filename = options.filename

    # Execute any writers specified
    try:
        source_path, source_base = os.path.split(source)
        source_name, source_ext = os.path.splitext(source_base)
        if options.output_path:
            if os.path.isdir(options.output_path):
                basename = os.path.join(options.output_path, source_name)
            else:
                sys.exit(
                    "The given output path '%s' is not a directory, cannot place output files there"
                    % (options.output_path, ))
        else:
            # Create basename based on input
            basename = os.path.join(source_path, source_name)

        if options.expand and options.legacy:
            # Expanded XML writer needs a separate tree instance with
            # all comments and PI's preserved.  We can assume there are no
            # parse errors at this point since we didnt call sys.exit() during
            # parsing.
            filename = options.output_filename
            if not filename:
                filename = basename + '.exp.xml'
                options.output_filename = filename
            new_xmlrfc = parser.parse(remove_comments=False,
                                      quiet=True,
                                      normalize=False)
            expwriter = xml2rfc.ExpandedXmlWriter(new_xmlrfc,
                                                  options=options,
                                                  date=options.date)
            expwriter.write(filename)
            options.output_filename = None

        if options.html and options.legacy:
            filename = options.output_filename
            if not filename:
                filename = basename + '.html'
                options.output_filename = filename
            htmlwriter = xml2rfc.HtmlRfcWriter(
                xmlrfc,
                options=options,
                date=options.date,
                templates_dir=options.template_dir or None)
            htmlwriter.write(filename)
            options.output_filename = None

        if options.raw:
            filename = options.output_filename
            if not filename:
                filename = basename + '.raw.txt'
                options.output_filename = filename
            rawwriter = xml2rfc.RawTextRfcWriter(xmlrfc,
                                                 options=options,
                                                 date=options.date)
            rawwriter.write(filename)
            options.output_filename = None

        if options.text and options.legacy:
            filename = options.output_filename
            if not filename:
                filename = basename + '.txt'
                options.output_filename = filename
            pagedwriter = xml2rfc.PaginatedTextRfcWriter(
                xmlrfc,
                options=options,
                date=options.date,
                omit_headers=options.omit_headers,
            )
            pagedwriter.write(filename)
            options.output_filename = None

        if options.nroff:
            filename = options.output_filename
            if not filename:
                filename = basename + '.nroff'
                options.output_filename = filename
            nroffwriter = xml2rfc.NroffRfcWriter(xmlrfc,
                                                 options=options,
                                                 date=options.date)
            nroffwriter.write(filename)
            options.output_filename = None

        # --- End of legacy formatter invocations ---

        if options.expand and not options.legacy:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  normalize=False,
                                  strip_cdata=False,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.exp.xml'
                options.output_filename = filename
            #v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc, options=options, date=options.date)
            #xmlrfc.tree = v2v3.convert2to3()
            expander = xml2rfc.ExpandV3XmlWriter(xmlrfc,
                                                 options=options,
                                                 date=options.date)
            expander.write(filename)
            options.output_filename = None

        if options.v2v3:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  normalize=False,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.v2v3.xml'
                options.output_filename = filename
            v2v3writer = xml2rfc.V2v3XmlWriter(xmlrfc,
                                               options=options,
                                               date=options.date)
            v2v3writer.write(filename)
            options.output_filename = None

        if options.preptool:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.prepped.xml'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc,
                                         options=options,
                                         date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            preptool = xml2rfc.PrepToolWriter(xmlrfc,
                                              options=options,
                                              date=options.date)
            preptool.write(filename)
            options.output_filename = None

        if options.unprep:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename.replace('.prepped', '') + '.plain.xml'
                options.output_filename = filename
            unprep = xml2rfc.UnPrepWriter(xmlrfc,
                                          options=options,
                                          date=options.date)
            unprep.write(filename)
            options.output_filename = None

        if options.text and not options.legacy:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.txt'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc,
                                         options=options,
                                         date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc,
                                          options=options,
                                          date=options.date,
                                          liberal=True,
                                          keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            if xmlrfc.tree:
                writer = xml2rfc.TextWriter(xmlrfc,
                                            options=options,
                                            date=options.date)
                writer.write(filename)
                options.output_filename = None

        if options.html and not options.legacy:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.html'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc,
                                         options=options,
                                         date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc,
                                          options=options,
                                          date=options.date,
                                          liberal=True,
                                          keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            if xmlrfc.tree:
                writer = xml2rfc.HtmlWriter(xmlrfc,
                                            options=options,
                                            date=options.date)
                writer.write(filename)
                options.output_filename = None

        if options.pdf:
            xmlrfc = parser.parse(remove_comments=False,
                                  quiet=True,
                                  add_xmlns=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.pdf'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc,
                                         options=options,
                                         date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc,
                                          options=options,
                                          date=options.date,
                                          liberal=True,
                                          keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            if xmlrfc.tree:
                writer = xml2rfc.PdfWriter(xmlrfc,
                                           options=options,
                                           date=options.date)
                writer.write(filename)
                options.output_filename = None

        if options.info:
            xmlrfc = parser.parse(remove_comments=False, quiet=True)
            filename = options.output_filename
            if not filename:
                filename = basename + '.json'
                options.output_filename = filename
            v2v3 = xml2rfc.V2v3XmlWriter(xmlrfc,
                                         options=options,
                                         date=options.date)
            xmlrfc.tree = v2v3.convert2to3()
            prep = xml2rfc.PrepToolWriter(xmlrfc,
                                          options=options,
                                          date=options.date,
                                          liberal=True,
                                          keep_pis=[xml2rfc.V3_PI_TARGET])
            xmlrfc.tree = prep.prep()
            if xmlrfc.tree:
                info = extract_anchor_info(xmlrfc.tree)
                if six.PY2:
                    with open(filename, 'w') as fp:
                        json.dump(info,
                                  fp,
                                  indent=2,
                                  ensure_ascii=False,
                                  encoding='utf-8')
                else:
                    with io.open(filename, 'w', encoding='utf-8') as fp:
                        json.dump(info, fp, indent=2, ensure_ascii=False)
                if not options.quiet:
                    xml2rfc.log.write('Created file', filename)

    except xml2rfc.RfcWriterError as e:
        xml2rfc.log.write(e.msg)
        xml2rfc.log.write('Unable to complete processing %s' % args[0])
        sys.exit(1)
Esempio n. 5
0
def get_draft_meta(form):
    authors = []
    file_name = {}
    abstract = None
    file_size = None
    for ext in form.fields.keys():
        if not ext in form.formats:
            continue
        f = form.cleaned_data[ext]
        if not f:
            continue

        name = os.path.join(settings.IDSUBMIT_STAGING_PATH,
                            '%s-%s.%s' % (form.filename, form.revision, ext))
        file_name[ext] = name
        with open(name, 'wb+') as destination:
            for chunk in f.chunks():
                destination.write(chunk)

    if form.cleaned_data['xml']:
        if not ('txt' in form.cleaned_data and form.cleaned_data['txt']):
            file_name['txt'] = os.path.join(
                settings.IDSUBMIT_STAGING_PATH,
                '%s-%s.txt' % (form.filename, form.revision))
            try:
                pagedwriter = xml2rfc.PaginatedTextRfcWriter(form.xmltree,
                                                             quiet=True)
                pagedwriter.write(file_name['txt'])
            except Exception as e:
                raise ValidationError("Error from xml2rfc: %s" % e)
            file_size = os.stat(file_name['txt']).st_size
        # Some meta-information, such as the page-count, can only
        # be retrieved from the generated text file.  Provide a
        # parsed draft object to get at that kind of information.
        with open(file_name['txt']) as txt_file:
            form.parsed_draft = Draft(txt_file.read().decode('utf8'),
                                      txt_file.name)

    else:
        file_size = form.cleaned_data['txt'].size

    if form.authors:
        authors = form.authors
    else:
        # If we don't have an xml file, try to extract the
        # relevant information from the text file
        for author in form.parsed_draft.get_author_list():
            full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author

            name = full_name.replace("\n", "").replace("\r", "").replace(
                "<", "").replace(">", "").strip()

            if email:
                try:
                    validate_email(email)
                except ValidationError:
                    email = ""

            def turn_into_unicode(s):
                if s is None:
                    return u""

                if isinstance(s, unicode):
                    return s
                else:
                    try:
                        return s.decode("utf-8")
                    except UnicodeDecodeError:
                        try:
                            return s.decode("latin-1")
                        except UnicodeDecodeError:
                            return ""

            name = turn_into_unicode(name)
            email = turn_into_unicode(email)
            company = turn_into_unicode(company)

            authors.append({
                "name": name,
                "email": email,
                "affiliation": company,
                "country": country
            })

    if form.abstract:
        abstract = form.abstract
    else:
        abstract = form.parsed_draft.get_abstract()

    return authors, abstract, file_name, file_size