Exemple #1
0
def modify_style(node):
    """
    For style properties, we want to pull it apart and then make individual attributes
    """
    log.note("modify_style check '{0}' in '{1}'".format(
        node.attrib['style'], node.tag))

    style_props = node.attrib['style'].rstrip(';').split(';')
    props_to_check = wp.style_properties

    for prop in style_props:
        # print("prop = %s" %  prop)
        v = prop.split(':')
        if len(v) != 2:
            log.error(
                "Malformed field '{0}' in style attribute found. Field removed."
                .format(v),
                where=node)
            continue
        p = v[0].strip()
        v = v[1].strip()  # May have leading blank
        log.note("   modify_style - p={0}  v={1}".format(p, v))
        # we will deal with the change of values later when the attribute list is processed.
        if p in props_to_check:
            log.error("Style property '{0}' promoted to attribute".format(p),
                      where=node)
            node.attrib[p] = v
        else:
            log.error("Style property '{0}' removed".format(p), where=node)
    del node.attrib['style']
Exemple #2
0
def checkTree(tree):
    """
    Process the XML tree.  There are two cases to be dealt with
    1. This is a simple svg at the root - can be either the real namespace or
       an empty namespace
    2. This is an rfc tree - and we should only look for real namespaces, but
       there may be more than one thing to look for.
    """
    global errorCount

    errorCount = 0
    element = tree.getroot().tag
    if element[0] == '{':
        element = element[element.rfind('}') + 1:]
    if element == 'svg':
        check(tree.getroot(), 0)
    else:
        # Locate all of the svg elements that we need to check

        svgPaths = tree.getroot().xpath(
            "//x:svg", namespaces={'x': 'http://www.w3.org/2000/svg'})

        for path in svgPaths:
            if len(svgPaths) > 1:
                log.note("Checking svg element at line {0} in file {1}".format(
                    1, "file"))
            check(path, 0)

    return errorCount == 0
Exemple #3
0
 def sendCommand(self, cmd):
     newLine = cmd + u'\n'
     if self.iso8859:
         log.note(u"Pre Encode = " + newLine)
         newLine = newLine.encode('iso-8859-1', 'replaceWithSpace')
         newLine = newLine.decode('iso-8859-1')
     else:
         newLine = newLine  # .encode('utf-8')
         log.note(newLine)
     self.stdin.write(newLine)
Exemple #4
0
    def checkWord(self, wordToCheck):
        #  Write word to check to the speller

        newLine = u'^ ' + wordToCheck + u'\n'
        if self.iso8859:
            log.note(u"Pre Encode = " + newLine)
            newLine = newLine.encode('iso-8859-1', 'replaceWithSpace')
            newLine = newLine.decode('iso-8859-1')
        else:
            newLine = newLine  # .encode('utf-8')
            log.note(newLine)
        self.stdin.write(newLine)

        result = []

        #  Read all of the results
        while True:
            line = self.stdout.readline()
            if six.PY2:
                if self.iso8859:
                    #  log.note(" ".join("{:02x}".format(c) for c in line))
                    line = line.decode('iso-8859-1')
                else:
                    line = line.decode('utf-8')
            line = line.strip()
            log.note('spell out line = ' + line)

            #  Empty lines mean that we are done
            if len(line) == 0:
                break

            # '*' means ????
            if line[0] == '*':
                continue

            m = self.aspell_re.match(line)
            if not m:
                log.error(
                    "Internal error trying to match the line '{0}'".format(
                        line))
                continue

            if line[0] == '#':
                offset = int(m.group(2))
                options = None
            elif line[0] == '&':
                offset = int(m.group(4))
                options = m.group(5)
            else:
                log.error(
                    "internal error - aspell says line is '{0}'".format(line))
                continue

            tuple = (line[0], offset, None, m.group(1), options, 0)
            result.append(tuple)

        return result
Exemple #5
0
    def processLine(self, allWords):
        """
        Process each individual set of words and return the errors found
        allWords is a tuple of (text string, tree element)
        returned is an array of tuples each tuple consisting of
        ( What the error is ('&' or '#'),
          What the word in error is,
          The set of alternative words (None for '#'),
          The offset in the string of the word,
          The word string,
          The tree node
        )
        """
        return []
        result = []
        setNo = 0
        for wordSet in allWords:
            newLine = u'^ ' + wordSet[0] + u'\n'
            if self.iso8859:
                log.note(u"Pre Encode = " + newLine)
                newLine = newLine.encode('iso-8859-1', 'replaceWithSpace')
                newLine = newLine.decode('iso-8859-1')
            else:
                newLine = newLine  # .encode('utf-8')
            log.note(newLine)
            self.stdin.write(newLine)

            while True:
                line = self.stdout.readline()
                if six.PY2:
                    if self.iso8859:
                        #  log.note(" ".join("{:02x}".format(c) for c in line))
                        line = line.decode('iso-8859-1')
                    else:
                        line = line.decode('utf-8')
                line = line.strip()
                log.note('spell out line = ' + line)

                if len(line) == 0:
                    break

                if line[0] == '*':
                    continue

                m = self.aspell_re.match(line)
                if not m:
                    log.error(
                        "Internal error trying to match the line '{0}'".format(
                            line))
                    continue

                if line[0] == '#':
                    offset = int(m.group(2))
                    options = None
                elif line[0] == '&':
                    offset = int(m.group(4))
                    options = m.group(5)
                else:
                    log.error(
                        "internal error - aspell says line is '{0}'".format(
                            line))
                    continue

                tuple = (line[0], offset, wordSet[1], m.group(1), options,
                         setNo)
                result.append(tuple)
            setNo += 1

        return result
Exemple #6
0
    def __init__(self, config):
        CursesCommon.__init__(self, config)
        program = config.get('spell', 'program')
        self.suggest = config.getBoolean('spell', 'suggest', True)
        self.window = config.getInt('spell', 'window', 15)
        coloring = config.get('spell', 'color')
        if coloring and coloring in SpellerColors:
            self.color_start = SpellerColors[coloring]
            self.color_end = colorama.Style.RESET_ALL
            if self.color_start == '':
                self.color_end = self.color_start
        elif os.name == 'nt':
            self.color_start = ''
            self.color_end = ''
        else:
            self.color_start = colorama.Style.BRIGHT
            self.color_end = colorama.Style.RESET_ALL

        if program:
            look_for = which(program)
            if not look_for and os.name == 'nt':
                look_for = which(program + '.exe')
            if not look_for:
                raise RfcLintError(
                    "The program '{0}' does not exist or is not executable".
                    format(program))
            program = look_for
        else:
            if os.name == "nt":
                look_for = "aspell.exe"
                program = which(look_for)
                if not program:
                    program = which(
                        "c:\\Program Files (x86)\\Aspell\\bin\\aspell.exe")
            else:
                look_for = "aspell"
                program = which(look_for)
            if not program:
                raise RfcLintError(
                    "The program '{0}' does not exist or is not executable".
                    format(look_for))

        spellBaseName = os.path.basename(program)
        spellBaseName = spellBaseName.replace('.exe', '')

        # I want to know what the program and version really are

        p = subprocess.Popen([program, "-v"], stdout=subprocess.PIPE)
        (versionOut, stderr) = p.communicate()
        """
        if p.returncode != 0:
            raise RfcLintError("The program '{0}' executed with an error code {1}".
                               format(program, p.returncode))
        """

        m = re.match(
            r".*International Ispell Version [\d.]+ \(but really (\w+) ([\d.]+).*",
            versionOut.decode('utf-8'))
        if m is None:
            raise RfcLintError(
                "Error starting the spelling program\n{0}".format(program))

        if m.group(1).lower() != spellBaseName:
            raise RfcLintError(
                "Error: The wrong spelling program was started.  Expected"
                "{0} and got {1}".format(spellBaseName, m.group(1)))

        codecs.register_error('replaceWithSpace', ReplaceWithSpace)

        self.iso8859 = False
        if spellBaseName == 'aspell':
            log.note("xx - " + m.group(2))
            if m.group(2)[:3] == '0.5':
                # This version does not support utf-8
                self.iso8859 = True
                log.note("Use iso8859")
        elif spellBaseName == 'hunspell':
            # minumum version of hunspell is 1.1.6, but that is probably not something
            # we would find in the wild anymore.  We are therefore not going to check it.
            # However, right now the only version I have for Windows does not support utf-8
            # so until I get a better version, force the use of iso8859 there.
            if os.name == 'nt':
                self.iso8859 = True
                log.note("Use iso8859")

        # now let's build the full command

        cmdLine = [program, '-a']  # always use pipe mode
        dicts = config.getList('spell', 'dictionaries')
        if dicts:
            dictList = ''
            for dict in dicts:
                if spellBaseName == 'hunspell':
                    dict = dict + '.dic'
                if os.path.isabs(dict):
                    dict2 = dict
                else:
                    dict2 = os.path.join(os.getcwd(), dict)
                dict2 = os.path.normpath(dict2)
                if not os.path.exists(dict2):
                    log.error(
                        "Additional Dictionary '{0}' ignored because it was not found"
                        .format(dict.replace('.dic', '')))
                    continue
                if spellBaseName == 'aspell':
                    cmdLine.append("--add-extra-dicts")
                    cmdLine.append(dict2)
                else:
                    dictList = dictList + "," + dict2.replace('.dic', '')
            if spellBaseName == 'hunspell':
                cmdLine.append('-d')
                cmdLine.append("en_US" + dictList)

        dict = config.get('spell', 'personal')
        if dict:
            if os.path.isabs(dict):
                dict2 = dict
            else:
                dict2 = os.path.join(os.getcwd(), dict)
            dict2 = os.path.normpath(dict2)
            if not os.path.exists(dict2):
                log.error(
                    "Personal Dictionary '{0}' ignored because it was not found"
                    .format(dict))
            else:
                cmdLine.append('-p')
                cmdLine.append(dict2)

        if self.iso8859:
            if spellBaseName == 'aspell':
                cmdLine.append('--encoding=iso8859-1')
            else:
                # Make sure if we have a better version of hunspell that it will do the right thing
                cmdLine.append('-i iso-8859-1')
        elif spellBaseName == 'hunspell':
            cmdLine.append('-i utf-8')

        log.note("spell command = '{0}'".format(" ".join(cmdLine)))
        self.p = subprocess.Popen(cmdLine,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)
        if six.PY2:
            if os.name == 'nt':
                self.stdin = codecs.getwriter('iso-8859-1')(self.p.stdin)
                self.stdout = self.p.stdout
            else:
                self.stdin = codecs.getwriter('utf8')(self.p.stdin)
                self.stdout = self.p.stdout
                # self.stdout = codecs.getreader('utf8')(self.p.stdout)
        else:
            if self.iso8859:
                self.stdin = io.TextIOWrapper(self.p.stdin,
                                              encoding='iso-8859-1',
                                              errors='replaceWithSpace',
                                              line_buffering=True)
                self.stdout = io.TextIOWrapper(self.p.stdout,
                                               encoding='iso-8859-1',
                                               errors='replaceWithSpace')
            else:
                self.stdin = io.TextIOWrapper(self.p.stdin,
                                              encoding='utf-8',
                                              line_buffering=True)
                self.stdout = io.TextIOWrapper(self.p.stdout, encoding='utf-8')

        #  Check that we got a good return
        line = self.stdout.readline()
        log.note(line)
        if line.lower().find('aspell') < 0:
            raise RfcLintError(
                "The program '{0}' did not start successfully".format(program))

        # [^\W\d_]
        self.word_re = re.compile(r'(\W*\w+\W*)', re.UNICODE | re.MULTILINE)
        self.word_re = re.compile(r'([\W\d_]*[^\W\d_]+[\W\d_]*)',
                                  re.UNICODE | re.MULTILINE)
        # self.word_re = re.compile(r'\w+', re.UNICODE | re.MULTILINE)

        # pattern to match output of aspell
        self.aspell_re = re.compile(r".\s(\S+)\s(\d+)\s*((\d+): (.+))?",
                                    re.UNICODE)

        self.spell_re = re.compile(r'\w[\w\'\u00B4\u2019]*\w', re.UNICODE)
        self.spell_re = re.compile(
            r'[^\W\d_]([^\W\d_]|[\'\u00B4\u2019])*[^\W\d_]', re.UNICODE)

        if config.options.output_filename is not None:
            self.ignoreWords = []
            self.lastElement = None
            self.textLocation = True
Exemple #7
0
def value_ok(obj, v):
    """
    Check that the value v is a legal value for the attribute passed in
    The return value is going to be (Value OK?, Replacement value)
    v -> set of values
    obj -> attribute name
    Returns if the value is ok, and if there is a value that should be used
    to replace the value if it is not.
    """

    log.note("value_ok look for %s in %s" % (v, obj))
    # Look if the object is a real attribute, or we recursed w/ an
    # internal type name such as '<color>' (i.e. a basic_type)
    if obj in wp.properties:
        values = wp.properties[obj]
    elif obj in wp.basic_types:
        values = wp.basic_types[obj]
    elif isinstance(obj, str):
        # values to check is a string
        if obj[0] == '+':
            n = re.match(r'\d+\.\d+%?$', v)
            rv = None
            if n:
                rv = n.group()
            return (True, rv)
        # if obj[0] == '[':
        #     return check_some_props(obj, v)
        if v == obj:
            return (True, v)
        return (False, None)
    else:  # Unknown attribute
        return (False, None)

    log.note("  legal value list {0}".format(values))
    if len(values) == 0:
        # Empty tuples have nothing to check, assume it is correct
        return (True, None)

    replaceWith = None
    for val in values:
        ok_v, matched_v = value_ok(val, v)
        if ok_v:
            return (True, matched_v)
        if matched_v:
            replaceWith = matched_v

    log.note(" --- skip to end -- {0}".format(obj))
    v = v.lower()
    if obj == 'font-family':
        all = v.split(',')
        newFonts = []
        for font in ["sans-serif", "serif", "monospace"]:
            if font in all:
                newFonts.append(font)
        if len(newFonts) == 0:
            newFonts.append("sans-serif")
        return (False, ",".join(newFonts))
    if obj == '<color>':
        if v in wp.color_map:
            return (False, wp.color_map[v])

        # Heuristic conversion of color or grayscale
        # when we get here, v is a non-conforming color element
        if ('rgb(' not in v) and v[0] != '#':
            return (False, wp.color_default)
        if v[0] == '#' and len(v) == 7:
            # hexadecimal color code
            shade = int(v[1:3], 16) + int(v[3:5], 16) + int(v[5:7], 16)
        elif v[0] == '#' and len(v) == 4:
            shade = int(v[1], 16)*16 + int(v[1], 16) + int(v[2], 16)*16 + int(v[2], 16) + \
                    int(v[3], 16)*16 + int(v[3], 16)
        elif 'rgb(' in v:
            triple = v.split('(')[1].split(')')[0].split(',')
            if '%' in triple[0]:
                shade = sum(
                    [int(t.replace('%', '')) * 255 / 100 for t in triple])
            else:
                shade = sum([int(t) for t in triple])
        else:
            shade = 0

        log.note(
            u"Color or grayscale heuristic applied to: '{0}' yields shade: '{1}'"
            .format(v, shade))
        if shade > wp.color_threshold:
            return (False, 'white')
        return (False, wp.color_default)

    return (False, replaceWith)
Exemple #8
0
def main():
    """ Main function for xmldiff """

    formatter = optparse.IndentedHelpFormatter(max_help_position=40)
    optionparser = optparse.OptionParser(usage='xmldiff LEFT RIGHT [OPTIONS] '
                                         '...\nExample: rfc-xmldiff '
                                         'draft1.xml draft2.xml',
                                         formatter=formatter)

    value_options = optparse.OptionGroup(optionparser, 'Other Options')
    value_options.add_option('-o',
                             '--out',
                             dest='output_filename',
                             metavar='FILE',
                             help='specify an explicit output filename',
                             default=None)
    value_options.add_option('--debug',
                             action="store_true",
                             help='Show debugging output')
    value_options.add_option('--raw',
                             action="store_true",
                             help='Diff using the raw tree')
    value_options.add_option('-t',
                             '--template',
                             dest='template',
                             metavar='FILE',
                             help='specify the HTML template filename',
                             default='base.html')
    value_options.add_option('-V',
                             '--version',
                             action='callback',
                             callback=display_version,
                             help='display the version number and exit')
    value_options.add_option('-C',
                             '--clear-cache',
                             action='store_true',
                             dest='clear_cache',
                             default=False,
                             help='purge the cache and exit')
    value_options.add_option(
        '-c',
        '--cache',
        dest='cache',
        help='specify a primary cache directory to write to; '
        'default: try [ %s ]' % ', '.join(CACHES))
    value_options.add_option('-q',
                             '--quiet',
                             action='store_true',
                             help='dont print anything')
    value_options.add_option('-v',
                             '--verbose',
                             action='store_true',
                             help='print extra information')
    value_options.add_option('--no-resolve-entities',
                             dest='noEntity',
                             action="store_true",
                             help="Don't resolve entities in the XML")
    value_options.add_option(
        '-N',
        '--no-network',
        action='store_true',
        default=False,
        help='don\'t use the network to resolve references')
    value_options.add_option('-X',
                             '--no-xinclude',
                             action='store_true',
                             dest='no_xinclude',
                             help='don\'t resolve any xi:include elements')
    value_options.add_option('-D',
                             '--no-defaults',
                             action='store_false',
                             default=True,
                             help="don't add default attributes")

    optionparser.add_option_group(value_options)

    # --- Parse and validate arguments ----------------------------

    (options, args) = optionparser.parse_args()

    if options.clear_cache:
        clear_cache(options.cache)

    if len(args) < 1:
        optionparser.print_help()
        sys.exit(2)

    # Setup warnings module
    # rfclint.log.warn_error = options.warn_error and True or False
    log.quiet = options.quiet and True or False
    log.verbose = options.verbose

    # Load the left file
    leftSource = args[0]
    if not os.path.exists(leftSource):
        sys.exit('No such file: ' + leftSource)

    log.note("Parse input files")
    parser = XmlRfcParser(leftSource,
                          verbose=log.verbose,
                          quiet=log.quiet,
                          no_network=options.no_network,
                          no_xinclude=options.no_xinclude,
                          resolve_entities=not options.noEntity,
                          attribute_defaults=options.no_defaults)
    try:
        ll = parser.parse(remove_pis=False,
                          strip_cdata=False,
                          remove_comments=False).tree
        leftXml = BuildDiffTree(ll, options)
        if not options.raw:
            leftXml = AddParagraphs(leftXml)
        leftFile_base = os.path.basename(leftSource)
        SourceFiles.LeftDone()
    except XmlRfcError as e:
        log.exception('Unable to parse the XML document: ' + leftSource, e)
        sys.exit(1)

    rightSource = args[1]
    if not os.path.exists(rightSource):
        sys.exit('No such file: ' + rightSource)

    parser = XmlRfcParser(rightSource,
                          verbose=log.verbose,
                          quiet=log.quiet,
                          no_network=options.no_network,
                          no_xinclude=options.no_xinclude,
                          resolve_entities=not options.noEntity,
                          attribute_defaults=options.no_defaults)
    try:
        rightXml = parser.parse(remove_pis=False,
                                strip_cdata=False,
                                remove_comments=False)
        rightXml = BuildDiffTree(rightXml.tree, options)
        if not options.raw:
            rightXml = AddParagraphs(rightXml)
        rightFile_base = os.path.basename(rightSource)
    except XmlRfcError as e:
        log.exception('Unable to parse the XML document: ' + rightSource, e)
        sys.exit(1)

    log.note("Read files for source display")
    cache = CachingResolver(library_dirs=[])

    leftSources = ""
    leftSourceNames = ""
    for i in range(len(SourceFiles.leftFiles)):
        file = SourceFiles.leftFiles[i]
        if file[:5] == 'file:':
            file = urlparse(file)
            file = file[2]
            if file[2] == ':':
                file = file[1:]
        else:
            file = cache.getReferenceRequest(file)[0]

        if six.PY2:
            with open(file, "rb") as f:
                leftLines = f.read()
                leftLines = leftLines.decode('utf8').splitlines(1)
        else:
            with open(file, "rU", encoding="utf8") as f:
                leftLines = f.readlines()

        leftSources += u'<div id="L_File{0}" class="tabcontent">\n'.format(i)
        leftLines = [
            escape(x).replace(' ', '&nbsp;').replace('"', '&quot;')
            for x in leftLines
        ]
        leftSources += formatLines(leftLines, 'L', i)
        leftSources += u'</div>\n'

        leftSourceNames += u'<option label="{0}" value="L_File{1}">{2}</option>\n'. \
                           format(file, i, file)

    rightSources = ""
    rightSourceNames = ""

    for i in range(len(SourceFiles.rightFiles)):
        file = SourceFiles.rightFiles[i]
        if file[:5] == 'file:':
            file = urlparse(file)
            file = file[2]
            if file[2] == ':':
                file = file[1:]
        else:
            file = cache.getReferenceRequest(file)[0]

        if six.PY2:
            with open(file, "rb") as f:
                rightLines = f.read().decode('utf8').splitlines(1)
        else:
            with open(file, "rU", encoding="utf8") as f:
                rightLines = f.readlines()

        rightSources += u'<div id="R_File{0}" class="tabcontent">\n'.format(i)
        rightLines = [
            escape(x).replace(' ', '&nbsp;').replace('"', '&quot;')
            for x in rightLines
        ]
        rightSources += formatLines(rightLines, 'R', i)
        rightSources += u'</div>\n'

        rightSourceNames += '<option label="{0}" value="R_File{1}">{2}</option>\n'. \
                            format(file, i, file)

    log.note("Start computing tree edit distance")
    editSet = distance(leftXml, rightXml, DiffRoot.get_children,
                       DiffRoot.InsertCost, DiffRoot.DeleteCost,
                       DiffRoot.UpdateCost)

    if options.debug:
        print("edit count = " + str(len(editSet)))
        for edit in editSet:
            print(edit.toString())

    log.note("Apply copmuted tree edits")
    if len(editSet) == 0:
        log.info("Files are identical")

    leftXml.applyEdits(editSet)

    log.note("Setup to write html file")
    templates_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                 'Templates')
    log.note("   template directory = " + templates_dir)

    if six.PY2:
        with open(os.path.join(templates_dir, "resize.js"), "rb") as f:
            allScript = f.read()
    else:
        with open(os.path.join(templates_dir, "resize.js"),
                  "rU",
                  encoding="utf8") as f:
            allScript = f.read()

    template_file = options.template
    if not os.path.exists(options.template):
        template_file = os.path.join(templates_dir, options.template)
        if not os.path.exists(template_file):
            sys.exit('No template file: ' + template_file)

    log.note('   template source file: ' + template_file)
    with open(template_file, 'rb') as file:
        html_template = string.Template(file.read().decode('utf8'))

    rightLines = [x.replace(' ', '&nbsp;') for x in rightLines]

    buffers = {}
    buffers['leftFile'] = leftSources
    buffers['rightFile'] = rightSources
    buffers['body'] = leftXml.ToString()

    subs = {
        'background': '',
        # HTML-escaped values
        'title': 'rfc-xmldiff {0} {1}'.format(leftFile_base, rightFile_base),
        'body': ''.join(buffers['body']),
        'leftFile': buffers['leftFile'],
        'leftSourceNames': leftSourceNames,
        'rightFile': buffers['rightFile'],
        'rightSourceNames': rightSourceNames,
        'allScript': allScript
    }
    output = html_template.substitute(subs)

    if options.output_filename is None:
        write_to(sys.stdout, output)
    else:
        log.note('Write out html file: ' + options.output_filename)
        file = open(options.output_filename, "wb")
        file.write(output.encode('utf8'))
        file.close()
Exemple #9
0
def check(el, depth=0):
    """
    Walk the current tree checking to see if all elements pass muster
    relative to RFC 7996 the RFC Tiny SVG document

    Return False if the element is to be removed from tree when
    writing it back out
    """
    global errorCount

    log.note("%s tag = %s" % (' ' * (depth * indent), el.tag))

    # Check that the namespace is one of the pre-approved ones
    # ElementTree prefixes elements with default namespace in braces

    element, ns = strip_prefix(el.tag, el)  # name of element

    # namespace for elements must be either empty or svg
    if ns is not None and ns not in wp.svg_urls:
        log.warn("Element '{0}' in namespace '{1}' is not allowed".format(
            element, ns),
                 where=el)
        return False  # Remove this el

    # Is the element in the list of legal elements?
    log.note("%s element % s: %s" % (' ' *
                                     (depth * indent), element, el.attrib))
    if element not in wp.elements:
        errorCount += 1
        log.warn("Element '{0}' not allowed".format(element), where=el)
        return False  # Remove this el

    elementAttributes = wp.elements[element]  # Allowed attributes for element

    # do a re-write of style into individual elements
    if 'style' in el.attrib:
        modify_style(el)

    attribs_to_remove = []  # Can't remove them inside the iteration!
    for nsAttrib, val in el.attrib.items():
        # validate that the namespace of the element is known and ok
        attr, ns = strip_prefix(nsAttrib, el)
        log.note("%s attr %s = %s (ns = %s)" %
                 (' ' * (depth * indent), attr, val, ns))
        if ns is not None and ns not in wp.svg_urls:
            if ns not in wp.xmlns_urls:
                log.warn(
                    "Element '{0}' does not allow attributes with namespace '{1}'"
                    .format(element, ns),
                    where=el)
                attribs_to_remove.append(nsAttrib)
            continue

        # look to see if the attribute is either an attribute for a specific
        # element or is an attribute generically for all properties
        if (attr not in elementAttributes) and (attr not in wp.properties):
            errorCount += 1
            log.warn("The element '{0}' does not allow the attribute '{1}',"
                     " attribute to be removed.".format(element, attr),
                     where=el)
            attribs_to_remove.append(nsAttrib)

        # Now check if the attribute is a generic property
        elif (attr in wp.properties):
            vals = wp.properties[attr]
            # log.note("vals = " + vals +  "<<<<<")

            #  Do method #1 of checking if the value is legal - not currently used.
            if vals and vals[0] == '[' and False:
                # ok, new_val = check_some_props(attr, val, depth)
                # if not ok:
                #    el.attrib[attr] = new_val[1:]
                pass
            else:
                ok, new_val = value_ok(attr, val)
                if vals and not ok:
                    errorCount += 1
                    if new_val is not None:
                        el.attrib[attr] = new_val
                        log.warn(
                            u"The attribute '{1}' does not allow the value '{0}',"
                            u" replaced with '{2}'".format(val, attr, new_val),
                            where=el)
                    else:
                        attribs_to_remove.append(nsAttrib)
                        log.warn(
                            u"The attribute '{1}' does not allow the value '{0}',"
                            u" attribute to be removed".format(val, attr),
                            where=el)

    for attrib in attribs_to_remove:
        del el.attrib[attrib]

    # Need to have a viewBox on the root
    if (depth == 0):
        if el.get("viewBox"):
            pass
        else:
            log.warn(
                "The attribute viewBox is required on the root svg element",
                where=el)
            svgw = maybefloat(el.get('width'))
            svgh = maybefloat(el.get('height'))
            try:
                if svgw and svgh:
                    newValue = '0 0 %s %s' % (svgw, svgh)
                    log.warn("Trying to put in the attribute with value '{0}'".
                             format(newValue),
                             where=el)
                    el.set('viewBox', newValue)
            except ValueError as e:
                log.error("Error when calculating SVG size: %s" % e, where=el)

    els_to_rm = []  # Can't remove them inside the iteration!
    if element in wp.element_children:
        allowed_children = wp.element_children[element]
    else:
        allowed_children = []

    for child in el:
        log.note("%schild, tag = %s" % (' ' * (depth * indent), child.tag))
        if not isinstance(child.tag, str):
            continue
        ch_tag, ns = strip_prefix(child.tag, el)
        if ns not in wp.svg_urls:
            log.warn(
                "The namespace {0} is not permitted for svg elements.".format(
                    ns),
                where=child)
            els_to_rm.append(child)
            continue

        if ch_tag not in allowed_children:
            log.warn(
                "The element '{0}' is not allowed as a child of '{1}'".format(
                    ch_tag, element),
                where=child)
            els_to_rm.append(child)
        elif not check(child, depth + 1):
            els_to_rm.append(child)

    if len(els_to_rm) != 0:
        for child in els_to_rm:
            el.remove(child)
    return True  # OK
Exemple #10
0
    def applyEdits(self, editList):
        newEdits = []
        for edit in editList:
            if edit.operation == EditItem.OP_DELETE:
                edit.left.deleted = True
            elif edit.operation == EditItem.OP_MATCH:
                edit.left.matchNode = edit.right
                edit.right.matchNode = edit.left
            elif edit.operation == EditItem.OP_RENAME:
                edit.left.matchNode = edit.right
                edit.right.matchNode = edit.left
            else:
                newEdits.append(edit)

        self.matchNode.markInsertTrees()
        self.markDeleteTrees()

        while True:
            editList = newEdits
            newEdits = []

            for edit in editList:
                # Already processed
                if edit.right.matchNode is not None:
                    continue

                # if isinstance(edit.right, DiffElement) and \
                #    (edit.right.xml.tag == 'back' or edit.right.xml.tag == 'middle'):
                #     thisIsABreak = 9

                if edit.right.insertTree:
                    matchingParent = edit.right.parent.matchNode
                    if matchingParent is None:
                        # we don't know where it goes yet
                        newEdits.append(edit)
                        continue

                    # If a node has no children, then we can add it as a child
                    if edit.right.parent.matchNode.children is None or \
                       len(edit.right.parent.matchNode.children) == 0:
                        if edit.right.parent.matchNode.children is None:
                            edit.right.parent.matchNode.children = []
                        edit.right.parent.matchNode.children.append(
                            edit.right.cloneTree(edit.right.parent.matchNode))
                        continue

                    # If we have a matched preceeding node, put it after that one
                    sibling = edit.right.parent.getPredecessor(edit.right)
                    if sibling is not None:
                        if sibling.matchNode is not None:
                            newNode2 = edit.right.cloneTree(matchingParent)
                            matchingParent.insertAfter(sibling.matchNode,
                                                       newNode2)
                            continue

                    # If we have a matching successor node, put it before that one
                    sibling = edit.right.parent.getSuccessor(edit.right)
                    if sibling is not None:
                        if sibling.matchNode is not None:
                            newNode2 = edit.right.cloneTree(matchingParent)
                            matchingParent.insertBefore(
                                sibling.matchNode, newNode2)
                            continue

                    # If all of the left children are deleted and a new right is added.
                    allDeleted = True
                    for child in matchingParent.children:
                        if not child.deleteTree:
                            allDeleted = False
                            break

                    if allDeleted:
                        newNode2 = edit.right.cloneTree(matchingParent)
                        matchingParent.children.append(newNode2)
                        continue

                    newEdits.append(edit)
                    continue

                # Nodes which have undealt with children are deferred
                f = True
                for child in edit.right.children:
                    if child.matchNode is None and not child.insertTree:
                        f = False
                        break
                if not f:
                    newEdits.append(edit)
                    continue

                # Get the list of children that we need to match
                matchList = []
                for child in edit.right.children:
                    if not child.inserted and not child.insertTree:
                        matchList.append(child.matchNode)
                if len(matchList) == 0:
                    newEdits.append(edit)
                    continue

                # Build the list of all common ancestors of these nodes
                commonParents = None

                for child in matchList:
                    ancestorList = child.getParents()[::-1]
                    if commonParents is None:
                        commonParents = ancestorList
                    else:
                        if len(ancestorList) < len(commonParents):
                            commonParents = commonParents[:len(ancestorList)]
                        for i in range(
                                0, min(len(ancestorList), len(commonParents))):
                            if ancestorList[i] != commonParents[i]:
                                if i == 0:
                                    pass
                                else:
                                    commonParents = commonParents[:i]
                                break
                if len(commonParents) == 0:
                    # we have no common parents so this is a mess.
                    newEdits.append(edit)
                    continue

                matchParent = commonParents[-1]

                # If we have a preceeding sibling, we need to make sure that we are
                # going to try and insert this after that node.

                sibling = edit.right.parent.getPredecessor(edit.right)

                if sibling and sibling.matchNode:
                    ancestorList = sibling.matchNode.parent.getParents()
                    if matchParent in ancestorList:
                        aList = []
                        bList = []
                        for child in matchList:
                            ancestorList = child.getParents()
                            if sibling.matchNode.parent in ancestorList:
                                aList.append(child)
                            else:
                                bList.append(child)

                        if len(aList) >= len(bList):
                            for child in bList:
                                child.untangle()
                        else:
                            for child in aList:
                                child.untangle()
                        newEdits.append(edit)
                        continue

                    ancestorList = matchParent.getParents(True)
                    if sibling.matchNode in ancestorList:
                        for child in edit.right.children:
                            if child.matchNode:
                                child.matchNode.untangle()
                        edit.right.insertTree = True
                        newEdits.append(edit)
                        continue

                # create the new node
                newNode = edit.right.clone()
                newNode.parent = matchParent
                newNode.inserted = True
                newNode.matchNode = edit.right
                edit.right.matchNode = newNode

                #

                i = 0
                iX = -1
                interums = []
                for child in edit.right.children:
                    if child.insertTree:
                        newNode2 = child.cloneTree(None)
                        newNode.children.append(newNode2)
                        continue
                    while i != len(matchParent.children):
                        if matchParent.children[i].isMatchNode(
                                child.matchNode):
                            if len(interums) != 0:
                                if iX != -1:
                                    for ii in interums:
                                        newNode.children.append(
                                            matchParent.children[ii])
                                        matchParent.children[ii] = newNode
                                        del matchParent.children[ii]
                                        i -= 1
                                interums = []
                            newNode.children.append(matchParent.children[i])
                            matchParent.children[i].parent = newNode
                            del matchParent.children[i]
                            if iX == -1:
                                iX = i
                            break
                        else:
                            interums.append(i)
                        i += 1

                if iX == -1:
                    iX = 0
                matchParent.children.insert(iX, newNode)

            if len(editList) == len(newEdits):
                break

        log.note("Number of edits left = " + str(len(newEdits)))
        for edit in newEdits:
            log.note(edit.toString())
        return len(newEdits)
Exemple #11
0
    def __init__(self, cache_path=None, library_dirs=None, source=None,
                 templates_path='templates', verbose=None, quiet=None,
                 no_network=None, network_locs=[
                     'https://xml2rfc.tools.ietf.org/public/rfc/',
                     'http://xml2rfc.tools.ietf.org/public/rfc/',
                 ],
                 rfc_number=None, options=Default_options):
        self.quiet = quiet if quiet != None else options.quiet
        self.verbose = verbose if verbose != None else options.verbose
        self.no_network = no_network if no_network != None else options.no_network
        self.cache_path = cache_path if cache_path != None else options.cache
        self.source = source
        self.library_dirs = library_dirs
        self.templates_path = templates_path
        self.network_locs = network_locs
        self.include = False
        self.rfc_number = rfc_number
        self.cache_refresh_secs = (60*60*24*14) # 14 days
        self.options = options

        self.file_handles = []

        # Get directory of source
        if self.source:
            if isinstance(self.source, six.string_types):
                self.source_dir = os.path.abspath(os.path.dirname(self.source))
            else:
                self.source_dir = os.path.abspath(os.path.dirname(self.source.name))
        else:
            self.source_dir = None

        # Determine cache directories to read/write to
        self.read_caches = [os.path.expanduser(path) for path in CACHES]
        self.write_cache = None
        if self.cache_path:
            # Explicit directory given, set as first directory in read_caches
            self.read_caches.insert(0, self.cache_path)
        # Try to find a valid directory to write to by stepping through
        # Read caches one by one
        for dir in self.read_caches:
            if os.path.exists(dir) and os.access(dir, os.W_OK):
                self.write_cache = dir
                break
            else:
                try:
                    os.makedirs(dir)
                    log.note('Created cache directory at', dir)
                    self.write_cache = dir
                except OSError:
                    # Can't write to this directory, try the next one
                    pass
        if not self.write_cache:
            log.warn('Unable to find a suitible cache directory to '
                            'write to, trying the following directories:\n ',
                            '\n  '.join(self.read_caches),
                            '\nTry giving a specific directory with --cache.')
        else:
            # Create the prefix directory if it doesnt exist
            if CACHE_PREFIX != None and len(CACHE_PREFIX) > 0:
                pdir = os.path.join(self.write_cache, CACHE_PREFIX)
                if not os.path.exists(pdir):
                    os.makedirs(pdir)

        self.sessions = {}
Exemple #12
0
    def cache(self, url):
        """ Return the path to a cached URL

            Checks for the existence of the cache and creates it if necessary.
        """
        scheme, netloc, path, params, query, fragment = urlparse(url)
        basename = os.path.basename(path)
        typename = self.include and 'include' or 'entity'
        # Try to load the URL from each cache in `read_cache`
        for dir in self.read_caches:
            cached_path = os.path.join(dir, CACHE_PREFIX, basename)
            if os.path.exists(cached_path):
                if os.path.getmtime(cached_path) < (time.time() - self.cache_refresh_secs) and not self.no_network:
                    log.note('Cached version at %s too old; will refresh cache for %s %s' % (cached_path, typename, url))
                    break
                else:
                    log.note('Resolving ' + typename + '...', url)
                    log.note('Loaded from cache', cached_path)
                    return cached_path

        log.note('Resolving ' + typename + '...', url)
        if self.no_network:
            # No network activity
            log.note("URL not retrieved because no-network option set")
            return ''

        if netloc not in self.sessions:
            self.sessions[netloc] = requests.Session()
        session = self.sessions[netloc]
        r = session.get(url)
        for rr in r.history + [r, ]:
            log.note(' ... %s %s' % (rr.status_code, rr.url))
        if r.status_code == 200:
            if self.write_cache:
                text = r.text.encode('utf8')
                try:
                    xml = lxml.etree.fromstring(text)
                    if self.validate_ref(xml):
                        xml.set('{%s}base'%utils.namespaces['xml'], r.url)
                        text = lxml.etree.tostring(xml, encoding='utf8')
                except Exception:
                    pass
                write_path = os.path.normpath(os.path.join(self.write_cache,
                                                           CACHE_PREFIX, basename))
                with codecs.open(write_path, 'w', encoding='utf-8') as cache_file:
                    cache_file.write(text.decode('utf8'))
                log.note('Added file to cache: ', write_path)
                r.close()
                return write_path
            else:
                r.close()
                return url
        else:
            # Invalid URL -- Error will be displayed in getReferenceRequest
            log.note("URL retrieval failed with status code %s for '%s'" % (r.status_code, r.url))
            return ''
Exemple #13
0
    def getReferenceRequest(self, request, include=False, line_no=0):
        """ Returns the correct and most efficient path for an external request

            To determine the path, the following algorithm is consulted:

            If REQUEST ends with '.dtd' or '.ent' then
              If REQUEST is an absolute path (local or network) then
                Return REQUEST
            Else
              Try TEMPLATE_DIR + REQUEST, otherwise
              Return SOURCE_DIR + REQUEST
            Else
              If REQUEST doesn't end with '.xml' then append '.xml'
              If REQUEST is an absolute path (local or network) then
                Return REQUEST
              Else
                If REQUEST contains intermediate directories then
                  Try each directory in LOCAL_LIB_DIRS + REQUEST, otherwise
                  Try NETWORK + REQUEST
                Else (REQUEST is simply a filename)
                  [Recursively] Try each directory in LOCAL_LIB_DIRS + REQUEST, otherise
                  Try each explicit (bibxml, bibxml2...) subdirectory in NETWORK + REQUEST

            Finally if the path returned is a network URL, use the cached
            version or create a new cache.

            - REQUEST refers to the full string of the file asked for,
            - TEMPLATE_DIR refers to the applications 'templates' directory,
            - SOURCE_DIR refers to the directory of the XML file being parsed
            - LOCAL_LIB_DIRS refers to a list of local directories to consult,
              on the CLI this is set by $XML_LIBRARY, defaulting to
              ['/usr/share/xml2rfc'].  On the GUI this can be configured
              manually but has the same initial defaults.
            - NETWORK refers to the online citation library.  On the CLI this
              is http://xml2rfc.ietf.org/public/rfc/.  On the GUI this
              can be configured manually but has the same initial default.

            The caches in read_dirs are consulted in sequence order to find the
            request.  If not found, the request will be cached at write_dir.

            This method will throw an lxml.etree.XMLSyntaxError to be handled
            by the application if the reference cannot be properly resolved
        """
        self.include = include  # include state
        tried_cache = False
        attempts = []  # Store the attempts
        original = request  # Used for the error message only
        result = None  # Our proper path
        if request.endswith('.dtd') or request.endswith('.ent'):
            if os.path.isabs(request):
                # Absolute request, return as-is
                attempts.append(request)
                result = request
            elif urlparse(request).netloc:
                paths = [request]
                # URL requested, cache it
                origloc = urlparse(paths[0]).netloc
                if True in [urlparse(loc).netloc == urlparse(paths[0]).netloc
                            for loc in self.network_locs]:
                    for loc in self.network_locs:
                        newloc = urlparse(loc).netloc
                        for path in paths:
                            path = path.replace(origloc, newloc)
                            attempts.append(path)
                            result = self.cache(path)
                            if result:
                                break
                        if result:
                            break
                else:
                    for path in paths:
                        attempts.append(request)
                        result = self.cache(request)
                        if result:
                            break
                if not result and self.no_network:
                    log.warn("Document not found in cache, and --no-network specified"
                             " -- couldn't resolve %s" % request)
                tried_cache = True
            else:
                basename = os.path.basename(request)
                # Look for dtd in templates directory
                attempt = os.path.join(self.templates_path, basename)
                attempts.append(attempt)
                if os.path.exists(attempt):
                    result = attempt
                else:
                    # Default to source directory
                    result = os.path.join(self.source_dir, basename)
                    attempts.append(result)
        else:
            if self.options and self.options.vocabulary == 'v3':
                paths = [request]
            elif not request.endswith('.xml'):
                paths = [request, request + '.xml']
            else:
                paths = [request]
            if os.path.isabs(paths[0]):
                # Absolute path, return as-is
                for path in paths:
                    attempts.append(path)
                    result = path
                    if os.path.exists(path):
                        break
            elif urlparse(paths[0]).netloc:
                # URL requested, cache it
                origloc = urlparse(paths[0]).netloc
                if True in [urlparse(loc).netloc == urlparse(paths[0]).netloc for loc in self.network_locs]:
                    for loc in self.network_locs:
                        newloc = urlparse(loc).netloc
                        for path in paths:
                            path = path.replace(origloc, newloc)
                            attempts.append(path)
                            result = self.cache(path)
                            if result:
                                break
                        if result:
                            break
                else:
                    for path in paths:
                        attempts.append(path)
                        result = self.cache(path)
                        if result:
                            break
                if not result:
                    if self.options and self.options.vocabulary == 'v3' \
                       and not request.endswith('.xml'):
                        log.warn("The v3 formatters require full explicit URLs of external "
                                 "resources.  Did you forget to add '.xml' (or some other extension)?")
                        result = attempt
                    elif self.no_network:
                        log.warn("Document not found in cache, and --no-network specified -- couldn't resolve %s" % request)
                tried_cache = True
            else:
                if os.path.dirname(paths[0]):
                    # Intermediate directories, only do flat searches
                    for dir in self.library_dirs:
                        # Try local library directories
                        for path in paths:
                            attempt = os.path.join(dir, path)
                            attempts.append(attempt)
                            if os.path.exists(attempt):
                                result = attempt
                                break

                    if not result:
                        # Try network location
                        for loc in self.network_locs:
                            for path in paths:
                                url = urljoin(loc, path)
                                attempts.append(url)
                                result = self.cache(url)
                                if result:
                                    break
                            if result:
                                break
                        tried_cache = True
                        if not result and self.no_network:
                            log.warn("Document not found in cache, and --no-network specified -- couldn't resolve %s" % request)

                        # if not result:
                        #     # Document didn't exist, default to source dir
                        #     result = os.path.join(self.source_dir, request)
                        #     attempts.append(result)
                else:
                    # Hanging filename
                    for dir in self.library_dirs:
                        # NOTE: Recursion can be implemented here
                        # Try local library directories
                        for path in paths:
                            attempt = os.path.join(dir, path)
                            attempts.append(attempt)
                            if os.path.exists(attempt):
                                result = attempt
                                break
                    if not result:
                        # Try network subdirs
                        for subdir in NET_SUBDIRS:
                            for loc in self.network_locs:
                                for path in paths:
                                    url = urljoin(loc, subdir + '/' + path)
                                    attempts.append(url)
                                    result = self.cache(url)
                                    if result:
                                        break
                                if result:
                                    break
                            tried_cache = True
                            if result:
                                break
                        if not result and self.no_network:
                            log.warn("Document not found in cache, and --no-network specified -- couldn't resolve %s" % request)
                    # if not result:
                    #     # Default to source dir
                    #     result = os.path.join(self.source_dir, request)
                    #     attempts.append(result)

        # Verify the result -- either raise exception or return it
        if not result or (not os.path.exists(result) and not urlparse(original).netloc):
            if os.path.isabs(original):
                log.warn('The reference "' + original + '" was requested with an absolute path, but not found '
                    'in that location.  Removing the path component will cause xml2rfc to look for '
                    'the file automatically in standard locations.')
            # Couldn't resolve.  Throw an exception
            error = XmlRfcError('Unable to resolve external request: '
                                + '"' + original + '"', line_no=line_no, filename=self.source)
            if self.verbose and len(attempts) > 1:
                # Reveal attemps
                error.msg += ', trying the following location(s):\n    ' + \
                             '\n    '.join(attempts)
            raise error
        else:
            if not tried_cache:
                # Haven't printed a verbose messsage yet
                typename = self.include and 'include' or 'entity'
                log.note('Resolving ' + typename + '...', result)
            if tried_cache:
                return [result, original]
            return [result, None]
Exemple #14
0
def main():
    # Populate options
    formatter = optparse.IndentedHelpFormatter(max_help_position=40)
    optionparser = optparse.OptionParser(usage='rfclint SOURCE [OPTIONS] '
                                         '...\nExample: rfclint '
                                         'draft.xml',
                                         formatter=formatter)

    parser_options = optparse.OptionGroup(optionparser, "Parser Options")
    parser_options.add_option('-C',
                              '--clear-cache',
                              action='store_true',
                              dest='clear_cache',
                              default=False,
                              help='purge the cache and exit')
    parser_options.add_option('-c',
                              '--cache',
                              dest='cache',
                              help='specify a primary cache directory to'
                              ' write to; default: try [ %s ]' %
                              ', '.join(CACHES))
    parser_options.add_option(
        '-N',
        '--no-network',
        action='store_true',
        default=False,
        help='don\'t use the network to resolve references')
    parser_options.add_option('-n',
                              '--no-rng',
                              action='store_true',
                              help='disable RNG validation step')
    parser_options.add_option('-r',
                              '--rng',
                              action='store_true',
                              help='Specify an alternate RNG file')
    parser_options.add_option('-X',
                              '--no-xinclude',
                              action='store_true',
                              dest='no_xinclude',
                              help='don\'t resolve any xi:include elements')

    optionparser.add_option_group(parser_options)

    general_options = optparse.OptionGroup(optionparser, "General Options")
    general_options.add_option('-o',
                               '--out',
                               dest='output_filename',
                               metavar='FILE',
                               help='specify an explicit output filename')
    parser_options.add_option('--no-xml',
                              dest='no_xml',
                              action='store_true',
                              help='Don\'t perform XML well-formness checking')
    parser_options.add_option('--bcp14',
                              dest='bcp14',
                              action='store_true',
                              help='Perform bcp14 checking')
    optionparser.add_option_group(general_options)

    plain_options = optparse.OptionGroup(optionparser, 'Plain Options')

    plain_options.add_option('-q',
                             '--quiet',
                             action='store_true',
                             help='dont print anything')
    plain_options.add_option('-v',
                             '--verbose',
                             action='store_true',
                             help='print extra information')
    plain_options.add_option('-V',
                             '--version',
                             action='callback',
                             callback=display_version,
                             help='display the version number and exit')
    plain_options.add_option('--debug',
                             action='store_true',
                             help='Show debugging output')
    plain_options.add_option('--extract',
                             dest='extract',
                             help='Extract all items of the given type')
    plain_options.add_option('--no-svgcheck',
                             action='store_true',
                             dest='no_svgcheck',
                             help='Don\'t run svgcheck')
    optionparser.add_option_group(plain_options)

    spell_options = optparse.OptionGroup(optionparser, 'Spell Options')
    spell_options.add_option('--no-spell',
                             dest='no_spell',
                             default=False,
                             action='store_true',
                             help='Don\'t run the spell checking')
    spell_options.add_option(
        '--dictionary',
        dest='dict_list',
        action='append',
        help='Use this addition dictionary when spell checking')
    spell_options.add_option(
        '--personal',
        dest='dict_personal',
        help='use this dictionary as the personal dictionary')
    spell_options.add_option(
        '--spell-window',
        dest='spell_window',
        action='store',
        type='int',
        help='Set the number of words to appear around spelling errors')
    spell_options.add_option('--no-dup-detection',
                             dest='no_dups',
                             action='store_true',
                             help='Don\'t do duplication detection.')
    spell_options.add_option('--spell-program',
                             dest='spell_program',
                             metavar='NAME',
                             help='Name of spelling program to use')
    spell_options.add_option('--no-suggest',
                             dest='spell_suggest',
                             action='store_false',
                             help='Do not provide suggestions')
    spell_options.add_option('--suggest',
                             dest='spell_suggest',
                             action='store_true',
                             help='provide suggestions (default)')
    spell_options.add_option('--color',
                             dest='spell_color',
                             action='callback',
                             callback=check_color,
                             type='string',
                             help='color incorrect words in supplied context')
    spell_options.add_option(
        '--no-curses',
        dest='no_curses',
        action='store_true',
        help='disable curses when doing spell checking and dup detection')
    spell_options.add_option(
        '--skip-code',
        dest='skip_code',
        action='store_true',
        help='skip all code elements when doing spell and duplicate checking')
    spell_options.add_option(
        '--skip-artwork',
        dest='skip_artwork',
        action='store_true',
        help='skip all artwork elements when doing spell and '
        'duplicate checking')
    optionparser.add_option_group(spell_options)

    abnf_options = optparse.OptionGroup(optionparser, 'ABNF Options')
    abnf_options.add_option('--abnf-program',
                            dest='abnf_program',
                            metavar='NAME',
                            help='Name of ABNF checker program to use')
    abnf_options.add_option('--no-abnf',
                            dest='no_abnf',
                            action='store_true',
                            help='Don\'t perform ABNF checking')
    abnf_options.add_option('--abnf-add-rules',
                            dest='abnf_add',
                            help='ABNF file to append during evaluation.')

    config_options = optparse.OptionGroup(optionparser,
                                          'Configuration Options')
    config_options.add_option(
        '--configfile',
        dest='config_file',
        metavar='NAME',
        help="Specify the name of the configuration file.")
    config_options.add_option('--save-config',
                              dest='save_config',
                              default=False,
                              action='store_true',
                              help='Save configuration back to file')

    # --- Parse and validate arguments ---------------------------------

    (options, args) = optionparser.parse_args()

    # --- Setup and parse the input file

    if options.cache:
        if not os.path.exists(options.cache):
            try:
                os.makedirs(options.cache)
                if options.verbose:
                    log.write('Created cache directory at', options.cache)
            except OSError as e:
                print('Unable to make cache directory: %s ' % options.cache)
                print(e)
                sys.exit(1)
        else:
            if not os.access(options.cache, os.W_OK):
                print('Cache directory is not writable: %s' % options.cache)
                sys.exit(1)

    if options.clear_cache:
        clear_cache(options.cache)

    # --- Locate the configuration file if it exists and import it ----

    config = ConfigFile(options)

    if options.save_config:
        config.save()
        sys.exit(0)

    # make things quiet if output goes to stdout
    if options.output_filename is None and not options.quiet and (
            options.extract):
        options.quiet = True

    # --- Get the file to be processed --------------------------------

    if len(args) < 1:
        optionparser.print_help()
        sys.exit(2)
    source = args[0]
    if not os.path.exists(source):
        sys.exit('No such file: ' + source)

    # Setup warnings module
    # rfclint.log.warn_error = options.warn_error and True or False
    log.quiet = options.quiet and True or False
    log.verbose = options.verbose

    # Parse the document into an xmlrfc tree instance
    log.note("Checking for well-formness of '{0}'".format(source))
    parser = XmlRfcParser(source,
                          verbose=options.verbose,
                          preserve_all_white=True,
                          quiet=True,
                          cache_path=options.cache,
                          no_network=options.no_network,
                          no_xinclude=options.no_xinclude,
                          templates_path=globals().get('_TEMPLATESPATH', None))
    try:
        xmlrfc = parser.parse(remove_comments=False, strip_cdata=False)
    except XmlRfcError as e:
        log.exception('Unable to parse the XML document: ' + source, e)
        sys.exit(1)
    except lxml.etree.XMLSyntaxError as e:
        # Give the lxml.etree.XmlSyntaxError exception a line attribute which
        # matches lxml.etree._LogEntry, so we can use the same logging function
        log.error("Unable to parse the XML document: " +
                  os.path.normpath(source))
        log.exception_lines("dummy", e.error_log)
        sys.exit(1)
    log.note("Well-formness passes")

    # Validate the document unless disabled
    if not options.no_rng:
        log.note("Checking for schema validation...")
        if not options.rng:
            options.rng = parser.default_rng_path
        ok, errors = xmlrfc.validate(rng_path=options.rng)
        if not ok:
            log.error('Unable to validate the XML document: ' +
                      os.path.normpath(source))
            log.exception_lines("dummy", errors)
            sys.exit(1)
        log.info("Schema validation passes")
    else:
        log.note("Skipping schema validation")

    # Do Extracts

    if options.extract:
        codeItems = xmlrfc.tree.getroot().xpath(
            "//sourcecode[@type='{0}']".format(options.extract))

        if len(codeItems) == 0:
            log.error("No sourcecode elements with type = '{0}' found.".format(
                options.extract))
            exit(1)

        if options.output_filename:
            file = open(options.output_filename, 'w')
        else:
            file = sys.stdout

        needEOL = True
        for item in codeItems:
            if "name" in item.attrib:
                with open(item.attrib["name"], 'w') as f:
                    f.write(item.text)
                    if len(item.text) > 0 and item.text[-1] != '\n':
                        f.write('\n')
            else:
                file.write(item.text)
                if len(item.text) > 0:
                    needEOL = item.text[-1] != '\n'

        if needEOL:
            file.write('\n')

        if options.output_filename:
            file.close()
        exit(0)

    #  Validate any embedded XML

    if not options.no_xml:
        codeItems = xmlrfc.tree.getroot().xpath("//sourcecode[@type='xml']")
        if len(codeItems) > 0:
            log.note("Validating XML fragments in sourcecode elements")
            # resolver without knowledge of rfc_number:
            caching_resolver = CachingResolver(no_network=True,
                                               verbose=options.verbose,
                                               quiet=options.quiet)

            for item in codeItems:
                parser = lxml.etree.XMLParser(dtd_validation=False,
                                              load_dtd=False,
                                              no_network=True,
                                              resolve_entities=False,
                                              recover=False)
                parser.resolvers.add(caching_resolver)
                try:
                    text = re.sub(u'^\s+<\?xml ', '<?xml ', item.text)
                    file = six.BytesIO(text.encode('utf-8'))

                    lxml.etree.parse(file, parser)
                    log.info(
                        "XML fragment in source code found and is well defined.",
                        where=item)
                except (lxml.etree.XMLSyntaxError) as e:
                    log.warn(u'XML in sourcecode not well formed: ',
                             e.msg,
                             where=item)
                except Exception as e:
                    log.exception(u'Error occured processing XML: ', e)
        else:
            log.info("No XML fragments in sourcecode elements found.")

    #  Validate any embedded ABNF
    if not options.no_abnf:
        try:
            checker = AbnfChecker(config)

            checker.validate(xmlrfc.tree)
        except RfcLintError as e:
            log.error("Skipping ABNF checking because")
            log.error(e.message, additional=2)

    # Validate any SVG items
    if not options.no_svgcheck:
        checkTree(xmlrfc.tree)

    # do the Spelling checking
    if not options.no_spell:
        speller = None
        try:
            speller = Speller(config)
            if options.no_curses:
                speller.no_curses = True
            speller.initscr()
            speller.processTree(xmlrfc.tree.getroot())
            speller.sendCommand("#")  # save personal dictionary
            speller.endwin()
        except RfcLintError as e:
            log.error("Skipping spell checking because")
            log.error(e.message, additional=2)
            if speller:
                speller.endwin()
        except Exception:
            if speller:
                speller.endwin()
            raise

    # do the Duplicate checking
    if not options.no_dups:
        try:
            dups = Dups(config)
            if options.no_curses:
                dups.no_curses = True
            dups.initscr()
            dups.processTree(xmlrfc.tree.getroot())
            dups.endwin()
        except RfcLintError as e:
            dups.endwin()
            log.error("Skipping duplicate checking because")
            log.error(e.message, additional=2)
        except Exception:
            dups.endwin()
            raise

    # do the 2119 Language tag checking
    if options.bcp14:
        try:
            lang2119 = Lang2119(config)
            if options.no_curses:
                lang2119.no_curses = True
            lang2119.initscr()
            lang2119.processTree(xmlrfc.tree.getroot())
            lang2119.endwin()
        except RfcLintError as e:
            log.error("Skipping RFC 2119 language tag checking because")
            log.error(e.message, additoin=2)
        except Exception:
            lang2119.endwin()
            raise

    if options.output_filename is not None:
        if six.PY2:
            file = open(options.output_filename, 'w')
        else:
            file = open(options.output_filename, 'w', encoding='utf8')
        text = lxml.etree.tostring(xmlrfc.tree.getroot(),
                                   xml_declaration=True,
                                   encoding='utf-8',
                                   doctype=xmlrfc.tree.docinfo.doctype)
        if six.PY3:
            text = text.decode('utf8')
        file.write(text)
        if len(text) > 0 and text[-1] != '\n':
            file.write('\n')