def justify_inline(left_str, center_str, right_str, width=72): """ Takes three string arguments and outputs a single string with the arguments left-justified, centered, and right-justified respectively. Raises a warning if the combined length of the three strings is greater than the width, and trims the longest string """ strings = [left_str.rstrip(), center_str.strip(), right_str.strip()] sumwidth = sum([len(s) for s in strings]) if sumwidth > width: # Trim longest string longest_index = strings.index(max(strings, key=len)) log.warn('The inline string was truncated because it was ' 'too long:\n ' + strings[longest_index]) strings[longest_index] = strings[longest_index][:-(sumwidth - width)] if len(strings[1]) % 2 == 0: center = strings[1].center(width) else: center = strings[1].center(width + 1) right = strings[2].rjust(width) output = list(strings[0].ljust(width)) for i, char in enumerate(output): if center[i] != ' ': output[i] = center[i] elif right[i] != ' ': output[i] = right[i] return ''.join(output)
def _replace_unicode_characters(str): """ replace those Unicode characters that we do not use internally &wj; &zwsp; &nbhy; """ while True: match = re.search(u'([^ -\x7e\u2060\u200B\u00A0\u2011\r\n])', str) if not match: return str if match.group(1) in _unicode_replacements: str = re.sub(match.group(1), _unicode_replacements[match.group(1)], str) else: entity = match.group(1).encode('ascii', 'xmlcharrefreplace').decode('ascii') str = re.sub(match.group(1), entity, str) log.warn('Illegal character replaced in string: ' + entity)
def strip_prefix(element, el): """ Given the tag for an element, separate the namespace from the tag and return a tuple of the namespace and the local tag It will be up to the caller to determine if the namespace is acceptable """ ns = None if element[0] == '{': rbp = element.rfind('}') # Index of rightmost } if rbp >= 0: ns = element[1:rbp] element = element[rbp + 1:] else: log.warn( "Malformed namespace. Should have errored during parsing") return element, ns # return tag, namespace
def initscr(self): self.A_REVERSE = 1 self.A_NORMAL = 0 if self.interactive and not self.no_curses: if haveCurses: try: self.curses = curses.initscr() curses.start_color() curses.noecho() curses.cbreak() self.spaceline = " " * curses.COLS self.A_REVERSE = curses.A_REVERSE self.A_NORMAL = curses.A_NORMAL except curses.error as e: if self.curses: self.endwin() self.curses = None log.error("Problem loading curses - " + e) else: log.warn("Unable to load CURSES for python")
def check(el, depth=0): """ Walk the current tree checking to see if all elements pass muster relative to RFC 7996 the RFC Tiny SVG document Return False if the element is to be removed from tree when writing it back out """ global errorCount log.note("%s tag = %s" % (' ' * (depth * indent), el.tag)) # Check that the namespace is one of the pre-approved ones # ElementTree prefixes elements with default namespace in braces element, ns = strip_prefix(el.tag, el) # name of element # namespace for elements must be either empty or svg if ns is not None and ns not in wp.svg_urls: log.warn("Element '{0}' in namespace '{1}' is not allowed".format( element, ns), where=el) return False # Remove this el # Is the element in the list of legal elements? log.note("%s element % s: %s" % (' ' * (depth * indent), element, el.attrib)) if element not in wp.elements: errorCount += 1 log.warn("Element '{0}' not allowed".format(element), where=el) return False # Remove this el elementAttributes = wp.elements[element] # Allowed attributes for element # do a re-write of style into individual elements if 'style' in el.attrib: modify_style(el) attribs_to_remove = [] # Can't remove them inside the iteration! for nsAttrib, val in el.attrib.items(): # validate that the namespace of the element is known and ok attr, ns = strip_prefix(nsAttrib, el) log.note("%s attr %s = %s (ns = %s)" % (' ' * (depth * indent), attr, val, ns)) if ns is not None and ns not in wp.svg_urls: if ns not in wp.xmlns_urls: log.warn( "Element '{0}' does not allow attributes with namespace '{1}'" .format(element, ns), where=el) attribs_to_remove.append(nsAttrib) continue # look to see if the attribute is either an attribute for a specific # element or is an attribute generically for all properties if (attr not in elementAttributes) and (attr not in wp.properties): errorCount += 1 log.warn("The element '{0}' does not allow the attribute '{1}'," " attribute to be removed.".format(element, attr), where=el) attribs_to_remove.append(nsAttrib) # Now check if the attribute is a generic property elif (attr in wp.properties): vals = wp.properties[attr] # log.note("vals = " + vals + "<<<<<") # Do method #1 of checking if the value is legal - not currently used. if vals and vals[0] == '[' and False: # ok, new_val = check_some_props(attr, val, depth) # if not ok: # el.attrib[attr] = new_val[1:] pass else: ok, new_val = value_ok(attr, val) if vals and not ok: errorCount += 1 if new_val is not None: el.attrib[attr] = new_val log.warn( u"The attribute '{1}' does not allow the value '{0}'," u" replaced with '{2}'".format(val, attr, new_val), where=el) else: attribs_to_remove.append(nsAttrib) log.warn( u"The attribute '{1}' does not allow the value '{0}'," u" attribute to be removed".format(val, attr), where=el) for attrib in attribs_to_remove: del el.attrib[attrib] # Need to have a viewBox on the root if (depth == 0): if el.get("viewBox"): pass else: log.warn( "The attribute viewBox is required on the root svg element", where=el) svgw = maybefloat(el.get('width')) svgh = maybefloat(el.get('height')) try: if svgw and svgh: newValue = '0 0 %s %s' % (svgw, svgh) log.warn("Trying to put in the attribute with value '{0}'". format(newValue), where=el) el.set('viewBox', newValue) except ValueError as e: log.error("Error when calculating SVG size: %s" % e, where=el) els_to_rm = [] # Can't remove them inside the iteration! if element in wp.element_children: allowed_children = wp.element_children[element] else: allowed_children = [] for child in el: log.note("%schild, tag = %s" % (' ' * (depth * indent), child.tag)) if not isinstance(child.tag, str): continue ch_tag, ns = strip_prefix(child.tag, el) if ns not in wp.svg_urls: log.warn( "The namespace {0} is not permitted for svg elements.".format( ns), where=child) els_to_rm.append(child) continue if ch_tag not in allowed_children: log.warn( "The element '{0}' is not allowed as a child of '{1}'".format( ch_tag, element), where=child) els_to_rm.append(child) elif not check(child, depth + 1): els_to_rm.append(child) if len(els_to_rm) != 0: for child in els_to_rm: el.remove(child) return True # OK
def validate(self, dtd_path=None, rng_path=None): """ Validate the document with its default dtd, or an optional one Return a success bool along with a list of any errors """ if rng_path: if os.path.exists(rng_path): try: rng = lxml.etree.parse(rng_path) rng = lxml.etree.RelaxNG(rng) except lxml.etree.XMLSyntaxError as e: log.error('Could not parse the rng file: ', rng_path + '\n ', e.message) return False, [] except lxml.etree.RelaxNGParseError as e: log.error('Could not parse the rng file: ', rng_path + '\n ', e.error_log.last_error.message) return False, [] else: # Invalid path given log.error("RNG file does not exist ", rng_path) return False, [] if rng.validate(self.tree): # The document was valid return True, [] else: if len(rng.error_log) == 0: return True, [] else: # The document was not valid return False, rng.error_log # Load dtd from alternate path, if it was specified if dtd_path: if os.path.exists(dtd_path): try: dtd = lxml.etree.DTD(dtd_path) except lxml.etree.DTDParseError as e: # The DTD itself has errors log.error('Could not parse the dtd file:', dtd_path + '\n ', e.message) return False, [] else: # Invalid path given log.error('DTD file does not exist:', dtd_path) return False, [] # Otherwise, use document's DTD declaration else: dtd = self.tree.docinfo.externalDTD if not dtd and self.default_dtd_path: # No explicit DTD filename OR declaration in document! log.warn('No DTD given, defaulting to', self.default_dtd_path) return self.validate(dtd_path=self.default_dtd_path) if not dtd or dtd.validate(self.getroot()): # The document was valid return True, [] else: if len(dtd.error_log) == 0: return True, [] else: # The document was not valid return False, dtd.error_log
def __init__(self, cache_path=None, library_dirs=None, source=None, templates_path='templates', verbose=None, quiet=None, no_network=None, network_locs=[ 'https://xml2rfc.tools.ietf.org/public/rfc/', 'http://xml2rfc.tools.ietf.org/public/rfc/', ], rfc_number=None, options=Default_options): self.quiet = quiet if quiet != None else options.quiet self.verbose = verbose if verbose != None else options.verbose self.no_network = no_network if no_network != None else options.no_network self.cache_path = cache_path if cache_path != None else options.cache self.source = source self.library_dirs = library_dirs self.templates_path = templates_path self.network_locs = network_locs self.include = False self.rfc_number = rfc_number self.cache_refresh_secs = (60*60*24*14) # 14 days self.options = options self.file_handles = [] # Get directory of source if self.source: if isinstance(self.source, six.string_types): self.source_dir = os.path.abspath(os.path.dirname(self.source)) else: self.source_dir = os.path.abspath(os.path.dirname(self.source.name)) else: self.source_dir = None # Determine cache directories to read/write to self.read_caches = [os.path.expanduser(path) for path in CACHES] self.write_cache = None if self.cache_path: # Explicit directory given, set as first directory in read_caches self.read_caches.insert(0, self.cache_path) # Try to find a valid directory to write to by stepping through # Read caches one by one for dir in self.read_caches: if os.path.exists(dir) and os.access(dir, os.W_OK): self.write_cache = dir break else: try: os.makedirs(dir) log.note('Created cache directory at', dir) self.write_cache = dir except OSError: # Can't write to this directory, try the next one pass if not self.write_cache: log.warn('Unable to find a suitible cache directory to ' 'write to, trying the following directories:\n ', '\n '.join(self.read_caches), '\nTry giving a specific directory with --cache.') else: # Create the prefix directory if it doesnt exist if CACHE_PREFIX != None and len(CACHE_PREFIX) > 0: pdir = os.path.join(self.write_cache, CACHE_PREFIX) if not os.path.exists(pdir): os.makedirs(pdir) self.sessions = {}
def parse(self, remove_comments=True, remove_pis=False, quiet=False, strip_cdata=True, textIn=None): """ Parses the source XML file and returns an XmlRfc instance """ if textIn is None: if not (self.quiet or quiet): log.write('Parsing file', os.path.normpath(self.source)) if six.PY2: with open(self.source, "rU") as f: self.text = f.read() else: with open(self.source, "rb", newline=None) as f: self.text = f.read() else: self.text = textIn.encode('utf-8') # Get an iterating parser object file = six.BytesIO(self.text) if textIn is None: file.name = os.path.join(os.path.abspath( os.path.dirname(self.source)), os.path.basename(self.source)) else: file.name = "stdin" context = lxml.etree.iterparse(file, dtd_validation=False, load_dtd=True, attribute_defaults=self.attribute_defaults, no_network=self.no_network, remove_comments=remove_comments, remove_pis=remove_pis, remove_blank_text=True, resolve_entities=False, strip_cdata=strip_cdata, events=("start",), tag="rfc", ) # resolver without knowledge of rfc_number: caching_resolver = CachingResolver(cache_path=self.cache_path, library_dirs=self.library_dirs, templates_path=self.templates_path, source=self.source, no_network=self.no_network, network_locs=self.network_locs, verbose=self.verbose, quiet=self.quiet, options=self.options, ) context.resolvers.add(caching_resolver) # Get hold of the rfc number (if any) in the rfc element, so we can # later resolve the "&rfc.number;" entity. self.rfc_number = None self.format_version = None try: for action, element in context: if element.tag == "rfc": self.rfc_number = element.attrib.get("number", None) self.format_version = element.attrib.get("version", None) break except lxml.etree.XMLSyntaxError: pass # log.warn("Parsing Error: %s" % e) except ValueError as e: if e.message == "I/O operation on closed file": pass if self.format_version == "3": self.default_dtd_path = None self.default_rng_path = os.path.join(self.templates_path, 'v3.rng') # now get a regular parser, and parse again, this time resolving entities parser = lxml.etree.XMLParser(dtd_validation=False, load_dtd=True, attribute_defaults=self.attribute_defaults, no_network=self.no_network, remove_comments=remove_comments, remove_pis=remove_pis, remove_blank_text=not self.preserve_all_white, # remove_blank_text=True, resolve_entities=self.resolve_entities, strip_cdata=strip_cdata) # Initialize the caching system self.cachingResolver = CachingResolver(cache_path=self.cache_path, library_dirs=self.library_dirs, templates_path=self.templates_path, source=self.source, no_network=self.no_network, network_locs=self.network_locs, verbose=self.verbose, quiet=self.quiet, rfc_number=self.rfc_number, options=self.options ) # Add our custom resolver parser.resolvers.add(self.cachingResolver) # Use our custom element class, which holds the state of PI settings # at this point in the xml tree element_lookup = lxml.etree.ElementDefaultClassLookup(element=AnnotatedElement) parser.set_element_class_lookup(element_lookup) # Parse the XML file into a tree and create an rfc instance file = six.BytesIO(self.text) file.name = os.path.join(os.path.abspath( os.path.dirname(self.source)), os.path.basename(self.source)) tree = lxml.etree.parse(file, parser) xmlrfc = XmlRfc(tree, self.default_dtd_path, nsmap=self.nsmap) # Evaluate processing instructions before root element xmlrfc._eval_pre_pi() # Keep seen elements in a list, to force lxml to not discard (and # recreate) the elements, as this would cause loss of our custom # state, the PI settings at the time the element was parsed # (in element.pis) xmlrfc._elements_cache = [] # Process PIs and expand 'include' instructions pis = xmlrfc.pis.copy() for element in xmlrfc.getroot().iterdescendants(): if element.tag is lxml.etree.PI: pidict = xmlrfc.parse_pi(element) pis = xmlrfc.pis.copy() if 'include' in pidict and pidict['include'] and not self.no_xinclude: request = pidict['include'] path, originalPath = self.cachingResolver.getReferenceRequest(request, # Pass the line number in XML for error bubbling include=True, line_no=getattr(element, 'sourceline', 0)) try: # Parse the xml and attach it to the tree here parser = lxml.etree.XMLParser(load_dtd=False, no_network=False, remove_comments=remove_comments, remove_pis=remove_pis, remove_blank_text=True, resolve_entities=True, strip_cdata=strip_cdata) parser.set_element_class_lookup(element_lookup) # parser.resolvers.add(self.cachingResolver) --- should this be done? ref_root = lxml.etree.parse(path, parser).getroot() ref_root.pis = pis ref_root.base = path xmlrfc._elements_cache.append(ref_root) for e in ref_root.iterdescendants(): e.pis = pis e.base = path xmlrfc._elements_cache.append(e) parent = element.getparent() parent.replace(element, ref_root) except (lxml.etree.XMLSyntaxError, IOError) as e: if e is lxml.etree.XMLSyntaxError: log.warn('The include file at', path, 'contained an XML error and was '\ 'not expanded:', e.msg) else: log.warn('Unable to load the include file at', path) else: if isinstance(element, AnnotatedElement): element.pis = pis xmlrfc._elements_cache.append(element) # Process xi:include statements if not self.no_xinclude: xmlrfc.tree.xinclude() # Finally, do any extra formatting on the RFC before returning if not self.preserve_all_white: xmlrfc._format_whitespace() return xmlrfc
def getReferenceRequest(self, request, include=False, line_no=0): """ Returns the correct and most efficient path for an external request To determine the path, the following algorithm is consulted: If REQUEST ends with '.dtd' or '.ent' then If REQUEST is an absolute path (local or network) then Return REQUEST Else Try TEMPLATE_DIR + REQUEST, otherwise Return SOURCE_DIR + REQUEST Else If REQUEST doesn't end with '.xml' then append '.xml' If REQUEST is an absolute path (local or network) then Return REQUEST Else If REQUEST contains intermediate directories then Try each directory in LOCAL_LIB_DIRS + REQUEST, otherwise Try NETWORK + REQUEST Else (REQUEST is simply a filename) [Recursively] Try each directory in LOCAL_LIB_DIRS + REQUEST, otherise Try each explicit (bibxml, bibxml2...) subdirectory in NETWORK + REQUEST Finally if the path returned is a network URL, use the cached version or create a new cache. - REQUEST refers to the full string of the file asked for, - TEMPLATE_DIR refers to the applications 'templates' directory, - SOURCE_DIR refers to the directory of the XML file being parsed - LOCAL_LIB_DIRS refers to a list of local directories to consult, on the CLI this is set by $XML_LIBRARY, defaulting to ['/usr/share/xml2rfc']. On the GUI this can be configured manually but has the same initial defaults. - NETWORK refers to the online citation library. On the CLI this is http://xml2rfc.ietf.org/public/rfc/. On the GUI this can be configured manually but has the same initial default. The caches in read_dirs are consulted in sequence order to find the request. If not found, the request will be cached at write_dir. This method will throw an lxml.etree.XMLSyntaxError to be handled by the application if the reference cannot be properly resolved """ self.include = include # include state tried_cache = False attempts = [] # Store the attempts original = request # Used for the error message only result = None # Our proper path if request.endswith('.dtd') or request.endswith('.ent'): if os.path.isabs(request): # Absolute request, return as-is attempts.append(request) result = request elif urlparse(request).netloc: paths = [request] # URL requested, cache it origloc = urlparse(paths[0]).netloc if True in [urlparse(loc).netloc == urlparse(paths[0]).netloc for loc in self.network_locs]: for loc in self.network_locs: newloc = urlparse(loc).netloc for path in paths: path = path.replace(origloc, newloc) attempts.append(path) result = self.cache(path) if result: break if result: break else: for path in paths: attempts.append(request) result = self.cache(request) if result: break if not result and self.no_network: log.warn("Document not found in cache, and --no-network specified" " -- couldn't resolve %s" % request) tried_cache = True else: basename = os.path.basename(request) # Look for dtd in templates directory attempt = os.path.join(self.templates_path, basename) attempts.append(attempt) if os.path.exists(attempt): result = attempt else: # Default to source directory result = os.path.join(self.source_dir, basename) attempts.append(result) else: if self.options and self.options.vocabulary == 'v3': paths = [request] elif not request.endswith('.xml'): paths = [request, request + '.xml'] else: paths = [request] if os.path.isabs(paths[0]): # Absolute path, return as-is for path in paths: attempts.append(path) result = path if os.path.exists(path): break elif urlparse(paths[0]).netloc: # URL requested, cache it origloc = urlparse(paths[0]).netloc if True in [urlparse(loc).netloc == urlparse(paths[0]).netloc for loc in self.network_locs]: for loc in self.network_locs: newloc = urlparse(loc).netloc for path in paths: path = path.replace(origloc, newloc) attempts.append(path) result = self.cache(path) if result: break if result: break else: for path in paths: attempts.append(path) result = self.cache(path) if result: break if not result: if self.options and self.options.vocabulary == 'v3' \ and not request.endswith('.xml'): log.warn("The v3 formatters require full explicit URLs of external " "resources. Did you forget to add '.xml' (or some other extension)?") result = attempt elif self.no_network: log.warn("Document not found in cache, and --no-network specified -- couldn't resolve %s" % request) tried_cache = True else: if os.path.dirname(paths[0]): # Intermediate directories, only do flat searches for dir in self.library_dirs: # Try local library directories for path in paths: attempt = os.path.join(dir, path) attempts.append(attempt) if os.path.exists(attempt): result = attempt break if not result: # Try network location for loc in self.network_locs: for path in paths: url = urljoin(loc, path) attempts.append(url) result = self.cache(url) if result: break if result: break tried_cache = True if not result and self.no_network: log.warn("Document not found in cache, and --no-network specified -- couldn't resolve %s" % request) # if not result: # # Document didn't exist, default to source dir # result = os.path.join(self.source_dir, request) # attempts.append(result) else: # Hanging filename for dir in self.library_dirs: # NOTE: Recursion can be implemented here # Try local library directories for path in paths: attempt = os.path.join(dir, path) attempts.append(attempt) if os.path.exists(attempt): result = attempt break if not result: # Try network subdirs for subdir in NET_SUBDIRS: for loc in self.network_locs: for path in paths: url = urljoin(loc, subdir + '/' + path) attempts.append(url) result = self.cache(url) if result: break if result: break tried_cache = True if result: break if not result and self.no_network: log.warn("Document not found in cache, and --no-network specified -- couldn't resolve %s" % request) # if not result: # # Default to source dir # result = os.path.join(self.source_dir, request) # attempts.append(result) # Verify the result -- either raise exception or return it if not result or (not os.path.exists(result) and not urlparse(original).netloc): if os.path.isabs(original): log.warn('The reference "' + original + '" was requested with an absolute path, but not found ' 'in that location. Removing the path component will cause xml2rfc to look for ' 'the file automatically in standard locations.') # Couldn't resolve. Throw an exception error = XmlRfcError('Unable to resolve external request: ' + '"' + original + '"', line_no=line_no, filename=self.source) if self.verbose and len(attempts) > 1: # Reveal attemps error.msg += ', trying the following location(s):\n ' + \ '\n '.join(attempts) raise error else: if not tried_cache: # Haven't printed a verbose messsage yet typename = self.include and 'include' or 'entity' log.note('Resolving ' + typename + '...', result) if tried_cache: return [result, original] return [result, None]
def main(): # Populate options formatter = optparse.IndentedHelpFormatter(max_help_position=40) optionparser = optparse.OptionParser(usage='rfclint SOURCE [OPTIONS] ' '...\nExample: rfclint ' 'draft.xml', formatter=formatter) parser_options = optparse.OptionGroup(optionparser, "Parser Options") parser_options.add_option('-C', '--clear-cache', action='store_true', dest='clear_cache', default=False, help='purge the cache and exit') parser_options.add_option('-c', '--cache', dest='cache', help='specify a primary cache directory to' ' write to; default: try [ %s ]' % ', '.join(CACHES)) parser_options.add_option( '-N', '--no-network', action='store_true', default=False, help='don\'t use the network to resolve references') parser_options.add_option('-n', '--no-rng', action='store_true', help='disable RNG validation step') parser_options.add_option('-r', '--rng', action='store_true', help='Specify an alternate RNG file') parser_options.add_option('-X', '--no-xinclude', action='store_true', dest='no_xinclude', help='don\'t resolve any xi:include elements') optionparser.add_option_group(parser_options) general_options = optparse.OptionGroup(optionparser, "General Options") general_options.add_option('-o', '--out', dest='output_filename', metavar='FILE', help='specify an explicit output filename') parser_options.add_option('--no-xml', dest='no_xml', action='store_true', help='Don\'t perform XML well-formness checking') parser_options.add_option('--bcp14', dest='bcp14', action='store_true', help='Perform bcp14 checking') optionparser.add_option_group(general_options) plain_options = optparse.OptionGroup(optionparser, 'Plain Options') plain_options.add_option('-q', '--quiet', action='store_true', help='dont print anything') plain_options.add_option('-v', '--verbose', action='store_true', help='print extra information') plain_options.add_option('-V', '--version', action='callback', callback=display_version, help='display the version number and exit') plain_options.add_option('--debug', action='store_true', help='Show debugging output') plain_options.add_option('--extract', dest='extract', help='Extract all items of the given type') plain_options.add_option('--no-svgcheck', action='store_true', dest='no_svgcheck', help='Don\'t run svgcheck') optionparser.add_option_group(plain_options) spell_options = optparse.OptionGroup(optionparser, 'Spell Options') spell_options.add_option('--no-spell', dest='no_spell', default=False, action='store_true', help='Don\'t run the spell checking') spell_options.add_option( '--dictionary', dest='dict_list', action='append', help='Use this addition dictionary when spell checking') spell_options.add_option( '--personal', dest='dict_personal', help='use this dictionary as the personal dictionary') spell_options.add_option( '--spell-window', dest='spell_window', action='store', type='int', help='Set the number of words to appear around spelling errors') spell_options.add_option('--no-dup-detection', dest='no_dups', action='store_true', help='Don\'t do duplication detection.') spell_options.add_option('--spell-program', dest='spell_program', metavar='NAME', help='Name of spelling program to use') spell_options.add_option('--no-suggest', dest='spell_suggest', action='store_false', help='Do not provide suggestions') spell_options.add_option('--suggest', dest='spell_suggest', action='store_true', help='provide suggestions (default)') spell_options.add_option('--color', dest='spell_color', action='callback', callback=check_color, type='string', help='color incorrect words in supplied context') spell_options.add_option( '--no-curses', dest='no_curses', action='store_true', help='disable curses when doing spell checking and dup detection') spell_options.add_option( '--skip-code', dest='skip_code', action='store_true', help='skip all code elements when doing spell and duplicate checking') spell_options.add_option( '--skip-artwork', dest='skip_artwork', action='store_true', help='skip all artwork elements when doing spell and ' 'duplicate checking') optionparser.add_option_group(spell_options) abnf_options = optparse.OptionGroup(optionparser, 'ABNF Options') abnf_options.add_option('--abnf-program', dest='abnf_program', metavar='NAME', help='Name of ABNF checker program to use') abnf_options.add_option('--no-abnf', dest='no_abnf', action='store_true', help='Don\'t perform ABNF checking') abnf_options.add_option('--abnf-add-rules', dest='abnf_add', help='ABNF file to append during evaluation.') config_options = optparse.OptionGroup(optionparser, 'Configuration Options') config_options.add_option( '--configfile', dest='config_file', metavar='NAME', help="Specify the name of the configuration file.") config_options.add_option('--save-config', dest='save_config', default=False, action='store_true', help='Save configuration back to file') # --- Parse and validate arguments --------------------------------- (options, args) = optionparser.parse_args() # --- Setup and parse the input file if options.cache: if not os.path.exists(options.cache): try: os.makedirs(options.cache) if options.verbose: log.write('Created cache directory at', options.cache) except OSError as e: print('Unable to make cache directory: %s ' % options.cache) print(e) sys.exit(1) else: if not os.access(options.cache, os.W_OK): print('Cache directory is not writable: %s' % options.cache) sys.exit(1) if options.clear_cache: clear_cache(options.cache) # --- Locate the configuration file if it exists and import it ---- config = ConfigFile(options) if options.save_config: config.save() sys.exit(0) # make things quiet if output goes to stdout if options.output_filename is None and not options.quiet and ( options.extract): options.quiet = True # --- Get the file to be processed -------------------------------- if len(args) < 1: optionparser.print_help() sys.exit(2) source = args[0] if not os.path.exists(source): sys.exit('No such file: ' + source) # Setup warnings module # rfclint.log.warn_error = options.warn_error and True or False log.quiet = options.quiet and True or False log.verbose = options.verbose # Parse the document into an xmlrfc tree instance log.note("Checking for well-formness of '{0}'".format(source)) parser = XmlRfcParser(source, verbose=options.verbose, preserve_all_white=True, quiet=True, cache_path=options.cache, no_network=options.no_network, no_xinclude=options.no_xinclude, templates_path=globals().get('_TEMPLATESPATH', None)) try: xmlrfc = parser.parse(remove_comments=False, strip_cdata=False) except XmlRfcError as e: log.exception('Unable to parse the XML document: ' + source, e) sys.exit(1) except lxml.etree.XMLSyntaxError as e: # Give the lxml.etree.XmlSyntaxError exception a line attribute which # matches lxml.etree._LogEntry, so we can use the same logging function log.error("Unable to parse the XML document: " + os.path.normpath(source)) log.exception_lines("dummy", e.error_log) sys.exit(1) log.note("Well-formness passes") # Validate the document unless disabled if not options.no_rng: log.note("Checking for schema validation...") if not options.rng: options.rng = parser.default_rng_path ok, errors = xmlrfc.validate(rng_path=options.rng) if not ok: log.error('Unable to validate the XML document: ' + os.path.normpath(source)) log.exception_lines("dummy", errors) sys.exit(1) log.info("Schema validation passes") else: log.note("Skipping schema validation") # Do Extracts if options.extract: codeItems = xmlrfc.tree.getroot().xpath( "//sourcecode[@type='{0}']".format(options.extract)) if len(codeItems) == 0: log.error("No sourcecode elements with type = '{0}' found.".format( options.extract)) exit(1) if options.output_filename: file = open(options.output_filename, 'w') else: file = sys.stdout needEOL = True for item in codeItems: if "name" in item.attrib: with open(item.attrib["name"], 'w') as f: f.write(item.text) if len(item.text) > 0 and item.text[-1] != '\n': f.write('\n') else: file.write(item.text) if len(item.text) > 0: needEOL = item.text[-1] != '\n' if needEOL: file.write('\n') if options.output_filename: file.close() exit(0) # Validate any embedded XML if not options.no_xml: codeItems = xmlrfc.tree.getroot().xpath("//sourcecode[@type='xml']") if len(codeItems) > 0: log.note("Validating XML fragments in sourcecode elements") # resolver without knowledge of rfc_number: caching_resolver = CachingResolver(no_network=True, verbose=options.verbose, quiet=options.quiet) for item in codeItems: parser = lxml.etree.XMLParser(dtd_validation=False, load_dtd=False, no_network=True, resolve_entities=False, recover=False) parser.resolvers.add(caching_resolver) try: text = re.sub(u'^\s+<\?xml ', '<?xml ', item.text) file = six.BytesIO(text.encode('utf-8')) lxml.etree.parse(file, parser) log.info( "XML fragment in source code found and is well defined.", where=item) except (lxml.etree.XMLSyntaxError) as e: log.warn(u'XML in sourcecode not well formed: ', e.msg, where=item) except Exception as e: log.exception(u'Error occured processing XML: ', e) else: log.info("No XML fragments in sourcecode elements found.") # Validate any embedded ABNF if not options.no_abnf: try: checker = AbnfChecker(config) checker.validate(xmlrfc.tree) except RfcLintError as e: log.error("Skipping ABNF checking because") log.error(e.message, additional=2) # Validate any SVG items if not options.no_svgcheck: checkTree(xmlrfc.tree) # do the Spelling checking if not options.no_spell: speller = None try: speller = Speller(config) if options.no_curses: speller.no_curses = True speller.initscr() speller.processTree(xmlrfc.tree.getroot()) speller.sendCommand("#") # save personal dictionary speller.endwin() except RfcLintError as e: log.error("Skipping spell checking because") log.error(e.message, additional=2) if speller: speller.endwin() except Exception: if speller: speller.endwin() raise # do the Duplicate checking if not options.no_dups: try: dups = Dups(config) if options.no_curses: dups.no_curses = True dups.initscr() dups.processTree(xmlrfc.tree.getroot()) dups.endwin() except RfcLintError as e: dups.endwin() log.error("Skipping duplicate checking because") log.error(e.message, additional=2) except Exception: dups.endwin() raise # do the 2119 Language tag checking if options.bcp14: try: lang2119 = Lang2119(config) if options.no_curses: lang2119.no_curses = True lang2119.initscr() lang2119.processTree(xmlrfc.tree.getroot()) lang2119.endwin() except RfcLintError as e: log.error("Skipping RFC 2119 language tag checking because") log.error(e.message, additoin=2) except Exception: lang2119.endwin() raise if options.output_filename is not None: if six.PY2: file = open(options.output_filename, 'w') else: file = open(options.output_filename, 'w', encoding='utf8') text = lxml.etree.tostring(xmlrfc.tree.getroot(), xml_declaration=True, encoding='utf-8', doctype=xmlrfc.tree.docinfo.doctype) if six.PY3: text = text.decode('utf8') file.write(text) if len(text) > 0 and text[-1] != '\n': file.write('\n')