def validate_schema(tdl_file): # Locate relaxng schema rng_file = None for tryme in ["../../docs/tdl.rng", "../docs/tdl.rng", "docs/tdl.rng", "tdl.rng"]: if os.path.isfile(tryme): rng_file = tryme break if rng_file is None: raise Exception("RelaxNG schema file not found: tdl.rng") # Load relaxng schema schema = open(rng_file, "r").read() rngp = libxml2.relaxNGNewMemParserCtxt(schema, len(schema)) rngs = rngp.relaxNGParse() # Define callback for error handling def error_cb(ctx, str): print "%s: %s" % (ctx, str.strip()) libxml2.registerErrorHandler(error_cb, tdl_file) # Attempt to validate reader = libxml2.newTextReaderFilename(tdl_file) # reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) reader.RelaxNGSetSchema(rngs) ret = reader.Read() while ret == 1: ret = reader.Read() if ret != 0: raise Exception("Error parsing the document: %s" % tdl_file) if reader.IsValid() != 1: raise Exception("Document failed to validate: %s" % tdl_file)
def streamFile(filename): try: reader = libxml2.newTextReaderFilename(filename) except: print "unable to open %s" % (filename) return #num=reader.Depth() ret = reader.Read() while ret == 1: list=processNode1(reader) ret = reader.Read() while len(list) != 0: while reader.Depth() == list[1]: list=processNode1(reader) ret = reader.Read() print 'ciao' if ret != 0: print "%s : failed to parse" % (filename) #print processNode1(reader) dict_rules.append(processNode1(reader)) # print dict_rules return dict_rules
def streamFile(filename): try: reader = libxml2.newTextReaderFilename(filename) except: print "Unable to open %s" % (filename) return ret = reader.Read() while ret == 1: processNode(reader) ret = reader.Read() if ret: print "Failed to parse %s" % (filename)
def streamFile(filename): try: reader = libxml2.newTextReaderFilename(filename) except: print "unable to open: %s" % (filename) exit(-1) ret = reader.Read() while ret == 1: processNode(reader) ret = reader.Read() if ret != 0: print "%s: failed to parse" % (filename) exit(-1)
def __init__(self, source, data_source_id, environment): #{{{2 self.db = DbImporter(environment) self.deleted = [] self.inserted = [] self.changed = [] self.imported_data = {} self.counter = 0 self.time = time.time() self.kingdoms = self._prepare_kingdoms() self.reader = libxml2.newTextReaderFilename(source) self.data_source_id = data_source_id self._current_tag = None self._record = self._reset_record()
def streamFile(filename): try: reader = libxml2.newTextReaderFilename(filename) except: print "unable to open %s" % (filename) return curClass = rootClass() ret = reader.Read() while ret == 1: curClass = curClass.processNode(reader, curClass) ret = reader.Read() if ret != 0: print "%s : failed to parse" % (filename)
def validate_schema(tdl_file): # Locate relaxng schema rng_file = None for tryme in [ '../../docs/tdl.rng', '../docs/tdl.rng', 'docs/tdl.rng', 'tdl.rng', ]: if os.path.isfile(tryme): rng_file = tryme break if rng_file is None: raise Exception('RelaxNG schema file not found: tdl.rng') # Load relaxng schema schema = open(rng_file, 'r').read() rngp = libxml2.relaxNGNewMemParserCtxt(schema, len(schema)) rngs = rngp.relaxNGParse() # Define callback for error handling def error_cb(ctx, str): print "%s: %s" % (ctx, str.strip()) libxml2.registerErrorHandler(error_cb, tdl_file) # Attempt to validate reader = libxml2.newTextReaderFilename(tdl_file) #reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) reader.RelaxNGSetSchema(rngs) ret = reader.Read() while ret == 1: ret = reader.Read() if ret != 0: raise Exception('Error parsing the document: %s' % tdl_file) if reader.IsValid() != 1: raise Exception('Document failed to validate: %s' % tdl_file)
else: return hexNum XMLREADER_START_ELEMENT_NODE_TYPE = 1 # reader = libxml2.newTextReaderFilename("../capture.xml") # reader = libxml2.newTextReaderFilename("../newCap.xml") # reader = libxml2.newTextReaderFilename("../mouse.xml") # reader = libxml2.newTextReaderFilename("../nj.xml") if len(sys.argv) != 2: print "Synopsis: " + sys.argv[0] + " <USBTraceLog.xml>" exit(1) reader = libxml2.newTextReaderFilename(sys.argv[1]) # Old data for endpoints 0, 1 and 2 oldData00 = "" oldData01 = "" oldData02 = "" # Parse through the XML... while reader.Read(): if reader.NodeType() == XMLREADER_START_ELEMENT_NODE_TYPE and reader.Depth() == 1 and reader.Name() == "Request": # This is a Request open tag...parse until the end tag node = reader.Expand() row = node.children # Initialise to empty the fields for this request rowMap = {
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader if type(source) in StringTypes: reader = libxml2.newTextReaderFilename(source) else: source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler, None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD, 1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1) reader.SetParserProp(libxml2.PARSER_VALIDATE, self.__validate) # we reuse attribute maps (for a slight performance gain) if self.__ns: attributesNSImpl = xmlreader.AttributesNSImpl({}, {}) else: attributesImpl = xmlreader.AttributesImpl({}) # prefixes to pop (for endPrefixMapping) prefixes = [] # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: if not self.__errors is None: self._reportErrors(0) elif r == 0: if not self.__errors is None: self._reportErrors(0) break # end of parse else: if not self.__errors is None: self._reportErrors(1) else: self._err_handler.fatalError(\ SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: if self.__ns: eltName = (_d(reader.NamespaceUri()),\ _d(reader.LocalName())) eltQName = _d(reader.Name()) attributesNSImpl._attrs = attrs = {} attributesNSImpl._qnames = qnames = {} newPrefixes = [] while reader.MoveToNextAttribute(): qname = _d(reader.Name()) value = _d(reader.Value()) if qname.startswith("xmlns"): if len(qname) > 5: newPrefix = qname[6:] else: newPrefix = None newPrefixes.append(newPrefix) self._cont_handler.startPrefixMapping(\ newPrefix,value) if not self.__nspfx: continue # don't report xmlns attribute attName = (_d(reader.NamespaceUri()), _d(reader.LocalName())) qnames[attName] = qname attrs[attName] = value reader.MoveToElement() self._cont_handler.startElementNS( \ eltName,eltQName,attributesNSImpl) if reader.IsEmptyElement(): self._cont_handler.endElementNS(eltName, eltQName) for newPrefix in newPrefixes: self._cont_handler.endPrefixMapping(newPrefix) else: prefixes.append(newPrefixes) else: eltName = _d(reader.Name()) attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = _d(reader.Name()) attrs[attName] = _d(reader.Value()) reader.MoveToElement() self._cont_handler.startElement( \ eltName,attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: if self.__ns: self._cont_handler.endElementNS( \ (_d(reader.NamespaceUri()),_d(reader.LocalName())), _d(reader.Name())) for prefix in prefixes.pop(): self._cont_handler.endPrefixMapping(prefix) else: self._cont_handler.endElement(_d(reader.Name())) # Text elif nodeType == 3: self._cont_handler.characters(_d(reader.Value())) # Whitespace elif nodeType == 13: self._cont_handler.ignorableWhitespace(_d(reader.Value())) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(_d(reader.Value())) # CDATA elif nodeType == 4: if not self.__lex_handler is None: self.__lex_handler.startCDATA() self._cont_handler.characters(_d(reader.Value())) if not self.__lex_handler is None: self.__lex_handler.endCDATA() # EntityReference elif nodeType == 5: if not self.__lex_handler is None: self.startEntity(_d(reader.Name())) reader.ResolveEntity() # EndEntity elif nodeType == 16: if not self.__lex_handler is None: self.endEntity(_d(reader.Name())) # ProcessingInstruction elif nodeType == 7: self._cont_handler.processingInstruction( \ _d(reader.Name()),_d(reader.Value())) # Comment elif nodeType == 8: if not self.__lex_handler is None: self.__lex_handler.comment(_d(reader.Value())) # DocumentType elif nodeType == 10: #if not self.__lex_handler is None: # self.__lex_handler.startDTD() pass # TODO (how to detect endDTD? on first non-dtd event?) # XmlDeclaration elif nodeType == 17: pass # TODO # Entity elif nodeType == 6: pass # TODO (entity decl) # Notation (decl) elif nodeType == 12: pass # TODO # Attribute (never in this loop) #elif nodeType == 2: # pass # Document (not exposed) #elif nodeType == 9: # pass # DocumentFragment (never returned by XmlReader) #elif nodeType == 11: # pass # None #elif nodeType == 0: # pass # - else: raise SAXException("Unexpected node type %d" % nodeType) if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0
""" def callback(ctx, str): global err err = err + "%s" % (str) libxml2.registerErrorHandler(callback, "") valid_files = glob.glob("../../test/valid/*.x*") valid_files.sort() for file in valid_files: if file.find("t8") != -1: continue if file == "../../test/valid/rss.xml": continue if file == "../../test/valid/xlink.xml": continue reader = libxml2.newTextReaderFilename(file) #print "%s:" % (file) reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) ret = reader.Read() while ret == 1: ret = reader.Read() if ret != 0: print("Error parsing and validating %s" % (file)) #sys.exit(1) if err != expect: print(err) # # another separate test based on Stephane Bidoul one #
expect = {"{}{}.xml".format(dir_prefix, key): val for key, val in expect.items()} def callback(ctx, str): global err err = err + "%s" % (str) libxml2.registerErrorHandler(callback, "") parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"] expect_parsing_error = ["{}{}.xml".format(dir_prefix, f) for f in parsing_error_files] valid_files = glob.glob(dir_prefix + "*.x*") assert valid_files, "found no valid files in '{}'".format(dir_prefix) valid_files.sort() for file in valid_files: err = "" reader = libxml2.newTextReaderFilename(file) #print "%s:" % (file) reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) ret = reader.Read() while ret == 1: ret = reader.Read() if ret != 0 and file not in expect_parsing_error: print("Error parsing and validating %s" % (file)) #sys.exit(1) if (err): if not(file in expect and err == expect[file]): print("Error: ", err) if file in expect: print("Expected: ", expect[file]) # # another separate test based on Stephane Bidoul one
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader if type(source) in StringTypes: reader = libxml2.newTextReaderFilename(source) else: source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler,None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD,1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) reader.SetParserProp(libxml2.PARSER_VALIDATE,self.__validate) # we reuse attribute maps (for a slight performance gain) if self.__ns: attributesNSImpl = xmlreader.AttributesNSImpl({},{}) else: attributesImpl = xmlreader.AttributesImpl({}) # prefixes to pop (for endPrefixMapping) prefixes = [] # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: if not self.__errors is None: self._reportErrors(0) elif r == 0: if not self.__errors is None: self._reportErrors(0) break # end of parse else: if not self.__errors is None: self._reportErrors(1) else: self._err_handler.fatalError(\ SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: if self.__ns: eltName = (_d(reader.NamespaceUri()),\ _d(reader.LocalName())) eltQName = _d(reader.Name()) attributesNSImpl._attrs = attrs = {} attributesNSImpl._qnames = qnames = {} newPrefixes = [] while reader.MoveToNextAttribute(): qname = _d(reader.Name()) value = _d(reader.Value()) if qname.startswith("xmlns"): if len(qname) > 5: newPrefix = qname[6:] else: newPrefix = None newPrefixes.append(newPrefix) self._cont_handler.startPrefixMapping(\ newPrefix,value) if not self.__nspfx: continue # don't report xmlns attribute attName = (_d(reader.NamespaceUri()), _d(reader.LocalName())) qnames[attName] = qname attrs[attName] = value reader.MoveToElement() self._cont_handler.startElementNS( \ eltName,eltQName,attributesNSImpl) if reader.IsEmptyElement(): self._cont_handler.endElementNS(eltName,eltQName) for newPrefix in newPrefixes: self._cont_handler.endPrefixMapping(newPrefix) else: prefixes.append(newPrefixes) else: eltName = _d(reader.Name()) attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = _d(reader.Name()) attrs[attName] = _d(reader.Value()) reader.MoveToElement() self._cont_handler.startElement( \ eltName,attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: if self.__ns: self._cont_handler.endElementNS( \ (_d(reader.NamespaceUri()),_d(reader.LocalName())), _d(reader.Name())) for prefix in prefixes.pop(): self._cont_handler.endPrefixMapping(prefix) else: self._cont_handler.endElement(_d(reader.Name())) # Text elif nodeType == 3: self._cont_handler.characters(_d(reader.Value())) # Whitespace elif nodeType == 13: self._cont_handler.ignorableWhitespace(_d(reader.Value())) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(_d(reader.Value())) # CDATA elif nodeType == 4: if not self.__lex_handler is None: self.__lex_handler.startCDATA() self._cont_handler.characters(_d(reader.Value())) if not self.__lex_handler is None: self.__lex_handler.endCDATA() # EntityReference elif nodeType == 5: if not self.__lex_handler is None: self.startEntity(_d(reader.Name())) reader.ResolveEntity() # EndEntity elif nodeType == 16: if not self.__lex_handler is None: self.endEntity(_d(reader.Name())) # ProcessingInstruction elif nodeType == 7: self._cont_handler.processingInstruction( \ _d(reader.Name()),_d(reader.Value())) # Comment elif nodeType == 8: if not self.__lex_handler is None: self.__lex_handler.comment(_d(reader.Value())) # DocumentType elif nodeType == 10: #if not self.__lex_handler is None: # self.__lex_handler.startDTD() pass # TODO (how to detect endDTD? on first non-dtd event?) # XmlDeclaration elif nodeType == 17: pass # TODO # Entity elif nodeType == 6: pass # TODO (entity decl) # Notation (decl) elif nodeType == 12: pass # TODO # Attribute (never in this loop) #elif nodeType == 2: # pass # Document (not exposed) #elif nodeType == 9: # pass # DocumentFragment (never returned by XmlReader) #elif nodeType == 11: # pass # None #elif nodeType == 0: # pass # - else: raise SAXException("Unexpected node type %d" % nodeType) if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0
def build_selector(self, checklists): '''Creates pages for selecting a new checklist. Arguments: :checklists: list of checklists we can instantiate. ''' selectorPage = gnome.ui.DruidPageStandard() self.selectorPage = selectorPage selectorPage.set_title('Select the Checklist to start') selectorPage.set_logo(self.logo) selectorGroup = gtk.VBox() selectorLabel = gtk.Label("Please select the type of review you wish" " to start from the following list. If you don't see the" " type of review you want to create it means no one has" " taken the time to write a checklist definition for it yet." " Please consider contributing one if that's the case.") selectorLabel.set_line_wrap(True) selectorGroup.add(selectorLabel) # Create a selection menu to choose from the available checklists checkStore = gtk.ListStore(gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING) for filename in checklists: summary = None name = None # Set up a stream reader for the checklist file. try: checkReader = libxml2.newTextReaderFilename(filename) except libxml2.treeError: print '%s was not a CheckList file' % (filename) continue # Read in the summary and name for the checklist status = checkReader.Read() while status == 1: if (checkReader.LocalName() == 'checklist' and checkReader.NodeType() == 1): name = checkReader.GetAttribute('name') elif checkReader.LocalName() == 'summary': status = checkReader.Read() # Get the text element if status == 1: summary = checkReader.Value() break status = checkReader.Read() if not (summary and name): print 'Unable to get a summary and name from %s' % (filename) continue # Enter the information into the checkStore checkIter = checkStore.append(None) checkStore.set(checkIter, self.__FILENAME, filename, self.__CHECKNAME, name, self.__CHECKSUMMARY, summary) checkList = gtk.TreeView(checkStore) self.selectorSelection = checkList.get_selection() self.selectorSelection.set_mode(gtk.SELECTION_SINGLE) renderer = gtk.CellRendererText() column = gtk.TreeViewColumn('Name', renderer, text=1) checkList.append_column(column) renderer = gtk.CellRendererText() column = gtk.TreeViewColumn('Summary', renderer, text=2) checkList.append_column(column) selectorGroup.add(checkList) selectorPage.append_item('', selectorGroup, '') selectorPage.connect('next', self.selector_next) self.druidWidget.add(selectorPage) # Double-clicking the row is the same as selecting next. checkList.connect('row-activated', lambda self, selector, column, druid: druid.next.clicked(), self.druidWidget)
def importfile(dbh, filename, batch=False): module_logger.info("Importing %s Batch %s", filename, batch) status = 1 message = 'OK' if os.path.isfile(filename) and os.path.getsize(filename) > 0: try: module_logger.debug("Parsing") xml = libxml2.newTextReaderFilename(filename) libxml2.registerErrorHandler(_xmlerror, "") while xml.Read(): module_logger.debug('X %s %s', xml.NodeType(), xml.Name()) if xml.NodeType( ) == libxml2.XML_READER_TYPE_ELEMENT and xml.Name( ) == 'FIMSSR': module_logger.debug("Found FIMSSR %s %s", xml.NodeType(), xml.Name()) status, message = _process_file(dbh, xml) if status != 1: module_logger.debug("Status %s bailing", status) break except: module_logger.exception("Parse error %s", filename) status = -2 message = "XML Parser Error on file " + filename finally: module_logger.debug("Cleanup") libxml2.cleanupParser() else: module_logger.error("Zero size file %s", filename) status = -2 message = "Zero size or truncated file " + filename if status == 1: try: csr = dbh.cursor() module_logger.debug("Store filename %s", os.path.basename(filename)) csr.execute(_sql['file_update'], {'filename': os.path.basename(filename)}) csr.close() module_logger.debug("Commit") dbh.commit() except: module_logger.warning("Rollback") csr.close() dbh.rollback() status = -1 message = "DB error" # if not in batch the SCHD update after file if status == 1 and not batch: module_logger.debug("Not Batch mode, updating schd after file %s", filename) status, message = _schdupdate(dbh) return status, message
return "0" + actualNum else : return hexNum XMLREADER_START_ELEMENT_NODE_TYPE = 1 #reader = libxml2.newTextReaderFilename("../capture.xml") #reader = libxml2.newTextReaderFilename("../newCap.xml") #reader = libxml2.newTextReaderFilename("../mouse.xml") #reader = libxml2.newTextReaderFilename("../nj.xml") if len(sys.argv) != 2: print "Synopsis: " + sys.argv[0] + " <USBTraceLog.xml>" exit(1) reader = libxml2.newTextReaderFilename(sys.argv[1]) # Old data for endpoints 0, 1 and 2 oldData00 = "" oldData01 = "" oldData02 = "" # Parse through the XML... while reader.Read(): if reader.NodeType() == XMLREADER_START_ELEMENT_NODE_TYPE and \ reader.Depth() == 1 and \ reader.Name() == "Request": # This is a Request open tag...parse until the end tag node = reader.Expand() row = node.children