def validate_xml_string(xml_string): import xml.sax.handler from xml.sax.handler import ContentHandler from xml.sax import parseString # A unicode string is not a well-formed XML by default, according to XML # 1.0 specification. Therefore the string may need to be encoded. try: uft8_xml = xml_string.encode('utf-8') xml.sax.parseString(uft8_xml, ContentHandler()) except Exception, e1: # XML data when no encoding was needed. try: xml.sax.parseString(xml_string, ContentHandler()) except Exception, e2: raise Exception('invalid XML format: %s' % e2)
def test_get_history_check_content_xml_is_well_formed(): parser = make_parser() parser.setContentHandler(ContentHandler()) try: parser.parse('http://egauge45930.d.egauge.net/cgi-bin/egauge-show') except Exception: assert False
def parsefile(file): parser = make_parser() parser.setContentHandler(ContentHandler()) xml_str = open(file).read().replace('encoding="gbk"', 'encoding="utf-8"') f = StringIO(xml_str) parser.parse(f)
def is_well_formed_xml(metadata_ref: generic.Reference): """ Checks whether the provided stream consists of well-formed XML data. Note that this does not perform any more advanced XML or XMP validation, the check is purely syntactic. :param metadata_ref: A reference to a (purported) metadata stream. :raises SuspiciousModification: if there are indications that the reference doesn't point to an XML stream. """ metadata_stream = metadata_ref.get_object() if not isinstance(metadata_stream, generic.StreamObject): raise SuspiciousModification( "/Metadata should be a reference to a stream object") from xml.sax import make_parser from xml.sax.handler import ContentHandler parser = make_parser() parser.setContentHandler(ContentHandler()) try: parser.parse(BytesIO(metadata_stream.data)) except Exception as e: raise SuspiciousModification( "/Metadata XML syntax could not be validated", e)
def check_well_formedness(self, file): try: saxparser = make_parser() saxparser.setContentHandler(ContentHandler()) saxparser.parse(file) return "ok" except (Exception): return str(file) + " is NOT well-formed! " + sys.exc_info()[1]
def __init__(self, parent=None, handlers=None, groups_of=1): XMLFilterBase.__init__(self, parent) if handlers is None: self.handlers = (ContentHandler() for i in count()) self.handlers = iter(handlers) self.processed = 0 self.groups_of = groups_of self.new_handler()
def test_xml_correctness(filename): try: parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(filename) # print "%s is well-formed" % filename return True except Exception, e: print "%s is NOT well-formed! %s" % (filename, e) return False
def haveError(self, file_path): parser = make_parser() flag = 1 try: parser.setContentHandler(ContentHandler()) parser.parse(file_path) print('\n\t:), %s is OK!\n' % file_path) except Exception as e: print('\n\t:(, Error found in file:%s\n' % e) flag = 0 return flag
def ParsearAparcamientos(): theParser = make_parser() theHandler = ContentHandler() theParser.setContentHandler(theHandler) theParser.parse( "http://datos.munimadrid.es/portal/site/egob/menuitem.ac61933d6ee3c31cae77ae7784f1a5a0/?vgnextoid=00149033f2201410VgnVCM100000171f5a0aRCRD&format=xml&file=0&filename=202584-0-aparcamientos-residentes&mgmtid=e84276ac109d3410VgnVCM2000000c205a0aRCRD&preview=full" ) Parking = theHandler.Parking return Parking
def check_wellformed(filename): sModuleInfo = inspect.currentframe().f_code.co_name + " : " + MODULE_NAME # filename = "/home/asci/AssetScience/recell_dse-in-test/Launcher/resource/configurations/desktop-fail-codes.xml" try: parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(filename) CommonUtil.ExecLog(sModuleInfo, "%s is well-formed. %s" % filename, 1) except Exception: errMsg = "%s is NOT well-formed! " % filename return CommonUtil.Exception_Handler(sys.exc_info(), None, errMsg)
def is_valid(xml_string): """validates a unicode string containing xml""" xml_file = StringIO.StringIO(xml_string.encode('utf-8')) parser = XmlScanner() parser.setContentHandler(ContentHandler()) try: parser.parse(xml_file) except SAXParseException: return False return True
def _embedparser(self, tree, XMLparser='xml.sax'): """ Returns a correctly parsed chunk of valid xml according to the parsing libraries it relies on (currently it relies on xml.sax only) """ from xml.sax.handler import ContentHandler if XMLparser == 'xml.sax': import xml.sax pstring = tree TempHandler = ContentHandler() try: do_parse = xml.sax.parseString(pstring, TempHandler) except: return False return True
def inject_data(input=None, placeholder=False, data_file='data.xml'): logger.info("> Injecting data") data_tag = r'([^\S\n]*){%\s?insert\s?data\s?%}' if not placeholder: try: data_filename = SCRIPT_DIR + "/%s" % (data_file) data = codecs.open(data_filename, encoding="utf-8", mode="r").read() try: parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(data_filename) except: pass # raise DataError("Data file is not proper XML") except IOError: logger.info("- No data file found (%s). Using placeholder." % (data_file)) placeholder = True if not input: logger.debug("- Fetching the template.") try: template_file = open(globals()['THEME_DIR'] + "/template.html", "r") input = template_file.read() except IOError: raise TemplateError("Template file could not be opened") while re.search(data_tag, input, flags=re.IGNORECASE): tag = re.search(data_tag, input, flags=re.IGNORECASE) begin = tag.start() end = tag.end() whitespace = tag.group(1) if not placeholder: data = '%s<!-- Injected patient data -->\n%s<script style="display: none;" id="xmlBBData" type="text/plain">%s</script>' % ( whitespace, whitespace, data) input = input[:begin] + data + input[end:] else: logger.debug("- Writing placeholder.") placeholder_text = '%s<script style="display: none;" id="xmlBBData" type="text/plain">\n%s\t<!-- Inject XML Data Here -->\n%s</script>' % ( whitespace, whitespace, whitespace) input = input[:begin] + placeholder_text + input[end:] return input
def check(l): global filesum global start filesum2 = filesum for i in range(start, filesum2): try: parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(l[i]) print('\n\t %s 是没问题的\n' % l[i]) start += 1 except Exception as e: print('\n\t 发现错误:%s\n' % e) del l[i] filesum -= 1 return check(l)
def wellform_xml(teipath): mistakes = 0 for teifile in glob.glob(teipath): #print(teifile) idno = os.path.basename(teifile) #print(idno) try: parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(teifile) except: print(idno) mistakes += 1 print("number of problematics file: ", mistakes) #wellform_xml("/home/jose/cligs/ne/master2/*.xml")
def main(argv=None): try: if len(sys.argv) < 2: print('Error: miss required parameter') return xml_file = sys.argv[1] if not os.path.exists(xml_file): print('Error: xml file is not found') return with open(xml_file, 'rb') as f: bi_data = f.read() info = chardet.detect(bi_data) data = bi_data.decode(encoding=info['encoding']) xml_string = '<root>' + data + '</root>' parseString(xml_string, ContentHandler()) print('Success: no error founds in the xml file') except Exception as ex: print('Error: {}'.format(ex))
def xmlChecker(filename): """Try to open the xml file, and return a message if an error occurs. @param filename name of the file of parameters ith its absolute path @return m error message """ m = "" try: p = make_parser() p.setContentHandler(ContentHandler()) p.parse(filename) except Exception as e: f = os.path.basename(filename) m = "%s file reading error. \n\n"\ "This file is not in accordance with XML specifications.\n\n"\ "The parsing syntax error is:\n\n%s" % (f, e) return m
def xxe_sax(): attack = request.form['attack'] test_string = "<!DOCTYPE doc [ " \ "<!ENTITY sax SYSTEM \"file:///etc/passwd\"> " \ "<!ENTITY sax2 SYSTEM \"http://www.google.com/marker\"> " \ "]>\n" \ "<root>\n" \ "<element>&sax;</element>\n" \ "<element>&sax2;</element>\n" \ "</root>\n" if str(attack).lower() == 'true': try: sax.parseString(test_string, ContentHandler()) result = 'SAX XXE Attack Attempted' except SAXParseException as e: result = 'SAX XXE Attack Attempted' else: result = '' return render_template('xxe_sax.html', result=result)
def parsefile(file): parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(file)
self.title = "" self.theContent = "" def characters(self, chars): if self.inContent: self.theContent = self.theContent + chars # -----main----- if len(sys.argv) < 2: print "Usage: python " + sys.argv[0] + " <document>" print sys.exit(1) # Load parser and driver theParser = make_parser() theHandler = ContentHandler() theParser.setContentHandler(theHandler) # Ready, set, go! try: xmlFile = open(sys.argv[1], "r") except IOError: print "File " + sys.argv[1] + " does not exists" sys.exit(1) print "<html><body>" theParser.parse(xmlFile) print "</body></html>" print
import rdflib from rdflib.plugins.sparql import prepareQuery path = os.getcwd() f = open("validationresult.txt", "w") for filenames in glob.glob( os.path.join("src/main/resources/gsb_queries/", '*.rq')): try: with open(filenames, 'r') as file: query = file.read().replace('\n', '') prepareQuery(query) #print ("%s is well-formed" % filenames) except Exception as e: print("%s is NOT well-formed!\nError: %s" % (filenames, e)) f.write( str(filenames) + " is not well-formed:\nError: " + str(e) + "\n") for filenames in glob.glob( os.path.join("src/main/resources/gsb_answers/", '*.srx')): try: parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(filenames) #print ("%s is well-formed" % filenames) except Exception as e: print("%s is NOT well-formed!\nError: %s" % (filenames, e)) f.write( str(filenames) + " is not well-formed:\nError: " + str(e) + "\n") f.close()
def validateXML(self, data): parseString(data, ContentHandler())
def do_xml_sax_parsestring(user_input): return sax.parseString(user_input, ContentHandler())
def check_if_xml_is_wellformed(file): parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(file)
def _check_xml_validity(my_file) : """ check if config XML file is well formed """ parser = make_parser() parser.setContentHandler(ContentHandler()) parser.parse(my_file)