def test(): s1 = StringIO() parser = swissprot38.format_expression.make_parser() parser.setErrorHandler(handler.ErrorHandler()) parser.setContentHandler(saxutils.XMLGenerator(s1)) parser.parseString(text) s2 = StringIO() parser = swissprot38.format.make_parser() parser.setErrorHandler(handler.ErrorHandler()) parser.setContentHandler(saxutils.XMLGenerator(s2)) parser.parseString(text) s3 = StringIO() parser = swissprot38.format.expression.make_parser() parser.setErrorHandler(handler.ErrorHandler()) parser.setContentHandler(saxutils.XMLGenerator(s3)) parser.parseString(text) assert s1.getvalue() == s2.getvalue() == s3.getvalue()
def rss(self): output = StringIO() handler = saxutils.XMLGenerator(output, 'UTF-8') handler.startDocument() handler.startElement("rss", self._get_attributes()) self.publish(handler) handler.endElement("rss") handler.endDocument() return output.getvalue()
def fix_sidx_ranges(self, input_file, output, sidx_for_representations): """ filter_mpd(input_file=some_input_filename, output=file_handler) Parses mpd and replaces ranges for sidx boxes. """ output_gen = saxutils.XMLGenerator(output, encoding='utf-8') parser = sax.make_parser() sidx_filter = MPDSidxFilter(parser, sidx_for_representations) sidx_filter.setFeature(handler.feature_namespaces, True) sidx_filter.setContentHandler(output_gen) sidx_filter.setErrorHandler(handler.ErrorHandler()) sidx_filter.parse(input_file)
def startElement(self, name, attrs): if name == 'directory': self.dirStack.append(os.path.join(self.dirStack[-1], attrs['name'])) elif name == 'file': fpath = os.path.join(self.dirStack[-1], attrs['name']) prepareForOutputFile(fpath) self.outf = open(fpath, 'w') self.outf.write('<?xml version="1.0" encoding="utf-8"?>') self.xmlgen = saxutils.XMLGenerator(self.outf, 'utf-8') else: self.xmlgen.startElement(name, attrs)
def filter_rdf(input, output): """ filter_rdf(input=some_input_filename, output=some_output_filename) Parses the XML input from the input stream, filtering out all elements and attributes that are in the RDF namespace. """ output_gen = saxutils.XMLGenerator(output) parser = sax.make_parser() filter = RDFFilter(parser) filter.setFeature(handler.feature_namespaces, True) filter.setContentHandler(output_gen) filter.setErrorHandler(handler.ErrorHandler()) filter.parse(input)
def test_ToSep(): exp = Martel.Group("test", Martel.ToSep("colon", ":") + \ Martel.ToSep("space", " ") + \ Martel.ToSep("empty", "!")) parser = exp.make_parser() file = StringIO.StringIO() parser.setContentHandler(saxutils.XMLGenerator(file)) parser.parseString("q:wxy !") s = file.getvalue() expect = "<test><colon>q</colon>:<space>wxy</space> <empty></empty>!</test>" assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
def test_header_footer1(): s = """\ header XX record 1 // record 2 // record 3 // footer """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header>header XX </header><record>record 1 // </record><record>record 2 // </record><record>record 3 // </record><footer>footer </footer></hf>""" debug_level = 1 # Don't use regexps like these in your code - for testing only! header = Martel.Group("header", Martel.Re(r"header(.|\n)*")) record = Martel.Group("record", Martel.Re(r"rec(.|\n)*")) footer = Martel.Group("footer", Martel.Re(r"footer(.|\n)*")) header = header.make_parser(debug_level=debug_level) record = record.make_parser(debug_level=debug_level) footer = footer.make_parser(debug_level=debug_level) hf = Parser.HeaderFooterParser("hf", {}, RecordReader.EndsWith, ("XX\n", ), header.tagtable, RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.StartsWith, ("f", ), footer.tagtable, (0, debug_level, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) result = outfile.getvalue() assert result == gold, (result, gold)
def test_ToEol(): parser = Martel.ToEol("SantaFe").make_parser() parseString = parser.parseString must_parse("ToEol", parseString, "Testing, 1, 2, 3!\n") must_parse("ToEol", parseString, "Andrew\n") must_not_parse("ToEol", parseString, "Dalke") must_not_parse("ToEol", parseString, "This\nis") must_not_parse("ToEol", parseString, "This\nis a test\n") file = StringIO.StringIO() parser.setContentHandler(saxutils.XMLGenerator(file)) parser.parseString("This is a test.\n") s = file.getvalue() expect = "<SantaFe>This is a test.</SantaFe>\n" assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
def _filter_svg(self, input): output_gen = saxutils.XMLGenerator(self.document, encoding='utf8') parser = make_parser() mode = "" if options.shadow: mode += "shadows," filter = SVGFilter(parser, output_gen, mode) filter.setFeature(handler.feature_namespaces, False) filter.setErrorHandler(handler.ErrorHandler()) filter.parse(input) del filter del parser del output_gen if options.test: with open('{}/output.svg'.format(pngs_directory), 'wb') as f: f.write(self.document.getvalue())
def filter_properties(stream): # type: (BinaryIO) -> bytes """ Filter out the '<properties>' section from the .ows xml stream. Parameters ---------- stream : io.BinaryIO Returns ------- xml : bytes ows xml without the '<properties>' nodes. """ class PropertiesFilter(saxutils.XMLFilterBase): _in_properties = False def startElement(self, tag, attrs): if tag == "properties": self._in_properties = True else: super().startElement(tag, attrs) def characters(self, content): if self._in_properties: pass else: super().characters(content) def endElement(self, name): if name == "properties": self._in_properties = False else: super().endElement(name) buffer = io.BytesIO() writer = saxutils.XMLGenerator(out=buffer, encoding="utf-8") filter = PropertiesFilter(parent=make_parser()) filter.setContentHandler(writer) filter.parse(stream) return buffer.getvalue()
def test_header_footer3(): # Have a footer but no header s = """\ ID 1 This is some data // ID 2 This is some more data // Okay, that was all of the data. """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><record>ID 1 This is some data // </record><record>ID 2 This is some more data // </record><footer>Okay, that was all of the data. </footer></hf>""" # Don't use a regexp like this in your code - for testing only! record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) # Require at least 5 characters (just to be safe) footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*")) record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Nothing, (), (), RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.Everything, (), footer.tagtable, (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def test_header_footer2(): # Have a header but no footer s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID 2 This is some more data """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header> This is some misc. header text that goes on until the end. </header><record>ID 1 This is some data </record><record>ID 2 This is some more data </record></hf>""" # Don't use a regexp like this in your code - for testing only! header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, RecordReader.Nothing, (), (), (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def filter_svg(input, output, mode): """filter_svg(input:file, output:file, mode) Parses the SVG input from the input stream. For mode == 'hotspots' it filters out all layers except for hotspots and slices. Also makes hotspots visible. For mode == 'shadows' it filters out the shadows layer. """ mode_objs = [] if 'hotspots' in mode: mode_objs.append(mode_hotspots) if 'shadows' in mode: mode_objs.append(mode_shadows) if 'slices' in mode: mode_objs.append(mode_slices) if 'invert' in mode: mode_objs.append(mode_invert) if len(mode_objs) == 0: raise ValueError() output_gen = saxutils.XMLGenerator(output) parser = make_parser() filter = SVGFilter(parser, output_gen, mode_objs) filter.setFeature(handler.feature_namespaces, False) filter.setErrorHandler(handler.ErrorHandler()) # This little I/O dance is here to ensure that SAX parser does not stash away # an open file descriptor for the input file, which would prevent us from unlinking it later with open(input, 'rb') as inp: contents = inp.read() contents_io = io.BytesIO(contents) source_object = saxutils.prepare_input_source(contents_io) filter.parse(source_object) del filter del parser del output_gen
def parse(self, filename): self.secrets_file = open('parts/_actual_secrets.txt', 'wb') self.mock_secrets_file = open('parts/_example_secrets.txt', 'wb') parser = ParserCreate() parser.CommentHandler = self.commentHandler parser.StartElementHandler = self.startElement parser.EndElementHandler = self.endElement parser.CharacterDataHandler = self.characters parser.StartCdataSectionHandler = self.startCDATA parser.EndCdataSectionHandler = self.endCDATA saxutils.XMLGenerator(out=self.master_header).startDocument() self.master_header.write(b'<!DOCTYPE erddapDatasets [\n') self.master_header.write( b'<!ENTITY % secrets SYSTEM "_secrets.txt">\n') self.master_header.write(b'%secrets;\n') parser.ParseFile(open(filename, "rb")) self.master_header.write(b']>\n') self.master_header.write(self.out_file.master_file.getvalue()) with open("parts/_datasets.xml", "wb") as f: f.write(self.master_header.getvalue()) self.secrets_file.close() self.mock_secrets_file.close()
def setUp(self): self.handle = saxutils.XMLGenerator()
def write_xml(self, outfile, encoding="iso-8859-1"): from xml.sax import saxutils handler = saxutils.XMLGenerator(outfile, encoding) handler.startDocument() self.publish(handler) handler.endDocument()
rows.append(row) return rows def process_type2(sheet): pass def visit(dirpath, dirnames, filenames): rows = [] for file in filenames: if os.path.splitext(file)[1] == '.xlsx': rows.extend(process_xslx(os.path.join(dirpath, file))) return rows sys.stdout = codecs.getwriter('utf-8')(sys.stdout) gen = SU.XMLGenerator(sys.stdout, 'utf-8') dir = sys.argv[1] rows = [] for (dirpath, dirnames, filenames) in os.walk(dir): rows.extend(visit(dirpath, dirnames, filenames)) rows.sort(lambda x, y: cmp(x[0], y[0])) sys.stdout.write('<strings>\n') for row in rows: sys.stdout.write('\t<string id="%d">' % row[0]) sys.stdout.write('<![CDATA[' + row[4] + ']]>') sys.stdout.write('</string>\n')
def write_xml(self, outfile): from xml.sax import saxutils handler = saxutils.XMLGenerator(outfile) handler.startDocument() self.publish(handler) handler.endDocument()
from xml.sax import saxutils class Name(object): def __init__(self, forename, surname): self.forename = forename self.surname = surname names = [ Name("Andrew", "Dalke"), Name("John", "Smith"), Name("Asa", "Svensson"), ] # BUG: # "Asa" is supposed to be: "\N{LATIN CAPITAL LETTER A WITH RING ABOVE}sa" gen = saxutils.XMLGenerator(sys.stdout, "utf-8") gen.startDocument() gen.startElement("NameList", {}) gen.characters("\n") for name in names: gen.characters(" "); gen.startElement("Name", {"forename": name.forename, "surname": name.surname}) gen.endElement("Name") gen.characters("\n"); gen.endElement("NameList") gen.characters("\n"); gen.endDocument()
""" import Martel from Martel import RecordReader def delimiter(delim): assert len(delim) == 1, \ "delimiter can only be a single character long, not %s" % repr(delim) assert delim not in "\n\r", "Cannot use %s as a delimiter" % repr(delim) field = Martel.Group("field", Martel.Rep(Martel.AnyBut(delim + "\r\n"))) line = field + Martel.Rep(Martel.Str(delim) + field) + Martel.AnyEol() record = Martel.Group("record", line) format = Martel.ParseRecords("delimited", {}, record, RecordReader.CountLines, (1, )) return format tabformat = delimiter("\t") spaceformat = delimiter(" ") colonformat = delimiter(":") commaformat = delimiter(",") if __name__ == "__main__": from xml.sax import saxutils parser = colonformat.make_parser() parser.setContentHandler(saxutils.XMLGenerator()) parser.parseFile(open("/etc/passwd"))