def main(): libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # URL to fetch the OSM data from map_source_data_url="http://xapi.openstreetmap.org/api/0.6/*[power=generator][bbox=-0.51,51.20,0.35,51.80]" # Filename for OSM map data xml_filename = "generators.xml" # Filename for XSLT to extract POIs xsl_filename = "trans_csv_generators.xsl" # Download the map.osm file from the net, if we don't already have one. #if os.path.isfile(xml_filename): #print "Not downloading map data. '%s' already exists."%xml_filename #else: #print "Downloading OSM data." #print "'%s' -> '%s'"%(map_source_data_url,xml_filename) #urllib.urlretrieve(map_source_data_url,xml_filename) # Read the XML into memory. We will use it many times. osmdoc = libxml2.parseFile(xml_filename) # Read the XSLT styledoc = libxml2.parseFile(xsl_filename) style = libxslt.parseStylesheetDoc(styledoc) # Extract POIs to layer text files result = style.applyStylesheet(osmdoc, {"key":"power", "value":"generator"}) style.saveResultToFilename("temp.csv", result, 0)
def xslt_convert( xml, xsl ): if not _have_xslt: Error( _err_msg ) libxml2.lineNumbersDefault( 1 ) libxml2.substituteEntitiesDefault( 1 ) try: styledoc = libxml2.parseFile( xsl ) except libxml2.parserError: Error("Cannot parse XSL stylesheet: '%s'" % xsl ) style = libxslt.parseStylesheetDoc( styledoc ) try: doc = libxml2.parseFile( xml ) except libxml2.parserError: Error("Unable to parse XML document: '%s'" % xml ) result = style.applyStylesheet( doc, None ) s = style.saveResultToString( result ) style.freeStylesheet() doc.freeDoc() result.freeDoc() return s
def setUp(self): # # Set up Libxml2. # self.initialMemUsed = libxml2.debugMemory(1) libxml2.initParser() libxml2.lineNumbersDefault(1) libxml2.registerErrorHandler(handleError, self)
def xsl_transform(content, bDownloadImages): # 1 strTidiedHtml = tidy_and_premail(content) # 2 Settings for libxml2 for transforming XHTML entities to valid XML libxml2.loadCatalog(XHTML_ENTITIES) libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # 3 First XSLT transformation styleDoc1 = libxml2.parseFile(GDOCS2CNXML_XSL1) style1 = libxslt.parseStylesheetDoc(styleDoc1) # doc1 = libxml2.parseFile(afile)) doc1 = libxml2.parseDoc(strTidiedHtml) result1 = style1.applyStylesheet(doc1, None) #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1) strResult1 = style1.saveResultToString(result1) style1.freeStylesheet() doc1.freeDoc() result1.freeDoc() # Parse XML with etree from lxml for TeX2MathML and image download etreeXml = etree.fromstring(strResult1) # 4 Convert TeX to MathML with Blahtex etreeXml = tex2mathml(etreeXml) # 5 Optional: Download Google Docs Images imageObjects = {} if bDownloadImages: etreeXml, imageObjects = downloadImages(etreeXml) # Convert etree back to string strXml = etree.tostring(etreeXml) # pretty_print=True) # 6 Second transformation styleDoc2 = libxml2.parseFile(GDOCS2CNXML_XSL2) style2 = libxslt.parseStylesheetDoc(styleDoc2) doc2 = libxml2.parseDoc(strXml) result2 = style2.applyStylesheet(doc2, None) #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging strResult2 = style2.saveResultToString(result2) style2.freeStylesheet() doc2.freeDoc() result2.freeDoc() return strResult2, imageObjects
def xsl_transform(content, bDownloadImages, base_or_source_url='.'): use_readability = True # 1 get title with readability html_title = "Untitled" try: html_title = Document(content).title() except: pass # 2 use readabilty to get content if use_readability: readable_article = Document(content).summary() else: readable_article = content # 3 tidy and premail strTidiedHtml = tidy_and_premail(readable_article) # 4 Load XHTML catalog files: Makes XHTML entities readable. libxml2.loadCatalog(XHTML_ENTITIES) libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # 5 XSLT transformation styleDoc1 = libxml2.parseFile(XHTML2CNXML_XSL1) style1 = libxslt.parseStylesheetDoc(styleDoc1) # doc1 = libxml2.parseFile(afile)) doc1 = libxml2.parseDoc(strTidiedHtml) result1 = style1.applyStylesheet(doc1, None) #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1) strResult1 = style1.saveResultToString(result1) # print strResult1 style1.freeStylesheet() doc1.freeDoc() result1.freeDoc() # Parse XML with etree from lxml for TeX2MathML and image download etreeXml = etree.fromstring(strResult1) # 6 Convert TeX to MathML with Blahtex (not in XHTML) # etreeXml = tex2mathml(etreeXml) # 7 Optional: Download Google Docs Images imageObjects = {} if bDownloadImages: etreeXml, imageObjects = downloadImages(etreeXml, base_or_source_url) # 8 add title from html etreeXml = add_cnxml_title(etreeXml, html_title) # Convert etree back to string strXml = etree.tostring(etreeXml) # pretty_print=True) # 9 Second transformation styleDoc2 = libxml2.parseFile(XHTML2CNXML_XSL2) style2 = libxslt.parseStylesheetDoc(styleDoc2) doc2 = libxml2.parseDoc(strXml) result2 = style2.applyStylesheet(doc2, None) #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging strResult2 = style2.saveResultToString(result2) style2.freeStylesheet() doc2.freeDoc() result2.freeDoc() return strResult2, imageObjects, html_title
# # the testsuite description # CONF = os.path.join(os.path.dirname(__file__), "test/xsdtest/xsdtestsuite.xml") LOG = "check-xsddata-test-suite.log" log = open(LOG, "w") nb_schemas_tests = 0 nb_schemas_success = 0 nb_schemas_failed = 0 nb_instances_tests = 0 nb_instances_success = 0 nb_instances_failed = 0 libxml2.lineNumbersDefault(1) # # Error and warnng callbacks # def callback(ctx, str): global log log.write("%s%s" % (ctx, str)) libxml2.registerErrorHandler(callback, "") # # Resolver callback #
def main(args = None): global debug global repeat global timing global novalid global noout global docbook global html global xinclude global profile global params global output global errorno done = 0 cur = None if not args: args = sys.argv[1:] if len(args) <= 0: usage(sys.argv[0]) i = 0 while i < len(args): if args[i] == "-": break if args[i][0] != '-': i = i + 1 continue if args[i] == "-timing" or args[i] == "--timing": timing = 1 elif args[i] == "-debug" or args[i] == "--debug": debug = 1 elif args[i] == "-verbose" or args[i] == "--verbose" or \ args[i] == "-v": print "TODO: xsltSetGenericDebugFunc() mapping missing" elif args[i] == "-version" or args[i] == "--version" or \ args[i] == "-V": print "TODO: version information mapping missing" elif args[i] == "-verbose" or args[i] == "--verbose" or \ args[i] == "-v": if repeat == 0: repeat = 20 else: repeat = 100 elif args[i] == "-novalid" or args[i] == "--novalid": print "TODO: xmlLoadExtDtdDefaultValue mapping missing" novalid = 1 elif args[i] == "-noout" or args[i] == "--noout": noout = 1 elif args[i] == "-html" or args[i] == "--html": html = 1 elif args[i] == "-nonet" or args[i] == "--nonet": print "TODO: xmlSetExternalEntityLoader mapping missing" nonet = 1 elif args[i] == "-catalogs" or args[i] == "--catalogs": try: catalogs = os.environ['SGML_CATALOG_FILES'] except: catalogs = None if catalogs != none: libxml2.xmlLoadCatalogs(catalogs) else: print "Variable $SGML_CATALOG_FILES not set" elif args[i] == "-xinclude" or args[i] == "--xinclude": xinclude = 1 libxslt.setXIncludeDefault(1) elif args[i] == "-param" or args[i] == "--param": i = i + 1 params[args[i]] = args[i + 1] i = i + 1 elif args[i] == "-stringparam" or args[i] == "--stringparam": i = i + 1 params[args[i]] = "'%s'" % (args[i + 1]) i = i + 1 elif args[i] == "-maxdepth" or args[i] == "--maxdepth": print "TODO: xsltMaxDepth mapping missing" else: print "Unknown option %s" % (args[i]) usage() return(3) i = i + 1 libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # TODO: xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS # if novalid: # TODO: xmlLoadExtDtdDefaultValue = 0 # TODO libxslt.exsltRegisterAll(); libxslt.registerTestModule() i = 0 while i < len(args) and done == 0: if args[i] == "-maxdepth" or args[i] == "--maxdepth": i = i + 2 continue if args[i] == "-o" or args[i] == "-output" or args[i] == "--output": i = i + 2 continue if args[i] == "-param" or args[i] == "--param": i = i + 3 continue if args[i] == "-stringparam" or args[i] == "--stringparam": i = i + 3 continue if args[i] != "-" and args[i][0] == '-': i = i + 1 continue if timing: startTimer() style = libxml2.parseFile(args[i]) if timing: endTimer("Parsing stylesheet %s" % (args[i])) if style == None: print "cannot parse %s" % (args[i]) cur = None errorno = 4 done = 1 else: cur = libxslt.loadStylesheetPI(style) if cur != None: xsltProcess(style, cur, args[i]) cur = None else: cur = libxslt.parseStylesheetDoc(style) if cur == None: style.freeDoc() errorno = 5 done = 1 i = i + 1 break while i < len(args) and done == 0 and cur != None: if timing: startTimer() if html: doc = libxml2.htmlParseFile(args[i], None) else: doc = libxml2.parseFile(args[i]) if doc == None: print "unable to parse %s" % (args[i]) errorno = 6 i = i + 1 continue if timing: endTimer("Parsing document %s" % (args[i])) xsltProcess(doc, cur, args[i]) i = i + 1 if cur != None: cur.freeStylesheet() params = None
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. http://www.gnu.org/licenses/gpl-3.0.html """ import libxslt import libxml2 import re import urllib import os import csv import sys import flickr libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) def escape(html): """ Returns the given HTML with ampersands, quotes and carets encoded, and encoded into unicode for weird characters. """ html_enc = html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') return unicode(html_enc, 'utf-8') def processRawData(xapi_uri, xsl_uri, features): """ Downloads the data from XAPI and turns it into a Python object. """ # Download data to temporary file and read the XML/XSL into memory
def getLineNumbersDefault(): old = libxml2.lineNumbersDefault(0) libxml2.lineNumbersDefault(old) return old
def init_libxml2(xml): libxml2.loadCatalog(XHTML_ENTITIES) libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) return xml, {}
def xsl_transform(content, bDownloadImages, base_or_source_url='.'): html_title = "Untitled" # 1 get title with readability # ONLY MAKES SENSE FOR AN UNKNOWN HTML, SO I COMMENTED IT OUT FOR https://github.com/Connexions/rhaptos.html2cnxml #try: # html_title = Document(content).title() #except: # pass # 2 use readabilty to get content # ONLY MAKES SENSE FOR AN UNKNOWN HTML, SO I COMMENTED IT OUT FOR https://github.com/Connexions/rhaptos.html2cnxml #readable_article = Document(content).summary() readable_article = content # 3 tidy and premail strTidiedHtml = tidy_and_premail(readable_article) # 4 Load XHTML catalog files: Makes XHTML entities readable. libxml2.loadCatalog(XHTML_ENTITIES) libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # 5 XSLT transformation styleDoc1 = libxml2.parseFile(XHTML2CNXML_XSL1) style1 = libxslt.parseStylesheetDoc(styleDoc1) # doc1 = libxml2.parseFile(afile)) doc1 = libxml2.parseDoc(strTidiedHtml) result1 = style1.applyStylesheet(doc1, None) #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1) strResult1 = style1.saveResultToString(result1) style1.freeStylesheet() doc1.freeDoc() result1.freeDoc() # Parse XML with etree from lxml for TeX2MathML and image download etreeXml = etree.fromstring(strResult1) # 6 Convert TeX to MathML with Blahtex (not in XHTML) # etreeXml = tex2mathml(etreeXml) # 7 Optional: Download Google Docs Images imageObjects = {} if bDownloadImages: etreeXml, imageObjects = downloadImages(etreeXml, base_or_source_url) # 8 add title from html etreeXml = add_cnxml_title(etreeXml, html_title) # Convert etree back to string strXml = etree.tostring(etreeXml) # pretty_print=True) # 9 Second transformation styleDoc2 = libxml2.parseFile(XHTML2CNXML_XSL2) style2 = libxslt.parseStylesheetDoc(styleDoc2) doc2 = libxml2.parseDoc(strXml) result2 = style2.applyStylesheet(doc2, None) #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging strResult2 = style2.saveResultToString(result2) style2.freeStylesheet() doc2.freeDoc() result2.freeDoc() return strResult2, imageObjects, html_title
def main(): libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # Filename for OSM map data xml_filename = "generators.xml" # Filename for XSLT to extract POIs xsl_filename = "trans_pois.xsl" # Layers we are going to extract (in a dict) # The key is the layer name, the value is a list of parameters: # 0,1: OSM key, value # 2: POI text output file name # 3: icon for this type of POI # 4,5: icon width,height (px) # 6,7: icon offset (x,y) (px) marker_layers={ "Generators":["power", "generator", "data_generators.csv"], } # Read the XML into memory. We will use it many times. osmdoc = libxml2.parseFile(xml_filename) # Read the XSLT styledoc = libxml2.parseFile(xsl_filename) style = libxslt.parseStylesheetDoc(styledoc) output_kml = """<?xml version="1.0" encoding="UTF-8"?><kml xmlns="http://www.opengis.net/kml/2.2">\n<Document>\n<Style id="solar">\n\t<IconStyle>\n\t\t<Icon>\n\t\t\t<href>http://tomchance.dev.openstreetmap.org/pois/power_solar.png</href>\n\t\t</Icon>\n\t</IconStyle>\n</Style>\n<Folder><name>Power generators in London</name>\n\n\n""" # Extract POIs to layer text files for layer,tags in marker_layers.iteritems(): layer_filename = tags[2] result = style.applyStylesheet(osmdoc,\ { "key":"'%s'"%tags[0], "value":"'%s'"%tags[1]}) style.saveResultToFilename(layer_filename, result, 0) # Read CSV file into dict pdata = csv.DictReader(open(layer_filename, 'rb'), delimiter=' ') for row in pdata: if (row['source'] == 'solar' and row['type'] == 'heat'): gen_type = "Solar thermal panel(s)" gen_style = "solar" elif (row['source'] == 'solar' and row['type'] == 'electricity'): gen_type = "Solar photovoltaic panel(s)" gen_style = "solar" elif (row['source'] == 'wind'): gen_type = "Wind turbine(s)" gen_style = "default" else: continue gen_type = "Unknown power generator" gen_style = "default" if (row['rating'] == ''): row['rating'] = "Unknown" if (row['description'] == ''): row['description'] = "No information available" output_kml = ''.join([output_kml, """<Placemark>\n\t<name>%s</name>\n\t<description><![CDATA[<strong>Description:</strong> %s<br><br><strong>Output capacity rating:</strong> %s]]></description>\n\t<StyleUrl>#%s</StyleUrl>\n\t<Point>\n\t\t<coordinates>%s,%s</coordinates>\n\t</Point>\n</Placemark>\n""" % (gen_type, row['description'], row['rating'], gen_style, row['lon'], row['lat'])]) output_kml = ''.join([output_kml, "</Folder></Document></kml>"]) print output_kml