def main():
 
  libxml2.lineNumbersDefault(1)
  libxml2.substituteEntitiesDefault(1)
 
  # URL to fetch the OSM data from
  map_source_data_url="http://xapi.openstreetmap.org/api/0.6/*[power=generator][bbox=-0.51,51.20,0.35,51.80]"
 
  # Filename for OSM map data
  xml_filename = "generators.xml"
 
  # Filename for XSLT to extract POIs
  xsl_filename = "trans_csv_generators.xsl"
 
  # Download the map.osm file from the net, if we don't already have one.
  #if os.path.isfile(xml_filename):
    #print "Not downloading map data.  '%s' already exists."%xml_filename
  #else:
  #print "Downloading OSM data."
  #print "'%s' -> '%s'"%(map_source_data_url,xml_filename)
  #urllib.urlretrieve(map_source_data_url,xml_filename)
 
  # Read the XML into memory.  We will use it many times.
  osmdoc = libxml2.parseFile(xml_filename)
 
  # Read the XSLT
  styledoc = libxml2.parseFile(xsl_filename)
  style = libxslt.parseStylesheetDoc(styledoc)
 
  # Extract POIs to layer text files
  result = style.applyStylesheet(osmdoc, {"key":"power", "value":"generator"})
  style.saveResultToFilename("temp.csv", result, 0)
Example #2
0
def xslt_convert( xml, xsl ):
	if not _have_xslt:
		Error( _err_msg )

	libxml2.lineNumbersDefault( 1 )
	libxml2.substituteEntitiesDefault( 1 )
	
	try:
		styledoc = libxml2.parseFile( xsl )
	except libxml2.parserError:
		Error("Cannot parse XSL stylesheet: '%s'" % xsl )

	style = libxslt.parseStylesheetDoc( styledoc )

	try:
		doc = libxml2.parseFile( xml )
	except libxml2.parserError:
		Error("Unable to parse XML document: '%s'" % xml )

	result = style.applyStylesheet( doc, None )
	s = style.saveResultToString( result )
	style.freeStylesheet()
	doc.freeDoc()
	result.freeDoc()
	return s
Example #3
0
	def setUp(self):
		#
		# Set up Libxml2.
		#
		self.initialMemUsed = libxml2.debugMemory(1)
		libxml2.initParser()
		libxml2.lineNumbersDefault(1)
		libxml2.registerErrorHandler(handleError, self)
Example #4
0
 def setUp(self):
     #
     # Set up Libxml2.
     #
     self.initialMemUsed = libxml2.debugMemory(1)
     libxml2.initParser()
     libxml2.lineNumbersDefault(1)
     libxml2.registerErrorHandler(handleError, self)
Example #5
0
def xsl_transform(content, bDownloadImages):
    # 1
    strTidiedHtml = tidy_and_premail(content)

    # 2 Settings for libxml2 for transforming XHTML entities  to valid XML
    libxml2.loadCatalog(XHTML_ENTITIES)
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)

    # 3 First XSLT transformation
    styleDoc1 = libxml2.parseFile(GDOCS2CNXML_XSL1)
    style1 = libxslt.parseStylesheetDoc(styleDoc1)
    # doc1 = libxml2.parseFile(afile))
    doc1 = libxml2.parseDoc(strTidiedHtml)
    result1 = style1.applyStylesheet(doc1, None)
    #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1)
    strResult1 = style1.saveResultToString(result1)
    style1.freeStylesheet()
    doc1.freeDoc()
    result1.freeDoc()

    # Parse XML with etree from lxml for TeX2MathML and image download
    etreeXml = etree.fromstring(strResult1)

    # 4 Convert TeX to MathML with Blahtex
    etreeXml = tex2mathml(etreeXml)

    # 5 Optional: Download Google Docs Images
    imageObjects = {}
    if bDownloadImages:
        etreeXml, imageObjects = downloadImages(etreeXml)

    # Convert etree back to string
    strXml = etree.tostring(etreeXml)  # pretty_print=True)

    # 6 Second transformation
    styleDoc2 = libxml2.parseFile(GDOCS2CNXML_XSL2)
    style2 = libxslt.parseStylesheetDoc(styleDoc2)
    doc2 = libxml2.parseDoc(strXml)
    result2 = style2.applyStylesheet(doc2, None)
    #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging
    strResult2 = style2.saveResultToString(result2)
    style2.freeStylesheet()
    doc2.freeDoc()
    result2.freeDoc()

    return strResult2, imageObjects
def xsl_transform(content, bDownloadImages):
    # 1
    strTidiedHtml = tidy_and_premail(content)

    # 2 Settings for libxml2 for transforming XHTML entities  to valid XML
    libxml2.loadCatalog(XHTML_ENTITIES)
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)

    # 3 First XSLT transformation
    styleDoc1 = libxml2.parseFile(GDOCS2CNXML_XSL1)
    style1 = libxslt.parseStylesheetDoc(styleDoc1)
    # doc1 = libxml2.parseFile(afile))
    doc1 = libxml2.parseDoc(strTidiedHtml)
    result1 = style1.applyStylesheet(doc1, None)
    #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1)
    strResult1 = style1.saveResultToString(result1)
    style1.freeStylesheet()
    doc1.freeDoc()
    result1.freeDoc()

    # Parse XML with etree from lxml for TeX2MathML and image download
    etreeXml = etree.fromstring(strResult1)

    # 4 Convert TeX to MathML with Blahtex
    etreeXml = tex2mathml(etreeXml)

    # 5 Optional: Download Google Docs Images
    imageObjects = {}
    if bDownloadImages:
        etreeXml, imageObjects = downloadImages(etreeXml)

    # Convert etree back to string
    strXml = etree.tostring(etreeXml) # pretty_print=True)

    # 6 Second transformation
    styleDoc2 = libxml2.parseFile(GDOCS2CNXML_XSL2)
    style2 = libxslt.parseStylesheetDoc(styleDoc2)
    doc2 = libxml2.parseDoc(strXml)
    result2 = style2.applyStylesheet(doc2, None)
    #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging
    strResult2 = style2.saveResultToString(result2)
    style2.freeStylesheet()
    doc2.freeDoc()
    result2.freeDoc()

    return strResult2, imageObjects
def xsl_transform(content, bDownloadImages, base_or_source_url='.'):
    use_readability = True

    # 1 get title with readability
    html_title = "Untitled"
    try:
        html_title = Document(content).title()
    except:
        pass        
    
    # 2 use readabilty to get content
    if use_readability:
        readable_article = Document(content).summary()
    else:
        readable_article = content

    # 3 tidy and premail
    strTidiedHtml = tidy_and_premail(readable_article)

    # 4 Load XHTML catalog files: Makes XHTML entities readable.
    libxml2.loadCatalog(XHTML_ENTITIES)
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)

    # 5 XSLT transformation
    styleDoc1 = libxml2.parseFile(XHTML2CNXML_XSL1)
    style1 = libxslt.parseStylesheetDoc(styleDoc1)
    # doc1 = libxml2.parseFile(afile))
    doc1 = libxml2.parseDoc(strTidiedHtml)
    result1 = style1.applyStylesheet(doc1, None)
    #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1)
    strResult1 = style1.saveResultToString(result1)
    # print strResult1
    style1.freeStylesheet()
    doc1.freeDoc()
    result1.freeDoc()

    # Parse XML with etree from lxml for TeX2MathML and image download
    etreeXml = etree.fromstring(strResult1)

    # 6 Convert TeX to MathML with Blahtex (not in XHTML)
    # etreeXml = tex2mathml(etreeXml)

    # 7 Optional: Download Google Docs Images
    imageObjects = {}
    if bDownloadImages:
        etreeXml, imageObjects = downloadImages(etreeXml, base_or_source_url)
        
    # 8 add title from html
    etreeXml = add_cnxml_title(etreeXml, html_title)

    # Convert etree back to string
    strXml = etree.tostring(etreeXml) # pretty_print=True)

    # 9 Second transformation
    styleDoc2 = libxml2.parseFile(XHTML2CNXML_XSL2)
    style2 = libxslt.parseStylesheetDoc(styleDoc2)
    doc2 = libxml2.parseDoc(strXml)
    result2 = style2.applyStylesheet(doc2, None)
    #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging
    strResult2 = style2.saveResultToString(result2)
    style2.freeStylesheet()
    doc2.freeDoc()
    result2.freeDoc()
    
    return strResult2, imageObjects, html_title     
#
# the testsuite description
#
CONF = os.path.join(os.path.dirname(__file__), "test/xsdtest/xsdtestsuite.xml")
LOG = "check-xsddata-test-suite.log"

log = open(LOG, "w")
nb_schemas_tests = 0
nb_schemas_success = 0
nb_schemas_failed = 0
nb_instances_tests = 0
nb_instances_success = 0
nb_instances_failed = 0

libxml2.lineNumbersDefault(1)


#
# Error and warnng callbacks
#
def callback(ctx, str):
    global log
    log.write("%s%s" % (ctx, str))


libxml2.registerErrorHandler(callback, "")

#
# Resolver callback
#
def main(args = None):
    global debug
    global repeat
    global timing
    global novalid
    global noout
    global docbook
    global html
    global xinclude
    global profile
    global params
    global output
    global errorno

    done = 0
    cur = None

    if not args:
        args = sys.argv[1:]
        if len(args) <= 0:
            usage(sys.argv[0])
            

    i = 0
    while i < len(args):
        if args[i] == "-":
            break
        if args[i][0] != '-':
            i = i + 1
            continue
        if args[i] == "-timing" or args[i] == "--timing":
            timing = 1
        elif args[i] == "-debug" or args[i] == "--debug":
            debug = 1
        elif args[i] == "-verbose" or args[i] == "--verbose" or \
             args[i] == "-v":
            print "TODO: xsltSetGenericDebugFunc() mapping missing"
        elif args[i] == "-version" or args[i] == "--version" or \
             args[i] == "-V":
            print "TODO: version information mapping missing"
        elif args[i] == "-verbose" or args[i] == "--verbose" or \
             args[i] == "-v":
            if repeat == 0:
                repeat = 20
            else:
                repeat = 100
        elif args[i] == "-novalid" or args[i] == "--novalid":
            print "TODO: xmlLoadExtDtdDefaultValue mapping missing"
            novalid = 1
        elif args[i] == "-noout" or args[i] == "--noout":
            noout = 1
        elif args[i] == "-html" or args[i] == "--html":
            html = 1
        elif args[i] == "-nonet" or args[i] == "--nonet":
            print "TODO: xmlSetExternalEntityLoader mapping missing"
            nonet = 1
        elif args[i] == "-catalogs" or args[i] == "--catalogs":
            try:
                catalogs = os.environ['SGML_CATALOG_FILES']
            except:
                catalogs = None
            if catalogs != none:
                libxml2.xmlLoadCatalogs(catalogs)
            else:
                print "Variable $SGML_CATALOG_FILES not set"
        elif args[i] == "-xinclude" or args[i] == "--xinclude":
            xinclude = 1
            libxslt.setXIncludeDefault(1)
        elif args[i] == "-param" or args[i] == "--param":
            i = i + 1
            params[args[i]] = args[i + 1]
            i = i + 1
        elif args[i] == "-stringparam" or args[i] == "--stringparam":
            i = i + 1
            params[args[i]] = "'%s'" % (args[i + 1])
            i = i + 1
        elif args[i] == "-maxdepth" or args[i] == "--maxdepth":
            print "TODO: xsltMaxDepth mapping missing"
        else:
            print "Unknown option %s" % (args[i])
            usage()
            return(3)
        
        
        
        
        i = i + 1
        
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)
    # TODO: xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS
    # if novalid:
    # TODO: xmlLoadExtDtdDefaultValue = 0

    # TODO libxslt.exsltRegisterAll();
    libxslt.registerTestModule()

    i = 0
    while i < len(args) and done == 0:
        if args[i] == "-maxdepth" or args[i] == "--maxdepth":
            i = i + 2
            continue
        if args[i] == "-o" or args[i] == "-output" or args[i] == "--output":
            i = i + 2
            continue
        if args[i] == "-param" or args[i] == "--param":
            i = i + 3
            continue
        if args[i] == "-stringparam" or args[i] == "--stringparam":
            i = i + 3
            continue
        if args[i] != "-" and args[i][0] == '-':
            i = i + 1
            continue
        if timing:
            startTimer()
        style = libxml2.parseFile(args[i])
        if timing:
            endTimer("Parsing stylesheet %s" % (args[i]))
        if style == None:
            print "cannot parse %s" % (args[i])
            cur = None
            errorno = 4
            done = 1
        else:
            cur = libxslt.loadStylesheetPI(style)
            if cur != None:
                xsltProcess(style, cur, args[i])
                cur = None
            else:
                cur = libxslt.parseStylesheetDoc(style)
                if cur == None:
                    style.freeDoc()
                    errorno = 5
                    done = 1
        i = i + 1
        break

    while i < len(args) and done == 0 and cur != None:
        if timing:
            startTimer()
        if html:
            doc = libxml2.htmlParseFile(args[i], None)
        else:
            doc = libxml2.parseFile(args[i])
        if doc == None:
            print "unable to parse %s" % (args[i])
            errorno = 6
            i = i + 1
            continue
        if timing:
            endTimer("Parsing document %s" % (args[i]))
        xsltProcess(doc, cur, args[i])
        i = i + 1

    if cur != None:
        cur.freeStylesheet()
    params = None
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
  GNU General Public License for more details.

  http://www.gnu.org/licenses/gpl-3.0.html
"""
 
import libxslt
import libxml2
import re
import urllib
import os
import csv
import sys
import flickr

libxml2.lineNumbersDefault(1)
libxml2.substituteEntitiesDefault(1)

def escape(html):
    """
      Returns the given HTML with ampersands, quotes and carets encoded,
      and encoded into unicode for weird characters.
    """
    html_enc = html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
    return unicode(html_enc, 'utf-8')

def processRawData(xapi_uri, xsl_uri, features):
  """
    Downloads the data from XAPI and turns it into a Python object.
  """
  # Download data to temporary file and read the XML/XSL into memory
def getLineNumbersDefault():
    old = libxml2.lineNumbersDefault(0)
    libxml2.lineNumbersDefault(old)
    return old
Example #12
0
def init_libxml2(xml):
    libxml2.loadCatalog(XHTML_ENTITIES)
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)
    return xml, {}
def xsl_transform(content, bDownloadImages, base_or_source_url='.'):

    html_title = "Untitled"

    # 1 get title with readability
    # ONLY MAKES SENSE FOR AN UNKNOWN HTML, SO I COMMENTED IT OUT FOR https://github.com/Connexions/rhaptos.html2cnxml
    #try:
    #    html_title = Document(content).title()
    #except:
    #    pass

    # 2 use readabilty to get content
    # ONLY MAKES SENSE FOR AN UNKNOWN HTML, SO I COMMENTED IT OUT FOR https://github.com/Connexions/rhaptos.html2cnxml
    #readable_article = Document(content).summary()
    readable_article = content

    # 3 tidy and premail
    strTidiedHtml = tidy_and_premail(readable_article)

    # 4 Load XHTML catalog files: Makes XHTML entities readable.
    libxml2.loadCatalog(XHTML_ENTITIES)
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)

    # 5 XSLT transformation
    styleDoc1 = libxml2.parseFile(XHTML2CNXML_XSL1)
    style1 = libxslt.parseStylesheetDoc(styleDoc1)
    # doc1 = libxml2.parseFile(afile))
    doc1 = libxml2.parseDoc(strTidiedHtml)
    result1 = style1.applyStylesheet(doc1, None)
    #style1.saveResultToFilename(os.path.join('output', docFilename + '_meta.xml'), result1, 1)
    strResult1 = style1.saveResultToString(result1)
    style1.freeStylesheet()
    doc1.freeDoc()
    result1.freeDoc()

    # Parse XML with etree from lxml for TeX2MathML and image download
    etreeXml = etree.fromstring(strResult1)

    # 6 Convert TeX to MathML with Blahtex (not in XHTML)
    # etreeXml = tex2mathml(etreeXml)

    # 7 Optional: Download Google Docs Images
    imageObjects = {}
    if bDownloadImages:
        etreeXml, imageObjects = downloadImages(etreeXml, base_or_source_url)

    # 8 add title from html
    etreeXml = add_cnxml_title(etreeXml, html_title)

    # Convert etree back to string
    strXml = etree.tostring(etreeXml)  # pretty_print=True)

    # 9 Second transformation
    styleDoc2 = libxml2.parseFile(XHTML2CNXML_XSL2)
    style2 = libxslt.parseStylesheetDoc(styleDoc2)
    doc2 = libxml2.parseDoc(strXml)
    result2 = style2.applyStylesheet(doc2, None)
    #style2.saveResultToFilename('tempresult.xml', result2, 0) # just for debugging
    strResult2 = style2.saveResultToString(result2)
    style2.freeStylesheet()
    doc2.freeDoc()
    result2.freeDoc()

    return strResult2, imageObjects, html_title
def main():
 
  libxml2.lineNumbersDefault(1)
  libxml2.substituteEntitiesDefault(1)
  
  # Filename for OSM map data
  xml_filename = "generators.xml"
 
  # Filename for XSLT to extract POIs
  xsl_filename = "trans_pois.xsl"
 
 
  # Layers we are going to extract (in a dict)
  # The key is the layer name, the value is a list of parameters:
  # 0,1: OSM key, value
  # 2: POI text output file name
  # 3: icon for this type of POI
  # 4,5: icon width,height (px)
  # 6,7: icon offset (x,y) (px)
  marker_layers={
    "Generators":["power", "generator", "data_generators.csv"],
     }
 
  # Read the XML into memory.  We will use it many times.
  osmdoc = libxml2.parseFile(xml_filename)
 
  # Read the XSLT
  styledoc = libxml2.parseFile(xsl_filename)
  style = libxslt.parseStylesheetDoc(styledoc)
 
  output_kml = """<?xml version="1.0" encoding="UTF-8"?><kml xmlns="http://www.opengis.net/kml/2.2">\n<Document>\n<Style id="solar">\n\t<IconStyle>\n\t\t<Icon>\n\t\t\t<href>http://tomchance.dev.openstreetmap.org/pois/power_solar.png</href>\n\t\t</Icon>\n\t</IconStyle>\n</Style>\n<Folder><name>Power generators in London</name>\n\n\n"""
 
  # Extract POIs to layer text files
  for layer,tags in marker_layers.iteritems():
    layer_filename = tags[2]
    result = style.applyStylesheet(osmdoc,\
    { "key":"'%s'"%tags[0], "value":"'%s'"%tags[1]})
    style.saveResultToFilename(layer_filename, result, 0)
    
    # Read CSV file into dict
    pdata = csv.DictReader(open(layer_filename, 'rb'), delimiter='	')
    for row in pdata:
      if (row['source'] == 'solar' and row['type'] == 'heat'):
	gen_type = "Solar thermal panel(s)"
	gen_style = "solar"
      elif (row['source'] == 'solar' and row['type'] == 'electricity'):
	gen_type = "Solar photovoltaic panel(s)"
	gen_style = "solar"
      elif (row['source'] == 'wind'):
	gen_type = "Wind turbine(s)"
	gen_style = "default"
      else:
        continue
        gen_type = "Unknown power generator"
	gen_style = "default"
      if (row['rating'] == ''):
	row['rating'] = "Unknown"
      if (row['description'] == ''):
	row['description'] = "No information available"
      output_kml = ''.join([output_kml, """<Placemark>\n\t<name>%s</name>\n\t<description><![CDATA[<strong>Description:</strong> %s<br><br><strong>Output capacity rating:</strong> %s]]></description>\n\t<StyleUrl>#%s</StyleUrl>\n\t<Point>\n\t\t<coordinates>%s,%s</coordinates>\n\t</Point>\n</Placemark>\n""" % (gen_type, row['description'], row['rating'], gen_style, row['lon'], row['lat'])])

  output_kml = ''.join([output_kml, "</Folder></Document></kml>"])
  
  print output_kml
def init_libxml2(xml):
    libxml2.loadCatalog(XHTML_ENTITIES)
    libxml2.lineNumbersDefault(1)
    libxml2.substituteEntitiesDefault(1)
    return xml, {}