Beispiel #1
0
    def searchDocuments(self,
                        user,
                        url="",
                        namespaces={},
                        xpath_="",
                        range_=slice(0, -1),
                        depth=-1,
                        version="",
                        restore=False):
        """ Returns selected documents.

		>>> store = XMLStorage(spock=minidom.parseString("<restaurant><meal><eggs /><spam /></meal><meal><eggs /><spam /><spam /></meal><foo bar='42'>6*7</foo></restaurant>"))
		>>> et.tostring(store.searchDocuments("spock")[0])
		'<restaurant><meal><eggs /><spam /></meal><meal><eggs /><spam /><spam /></meal><foo bar="42">6*7</foo></restaurant>'
		>>> list(et.tostring(el) for el in store.searchDocuments("spock", xpath_="//spam"))
		['<spam />', '<spam />']
		"""
        xml = self.data[user]
        xctxt = xpath.XPathContext()
        if url:
            xml = xctxt.find("//*[xmlu:src='{}']".format(url), xml)[0]
        results = xctxt.find(xpath_, xml, **
                             namespaces)[range_] if xpath_ else (xml, )

        return tuple(et.XML(el.toxml())
                     for el in results)  # Generator causes issues
Beispiel #2
0
    def multitest(self, expr, **kwargs):
        functions = ['find', 'findnode', 'findvalue', 'findvalues']
        results = {}
        context = xpath.XPathContext(**kwargs)
        compiled = xpath.XPath(expr)

        def invoke(obj, func, *args, **kwargs):
            try:
                return getattr(obj, func)(*args, **kwargs)
            except xpath.XPathError as e:
                return e.__class__

        for f in functions:
            results[f] = invoke(xpath, f, expr, self.doc, **kwargs)
            self.assertEqual(results[f], invoke(compiled, f, self.doc,
                                                **kwargs))
            self.assertEqual(results[f],
                             invoke(context, f, expr, self.doc, **kwargs))

            #results[f] = getattr(xpath, f)(expr, self.doc, **kwargs)

            #self.assertEqual(results[f],
            #                     getattr(compiled, f)(self.doc, **kwargs))

            #self.assertEqual(results[f],
            #                     getattr(context, f)(expr, self.doc, **kwargs))

        return results
Beispiel #3
0
    def setUp(self):
        self.doc = xml.dom.minidom.parseString(self.xml)
        self.docns = xml.dom.minidom.parseString(self.xmlns)

        self.context = xpath.XPathContext(
            default_namespace='http://a.example.com',
            namespaces={ 'b' : 'http://b.example.com' })
Beispiel #4
0
def run_test():
    doc = xml.dom.minidom.parse('/homespace/gaubert/RODD/src-data/130810-vprodnav/3.xml').documentElement
    
    # create context
    context = xpath.XPathContext()
    
    c = { 'gmi': "http://www.isotc211.org/2005/gmi",
          'eum': "http://www.eumetsat.int/2008/gmi",
          'gco': "http://www.isotc211.org/2005/gco",
          'gmd': "http://www.isotc211.org/2005/gmd",
          "xsi": "http://www.w3.org/2001/XMLSchema-instance"
        }
    
    context.namespaces['gmi'] = "http://www.isotc211.org/2005/gmi"
    context.namespaces['eum'] = "http://www.eumetsat.int/2008/gmi"
    context.namespaces['gco'] = "http://www.isotc211.org/2005/gco"
    context.namespaces['gmd'] = "http://www.isotc211.org/2005/gmd"
    context.namespaces['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
    
    
    
    #result = xpath.find('gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c)
    result = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c)
    print("Result = %s\n" % (result))
    
    result = context.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c)
    print("Result = %s\n" % (result))
Beispiel #5
0
 def setUp(self):
     self.doc = xml.dom.minidom.parseString(self.xml)
     self.context = xpath.XPathContext()
     self.context.variables['start'] = 2
     self.context.variables['end'] = '4'
     self.context.variables[('http://anaconda.python.org', 'start')] = 3
     self.context.namespaces['ana'] = 'http://anaconda.python.org'
Beispiel #6
0
 def get(self, *args):
     character = args[1] or ""
     if not character:
         return self.ok("Please provide a UTF-8 character.")
     character = urllib.unquote(character)
     try:
         unic = character.decode("utf8")
     except:
         return self.ok("Error decoding UTF-8 character.")
     if len(unic) > 1:
         # Search for the page.
         rv = api.urlfetch.fetch(
             "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&safe=off&q=site:fileformat.info%%20%s"
             % urllib.quote(unic)).content
         js = simplejson.loads(rv)
         rv = str(js['responseData']['results'][0]['titleNoFormatting'])
         rv = re.sub(r'&#(\d+);', (lambda m: unichr(int(m.group(1)))), rv)
         m = re.search(r'U\+([0-9a-fA-F]+)', rv)
         rv += " - " + js['responseData']['results'][0]['url']
         if m:
             rv += ' (%s)' % (unichr(int(m.group(1), 16)))
         return self.ok(rv)
     try:
         name = unicodedata.name(unic[0])
     except:
         # Our unicodedata was lacking; let's ask fileformat.info
         import html5lib, xpath, StringIO
         url = URI % ord(unic[0])
         bytes = api.urlfetch.fetch(url).content
         fp = StringIO.StringIO(bytes)
         pars = None
         try:
             pars = html5lib.parse(fp, treebuilder="dom")
         except Exception, e:
             name = "(No name found)"
         if not pars:
             name = "(No name found)"
         else:
             con = xpath.XPathContext()
             con.namespaces["x"] = "http://www.w3.org/1999/xhtml"
             it = con.find("//x:title//text()", pars.documentElement)
             if not it:
                 name = "(No name found)"
             else:
                 name = reduce((lambda x, y: x + y),
                               map((lambda x: x.data), it))
                 name = " ".join(name.split())
                 m = re.search(r"'(.*?)'", name)
                 if m:
                     name = m.group(1)
Beispiel #7
0
def getXpathList(node, queryList):
    label = []
    context = xpath.XPathContext(node)
    for queryLine in queryList:
        returnList = []
        for query in queryLine:
            queryReturn = context.find(query, node)
            if isinstance(queryReturn, unicode):
                returnList.append([queryReturn])
            else:
                returnList.append([attr.nodeValue for attr in queryReturn])
        label.append(
            [list(row) for row in izip_longest(*returnList, fillvalue=u'')])
    return label
Beispiel #8
0
 def __init__(self, filename, base_xpath="//entries"):
     #parse the document into a DOM tree
     # Error handling here is based on: http://stackoverflow.com/questions/192907/xml-parsing-elementtree-vs-sax-and-dom
     try:
         self.tree = rdf_tree = MD.parse(filename)
     except xml.parsers.expat.ExpatError as e:
         msg = "Error: The data file is not proper XML! Cannot continue." + \
               "[XML] Error (line {}): {}\n".format(e.lineno, e.code) + \
               "[XML] Offset: {}".format(e.offset)
         L.error(msg)
         raise XmlError(msg)
     except IOError as e:
         msg = "[IO] I/O Error {}: {}".format(e.errno, e.strerror)
         L.error(msg)
         raise
         
     #read the default namespace and prefix from the root node
     self.context = xpath.XPathContext(rdf_tree)
     self.entries = self.context.find(base_xpath, rdf_tree)
Beispiel #9
0
 def setUp(self):
     self.doc = xml.dom.minidom.parseString(self.xml)
     self.context = xpath.XPathContext(
         namespaces={'a':'http://www.example.com/a'})
Beispiel #10
0
from google.appengine import api

import base

def do_generic_parse(url, xpth):
    url=url.replace(' ','%20')
    thepage=api.urlfetch.fetch(url).content
    fp=StringIO.StringIO(thepage)
    try:
        pars=html5lib.parse(fp, treebuilder="dom")
    except Exception, e:
        return "something failed: %s (%s)."%(str(e),str(type(e)))
    if not pars:
        return "Parsing failed for some reason"
    con=xpath.XPathContext()
    con.namespaces["x"]="http://www.w3.org/1999/xhtml"
    it=con.find(xpth+"//text()", pars.documentElement)
    if not it:
        return "ENOTFOUND"
    stuff=reduce((lambda x,y: x+y), map((lambda x: x.data), it))
    stuff=stuff.replace("\n"," ").strip()
    return stuff[:200]

class Main(base.RequestHandler):

    def get(self,*args):
        stuff=os.environ['PATH_INFO']
        a=stuff.split('/',2)
        stuff=a[2]
        try:
Beispiel #11
0
 def setUpClass(cls):
     cls.xpathctx = xpath.XPathContext()
     cls.xpathctx.namespaces['gml'] = 'http://www.opengis.net/gml'
     cls.xpathctx.namespaces['gsf'] = 'http://geoscript.org/feature'
Beispiel #12
0
 def __init__(self, test, node):
     self.test = test
     self.node = node
     self.ctx = xpath.XPathContext(node)
 def gettranscode(self):
     context = xpath.XPathContext()
     return context.find(self._config.trancodexpath, self.dom)[0].childNodes[0].nodeValue
Beispiel #14
0
 def setUp(self):
     self.doc = xml.dom.minidom.parseString(self.xml)
     self.context = xpath.XPathContext()
Beispiel #15
0
 def test_explicit_document_context_prefix(self):
     nsdoc = xml.dom.minidom.parseString(
         """<doc xmlns:pork="http://porcupine.example.org/" />""")
     context = xpath.XPathContext(nsdoc)
     result = context.findvalues('//pork:item', self.doc)
     self.assertEqual(result, ['porcupine'])
Beispiel #16
0
 def test_empty_context(self):
     context = xpath.XPathContext()
     result = context.findvalues('//item', self.doc)
     self.assertEqual(result, [])
 def getserialno(self):
     context = xpath.XPathContext()
     return context.find(self._config.serialnoxpath, self.dom)[0].childNodes[0].nodeValue