def ETIteratorFromObj(obj, events=None, parser=None): """obj can be 1) a string that ends with .xml -> the file is parsed and the resulting ElementTree returned 2) a string that ends with .xml.gz -> the file is unzipped, parsed, and the resulting ElementTree is returned 3) an open input stream -> the input is parsed and the resulting ElementTree is returned 4) an ElementTree or an Element -> obj is returned as-is, nothing is done""" if isinstance(obj, str) or isinstance(obj, unicode): if obj.endswith(".gz"): fStream = GzipFile(obj, "rt") #fStream = codecs.getreader("utf-8")(GzipFile(obj,"rt")) else: fStream = open(obj, "rt") #fStream=codecs.open(obj, "rt", "utf-8") for rv in iterparse2(fStream, events): yield rv elif isinstance(obj, ElementTree.ElementTree) or ElementTree.iselement(obj): if ElementTree.iselement(obj): root = obj else: root = obj.getroot() #if events == None: # events = ["END"] for element in root.getiterator(): yield ("memory", element) else: #not a string, not a tree, not an element, should be a stream #let's parse it for rv in ElementTree.iterparse(obj, events): yield rv
def ETIteratorFromObj(obj, events=None, parser=None): """obj can be 1) a string that ends with .xml -> the file is parsed and the resulting ElementTree returned 2) a string that ends with .xml.gz -> the file is unzipped, parsed, and the resulting ElementTree is returned 3) an open input stream -> the input is parsed and the resulting ElementTree is returned 4) an ElementTree or an Element -> obj is returned as-is, nothing is done""" if isinstance(obj,str) or isinstance(obj,unicode): if obj.endswith(".gz"): fStream=GzipFile(obj,"rt") #fStream = codecs.getreader("utf-8")(GzipFile(obj,"rt")) else: fStream=open(obj,"rt") #fStream=codecs.open(obj, "rt", "utf-8") for rv in iterparse2(fStream, events): yield rv elif isinstance(obj,ElementTree.ElementTree) or ElementTree.iselement(obj): if ElementTree.iselement(obj): root = obj else: root = obj.getroot() #if events == None: # events = ["END"] for element in root.getiterator(): yield ("memory", element) else: #not a string, not a tree, not an element, should be a stream #let's parse it for rv in ElementTree.iterparse(obj, events): yield rv
def __init__(self, key, name, force_=False): #didn't i read somwhere that variable reuse is bad like this? :) #sys.stdout.write('Got key: %s\n' % (key,)) x = 'http://isbndb.com/api/books.xml?access_key=%s&index1=isbn' x += '&value1=foo&results=keystats' x %= key a = urllib.urlopen(x).read() sys.stdout.write('Got from isbndb:\n %s\n' % (a, )) y = ElementTree.fromstring(a) if ElementTree.iselement(y.find('ErrorMessage')): sys.stdout.write('Found error...') if force_: sys.stdout.write('forcing\n') self.active = False self.granted = 0 self.limit = 0 return else: sys.stdout.write('erroring\n') raise ValueError, 'either bad key or something' else: self.active = True y = y.find('KeyStats') self.granted = int(y.get('granted')) self.limit = int(y.get('limit')) self.name = name
def __init__(self, key, name, force_=False): #didn't i read somwhere that variable reuse is bad like this? :) #sys.stdout.write('Got key: %s\n' % (key,)) x = 'http://isbndb.com/api/books.xml?access_key=%s&index1=isbn' x += '&value1=foo&results=keystats' x %=key a = urllib.urlopen(x).read() sys.stdout.write('Got from isbndb:\n %s\n' % (a,)) y = ElementTree.fromstring(a) if ElementTree.iselement(y.find('ErrorMessage')): sys.stdout.write('Found error...') if force_: sys.stdout.write('forcing\n') self.active = False self.granted = 0 self.limit = 0 return else: sys.stdout.write('erroring\n') raise ValueError, 'either bad key or something' else: self.active = True y = y.find('KeyStats') self.granted = int(y.get('granted')) self.limit = int(y.get('limit')) self.name = name
def ETFromObj(obj): """obj can be 1) a string that ends with .xml -> the file is parsed and the resulting ElementTree returned 2) a string that ends with .xml.gz -> the file is unzipped, parsed, and the resulting ElementTree is returned 3) an open input stream -> the input is parsed and the resulting ElementTree is returned 4) an ElementTree or an Element -> obj is returned as-is, nothing is done""" if isinstance(obj, str) or isinstance(obj, unicode): if obj.endswith(".xml.gz"): fStream = GzipFile(obj, "rt") #fStream = codecs.getreader("utf-8")(GzipFile(obj,"rt")) elif obj.endswith(".xml") or obj.endswith(".svg") or obj.endswith( ".nxml") or obj.endswith(".csml"): fStream = open(obj, "rt") #fStream=codecs.open(obj, "rt", "utf-8") else: raise ValueError( "%s: File format not recognized (expected .xml or .xml.gz)" % obj) return ElementTree.parse(fStream) elif isinstance(obj, ElementTree.ElementTree) or ElementTree.iselement(obj): return obj else: #not a string, not a tree, not an element, should be a stream #let's parse it return ElementTree.parse(obj)
def fetch(param): a = urllib.urlopen(param.url) x = unicode(a.read(), 'ascii', errors='ignore') tree = ElementTree.fromstring(x) err = tree.find('ErrorMessage') if ElementTree.iselement(err): raise Exception, 'an error occurred' stats = tree.find('KeyStats') list = tree.find('BookList') param.key.update(stats) return list
def fetch(param): a = urllib.urlopen(param.url) x = unicode(a.read(),'ascii',errors='ignore') tree = ElementTree.fromstring(x) err = tree.find('ErrorMessage') if ElementTree.iselement(err): raise Exception, 'an error occurred' stats = tree.find('KeyStats') list = tree.find('BookList') param.key.update(stats) return list
def extractMarks(srcGpx, dstGpx=None): if ET.iselement(srcGpx): sroot = srcGpx else: src = ET.parse(srcGpx) sroot = src.getroot() root = ET.Element("gpx") root.attrib = sroot.attrib.copy() root.set("xmlns", GPX_NS) root.set("{%s}dummy"%GPX_NS, "true") # dummy to force ns insertion root.set("{%s}dummy"%MY_NS, "true") # dummy to force ns insertion root.text = "\n " all = sroot.findall(mkpath("trk/trkseg/trkpt")) trk = seg = None ctr = [] open = None for tpt in all: mark = tpt.find(mkpath("extensions/gpslog:mark")) # extensions/gpslog:mark")) if mark != None and mark.get("in") == "true": trk = ET.SubElement(root, "trk") tnm = ET.SubElement(trk, "name") tdsc = ET.SubElement(trk, "desc") seg = ET.SubElement(trk, "trkseg") tnm.text = mark.text tdsc.text = mark.text + " - " + tpt.findtext(mkpath("time")) # Formatting in DOM :-( tnm.tail = "\n "; seg.text = "\n "; seg.tail = "\n "; trk.text = "\n "; trk.tail = "\n " open = tpt if seg != None: seg.append(killNs(tpt)) if mark != None and mark.get("in") != "true": assert open != None, "Found closing trackpoint without opener" ctr += [ center(open, tpt) ] trk = seg = open = None if open != None: ctr += [ center(open, tpt) ] addCenterpoints(root, ctr) return root
def fauxFetch(params): """This is a simple fetch for testing cursor functionality""" page = os.path.join(prependdir,'xml','%s.xml'%(params.pageNum,)) sys.stdout.write('OPENING PAGE: %s\n' % (page,)) #f = open(page) #a = f.read() #r = ElementTree.fromstring(unicode(a,'ascii',errors='ignore')) try: r = ElementTree.parse(page) except: raise SyntaxError, 'page was: %s' % (page,) r = r.find('BookList') if ElementTree.iselement(r): return r else: raise Exception, 'not getting a page...'
def fauxFetch(params): """This is a simple fetch for testing cursor functionality""" page = os.path.join(prependdir, 'xml', '%s.xml' % (params.pageNum, )) sys.stdout.write('OPENING PAGE: %s\n' % (page, )) #f = open(page) #a = f.read() #r = ElementTree.fromstring(unicode(a,'ascii',errors='ignore')) try: r = ElementTree.parse(page) except: raise SyntaxError, 'page was: %s' % (page, ) r = r.find('BookList') if ElementTree.iselement(r): return r else: raise Exception, 'not getting a page...'
def check_element(element): if not ElementTree.iselement(element): print "not an element" if not hasattr(element, "tag"): print "no tag member" if not hasattr(element, "attrib"): print "no attrib member" if not hasattr(element, "text"): print "no text member" if not hasattr(element, "tail"): print "no tail member" check_string(element.tag) check_mapping(element.attrib) if element.text != None: check_string(element.text) if element.tail != None: check_string(element.tail)
def ETFromObj(obj): """obj can be 1) a string that ends with .xml -> the file is parsed and the resulting ElementTree returned 2) a string that ends with .xml.gz -> the file is unzipped, parsed, and the resulting ElementTree is returned 3) an open input stream -> the input is parsed and the resulting ElementTree is returned 4) an ElementTree or an Element -> obj is returned as-is, nothing is done""" if isinstance(obj,str) or isinstance(obj,unicode): if obj.endswith(".xml.gz"): fStream=GzipFile(obj,"rt") elif obj.endswith(".xml"): fStream=open(obj,"rt") else: raise ValueError("%s: File format not recognized (expected .xml or .xml.gz)"%obj) return ElementTree.parse(fStream) elif isinstance(obj,ElementTree.ElementTree) or ElementTree.iselement(obj): return obj else: #not a string, not a tree, not an element, should be a stream #let's parse it return ElementTree.parse(obj)
def test_fetchReturnType(self): print "Testing return type is an Element and BookList...", x = fetch(self.p) self.assert_(ElementTree.iselement(x),"FAILED (fetch didnt get elem)") self.assert_(x.tag == 'BookList', "FAILED (fetched elem not BookList)") print "OK"
def check_type(self, c): return ElementTree.iselement(c)