def download(self, url): results = [] def handle(element): if not element.content: data = dict(categories=[], backdrop={}, poster={}) for child in element: if child.tagname == 'categories' and child.type == 'genre': data['categories'].append(child.name) elif child.tagname == 'images': for image in child: if not image.type in ('backdrop', 'poster'): continue if not image.id in data[image.type]: data[image.type][image.id] = {} data[image.type][image.id][image.size] = image.url elif child.content: data[child.tagname] = child.content results.append(Movie(data)) e = ElementParser('movie') e.handle = handle parser = xml.sax.make_parser() parser.setContentHandler(e) try: parser.parse(urllib2.urlopen(url, timeout=10)) except Exception, e: log.exception('download/parse error') return []
def parse_xml(what, nest=[]): results = [] def handle(element): info = {} attrs = dict((a, getattr(element, a)) for a in element.attributes) if element.content: results.append((element.tagname, attrs, element.content)) else: for child in element: if child.tagname in nest: handle(child) elif child.content: info[child.tagname] = child.content results.append((element.tagname, attrs, info)) e = ElementParser() e.handle = handle parser = xml.sax.make_parser() parser.setContentHandler(e) if isinstance(what, basestring) and what.startswith('http'): status, data = yield download(what, retry=4) if status != 200: raise ValueError('download failed with http status %d' % status) parser.feed(data) parser.close() else: parser.parse(what) yield results
def parse(url): """ Threaded XML parser """ results = [] def handle(element): info = {} if element.content: results.append((element.tagname, element.content)) else: for child in element: if child.content: info[child.tagname] = child.content results.append((element.tagname, info)) e = ElementParser() e.handle = handle parser = xml.sax.make_parser() parser.setContentHandler(e) parser.parse(url) return e.attr, results