def startPrefixMapping(self, prefix, uri): for handler in iter(self.handler_stack[-1]): handler.namespace[prefix] = uri if uri and len(uri.split()) > 1: from xml.sax import SAXException self.error(SAXException('Invalid Namespace: %s' % uri)) if prefix in namespaces.values(): if not namespaces.get(uri, '') == prefix and prefix: from logging import ReservedPrefix preferredURI = [ key for key, value in namespaces.items() if value == prefix ][0] self.log(ReservedPrefix({ 'prefix': prefix, 'ns': preferredURI })) elif prefix == 'wiki' and uri.find('usemod') >= 0: from logging import ObsoleteWikiNamespace self.log( ObsoleteWikiNamespace({ 'preferred': namespaces[uri], 'ns': uri })) elif namespaces.has_key(uri): if not namespaces[uri] == prefix and prefix: from logging import NonstdPrefix self.log( NonstdPrefix({ 'preferred': namespaces[uri], 'ns': uri }))
def startElement(self, name, attrs): # getting password if self.state == 2: self.conDict['password'] = attrs.getValue("value") if (self.useConList == 1): self.conList.append(copy.copy(self.conDict)) self.state = 0 else: raise SAXException() # stop parsing # getting username if self.state == 1: self.conDict['user'] = attrs.getValue("value") self.state = 2 # getting DB name & schema if name == "connection" and (attrs.getValue("name") == self.searchTerm or self.useConList == 1): splitedDBConList = re.split(r'//', attrs.getValue("name")) splitedDBConList = re.split(r'/', splitedDBConList[1]) self.conDict['dbName'] = splitedDBConList[0] if len(splitedDBConList) == 1 or splitedDBConList[ 1] == '': # Checking for schema presence self.conDict['schema'] = '' else: self.conDict['schema'] = splitedDBConList[1] self.conDict['connStr'] = attrs.getValue("name") self.state = 1
def startPrefixMapping(self, prefix, uri): for handler in iter(self.handler_stack[-1]): handler.namespace[prefix] = uri if uri and len(uri.split()) > 1: from xml.sax import SAXException self.error(SAXException('Invalid Namespace: %s' % uri)) if prefix in namespaces.values(): if not namespaces.get(uri, '') == prefix and prefix: from logging import ReservedPrefix, MediaRssNamespace preferredURI = [ key for key, value in namespaces.items() if value == prefix ][0] if uri == 'http://search.yahoo.com/mrss': self.log( MediaRssNamespace({ 'prefix': prefix, 'ns': preferredURI })) else: self.log( ReservedPrefix({ 'prefix': prefix, 'ns': preferredURI })) elif prefix == 'wiki' and uri.find('usemod') >= 0: from logging import ObsoleteWikiNamespace self.log( ObsoleteWikiNamespace({ 'preferred': namespaces[uri], 'ns': uri })) elif prefix in ['atom', 'xhtml']: from logging import TYPE_ATOM, AvoidNamespacePrefix if self.getFeedType() == TYPE_ATOM: self.log(AvoidNamespacePrefix({'prefix': prefix})) elif namespaces.has_key(uri): if not namespaces[uri] == prefix and prefix: from logging import NonstdPrefix self.log( NonstdPrefix({ 'preferred': namespaces[uri], 'ns': uri })) if namespaces[uri] in ['atom', 'xhtml']: from logging import TYPE_UNKNOWN, TYPE_ATOM, AvoidNamespacePrefix if self.getFeedType() in [TYPE_ATOM, TYPE_UNKNOWN]: self.log(AvoidNamespacePrefix({'prefix': prefix})) elif uri == 'http://search.yahoo.com/mrss': from logging import MediaRssNamespace uri = 'http://search.yahoo.com/mrss/' self.log(MediaRssNamespace({'prefix': prefix, 'ns': uri})) else: from validators import rfc3987 rule = rfc3987() rule.setElement('xmlns:' + str(prefix), {}, self.handler_stack[-1][0]) rule.value = uri if not uri or rule.validate(): from logging import UnknownNamespace self.log(UnknownNamespace({'namespace': uri}))
def startElement(self, name, attrs): if self.found and name == "MediaType" and "mnemonic" in attrs and "uuid" in attrs: mnemonic = attrs["mnemonic"] if mnemonic in self.dict_media_type: raise SAXException( "Duplicate media type mnemonic '{}'".format(mnemonic)) self.dict_media_type[mnemonic] = TupleMediaType( attrs["uuid"], set()) if "encrypted" in attrs: self.dict_media_type[mnemonic].sub_types.add( MediaSubtype.ENCRYPTED) if "signed" in attrs: self.dict_media_type[mnemonic].sub_types.add( MediaSubtype.SIGNED) if "signedAndEncrypted" in attrs: self.dict_media_type[mnemonic].sub_types.add( MediaSubtype.SIGNED_ENCRYPTED) if "drm" in attrs: self.dict_media_type[mnemonic].sub_types.add(MediaSubtype.DRM) if "notProtected" in attrs: self.dict_media_type[mnemonic].sub_types.add( MediaSubtype.NOT_PROTECTED) elif name == "MediaTypes": self.found = True
def endElement(self, name): if name == 'patent-assignment': self.recs += 1 if self.recs >= max_recs: raise SAXException('reached record limit') elif name == 'recorded-date': self.in_recd_date = False elif name == 'conveyance-text': self.in_conveyance = False elif name == 'patent-assignor': self.in_assignor = False elif name == 'patent-assignee': self.in_assignee = False elif name == 'name': self.in_assignor_name = False self.in_assignee_name = False elif name == 'execution-date': self.in_exec_date = False elif name == 'date': if self.in_recd_date2: self.in_recd_date2 = False elif self.in_exec_date2: self.in_exec_date2 = False elif name == 'patent-property': self.addPatent() elif name == 'document-id': if self.patnum == '' or self.patkind[0] == 'B': self.patnum = self.patcand elif name == 'doc-number': self.in_docnumber = False elif name == 'kind': self.in_kind = False
def dispatch_element (self, silent, prev, *a, **k): attr = prev + self._name_stack [-1] if hasattr (self, attr): getattr (self, attr) (*a, **k) elif not silent: raise SAXException ('Unknown node: ' + self._name_stack [-1])
def endElement(self, name): if self.is_name: #判断是否是tag的结尾. self.current_name = self.buffer.strip( ) #得到tag里面的内容,这个是unicode的string,根据自己要的字符集可以用encode方法来转换一下 self.buffer = '' self.is_name = None elif self.is_mobile and self.current_name == self.look_for: self.mobile = self.buffer raise SAXException('Found mobile phone') # stop parsing
def handle_data(self, data): #ignore white space outside the toplevel element if self._nesting == 0: if string.strip(data) != "": # It's not whitespace? self.err_handler.error( SAXException("characters '%s' outside root element" % data)) return self.doc_handler.characters(data, 0, len(data))
def startElement(self, name, attrs): try: fn = self._start_handlers[name] except KeyError: if self._error_mode == self.ERR_MODE_IGNORE: fn = self._ignore else: raise SAXException('No start handler for tag {0!r}'.format( name)) # FIXME: better exception self._tag_stack.append(name) fn(attrs)
def endElement(self, name): try: fn = self._end_handlers[name] except KeyError: if self._error_mode == self.ERR_MODE_IGNORE: fn = self._ignore else: raise SAXException('No end handler for tag {0!r}'.format( name)) # FIXME: better exception fn() assert self._tag_stack.pop() == name
def startElement(self, name, attrs): if self.found: if name == "MessageArea" and "name" in attrs and "uuid" in attrs: name_area = attrs["name"] disposal = "hold:{}".format(name_area) if disposal in self.dict_disposal_action: raise SAXException( "Duplicate message area '{}'".format(name_area)) self.dict_disposal_action[disposal] = attrs["uuid"] elif name in DICT_DISPOSAL and "uuid" in attrs: disposal = DICT_DISPOSAL[name] if disposal in self.dict_disposal_action: raise SAXException( "Duplicate disposal action '{}'".format(disposal)) self.dict_disposal_action[disposal] = attrs["uuid"] elif name == "DisposalCollection": self.found = True
def characters(self, ch): name = self._tag_stack[-1] try: fn = self._char_handlers[name] except KeyError: if self._error_mode == self.ERR_MODE_IGNORE: fn = self._ignore else: raise SAXException( 'No characters handler for tag {0!r}'.format( name)) # FIXME: better exception fn(ch)
def aspace_handle(self, attribs): try: name = attribs[self._g.AS_ID] except KeyError: name = attribs[self._g.NAME] type_ = attribs[self._g.TYPE] if name in self.graph.annotation_spaces: if type_ != self.graph.annotation_spaces[name].type: raise SAXException( 'Type mismatch for annotation space {0!r}'.format(name)) else: self.graph.annotation_spaces.create(name, type_)
def detectXMLType(self, name, attrs): """ Detects whether the current file being parsed is a card-list or deck. """ if name not in ("card-list", "deck"): # SOMEONE SET UP US THE BOMB print "unknown XML container '" + name + "', cannot continue" raise SAXException("toplevel element unrecognized") else: if name == "card-list": self.__dict__.update(self.cardlistHandlers) else: self.__dict__.update(self.deckHandlers) # relay this SAX event to the new helper set in case they need it self.startElement(name, attrs)
def index(request): data = None try: url = request.GET.get("url") if url is not None: # requests.get checks if url is valid and available request_url = requests.get(url) if request_url.status_code == 200: feed = feedparser.parse(url) if feed.bozo == 1: raise SAXException(msg="") if len(feed.entries) == 0: raise ValueError product_feed = ProductFeed(feed) nr_of_items = product_feed.nr_of_items nr_of_in_stock_items = product_feed.nr_of_in_stock_items custom_labels_data = product_feed.get_custom_labels_data() if not custom_labels_data: custom_labels_data = "" data = { 'url': url, 'nr_of_items': nr_of_items, 'nr_of_in_stock_items': nr_of_in_stock_items, 'custom_labels_data': custom_labels_data } except exceptions.RequestException as ex: data = { 'error_msg': 'Please enter a valid URL.', 'sub_error_msg': f'Exception occured: {ex}.' } except ValueError: data = {'error_msg': 'File formatting error (missing "item" tag).'} except SAXException: data = {'error_msg': 'The feed data is not well-formed XML.'} finally: return render(request, 'reader.html', data)
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler, None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD, 1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 0) reader.SetParserProp(libxml2.PARSER_VALIDATE, 0) # we reuse attribute maps (for a slight performance gain) attributesImpl = xmlreader.AttributesImpl({}) # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: pass if self.__errors is not None: self._reportErrors(0) elif r == 0: if self.__errors is not None: self._reportErrors(0) break # end of parse else: if self.__errors is not None: self._reportErrors(1) else: self._err_handler.fatalError( SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: eltName = reader.Name() attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = reader.Name() attrs[attName] = reader.Value() reader.MoveToElement() self._cont_handler.startElement(eltName, attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: self._cont_handler.endElement(reader.Name()) # Text elif nodeType == 3: self._cont_handler.characters(reader.Value()) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(reader.Value()) # EntityReference elif nodeType == 5: # Treating entity as such self._cont_handler.entity(reader.Name()) elif nodeType == 10: # We parse the doctype with a SAX parser nodeText = str(reader.CurrentNode()) entityDeclParser = libxml2.createPushParser( self._cont_handler, nodeText, len(nodeText), "doctype") entityDeclParser.parseChunk("", 0, 1) pass # Ignore all other node types if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0
def endElement(self, name): """Performs resetting the flag_*_* variables, when processed element is left.""" # Local tag entry treating. if self.found_lt: if name == 'lhcb:name': if self.text == self.requested_partition: self.lt_Partition = self.text # self.found_lt_Partition = False elif name == 'lhcb:tag': self.lt_Name = self.text # self.found_lt_Name = False elif name == 'lhcb:type': # Choosing among all mentioned data types the one requested if self.text == self.requested_datatype: self.lt_DataType = self.text # self.found_lt_DataType = False elif name == 'lhcb:note': self.found_lt = False elif name == 'lhcb:partition': if len(self.lt_Partition) and len( self.lt_DataType ) and self.lt_Name not in self.local_tags: self.local_tags.append(str(self.lt_Name)) # Prepare for searching in next LT partition element self.lt_Name = self.lt_Partition = self.lt_DataType = '' # Global tag entry treating. elif self.found_gt and not self.search_lts: if name == 'lhcb:tag': self.gt_Name = self.text # self.found_gt_Name = False elif name == 'lhcb:type': if self.text == self.requested_datatype: self.gt_DataType = self.text # self.found_gt_DataType = False elif name == 'lhcb:name': if self.text == self.requested_partition: self.gt_Partition = self.text if len(self.gt_DataType) and len(self.gt_Partition) and len( self.gt_Name): if not self.search_gts: self.globaltag_localtags = str( self.gt_Name), self.local_tags raise SAXException( 'Found most recent global tag and all subsequent local tags for it.' ) else: self.global_tags.append(str(self.gt_Name)) self.gt_Partition = '' # elif name == 'lhcb:partition': # self.gt_Name = self.gt_Partition = '' # self.found_gt_Partition = False # GT entry structure allow resetting of it here, # instead of while leaving the partition element # It's a preparation for searching in next partition element of GT # self.gt_Partition = None elif name == 'lhcb:global_tag': self.found_gt = False # Prepare for searching in next GT self.gt_Name = self.gt_DataType = '' self.text = ''
def __init__(self, msg): if self.locator: l = self.locator msg += ' _((line: %s, column: %s))' % (l.getLineNumber(), l.getColumnNumber()) SAXException.__init__(self, msg)
def startElement(self, name, attrs): if name == self.section: self.attr_value = attrs.get(self.attr).strip() raise SAXException("Found")
def __init__(self, msg): if self.locator: msg = _('%s (line: %s, column: %s)') % ( msg, self.locator.getLineNumber(), self.locator.getColumnNumber()) SAXException.__init__(self, msg)
def endElement(self, Element): if locator.found: if Element == 'DECISION': locator.decisioncounter -= 1 if Element == 'CHOICE' and locator.decisioncounter == 0: raise SAXException('') # stop parsing
def startElement(self, Element, Attribute): locator.tagnumberlist.append(int(Attribute.get('tag'))) locator.currenttag = [int(Attribute.get('tag')), 'opening'] if debug.prtcurrenttag: print 'locator.currenttag = ', locator.currenttag # ---------- DEBUG if locator.currenttag == locator.wantedtag: locator.found = 1 if debug.prtlocatorfound: print 'locator found for currenttag', locator.currenttag # ---------- DEBUG locator.element = '' locator.description = '' locator.readdecision = 1 if (locator.found == 1) & (Element == 'DECISION') & (locator.readdecision == 0): locator.decisionindex += 1 if debug.decisionindex: print 'locator.decisionindex (begin) ', locator.decisionindex # ---------- DEBUG if (locator.found == 1) & (locator.readdecision == 1): if Element == 'SECTION': locator.currentsection = str(Attribute.get('sectiontitle')) output.container.append( '#=================================================\n# %s\n' % Attribute.get('sectiontitle')) output.pieceindex += 1 UpdateOutput() if Element == 'CHOICE': locator.element = 'CHOICE' locator.instruction = Attribute.get('instruction') locator.wantedtag = locator.currenttag raise SAXException('') # stop parsing if Element == 'DECISION': if not int(Attribute.get('tag')) in locator.readdecisionlist: locator.readdecision = 0 locator.decisionindex = 1 if Element == 'MOUSEENTRY': locator.element = 'MOUSEENTRY' locator.instruction = Attribute.get('instruction') locator.wantedtag = locator.currenttag raise SAXException('') # stop parsing if Element == 'KEYBOARDENTRY': locator.element = 'KEYBOARDENTRY' locator.instruction = Attribute.get('instruction') locator.variablename = Attribute.get('variablename') locator.textlen = Attribute.get('textlen') if locator.textlen.split()[0] == 'getvalue': locator.textlen = getvalue(locator.textlen.split()[1]) if debug.getvalue: print 'returned value', locator.textlen raise SAXException('') # stop parsing if Element == 'DIRECTENTRY': output.container.append('%s %s\n' % (Attribute.get('variablename'), Attribute.get('variablevalue'))) output.pieceindex += 1 UpdateOutput()
def endElement(self, Element): if locator.found: if Element == 'MOUSEENTRY': raise SAXException('') # stop parsing