def startPrefixMapping(self, prefix, uri):
     for handler in iter(self.handler_stack[-1]):
         handler.namespace[prefix] = uri
     if uri and len(uri.split()) > 1:
         from xml.sax import SAXException
         self.error(SAXException('Invalid Namespace: %s' % uri))
     if prefix in namespaces.values():
         if not namespaces.get(uri, '') == prefix and prefix:
             from logging import ReservedPrefix
             preferredURI = [
                 key for key, value in namespaces.items() if value == prefix
             ][0]
             self.log(ReservedPrefix({
                 'prefix': prefix,
                 'ns': preferredURI
             }))
         elif prefix == 'wiki' and uri.find('usemod') >= 0:
             from logging import ObsoleteWikiNamespace
             self.log(
                 ObsoleteWikiNamespace({
                     'preferred': namespaces[uri],
                     'ns': uri
                 }))
     elif namespaces.has_key(uri):
         if not namespaces[uri] == prefix and prefix:
             from logging import NonstdPrefix
             self.log(
                 NonstdPrefix({
                     'preferred': namespaces[uri],
                     'ns': uri
                 }))
Example #2
0
    def startElement(self, name, attrs):
        # getting password
        if self.state == 2:
            self.conDict['password'] = attrs.getValue("value")

            if (self.useConList == 1):
                self.conList.append(copy.copy(self.conDict))
                self.state = 0
            else:
                raise SAXException()  # stop parsing
        # getting username
        if self.state == 1:
            self.conDict['user'] = attrs.getValue("value")
            self.state = 2
        # getting DB name & schema
        if name == "connection" and (attrs.getValue("name") == self.searchTerm
                                     or self.useConList == 1):
            splitedDBConList = re.split(r'//', attrs.getValue("name"))
            splitedDBConList = re.split(r'/', splitedDBConList[1])
            self.conDict['dbName'] = splitedDBConList[0]
            if len(splitedDBConList) == 1 or splitedDBConList[
                    1] == '':  # Checking for schema presence
                self.conDict['schema'] = ''
            else:
                self.conDict['schema'] = splitedDBConList[1]

            self.conDict['connStr'] = attrs.getValue("name")
            self.state = 1
Example #3
0
 def startPrefixMapping(self, prefix, uri):
     for handler in iter(self.handler_stack[-1]):
         handler.namespace[prefix] = uri
     if uri and len(uri.split()) > 1:
         from xml.sax import SAXException
         self.error(SAXException('Invalid Namespace: %s' % uri))
     if prefix in namespaces.values():
         if not namespaces.get(uri, '') == prefix and prefix:
             from logging import ReservedPrefix, MediaRssNamespace
             preferredURI = [
                 key for key, value in namespaces.items() if value == prefix
             ][0]
             if uri == 'http://search.yahoo.com/mrss':
                 self.log(
                     MediaRssNamespace({
                         'prefix': prefix,
                         'ns': preferredURI
                     }))
             else:
                 self.log(
                     ReservedPrefix({
                         'prefix': prefix,
                         'ns': preferredURI
                     }))
         elif prefix == 'wiki' and uri.find('usemod') >= 0:
             from logging import ObsoleteWikiNamespace
             self.log(
                 ObsoleteWikiNamespace({
                     'preferred': namespaces[uri],
                     'ns': uri
                 }))
         elif prefix in ['atom', 'xhtml']:
             from logging import TYPE_ATOM, AvoidNamespacePrefix
             if self.getFeedType() == TYPE_ATOM:
                 self.log(AvoidNamespacePrefix({'prefix': prefix}))
     elif namespaces.has_key(uri):
         if not namespaces[uri] == prefix and prefix:
             from logging import NonstdPrefix
             self.log(
                 NonstdPrefix({
                     'preferred': namespaces[uri],
                     'ns': uri
                 }))
             if namespaces[uri] in ['atom', 'xhtml']:
                 from logging import TYPE_UNKNOWN, TYPE_ATOM, AvoidNamespacePrefix
                 if self.getFeedType() in [TYPE_ATOM, TYPE_UNKNOWN]:
                     self.log(AvoidNamespacePrefix({'prefix': prefix}))
     elif uri == 'http://search.yahoo.com/mrss':
         from logging import MediaRssNamespace
         uri = 'http://search.yahoo.com/mrss/'
         self.log(MediaRssNamespace({'prefix': prefix, 'ns': uri}))
     else:
         from validators import rfc3987
         rule = rfc3987()
         rule.setElement('xmlns:' + str(prefix), {},
                         self.handler_stack[-1][0])
         rule.value = uri
         if not uri or rule.validate():
             from logging import UnknownNamespace
             self.log(UnknownNamespace({'namespace': uri}))
Example #4
0
    def startElement(self, name, attrs):
        if self.found and name == "MediaType" and "mnemonic" in attrs and "uuid" in attrs:
            mnemonic = attrs["mnemonic"]

            if mnemonic in self.dict_media_type:
                raise SAXException(
                    "Duplicate media type mnemonic '{}'".format(mnemonic))

            self.dict_media_type[mnemonic] = TupleMediaType(
                attrs["uuid"], set())

            if "encrypted" in attrs:
                self.dict_media_type[mnemonic].sub_types.add(
                    MediaSubtype.ENCRYPTED)

            if "signed" in attrs:
                self.dict_media_type[mnemonic].sub_types.add(
                    MediaSubtype.SIGNED)

            if "signedAndEncrypted" in attrs:
                self.dict_media_type[mnemonic].sub_types.add(
                    MediaSubtype.SIGNED_ENCRYPTED)

            if "drm" in attrs:
                self.dict_media_type[mnemonic].sub_types.add(MediaSubtype.DRM)

            if "notProtected" in attrs:
                self.dict_media_type[mnemonic].sub_types.add(
                    MediaSubtype.NOT_PROTECTED)
        elif name == "MediaTypes":
            self.found = True
Example #5
0
 def endElement(self, name):
     if name == 'patent-assignment':
         self.recs += 1
         if self.recs >= max_recs:
             raise SAXException('reached record limit')
     elif name == 'recorded-date':
         self.in_recd_date = False
     elif name == 'conveyance-text':
         self.in_conveyance = False
     elif name == 'patent-assignor':
         self.in_assignor = False
     elif name == 'patent-assignee':
         self.in_assignee = False
     elif name == 'name':
         self.in_assignor_name = False
         self.in_assignee_name = False
     elif name == 'execution-date':
         self.in_exec_date = False
     elif name == 'date':
         if self.in_recd_date2:
             self.in_recd_date2 = False
         elif self.in_exec_date2:
             self.in_exec_date2 = False
     elif name == 'patent-property':
         self.addPatent()
     elif name == 'document-id':
         if self.patnum == '' or self.patkind[0] == 'B':
             self.patnum = self.patcand
     elif name == 'doc-number':
         self.in_docnumber = False
     elif name == 'kind':
         self.in_kind = False
Example #6
0
 def dispatch_element (self, silent, prev, *a, **k):
     attr = prev + self._name_stack [-1]
     
     if hasattr (self, attr):
         getattr (self, attr) (*a, **k)
     elif not silent:
         raise SAXException ('Unknown node: ' + self._name_stack [-1])
Example #7
0
 def endElement(self, name):
     if self.is_name:  #判断是否是tag的结尾.
         self.current_name = self.buffer.strip(
         )  #得到tag里面的内容,这个是unicode的string,根据自己要的字符集可以用encode方法来转换一下
         self.buffer = ''
         self.is_name = None
     elif self.is_mobile and self.current_name == self.look_for:
         self.mobile = self.buffer
         raise SAXException('Found mobile phone')  # stop parsing
Example #8
0
 def handle_data(self, data):
     #ignore white space outside the toplevel element
     if self._nesting == 0:
         if string.strip(data) != "":
             # It's not whitespace?
             self.err_handler.error(
                 SAXException("characters '%s' outside root element" %
                              data))
         return
     self.doc_handler.characters(data, 0, len(data))
Example #9
0
 def startElement(self, name, attrs):
     try:
         fn = self._start_handlers[name]
     except KeyError:
         if self._error_mode == self.ERR_MODE_IGNORE:
             fn = self._ignore
         else:
             raise SAXException('No start handler for tag {0!r}'.format(
                 name))  # FIXME: better exception
     self._tag_stack.append(name)
     fn(attrs)
Example #10
0
 def endElement(self, name):
     try:
         fn = self._end_handlers[name]
     except KeyError:
         if self._error_mode == self.ERR_MODE_IGNORE:
             fn = self._ignore
         else:
             raise SAXException('No end handler for tag {0!r}'.format(
                 name))  # FIXME: better exception
     fn()
     assert self._tag_stack.pop() == name
Example #11
0
    def startElement(self, name, attrs):
        if self.found:
            if name == "MessageArea" and "name" in attrs and "uuid" in attrs:
                name_area = attrs["name"]
                disposal = "hold:{}".format(name_area)

                if disposal in self.dict_disposal_action:
                    raise SAXException(
                        "Duplicate message area '{}'".format(name_area))

                self.dict_disposal_action[disposal] = attrs["uuid"]
            elif name in DICT_DISPOSAL and "uuid" in attrs:
                disposal = DICT_DISPOSAL[name]

                if disposal in self.dict_disposal_action:
                    raise SAXException(
                        "Duplicate disposal action '{}'".format(disposal))

                self.dict_disposal_action[disposal] = attrs["uuid"]
        elif name == "DisposalCollection":
            self.found = True
Example #12
0
 def characters(self, ch):
     name = self._tag_stack[-1]
     try:
         fn = self._char_handlers[name]
     except KeyError:
         if self._error_mode == self.ERR_MODE_IGNORE:
             fn = self._ignore
         else:
             raise SAXException(
                 'No characters handler for tag {0!r}'.format(
                     name))  # FIXME: better exception
     fn(ch)
Example #13
0
    def aspace_handle(self, attribs):
        try:
            name = attribs[self._g.AS_ID]
        except KeyError:
            name = attribs[self._g.NAME]
        type_ = attribs[self._g.TYPE]

        if name in self.graph.annotation_spaces:
            if type_ != self.graph.annotation_spaces[name].type:
                raise SAXException(
                    'Type mismatch for annotation space {0!r}'.format(name))
        else:
            self.graph.annotation_spaces.create(name, type_)
Example #14
0
    def detectXMLType(self, name, attrs):
        """
	Detects whether the current file being parsed is a card-list or deck.
	"""
        if name not in ("card-list", "deck"):  # SOMEONE SET UP US THE BOMB
            print "unknown XML container '" + name + "', cannot continue"
            raise SAXException("toplevel element unrecognized")
        else:
            if name == "card-list":
                self.__dict__.update(self.cardlistHandlers)
            else:
                self.__dict__.update(self.deckHandlers)

            # relay this SAX event to the new helper set in case they need it
            self.startElement(name, attrs)
Example #15
0
def index(request):
    data = None
    try:
        url = request.GET.get("url")
        if url is not None:

            # requests.get checks if url is valid and available
            request_url = requests.get(url)
            if request_url.status_code == 200:
                feed = feedparser.parse(url)

                if feed.bozo == 1:
                    raise SAXException(msg="")

                if len(feed.entries) == 0:
                    raise ValueError

                product_feed = ProductFeed(feed)
                nr_of_items = product_feed.nr_of_items
                nr_of_in_stock_items = product_feed.nr_of_in_stock_items
                custom_labels_data = product_feed.get_custom_labels_data()

                if not custom_labels_data:
                    custom_labels_data = ""

                data = {
                    'url': url,
                    'nr_of_items': nr_of_items,
                    'nr_of_in_stock_items': nr_of_in_stock_items,
                    'custom_labels_data': custom_labels_data
                }

    except exceptions.RequestException as ex:
        data = {
            'error_msg': 'Please enter a valid URL.',
            'sub_error_msg': f'Exception occured: {ex}.'
        }

    except ValueError:
        data = {'error_msg': 'File formatting error (missing "item" tag).'}

    except SAXException:
        data = {'error_msg': 'The feed data is not well-formed XML.'}

    finally:
        return render(request, 'reader.html', data)
    def parse(self, source):
        self.__parsing = 1
        try:
            # prepare source and create reader
            source = saxutils.prepare_input_source(source)
            input = libxml2.inputBuffer(source.getByteStream())
            reader = input.newTextReader(source.getSystemId())

            reader.SetErrorHandler(self._errorHandler, None)
            # configure reader
            reader.SetParserProp(libxml2.PARSER_LOADDTD, 1)
            reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1)
            reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 0)
            reader.SetParserProp(libxml2.PARSER_VALIDATE, 0)
            # we reuse attribute maps (for a slight performance gain)
            attributesImpl = xmlreader.AttributesImpl({})
            # start loop
            self._cont_handler.startDocument()

            while 1:
                r = reader.Read()
                # check for errors
                if r == 1:
                    pass
                    if self.__errors is not None:
                        self._reportErrors(0)
                elif r == 0:
                    if self.__errors is not None:
                        self._reportErrors(0)
                    break  # end of parse
                else:
                    if self.__errors is not None:
                        self._reportErrors(1)
                    else:
                        self._err_handler.fatalError(
                            SAXException("Read failed (no details available)"))
                    break  # fatal parse error
                # get node type
                nodeType = reader.NodeType()
                # Element
                if nodeType == 1:
                    eltName = reader.Name()
                    attributesImpl._attrs = attrs = {}
                    while reader.MoveToNextAttribute():
                        attName = reader.Name()
                        attrs[attName] = reader.Value()
                    reader.MoveToElement()
                    self._cont_handler.startElement(eltName, attributesImpl)
                    if reader.IsEmptyElement():
                        self._cont_handler.endElement(eltName)
                # EndElement
                elif nodeType == 15:
                    self._cont_handler.endElement(reader.Name())
                # Text
                elif nodeType == 3:
                    self._cont_handler.characters(reader.Value())
                # SignificantWhitespace
                elif nodeType == 14:
                    self._cont_handler.characters(reader.Value())
                # EntityReference
                elif nodeType == 5:
                    # Treating entity as such
                    self._cont_handler.entity(reader.Name())
                elif nodeType == 10:
                    # We parse the doctype with a SAX parser
                    nodeText = str(reader.CurrentNode())
                    entityDeclParser = libxml2.createPushParser(
                        self._cont_handler, nodeText, len(nodeText), "doctype")
                    entityDeclParser.parseChunk("", 0, 1)
                    pass
                # Ignore all other node types
            if r == 0:
                self._cont_handler.endDocument()
            reader.Close()
        finally:
            self.__parsing = 0
Example #17
0
    def endElement(self, name):
        """Performs resetting the flag_*_* variables, when processed element is left."""

        # Local tag entry treating.
        if self.found_lt:
            if name == 'lhcb:name':
                if self.text == self.requested_partition:
                    self.lt_Partition = self.text
#                self.found_lt_Partition = False
            elif name == 'lhcb:tag':
                self.lt_Name = self.text
#                self.found_lt_Name = False
            elif name == 'lhcb:type':
                # Choosing among all mentioned data types the one requested
                if self.text == self.requested_datatype:
                    self.lt_DataType = self.text
#                self.found_lt_DataType = False
            elif name == 'lhcb:note':
                self.found_lt = False
            elif name == 'lhcb:partition':
                if len(self.lt_Partition) and len(
                        self.lt_DataType
                ) and self.lt_Name not in self.local_tags:
                    self.local_tags.append(str(self.lt_Name))
                # Prepare for searching in next LT partition element
                self.lt_Name = self.lt_Partition = self.lt_DataType = ''
        # Global tag entry treating.
        elif self.found_gt and not self.search_lts:
            if name == 'lhcb:tag':
                self.gt_Name = self.text
#                self.found_gt_Name = False
            elif name == 'lhcb:type':
                if self.text == self.requested_datatype:
                    self.gt_DataType = self.text
#                self.found_gt_DataType = False
            elif name == 'lhcb:name':
                if self.text == self.requested_partition:
                    self.gt_Partition = self.text
                if len(self.gt_DataType) and len(self.gt_Partition) and len(
                        self.gt_Name):
                    if not self.search_gts:
                        self.globaltag_localtags = str(
                            self.gt_Name), self.local_tags
                        raise SAXException(
                            'Found most recent global tag and all subsequent local tags for it.'
                        )
                    else:
                        self.global_tags.append(str(self.gt_Name))
                self.gt_Partition = ''


#            elif name == 'lhcb:partition':
#                self.gt_Name = self.gt_Partition = ''
#                self.found_gt_Partition = False
# GT entry structure allow resetting of it here,
# instead of while leaving the partition element
# It's a preparation for searching in next partition element of GT
#                self.gt_Partition = None
            elif name == 'lhcb:global_tag':
                self.found_gt = False
                # Prepare for searching in next GT
                self.gt_Name = self.gt_DataType = ''
        self.text = ''
Example #18
0
 def __init__(self, msg):
     if self.locator:
         l = self.locator
         msg += ' _((line: %s, column:  %s))' % (l.getLineNumber(),
                                                 l.getColumnNumber())
     SAXException.__init__(self, msg)
Example #19
0
 def startElement(self, name, attrs):
     if name == self.section:
         self.attr_value = attrs.get(self.attr).strip()
         raise SAXException("Found")
Example #20
0
 def __init__(self, msg):
     if self.locator:
         msg = _('%s (line: %s, column: %s)') % (
             msg, self.locator.getLineNumber(),
             self.locator.getColumnNumber())
     SAXException.__init__(self, msg)
Example #21
0
 def endElement(self, Element):
     if locator.found:
         if Element == 'DECISION':
             locator.decisioncounter -= 1
         if Element == 'CHOICE' and locator.decisioncounter == 0:
             raise SAXException('')  # stop parsing
Example #22
0
 def __init__(self, msg):
     if self.locator:
         l = self.locator
         msg += ' _((line: %s, column:  %s))' % (l.getLineNumber(),
                                              l.getColumnNumber())
     SAXException.__init__(self, msg)
Example #23
0
    def startElement(self, Element, Attribute):
        locator.tagnumberlist.append(int(Attribute.get('tag')))
        locator.currenttag = [int(Attribute.get('tag')), 'opening']
        if debug.prtcurrenttag:
            print 'locator.currenttag = ', locator.currenttag  # ---------- DEBUG

        if locator.currenttag == locator.wantedtag:
            locator.found = 1
            if debug.prtlocatorfound:
                print 'locator found for currenttag', locator.currenttag  # ---------- DEBUG
            locator.element = ''
            locator.description = ''
            locator.readdecision = 1

        if (locator.found
                == 1) & (Element == 'DECISION') & (locator.readdecision == 0):
            locator.decisionindex += 1
            if debug.decisionindex:
                print 'locator.decisionindex (begin) ', locator.decisionindex  # ---------- DEBUG

        if (locator.found == 1) & (locator.readdecision == 1):

            if Element == 'SECTION':
                locator.currentsection = str(Attribute.get('sectiontitle'))
                output.container.append(
                    '#=================================================\n# %s\n'
                    % Attribute.get('sectiontitle'))
                output.pieceindex += 1
                UpdateOutput()

            if Element == 'CHOICE':
                locator.element = 'CHOICE'
                locator.instruction = Attribute.get('instruction')
                locator.wantedtag = locator.currenttag
                raise SAXException('')  # stop parsing

            if Element == 'DECISION':
                if not int(Attribute.get('tag')) in locator.readdecisionlist:
                    locator.readdecision = 0
                    locator.decisionindex = 1

            if Element == 'MOUSEENTRY':
                locator.element = 'MOUSEENTRY'
                locator.instruction = Attribute.get('instruction')
                locator.wantedtag = locator.currenttag
                raise SAXException('')  # stop parsing

            if Element == 'KEYBOARDENTRY':
                locator.element = 'KEYBOARDENTRY'
                locator.instruction = Attribute.get('instruction')
                locator.variablename = Attribute.get('variablename')
                locator.textlen = Attribute.get('textlen')
                if locator.textlen.split()[0] == 'getvalue':
                    locator.textlen = getvalue(locator.textlen.split()[1])
                    if debug.getvalue: print 'returned value', locator.textlen
                raise SAXException('')  # stop parsing

            if Element == 'DIRECTENTRY':
                output.container.append('%s %s\n' %
                                        (Attribute.get('variablename'),
                                         Attribute.get('variablevalue')))
                output.pieceindex += 1
                UpdateOutput()
Example #24
0
 def endElement(self, Element):
     if locator.found:
         if Element == 'MOUSEENTRY':
             raise SAXException('')  # stop parsing