コード例 #1
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
    def process_document(self, session, doc):

        xml = doc.get_raw(session)        
        self.inputSource.setByteStream(cStringIO.StringIO(xml))        
        ch = self.contentHandler
        ch.reinit()
        try:
            self.parser.parse(self.inputSource)
        except:
            # Splat.  Reset self and reraise
            if self.keepError:
                # Work out path
                path = []
                for l in ch.pathLines:
                    line = ch.currentText[l]
                    elemName = line[2:line.index('{')-1]
                    path.append("%s[@SAXID='%s']" % (elemName, l))
                self.errorPath = '/'.join(path)
            else:
                ch.reinit()
                
            raise        
        rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount)
        rec.elementHash = ch.elementHash
        rec.byteCount = len(xml)
        self._copyData(doc, rec)
        ch.reinit()
        return rec
コード例 #2
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
    def process_document(self, session, doc):

        xml = doc.get_raw(session)
        self.inputSource.setByteStream(cStringIO.StringIO(xml))
        ch = self.contentHandler
        ch.reinit()
        try:
            self.parser.parse(self.inputSource)
        except:
            # Splat.  Reset self and reraise
            if self.keepError:
                # Work out path
                path = []
                for l in ch.pathLines:
                    line = ch.currentText[l]
                    elemName = line[2:line.index('{') - 1]
                    path.append("%s[@SAXID='%s']" % (elemName, l))
                self.errorPath = '/'.join(path)
            else:
                ch.reinit()

            raise
        rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount)
        rec.elementHash = ch.elementHash
        rec.byteCount = len(xml)
        self._copyData(doc, rec)
        ch.reinit()
        return rec
コード例 #3
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     data = unicode(data, 'utf-8')
     sax = data.split(nonTextToken)
     if sax[-1][0] == "9":
         line = sax.pop()
         elemHash = pickle.loads(str(line[2:]))
     else:
         elemHash = {}
     rec = SaxRecord(sax)
     rec.elementHash = elemHash
     return rec
コード例 #4
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     data = unicode(data, 'utf-8')
     sax = data.split(nonTextToken)
     if sax[-1][0] == "9":
         line = sax.pop()
         elemHash = pickle.loads(str(line[2:]))
     else:
         elemHash = {}
     rec = SaxRecord(sax)
     rec.elementHash = elemHash
     return rec