Ejemplo n.º 1
0
    def process_document(self, session, doc):

        xml = doc.get_raw(session)        
        self.inputSource.setByteStream(cStringIO.StringIO(xml))        
        ch = self.contentHandler
        ch.reinit()
        try:
            self.parser.parse(self.inputSource)
        except:
            # Splat.  Reset self and reraise
            if self.keepError:
                # Work out path
                path = []
                for l in ch.pathLines:
                    line = ch.currentText[l]
                    elemName = line[2:line.index('{')-1]
                    path.append("%s[@SAXID='%s']" % (elemName, l))
                self.errorPath = '/'.join(path)
            else:
                ch.reinit()
                
            raise        
        rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount)
        rec.elementHash = ch.elementHash
        rec.byteCount = len(xml)
        self._copyData(doc, rec)
        ch.reinit()
        return rec
Ejemplo n.º 2
0
    def create_record(self, session, rec=None):

        p = self.permissionHandlers.get('info:srw/operation/1/create', None)
        if p:
            if not session.user:
                raise PermissionException("Authenticated user required to "
                                          "create an object in %s" % self.id)
            okay = p.hasPermission(session, session.user)
            if not okay:
                raise PermissionException("Permission required to create an "
                                          "object in %s" % self.id)
        id = self.generate_id(session)
        if (rec is None):
            # Create a placeholder
            rec = SaxRecord([], "", id)
        else:
            rec.id = id
        rec.recordStore = self.id

        try:
            self.store_record(session, rec)
        except ObjectAlreadyExistsException:
            # Back out id change
            if type(id) == long:
                self.currentId -= 1
            raise
        except:
            raise
        return rec
Ejemplo n.º 3
0
    def process_document(self, session, doc):

        xml = doc.get_raw(session)
        self.inputSource.setByteStream(cStringIO.StringIO(xml))
        ch = self.contentHandler
        ch.reinit()
        try:
            self.parser.parse(self.inputSource)
        except:
            # Splat.  Reset self and reraise
            if self.keepError:
                # Work out path
                path = []
                for l in ch.pathLines:
                    line = ch.currentText[l]
                    elemName = line[2:line.index('{') - 1]
                    path.append("%s[@SAXID='%s']" % (elemName, l))
                self.errorPath = '/'.join(path)
            else:
                ch.reinit()

            raise
        rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount)
        rec.elementHash = ch.elementHash
        rec.byteCount = len(xml)
        self._copyData(doc, rec)
        ch.reinit()
        return rec
Ejemplo n.º 4
0
 def create_record(self, session, rec=None):
     if (rec is None):
         rec = SaxRecord([], "", None)
     else:
         rec.id = None
     self.store_record(session, rec)
     return rec
Ejemplo n.º 5
0
    def create_record(self, session, rec=None):

        p = self.permissionHandlers.get('info:srw/operation/1/create', None)
        if p:
            if not session.user:
                raise PermissionException("Authenticated user required to create an object in %s" % self.id)
            okay = p.hasPermission(session, session.user)
            if not okay:
                raise PermissionException("Permission required to create an object in %s" % self.id)


        id = self.generate_id(session)
        if (rec == None):
            # Create a placeholder
            rec = SaxRecord([], "", id)
        else:
            rec.id = id
        rec.recordStore = self.id

        try:
            self.store_record(session, rec)
        except ObjectAlreadyExistsException:
            # Back out id change
            if type(id) == long:
                self.currentId -= 1
            raise
        except:
            raise
        return rec
Ejemplo n.º 6
0
 def create_record(self, session, rec=None):
     if (rec == None):
         rec = SaxRecord([], "", None)
     else:
         rec.id = None
     self.store_record(session, rec)
     return rec
Ejemplo n.º 7
0
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     data = unicode(data, 'utf-8')
     sax = data.split(nonTextToken)
     if sax[-1][0] == "9":
         line = sax.pop()
         elemHash = pickle.loads(str(line[2:]))
     else:
         elemHash = {}
     rec = SaxRecord(sax)
     rec.elementHash = elemHash
     return rec
Ejemplo n.º 8
0
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     data = unicode(data, 'utf-8')
     sax = data.split(nonTextToken)
     if sax[-1][0] == "9":
         line = sax.pop()
         elemHash = pickle.loads(str(line[2:]))
     else:
         elemHash = {}
     rec = SaxRecord(sax)
     rec.elementHash = elemHash
     return rec
Ejemplo n.º 9
0
 def find_documents(self, session, cache=0):
     # Should extract records by xpath or span and store as X/SGML
     if cache == 1:
         # nothing to offset into
         raise NotImplementedError
     rec = self.stream
     hasNsRe = re.compile('<([a-zA-Z1-9_-]+:[a-zA-Z1-9_-])[ >]')
     for src in self.sources:
         raw = src.process_record(session, rec)
         for xp in raw:
             for r in xp:
                 if (type(r) == types.ListType):
                     tempRec = SaxRecord(r)
                     docstr = tempRec.get_xml(session)
                     hasNs = hasNsRe.search(docstr)
                     saxid = r[-1][r[-1].rfind(' ') + 1:]
                     if hasNs:
                         docstr = "<c3:component xmlns:c3=\"http://www.cheshire3.org/schemas/component/\" parent=\"%r\" event=\"%s\">%s</c3:component>" % (
                             rec, saxid, docstr)
                     else:
                         docstr = "<c3component parent=\"%r\" event=\"%s\">%s</c3component>" % (
                             rec, saxid, docstr)
                 elif (type(r) == types.StringType):
                     docstr = "<c3component parent=\"%r\"><data>%s</data></c3component>" % (
                         rec, escape(r))
                 else:
                     if r.__class__ == etree._Element:
                         # Lxml Record
                         docstr = etree.tostring(r)
                         tree = r.getroottree()
                         path = tree.getpath(r)
                         if (r.nsmap):
                             #if hasNs:
                             namespaceList = []
                             for (pref, ns) in r.nsmap.iteritems():
                                 namespaceList.append("xmlns:%s=\"%s\"" %
                                                      (pref, ns))
                             namespaces = " ".join(namespaceList)
                             docstr = """<c3:component xmlns:c3="http://www.cheshire3.org/schemas/component/" %s parent="%r" xpath="%s">%s</c3component>""" % (
                                 namespaces, rec, path, docstr)
                         else:
                             docstr = """<c3component parent="%r" xpath="%s">%s</c3component>""" % (
                                 rec, path, docstr)
                     else:
                         raise ValueError("Unknown Record Type")
                 doc = StringDocument(docstr)
                 if cache == 0:
                     yield doc
                 else:
                     self.documents.append(doc)
Ejemplo n.º 10
0
 def process_document(self, session, doc):
     # Simply copy data into a record of appropriate type
     data = doc.get_raw(session)
     if (typeof(data) == types.ListType):
         rec = SaxRecord(data)
     else:
         rec = DomRecord(data)
     self._copyData(doc, rec)
     return rec
Ejemplo n.º 11
0
 def process_document(self, session, doc):
     # Simply copy data into a record of appropriate type
     data = doc.get_raw(session)
     if isinstance(data, list):
         rec = SaxRecord(data)
     else:
         rec = DomRecord(data)
     self._copyData(doc, rec)
     return rec
Ejemplo n.º 12
0
 def find_documents(self, session, cache=0):
     # Should extract records by xpath or span and store as X/SGML
     if cache == 1:
         # nothing to offset into
         raise NotImplementedError
     rec = self.stream
     hasNsRe = re.compile('<([a-zA-Z1-9_-]+:[a-zA-Z1-9_-])[ >]')
     for src in self.sources:
         raw = src.process_record(session, rec)
         for xp in raw:
             for r in xp:
                 if (type(r) == types.ListType):
                     tempRec = SaxRecord(r)
                     docstr = tempRec.get_xml(session)
                     hasNs = hasNsRe.search(docstr)
                     saxid = r[-1][r[-1].rfind(' ')+1:]
                     if hasNs:
                         docstr = "<c3:component xmlns:c3=\"http://www.cheshire3.org/schemas/component/\" parent=\"%r\" event=\"%s\">%s</c3:component>" % (rec, saxid, docstr)
                     else:
                         docstr = "<c3component parent=\"%r\" event=\"%s\">%s</c3component>" % (rec, saxid, docstr)
                 elif (type(r) == types.StringType):
                     docstr = "<c3component parent=\"%r\"><data>%s</data></c3component>" % (rec, escape(r))
                 else:
                     if r.__class__ == etree._Element:
                         # Lxml Record
                         docstr = etree.tostring(r)
                         tree = r.getroottree()
                         path = tree.getpath(r)
                         if (r.nsmap):
                         #if hasNs:
                             namespaceList = []
                             for (pref, ns) in r.nsmap.iteritems():
                                 namespaceList.append("xmlns:%s=\"%s\"" % (pref, ns))
                             namespaces = " ".join(namespaceList)
                             docstr = """<c3:component xmlns:c3="http://www.cheshire3.org/schemas/component/" %s parent="%r" xpath="%s">%s</c3component>""" % (namespaces, rec, path, docstr)
                         else:
                             docstr = """<c3component parent="%r" xpath="%s">%s</c3component>""" % (rec, path, docstr)
                     else:
                         raise ValueError("Unknown Record Type")
                 doc = StringDocument(docstr)
                 if cache == 0:
                     yield doc
                 else:
                     self.documents.append(doc)