Exemple #1
0
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
Exemple #2
0
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
Exemple #3
0
 def process_document(self, session, doc):
     # input must be string or stream
     data = doc.get_raw(session)
     try:
         et = etree.parse(StringIO.StringIO(data), self.parser)
     except AssertionError:
         data = data.decode('utf8')
         et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
Exemple #4
0
 def process_document(self, session, doc):
     # input must be string or stream
     data = doc.get_raw(session)
     try:
         et = etree.parse(StringIO.StringIO(data), self.parser)
     except AssertionError:
         data = data.decode('utf8')
         et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
Exemple #5
0
    def _processResult(self, session, data):
        """Parse XML to create and return dict of metadata items.
        
        Parse XML output from external program.
        Process parsed XML using self.sources.
        Populate and return a dictionary of metadata items.
        
        """
        try:
            et = etree.fromstring(data)
        except AssertionError:
            data = data.decode('utf8')
            et = etree.fromstring(data)
        except etree.XMLSyntaxError:
            if session.logger is not None:
                # log debug level
                session.logger.log_lvl(session, 10, data)
            raise
        record = LxmlRecord(et)
        record.byteCount = len(data)
        mddict = {}
        for key, src in self.sources.iteritems():
            (xpath, process, preprocess) = src['source']
            if preprocess is not None:
                record = preprocess.process(session, record)
            if xpath is not None:
                rawlist = xpath.process_record(session, record)
                processed = process.process(session, rawlist)
            else:
                processed = process.process(session, record)

            if len(processed) > 1:
                mddict[key] = []
                for pl, k in sorted([(val['proxLoc'], k)
                                     for k, val in processed.iteritems()]):
                    for x in pl:
                        mddict[key].append(k)

            elif len(processed) == 1:
                mddict[key] = processed.keys()[0]
            elif src['default'] is not None:
                mddict[key] = src['default']

        return mddict
Exemple #6
0
 def _processResult(self, session, data):
     """Parse XML to create and return dict of metadata items.
     
     Parse XML output from external program.
     Process parsed XML using self.sources.
     Populate and return a dictionary of metadata items.
     
     """
     try:
         et = etree.fromstring(data)
     except AssertionError:
         data = data.decode('utf8')
         et = etree.fromstring(data)
     except etree.XMLSyntaxError:
         if session.logger is not None:
             # log debug level
             session.logger.log_lvl(session, 10, data)
         raise
     record = LxmlRecord(et)
     record.byteCount = len(data)
     mddict = {}
     for key, src in self.sources.iteritems():
         (xpath, process, preprocess) = src['source']
         if preprocess is not None:
             record = preprocess.process(session, record)
         if xpath is not None:
             rawlist = xpath.process_record(session, record)
             processed = process.process(session, rawlist)
         else:
             processed = process.process(session, record)
         
         if len(processed) > 1:
             mddict[key] = []
             for pl, k in sorted([(val['proxLoc'], k) for k, val in processed.iteritems()]):
                 for x in pl:
                     mddict[key].append(k)
                 
         elif len(processed) == 1:
             mddict[key] = processed.keys()[0]
         elif src['default'] is not None:
             mddict[key] = src['default']
         
     return mddict