Exemple #1
0
 def __init__(self, pipelinename, pipelineconfigdir, debugDocumentSet = False, idfield = "url", defaultparams = None, maxdocs = 0):
     self.pipeline = DocumentPipeline(pipelinename)
     self.pipeline.init(pipelineconfigdir)
     self.idfield = idfield
     self.debugDocumentSet = debugDocumentSet
     self.maxdocs = maxdocs
     self.processed = 0
     self.defaultparams = defaultparams or {}
Exemple #2
0
class PipelineProcessor(object):
    """
    A PipelineProcessor forwards incoming messages to a pipeline of stages
    """

    def __init__(self, pipelinename, pipelineconfigdir, debugDocumentSet = False, idfield = "url", defaultparams = None, maxdocs = 0):
        self.pipeline = DocumentPipeline(pipelinename)
        self.pipeline.init(pipelineconfigdir)
        self.idfield = idfield
        self.debugDocumentSet = debugDocumentSet
        self.maxdocs = maxdocs
        self.processed = 0
        self.defaultparams = defaultparams or {}

    def makeDocument(self, header, message):
        if isinstance(message, dict):
            doc = Document(message, self.debugDocumentSet)
            doc.Set('amqheaders', header)
        elif isinstance(message, str):
            doc = Document(header, self.debugDocumentSet)
            doc.Set('body', message)
        else:
            raise Exception("Not implemented")

        for key, val in self.defaultparams.items():
            doc.Set(key, val)
        return doc

    def process(self, header, message):
        doc = self.makeDocument(header, message)
        try:
            status = self.pipeline.process(doc, idfield = self.idfield)
            if status not in [ProcessorStatus.OK, ProcessorStatus.OK_NoChange]:
                logger.error("Error processing %s in %s" % (header['message-id'], self.pipeline.getLastProcessed()))
        except StageError, ex:
            logger.warning("Unhandled stage error: %s" % str(ex))
            logger.exception(ex)

        self.processed += 1
        if self.maxdocs > 0 and self.processed >= self.maxdocs:
            raise StopConsumerError()