Exemple #1
0
class MessageQueuePipeline(object):
    """Emit processed items to a RabbitMQ exchange/queue"""
    def __init__(self, host_name, port, userid, password, virtual_host,
                 encoder_class):
        self.connection = Connection(hostname=host_name,
                                     port=port,
                                     userid=userid,
                                     password=password,
                                     virtual_host=virtual_host)
        self.encoder = encoder_class()
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    @classmethod
    def from_settings(cls, settings):
        host_name = settings.get('BROKER_HOST')
        port = settings.get('BROKER_PORT')
        userid = settings.get('BROKER_USERID')
        password = settings.get('BROKER_PASSWORD')
        virtual_host = settings.get('BROKER_VIRTUAL_HOST')
        encoder_class = settings.get('MESSAGE_Q_SERIALIZER', ScrapyJSONEncoder)
        return cls(host_name, port, userid, password, virtual_host,
                   encoder_class)

    def spider_opened(self, spider):
        self.queue = self.connection.SimpleQueue(spider.name)

    def spider_closed(self, spider):
        self.queue.close()
        self.connection.close()

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        self.queue.put(self.encoder.encode(dict(item)))
        return item