class MessageQueuePipeline(object): """Emit processed items to a RabbitMQ exchange/queue""" def __init__(self, host_name, port, userid, password, virtual_host, encoder_class): self.connection = Connection(hostname=host_name, port=port, userid=userid, password=password, virtual_host=virtual_host) self.encoder = encoder_class() dispatcher.connect(self.spider_opened, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) @classmethod def from_settings(cls, settings): host_name = settings.get('BROKER_HOST') port = settings.get('BROKER_PORT') userid = settings.get('BROKER_USERID') password = settings.get('BROKER_PASSWORD') virtual_host = settings.get('BROKER_VIRTUAL_HOST') encoder_class = settings.get('MESSAGE_Q_SERIALIZER', ScrapyJSONEncoder) return cls(host_name, port, userid, password, virtual_host, encoder_class) def spider_opened(self, spider): self.queue = self.connection.SimpleQueue(spider.name) def spider_closed(self, spider): self.queue.close() self.connection.close() def process_item(self, item, spider): return deferToThread(self._process_item, item, spider) def _process_item(self, item, spider): self.queue.put(self.encoder.encode(dict(item))) return item