Esempio n. 1
0
 def __init__(self, *args, **kwargs):
     print('>>>>>>>>>>>>>RabbitMQMixin#__init__')
     self.crawler = args[0]
     self.queue_name = self.crawler.settings.get('RABBITMQ_QUEUE_NAME')
     self.connection = con.from_settings(self.crawler.settings)
     self.channel = self.connection.channel()
     self.channel.queue_declare(self.queue_name)
Esempio n. 2
0
 def from_settings(cls, settings):
     server = from_settings(settings)
     # create one-time key. needed to support to use this
     # class as standalone dupefilter with scrapy's default scheduler
     # if scrapy passes spider on open() method this wouldn't be needed
     key = "dupefilter:%s" % int(time.time())
     return cls(server, key)
Esempio n. 3
0
 def test_get_conn_localhost(self):
     settings = {
         'host': 'localhost',
         'port': 5672,
     }
     conn = connection.from_settings(settings)
     debug(conn)
Esempio n. 4
0
 def from_settings(cls, settings):
     persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST)
     queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY)
     queue_cls = load_object(settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS))
     dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY)
     idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE)
     server = con.from_settings(settings).channel()
     return cls(server, persist, queue_key, queue_cls, dupefilter_key, idle_before_close)
Esempio n. 5
0
    def test_from_setings_auth(self):
        credentials = connection.credentials(self.user, self.password)
        settings = {
            'RABBITMQ_CONNECTION_PARAMETERS': {
                'host': 'localhost',
                'credentials': credentials
            }
        }

        ch = connection.from_settings(settings)
        ch.close()
Esempio n. 6
0
    def next_request(self):
        block_pop_timeout = self.idle_before_close

        if not self.server.is_open and not self.settings is None:
            #重新打开连接
            self.server = from_settings(self.settings)
            #重建队列
            self.open(self.spider)

        request = self.queue.pop()
        if request and self.stats:
            self.stats.inc_value('scheduler/dequeued/rabbitmq',
                                 spider=self.spider)
        return request
Esempio n. 7
0
    def setup_rabbitmq(self):
        """ Setup RabbitMQ connection.

            Call this method after spider has set its crawler object.
        :return: None
        """

        if not self.rabbitmq_key:
            self.rabbitmq_key = '{}:start_urls'.format(self.name)

        self.server = connection.from_settings(self.crawler.settings)
        self.crawler.signals.connect(self.spider_idle,
                                     signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped,
                                     signal=signals.item_scraped)
Esempio n. 8
0
    def next_request(self):
        """ Provides a request to be scheduled.
        :return: Request object or None
        """

        if not self.server.is_open:
            #重新打开连接
            self.server = connection.from_settings(self.crawler.settings)

        method_frame, header_frame, url = self.server.basic_get(
            queue=self.rabbitmq_key)

        if url:
            req = self.make_requests_from_url(bytes_to_str(url))
            yield req
Esempio n. 9
0
    def setup_rabbitmq(self):
        """ Setup RabbitMQ connection.

            Call this method after spider has set its crawler object.
        :return: None
        """

        if self.crawler.settings.get('RABBITMQ_QUEUE_NAME', None):
            self.rabbitmq_key = self.crawler.settings.get(
                'RABBITMQ_QUEUE_NAME', None)
            self.rabbitmq_key = self.rabbitmq_key % {'name': self.name}
            self.crawler.settings.frozen = False
            self.crawler.settings.set('RABBITMQ_QUEUE_NAME', self.rabbitmq_key)
            self.crawler.settings.frozen = True

        if not self.rabbitmq_key:
            self.rabbitmq_key = '{}:start_urls'.format(self.name)

        self.server = connection.from_settings(self.crawler.settings)
        self.crawler.signals.connect(self.spider_idle,
                                     signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped,
                                     signal=signals.item_scraped)
Esempio n. 10
0
 def from_settings(cls, settings):
     server = from_settings(settings)
     exchange_name = settings.get('RABBITMQ_EXCHANGE_NAME', EXCHANGE_NAME)
     return cls(server, exchange_name)