Exemple #1
0
 def test_shutdown(self):
     worker = ResourceItemWorker(
         'api_clients_queue', 'resource_items_queue', 'db',
         {'bulk_save_limit': 1, 'bulk_save_interval': 1},
         'retry_resource_items_queue')
     self.assertEqual(worker.exit, False)
     worker.shutdown()
     self.assertEqual(worker.exit, True)
Exemple #2
0
    def test_add_to_retry_queue(self):
        retry_items_queue = Queue()
        worker = ResourceItemWorker(
            config_dict=self.worker_config,
            retry_resource_items_queue=retry_items_queue)
        retry_item = {
            'id': uuid.uuid4().hex,
            'dateModified': datetime.datetime.utcnow().isoformat(),
        }
        self.assertEqual(retry_items_queue.qsize(), 0)

        # Add to retry_resource_items_queue
        worker.add_to_retry_queue(retry_item)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(retry_items_queue.qsize(), 1)
        retry_item_from_queue = retry_items_queue.get()
        self.assertEqual(retry_item_from_queue['retries_count'], 1)
        self.assertEqual(retry_item_from_queue['timeout'],
                         worker.config['retry_default_timeout'] * 2)

        # Add to retry_resource_items_queue with status_code '429'
        worker.add_to_retry_queue(retry_item, status_code=429)
        retry_item_from_queue = retry_items_queue.get()
        self.assertEqual(retry_item_from_queue['retries_count'], 1)
        self.assertEqual(retry_item_from_queue['timeout'],
                         worker.config['retry_default_timeout'] * 2)

        # Drop from retry_resource_items_queue
        retry_item['retries_count'] = 3
        worker.add_to_retry_queue(retry_item)
        self.assertEqual(retry_items_queue.qsize(), 0)

        del worker
Exemple #3
0
    def test__add_to_bulk(self):
        retry_queue = Queue()
        old_date_modified = datetime.datetime.utcnow().isoformat()
        queue_resource_item = {
            'doc_type': 'Tender',
            'id': uuid.uuid4().hex,
            'dateModified': datetime.datetime.utcnow().isoformat()
        }
        resource_item_doc_dict = {
            'doc_type': 'Tender',
            '_rev': '1-' + uuid.uuid4().hex,
            'id': queue_resource_item['id'],
            'dateModified': queue_resource_item['dateModified']
        }
        resource_item_dict = {
            'doc_type': 'Tender',
            'id': queue_resource_item['id'],
            'dateModified': queue_resource_item['dateModified']
        }
        worker = ResourceItemWorker(config_dict=self.worker_config,
                                    log_dict=self.log_dict,
                                    retry_resource_items_queue=retry_queue)
        worker.db = MagicMock()

        # Successfull adding to bulk
        start_length = len(worker.bulk)
        worker._add_to_bulk(resource_item_dict, queue_resource_item,
                            resource_item_doc_dict)
        end_length = len(worker.bulk)
        self.assertGreater(end_length, start_length)

        # Update exist doc in bulk
        start_length = len(worker.bulk)
        new_resource_item_dict = deepcopy(resource_item_dict)
        new_resource_item_dict['dateModified'] = datetime.datetime.utcnow(
        ).isoformat()
        worker._add_to_bulk(new_resource_item_dict, queue_resource_item,
                            resource_item_doc_dict)
        end_length = len(worker.bulk)
        self.assertEqual(start_length, end_length)

        # Ignored dublicate in bulk
        start_length = end_length
        worker._add_to_bulk(
            {
                'doc_type': 'Tender',
                'id': queue_resource_item['id'],
                '_id': queue_resource_item['id'],
                'dateModified': old_date_modified
            }, queue_resource_item, resource_item_dict)
        end_length = len(worker.bulk)
        self.assertEqual(start_length, end_length)
        del worker
    def test__add_to_bulk(self):
        retry_queue = PriorityQueue()
        old_date_modified = datetime.datetime.utcnow().isoformat()
        resource_item_id = uuid.uuid4().hex
        priority = 1

        local_resource_item = {
            'doc_type': 'Tender',
            '_rev': '1-' + uuid.uuid4().hex,
            'id': resource_item_id,
            'dateModified': old_date_modified
        }
        new_date_modified = datetime.datetime.utcnow().isoformat()
        public_resource_item = {
            'id': resource_item_id,
            'dateModified': new_date_modified
        }
        worker = ResourceItemWorker(config_dict=self.worker_config,
                                    retry_resource_items_queue=retry_queue)
        worker.db = MagicMock()

        # Successfull adding to bulk
        start_length = len(worker.bulk)
        worker._add_to_bulk(local_resource_item, public_resource_item,
                            priority)
        end_length = len(worker.bulk)
        self.assertGreater(end_length, start_length)

        # Update exist doc in bulk
        start_length = len(worker.bulk)
        new_public_resource_item = deepcopy(public_resource_item)
        new_public_resource_item['dateModified'] =\
            datetime.datetime.utcnow().isoformat()
        worker._add_to_bulk(local_resource_item, new_public_resource_item,
                            priority)
        end_length = len(worker.bulk)
        self.assertEqual(start_length, end_length)

        # Ignored dublicate in bulk
        start_length = end_length
        worker._add_to_bulk(
            local_resource_item, {
                'doc_type': 'Tender',
                'id': local_resource_item['id'],
                '_id': local_resource_item['id'],
                'dateModified': old_date_modified
            }, priority)
        end_length = len(worker.bulk)
        self.assertEqual(start_length, end_length)
        del worker
    def test__get_api_client_dict(self):
        api_clients_queue = Queue()
        client = MagicMock()
        client_dict = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        client_dict2 = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        api_clients_queue.put(client_dict)
        api_clients_queue.put(client_dict2)
        api_clients_info = {
            client_dict['id']: {
                'drop_cookies': False,
                'not_actual_count': 5,
                'request_interval': 3
            },
            client_dict2['id']: {
                'drop_cookies': True,
                'not_actual_count': 3,
                'request_interval': 2
            }
        }

        # Success test
        worker = ResourceItemWorker(api_clients_queue=api_clients_queue,
                                    config_dict=self.worker_config,
                                    api_clients_info=api_clients_info)
        self.assertEqual(worker.api_clients_queue.qsize(), 2)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, client_dict)

        # Get lazy client
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client['not_actual_count'], 0)
        self.assertEqual(api_client['request_interval'], 0)

        # Empty queue test
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, None)

        # Exception when try renew cookies
        client.renew_cookies.side_effect = Exception('Can\'t renew cookies')
        worker.api_clients_queue.put(client_dict2)
        api_clients_info[client_dict2['id']]['drop_cookies'] = True
        api_client = worker._get_api_client_dict()
        self.assertIs(api_client, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(worker.api_clients_queue.get(), client_dict2)

        # Get api_client with raise Empty exception
        api_clients_queue.put(client_dict2)
        api_clients_queue.get = MagicMock(side_effect=Empty)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, None)
        del worker
    def test__get_resource_item_from_queue(self):
        items_queue = PriorityQueue()
        item = (1, uuid.uuid4().hex)
        items_queue.put(item)

        # Success test
        worker = ResourceItemWorker(resource_items_queue=items_queue,
                                    config_dict=self.worker_config)
        self.assertEqual(worker.resource_items_queue.qsize(), 1)
        priority, resource_item = worker._get_resource_item_from_queue()
        self.assertEqual((priority, resource_item), item)
        self.assertEqual(worker.resource_items_queue.qsize(), 0)

        # Empty queue test
        priority, resource_item = worker._get_resource_item_from_queue()
        self.assertEqual(resource_item, None)
        self.assertEqual(priority, None)
        del worker
Exemple #7
0
    def test__get_resource_item_from_queue(self):
        items_queue = Queue()
        item = {'id': uuid.uuid4().hex,
                'dateModified': datetime.datetime.utcnow().isoformat()}
        items_queue.put(item)

        # Success test
        worker = ResourceItemWorker(resource_items_queue=items_queue,
                                    config_dict=self.worker_config)
        self.assertEqual(worker.resource_items_queue.qsize(), 1)
        resource_item = worker._get_resource_item_from_queue()
        self.assertEqual(resource_item, item)
        self.assertEqual(worker.resource_items_queue.qsize(), 0)

        # Empty queue test
        resource_item = worker._get_resource_item_from_queue()
        self.assertEqual(resource_item, None)
        del worker
Exemple #8
0
 def up_worker(self):
     worker_thread = ResourceItemWorker.spawn(
         resource_items_queue=self.queue,
         retry_resource_items_queue=self.retry_queue,
         api_clients_info=self.api_clients_info,
         api_clients_queue=self.api_clients_queue,
         config_dict=self.worker_config, db=self.db)
     idle()
     worker_thread.shutdown()
     sleep(3)
    def test_add_to_retry_queue(self, mocked_logger):
        retry_items_queue = PriorityQueue()
        worker = ResourceItemWorker(
            config_dict=self.worker_config,
            retry_resource_items_queue=retry_items_queue)
        resource_item_id = uuid.uuid4().hex
        priority = 1000
        self.assertEqual(retry_items_queue.qsize(), 0)

        # Add to retry_resource_items_queue
        worker.add_to_retry_queue(resource_item_id, priority=priority)
        # sleep(worker.config['retry_default_timeout'] * 0)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item_id = retry_items_queue.get()
        self.assertEqual(priority, 1001)
        self.assertEqual(retry_resource_item_id, resource_item_id)

        # Add to retry_resource_items_queue with status_code '429'
        worker.add_to_retry_queue(resource_item_id, priority, status_code=429)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item_id = retry_items_queue.get()
        self.assertEqual(priority, 1001)
        self.assertEqual(retry_resource_item_id, resource_item_id)

        priority = 1002
        worker.add_to_retry_queue(resource_item_id, priority=priority)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item_id = retry_items_queue.get()
        self.assertEqual(priority, 1003)
        self.assertEqual(retry_resource_item_id, resource_item_id)

        worker.add_to_retry_queue(resource_item_id, priority=priority)
        self.assertEqual(retry_items_queue.qsize(), 0)
        mocked_logger.critical.assert_called_once_with(
            'Tender {} reached limit retries count {} and droped from '
            'retry_queue.'.format(resource_item_id,
                                  worker.config['retries_count']),
            extra={'MESSAGE_ID': 'dropped_documents'})
        del worker
Exemple #10
0
 def test_init(self):
     worker = ResourceItemWorker(
         'api_clients_queue', 'resource_items_queue', 'db',
         {'bulk_save_limit': 1, 'bulk_save_interval': 1},
         'retry_resource_items_queue')
     self.assertEqual(worker.api_clients_queue, 'api_clients_queue')
     self.assertEqual(worker.resource_items_queue, 'resource_items_queue')
     self.assertEqual(worker.db, 'db')
     self.assertEqual(worker.config,
                      {'bulk_save_limit': 1, 'bulk_save_interval': 1})
     self.assertEqual(worker.retry_resource_items_queue,
                      'retry_resource_items_queue')
     self.assertEqual(worker.exit, False)
     self.assertEqual(worker.update_doc, False)
Exemple #11
0
    def test__get_api_client_dict(self):
        api_clients_queue = Queue()
        client = MagicMock()
        client_dict = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        client_dict2 = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        api_clients_queue.put(client_dict)
        api_clients_queue.put(client_dict2)
        api_clients_info = {
            client_dict['id']: {
                'destroy': False
            },
            client_dict2['id']: {
                'destroy': True
            }
        }

        # Success test
        worker = ResourceItemWorker(api_clients_queue=api_clients_queue,
                                    config_dict=self.worker_config,
                                    log_dict=self.log_dict,
                                    api_clients_info=api_clients_info)
        self.assertEqual(worker.api_clients_queue.qsize(), 2)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, client_dict)

        # Get lazy client
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, None)

        # Empty queue test
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, None)

        # Get api_client with raise Empty exception
        api_clients_queue.put(client_dict2)
        api_clients_queue.get = MagicMock(side_effect=Empty)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, None)
        del worker
Exemple #12
0
    def test__get_resource_item_from_public(self, mock_api_client):
        item = {
            'id': uuid.uuid4().hex,
            'dateModified': datetime.datetime.utcnow().isoformat()
        }
        api_clients_queue = Queue()
        client_dict = {
            'id': uuid.uuid4().hex,
            'request_interval': 0.02,
            'client': mock_api_client
        }
        api_clients_queue.put(client_dict)
        api_clients_info = {
            client_dict['id']: {
                'destroy': False,
                'request_durations': {}
            }
        }
        retry_queue = Queue()
        return_dict = {
            'data': {
                'id': item['id'],
                'dateModified': datetime.datetime.utcnow().isoformat()
            }
        }
        mock_api_client.get_resource_item.return_value = return_dict
        worker = ResourceItemWorker(api_clients_queue=api_clients_queue,
                                    config_dict=self.worker_config,
                                    log_dict=self.log_dict,
                                    retry_resource_items_queue=retry_queue,
                                    api_clients_info=api_clients_info)

        # Success test
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client['request_interval'], 0.02)
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 0)
        self.assertEqual(public_item, return_dict['data'])

        # Not actual document form public
        item['dateModified'] = datetime.datetime.utcnow().isoformat()
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'], 0)
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.log_dict['not_actual_docs_count'], 1)
        self.assertEqual(worker.log_dict['add_to_retry'], 1)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 1)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)

        # InvalidResponse
        mock_api_client.get_resource_item.side_effect = InvalidResponse(
            'invalid response')
        self.assertEqual(self.log_dict['exceptions_count'], 0)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.log_dict['exceptions_count'], 1)
        self.assertEqual(worker.log_dict['add_to_retry'], 2)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 2)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)

        # RequestFailed status_code=429
        mock_api_client.get_resource_item.side_effect = RequestFailed(
            munchify({'status_code': 429}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'], 0)
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.log_dict['exceptions_count'], 2)
        self.assertEqual(worker.log_dict['add_to_retry'], 3)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 3)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'],
                         worker.config['client_inc_step_timeout'])

        # RequestFailed status_code=429 with drop cookies
        api_client['request_interval'] = 2
        public_item = worker._get_resource_item_from_public(api_client, item)
        sleep(api_client['request_interval'])
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(public_item, None)
        self.assertEqual(api_client['request_interval'], 0)
        self.assertEqual(worker.log_dict['exceptions_count'], 3)
        self.assertEqual(worker.log_dict['add_to_retry'], 4)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 4)

        # RequestFailed with status_code not equal 429
        mock_api_client.get_resource_item.side_effect = RequestFailed(
            munchify({'status_code': 404}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        self.assertEqual(worker.log_dict['exceptions_count'], 4)
        self.assertEqual(worker.log_dict['add_to_retry'], 5)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 5)

        # ResourceNotFound
        mock_api_client.get_resource_item.side_effect = RNF(
            munchify({'status_code': 404}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        self.assertEqual(worker.log_dict['exceptions_count'], 4)
        self.assertEqual(worker.log_dict['add_to_retry'], 6)
        self.assertEqual(worker.log_dict['not_found_count'], 1)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 6)

        # Exception
        api_client = worker._get_api_client_dict()
        mock_api_client.get_resource_item.side_effect = Exception(
            'text except')
        public_item = worker._get_resource_item_from_public(api_client, item)
        self.assertEqual(public_item, None)
        self.assertEqual(api_client['request_interval'], 0)
        self.assertEqual(worker.log_dict['exceptions_count'], 5)
        self.assertEqual(worker.log_dict['add_to_retry'], 7)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 7)

        del worker
    def test__run(self, mocked_logger, mock_get_from_public, mocked_save_bulk):
        self.queue = Queue()
        self.retry_queue = Queue()
        self.api_clients_queue = Queue()
        queue_item = (1, uuid.uuid4().hex)
        doc = {
            'id': queue_item[1],
            '_rev': '1-{}'.format(uuid.uuid4().hex),
            'dateModified': datetime.datetime.utcnow().isoformat(),
            'doc_type': 'Tender'
        }
        client = MagicMock()
        api_client_dict = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        client.session.headers = {'User-Agent': 'Test-Agent'}
        self.api_clients_info = {
            api_client_dict['id']: {
                'drop_cookies': False,
                'request_durations': []
            }
        }
        self.db = MagicMock()
        worker = ResourceItemWorker(
            api_clients_queue=self.api_clients_queue,
            resource_items_queue=self.queue,
            retry_resource_items_queue=self.retry_queue,
            db=self.db,
            api_clients_info=self.api_clients_info,
            config_dict=self.worker_config)
        worker.exit = MagicMock()
        worker.exit.__nonzero__.side_effect = [False, True]

        # Try get api client from clients queue
        self.assertEqual(self.queue.qsize(), 0)
        worker._run()
        self.assertEqual(self.queue.qsize(), 0)
        mocked_logger.debug.assert_called_once_with(
            'API clients queue is empty.')

        # Try get item from resource items queue
        self.api_clients_queue.put(api_client_dict)
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[1:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('PUT API CLIENT: {}'.format(api_client_dict['id']),
                 extra={'MESSAGE_ID': 'put_client'}),
            call('Resource items queue is empty.')
        ])

        # Try get resource item from local storage
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[4:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('Get tender {} from main queue.'.format(doc['id'])),
            call('Put in bulk tender {} {}'.format(doc['id'],
                                                   doc['dateModified']),
                 extra={'MESSAGE_ID': 'add_to_save_bulk'})
        ])

        # Try get local_resource_item with Exception
        self.api_clients_queue.put(api_client_dict)
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        self.db.get.side_effect = [Exception('Database Error')]
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[7:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('Get tender {} from main queue.'.format(doc['id'])),
            call('PUT API CLIENT: {}'.format(api_client_dict['id']),
                 extra={'MESSAGE_ID': 'put_client'})
        ])
        mocked_logger.error.assert_called_once_with(
            "Error while getting resource item from couchdb:"
            " Exception('Database Error',)",
            extra={'MESSAGE_ID': 'exceptions'})

        self.api_clients_queue.put(api_client_dict)
        self.queue.put(queue_item)
        mock_get_from_public.return_value = None
        self.db.get.side_effect = [doc]
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[10:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('Get tender {} from main queue.'.format(doc['id'])),
        ])
        self.assertEqual(mocked_save_bulk.call_count, 1)
    def test__save_bulk_docs(self):
        self.worker_config['bulk_save_limit'] = 3
        retry_queue = PriorityQueue()
        worker = ResourceItemWorker(config_dict=self.worker_config,
                                    retry_resource_items_queue=retry_queue)
        doc_id_1 = uuid.uuid4().hex
        doc_id_2 = uuid.uuid4().hex
        doc_id_3 = uuid.uuid4().hex
        doc_id_4 = uuid.uuid4().hex
        worker.priority_cache[doc_id_1] = 1
        worker.priority_cache[doc_id_2] = 1
        worker.priority_cache[doc_id_3] = 1
        worker.priority_cache[doc_id_4] = 1
        date_modified = datetime.datetime.utcnow().isoformat()
        worker.bulk = {
            doc_id_1: {
                'id': doc_id_1,
                'dateModified': date_modified
            },
            doc_id_2: {
                'id': doc_id_2,
                'dateModified': date_modified
            },
            doc_id_3: {
                'id': doc_id_3,
                'dateModified': date_modified
            },
            doc_id_4: {
                'id': doc_id_4,
                'dateModified': date_modified
            }
        }
        update_return_value = [
            (True, doc_id_1, '1-' + uuid.uuid4().hex),
            (True, doc_id_2, '2-' + uuid.uuid4().hex),
            (False, doc_id_3, Exception(u'New doc with oldest dateModified.')),
            (False, doc_id_4, Exception(u'Document update conflict.'))
        ]
        worker.db = MagicMock()
        worker.db.update.return_value = update_return_value

        # Test success response from couchdb
        worker._save_bulk_docs()
        sleep(0.1)
        self.assertEqual(len(worker.bulk), 0)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 1)

        # Test failed response from couchdb
        worker.db.update.side_effect = Exception('Some exceptions')
        worker.bulk = {
            doc_id_1: {
                'id': doc_id_1,
                'dateModified': date_modified
            },
            doc_id_2: {
                'id': doc_id_2,
                'dateModified': date_modified
            },
            doc_id_3: {
                'id': doc_id_3,
                'dateModified': date_modified
            },
            doc_id_4: {
                'id': doc_id_4,
                'dateModified': date_modified
            }
        }
        worker.priority_cache[doc_id_1] = 1
        worker.priority_cache[doc_id_2] = 1
        worker.priority_cache[doc_id_3] = 1
        worker.priority_cache[doc_id_4] = 1
        worker._save_bulk_docs()
        sleep(0.2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 5)
        self.assertEqual(len(worker.bulk), 0)
Exemple #15
0
    def test__run(self, mocked_logger, mock_get_from_public, mocked_save_bulk):
        self.queue = Queue()
        self.retry_queue = Queue()
        self.api_clients_queue = Queue()
        queue_item = {
            'id': uuid.uuid4().hex,
            'dateModified': datetime.datetime.utcnow().isoformat()
        }
        doc = {
            'id': queue_item['id'],
            '_rev': '1-{}'.format(uuid.uuid4().hex),
            'dateModified': datetime.datetime.utcnow().isoformat(),
            'doc_type': 'Tender'
        }
        client = MagicMock()
        api_client_dict = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        client.session.headers = {'User-Agent': 'Test-Agent'}
        self.api_clients_info = {
            api_client_dict['id']: {
                'drop_cookies': False, 'request_durations': []
            }
        }
        self.db = MagicMock()
        worker = ResourceItemWorker(
            api_clients_queue=self.api_clients_queue,
            resource_items_queue=self.queue,
            retry_resource_items_queue=self.retry_queue,
            db=self.db, api_clients_info=self.api_clients_info,
            config_dict=self.worker_config
        )
        worker.exit = MagicMock()
        worker.exit.__nonzero__.side_effect = [False, True]

        # Try get api client from clients queue
        self.assertEqual(self.queue.qsize(), 0)
        worker._run()
        self.assertEqual(self.queue.qsize(), 0)
        mocked_logger.debug.assert_called_once_with(
            'API clients queue is empty.')

        # Try get item from resource items queue
        self.api_clients_queue.put(api_client_dict)
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[1:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'], client.session.headers['User-Agent']
                )),
                call('PUT API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'put_client'}),
                call('Resource items queue is empty.')
            ]
        )

        # Try get resource item from local storage
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[6:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id'])),
                call('Put in bulk tender {} {}'.format(doc['id'],
                                                       doc['dateModified']),
                     extra={'MESSAGE_ID': 'add_to_save_bulk'})
            ]
        )

        # queue_resource_item dateModified is None and None public doc
        self.api_clients_queue.put(api_client_dict)
        self.queue.put({'id': doc['id'], 'dateModified': None})
        mock_get_from_public.return_value = None
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[11:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id']))
            ]
        )

        # queue_resource_item dateModified is None and not None public doc
        self.api_clients_queue.put(api_client_dict)
        self.api_clients_queue.put(api_client_dict)
        self.queue.put({'id': doc['id'], 'dateModified': None})
        mock_get_from_public.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[15:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id'])),
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Ignored duplicate tender {} in bulk: previous {}, '
                     'current {}'.format(
                    doc['id'], doc['dateModified'], doc['dateModified']),
                    extra={'MESSAGE_ID': 'skipped'})
            ]
        )

        # Add to retry queue
        self.api_clients_queue.put(api_client_dict)
        self.queue.put({'id': doc['id'], 'dateModified': None})
        mock_get_from_public.return_value = doc
        self.db.get_doc.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[23:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id']))
            ]
        )
        mocked_logger.info.assert_called_once_with(
            'Put tender {} to \'retries_queue\''.format(doc['id']),
            extra={'MESSAGE_ID': 'add_to_retry'}
        )

        # Skip doc
        self.api_clients_queue.put(api_client_dict)
        self.api_clients_queue.put(api_client_dict)
        self.queue.put({'id': doc['id'], 'dateModified': None})
        mock_get_from_public.return_value = doc
        self.db.get_doc.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[27:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id'])),
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Ignored duplicate tender {} in bulk: previous {}, '
                     'current {}'.format(
                    doc['id'], doc['dateModified'], doc['dateModified']),
                    extra={'MESSAGE_ID': 'skipped'})
            ]
        )
        self.assertEqual(mocked_logger.info.call_count, 1)

        # Skip doc with raise exception
        self.api_clients_queue.put(api_client_dict)
        self.api_clients_queue.put(api_client_dict)
        self.queue.put({'id': doc['id'], 'dateModified': None})
        mock_get_from_public.side_effect = Exception('test')
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[35:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id'])),
                call('PUT API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'put_client'})
            ]
        )
        mocked_logger.error.assert_called_once_with(
            'Error while getting resource item from couchdb: {}'.format(repr(
                Exception('test'))),
            extra={'MESSAGE_ID': 'exceptions'}
        )
        self.assertEqual(
            mocked_logger.info.call_args_list[1],
            call('Put tender {} to \'retries_queue\''.format(doc['id']),
                 extra={'MESSAGE_ID': 'add_to_retry'})
        )

        # Try get resource item from public server with None public doc
        new_date_modified = datetime.datetime.utcnow().isoformat()
        self.queue.put({'id': doc['id'], 'dateModified': new_date_modified})
        mock_get_from_public.return_value = None
        mock_get_from_public.side_effect = None
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[40:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id']))
            ]
        )
        self.assertEqual(mocked_logger.info.call_count, 2)
        self.assertEqual(mocked_logger.error.call_count, 1)

        # Try get resource item from public server
        new_date_modified = datetime.datetime.utcnow().isoformat()
        self.queue.put({'id': doc['id'], 'dateModified': new_date_modified})
        mock_get_from_public.return_value = doc
        mock_get_from_public.side_effect = None
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(
            mocked_logger.debug.call_args_list[44:],
            [
                call('GET API CLIENT: {}'.format(api_client_dict['id']),
                     extra={'MESSAGE_ID': 'get_client'}),
                call('SLEEP before return client: 0'),
                call('Got api_client ID: {} {}'.format(
                    api_client_dict['id'],
                    client.session.headers['User-Agent'])),
                call('Get tender {} from main queue.'.format(
                    doc['id'])),
                call('Ignored duplicate tender {} in bulk: previous {}, '
                     'current {}'.format(
                    doc['id'], doc['dateModified'], doc['dateModified']),
                    extra={'MESSAGE_ID': 'skipped'})
            ]
        )
        self.assertEqual(mocked_logger.info.call_count, 2)
        self.assertEqual(mocked_logger.error.call_count, 1)
Exemple #16
0
    def test__save_bulk_docs(self):
        self.worker_config['bulk_save_limit'] = 3
        retry_queue = Queue()
        worker = ResourceItemWorker(config_dict=self.worker_config,
                                    log_dict=self.log_dict,
                                    retry_resource_items_queue=retry_queue)
        doc_id_1 = uuid.uuid4().hex
        doc_id_2 = uuid.uuid4().hex
        doc_id_3 = uuid.uuid4().hex
        doc_id_4 = uuid.uuid4().hex
        date_modified = datetime.datetime.utcnow().isoformat()
        worker.bulk = {
            doc_id_1: {
                'id': doc_id_1,
                'dateModified': date_modified
            },
            doc_id_2: {
                'id': doc_id_2,
                'dateModified': date_modified
            },
            doc_id_3: {
                'id': doc_id_3,
                'dateModified': date_modified
            },
            doc_id_4: {
                'id': doc_id_4,
                'dateModified': date_modified
            }
        }
        update_return_value = [
            (True, doc_id_1, '1-' + uuid.uuid4().hex),
            (True, doc_id_2, '2-' + uuid.uuid4().hex),
            (False, doc_id_3, Exception(u'New doc with oldest dateModified.')),
            (False, doc_id_4, Exception(u'Document update conflict.'))
        ]
        worker.db = MagicMock()
        worker.db.update.return_value = update_return_value

        self.assertEqual(worker.log_dict['update_documents'], 0)
        self.assertEqual(worker.log_dict['save_documents'], 0)
        self.assertEqual(worker.log_dict['skiped'], 0)
        self.assertEqual(worker.log_dict['add_to_retry'], 0)

        # Test success response from couchdb
        worker._save_bulk_docs()
        self.assertEqual(worker.log_dict['update_documents'], 1)
        self.assertEqual(worker.log_dict['save_documents'], 1)
        self.assertEqual(worker.log_dict['skiped'], 1)
        self.assertEqual(worker.log_dict['add_to_retry'], 1)

        # Test failed response from couchdb
        worker.db.update.side_effect = Exception('Some exceptions')
        worker.bulk = {
            doc_id_1: {
                'id': doc_id_1,
                'dateModified': date_modified
            },
            doc_id_2: {
                'id': doc_id_2,
                'dateModified': date_modified
            },
            doc_id_3: {
                'id': doc_id_3,
                'dateModified': date_modified
            },
            doc_id_4: {
                'id': doc_id_4,
                'dateModified': date_modified
            }
        }
        worker._save_bulk_docs()
        self.assertEqual(worker.log_dict['update_documents'], 1)
        self.assertEqual(worker.log_dict['save_documents'], 1)
        self.assertEqual(worker.log_dict['skiped'], 1)
        self.assertEqual(worker.log_dict['add_to_retry'], 5)
    def test__get_resource_item_from_public(self, mock_api_client):
        resource_item_id = uuid.uuid4().hex
        priority = 1

        api_clients_queue = Queue()
        client_dict = {
            'id': uuid.uuid4().hex,
            'request_interval': 0.02,
            'client': mock_api_client
        }
        api_clients_queue.put(client_dict)
        api_clients_info =\
            {client_dict['id']: {'drop_cookies': False, 'request_durations': {}}}
        retry_queue = PriorityQueue()
        return_dict = {
            'data': {
                'id': resource_item_id,
                'dateModified': datetime.datetime.utcnow().isoformat()
            }
        }
        mock_api_client.get_resource_item.return_value = return_dict
        worker = ResourceItemWorker(api_clients_queue=api_clients_queue,
                                    config_dict=self.worker_config,
                                    retry_resource_items_queue=retry_queue,
                                    api_clients_info=api_clients_info)

        # Success test
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client['request_interval'], 0.02)
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 0)
        self.assertEqual(public_item, return_dict['data'])

        # # Not actual document form public
        # item['dateModified'] = datetime.datetime.utcnow().isoformat()
        # api_client = worker._get_api_client_dict()
        # self.assertEqual(worker.api_clients_queue.qsize(), 0)
        # self.assertEqual(api_client['request_interval'], 0)
        # public_item = worker._get_resource_item_from_public(
        #     api_client, priority, item['id']
        # )
        # self.assertEqual(public_item, None)
        # sleep(worker.config['retry_default_timeout'] * 2)
        # self.assertEqual(worker.retry_resource_items_queue.qsize(), 1)
        # self.assertEqual(worker.api_clients_queue.qsize(), 1)

        # InvalidResponse
        mock_api_client.get_resource_item.side_effect =\
            InvalidResponse('invalid response')
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(public_item, None)
        sleep(worker.config['retry_default_timeout'] * 1)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 1)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)

        # RequestFailed status_code=429
        mock_api_client.get_resource_item.side_effect = RequestFailed(
            munchify({'status_code': 429}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'], 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(public_item, None)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 2)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'],
                         worker.config['client_inc_step_timeout'])

        # RequestFailed status_code=429 with drop cookies
        api_client['request_interval'] = 2
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        sleep(api_client['request_interval'])
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(public_item, None)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 3)

        # RequestFailed with status_code not equal 429
        mock_api_client.get_resource_item.side_effect = RequestFailed(
            munchify({'status_code': 404}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 4)

        # ResourceNotFound
        mock_api_client.get_resource_item.side_effect = RNF(
            munchify({'status_code': 404}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 5)

        # ResourceGone
        mock_api_client.get_resource_item.side_effect = ResourceGone(
            munchify({'status_code': 410}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 5)

        # Exception
        api_client = worker._get_api_client_dict()
        mock_api_client.get_resource_item.side_effect =\
            Exception('text except')
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item_id)
        self.assertEqual(public_item, None)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 6)

        del worker