コード例 #1
0
def last_request_status(request):
    """Returns the last requests requested.

    The request accepts an optional parameter size, which is the maximum number
    of items returned.
    """
    settings = request.registry.settings

    default_size = 10
    size_str = request.params.get('size', default_size)
    try:
        size = int(size_str)
    except ValueError:
        raise exc.HTTPBadGateway(detail="Size parameter has incorrect value")

    # Get last requests
    dbinterf = web_runner.db.DbInterface(settings['db_filename'],
                                         recreate=False)
    reqs = dbinterf.get_last_requests(size)
    dbinterf.close()

    # Get the jobid status dictionary.
    scrapyd_baseurl = settings[SCRAPYD_BASE_URL_KEY]
    scrapyd_interf = Scrapyd(scrapyd_baseurl)
    jobids_status = scrapyd_interf.get_jobs()

    # For each request, determine the request status gathering
    # the information from all jobids related to it
    for req in reqs:
        req['status'] = get_request_status(req, jobids_status)

    return reqs
コード例 #2
0
class ScrapydTest(unittest.TestCase):

    maxDiff = None

    URL = 'http://example.com'

    EXPECTED_LIST_JOBS_URL = URL + '/listjobs.json?project=test'
    EXPECTED_LIST_PROJECTS_URL = URL + '/listprojects.json'
    EXPECTED_LIST_SPIDERS_URL = URL + '/listspiders.json?project=test'

    EMPTY_QUEUE = {'running': 0, 'finished': 0, 'pending': 0}

    def setUp(self):
        # Always clear the cache so that tests are independent.
        Scrapyd._CACHE.clear()

        self.subject = Scrapyd(ScrapydTest.URL)

    def test_when_status_is_not_ok_then_it_should_report_an_error(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {"status": "ERROR", "message": "Test"}

            self.assertRaises(exc.HTTPBadGateway, self.subject.get_queues,
                              ['test'])

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_JOBS_URL)

    def test_when_queues_are_empty_then_it_should_return_empty_queues(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status": "ok",
                "pending": [],
                "running": [],
                "finished": [],
            }

            queues, summary = self.subject.get_queues(['test'])

            self.assertEqual({'test': self.EMPTY_QUEUE}, queues)
            self.assertEqual(self.EMPTY_QUEUE, summary)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_JOBS_URL)

    def test_when_queues_have_jobs_then_it_should_return_their_state(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status":
                "ok",
                "pending": [{
                    "id": "78391cc0fcaf11e1b0090800272a6d06",
                    "project_name": "spider1",
                }],
                "running": [],
                "finished": [{
                    "id": "2f16646cfcaf11e1b0090800272a6d06",
                    "spider": "spider3",
                    "start_time": "2012-09-12 10:14:03.594664",
                    "end_time": "2012-09-12 10:24:03.594664"
                }],
            }

            queues, summary = self.subject.get_queues(['test'])

            expected_queue = {'running': 0, 'finished': 1, 'pending': 1}

            self.assertEqual({'test': expected_queue}, queues)
            self.assertEqual(expected_queue, summary)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_JOBS_URL)

    def test_when_a_request_is_repeated_then_it_should_query_just_once(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status": "ok",
                "pending": [],
                "running": [],
                "finished": [],
            }

            queues, summary = self.subject.get_queues(['test'])
            self.assertEqual({'test': self.EMPTY_QUEUE}, queues)
            self.assertEqual(self.EMPTY_QUEUE, summary)

            queues, summary = self.subject.get_queues(['test'])
            self.assertEqual({'test': self.EMPTY_QUEUE}, queues)
            self.assertEqual(self.EMPTY_QUEUE, summary)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_JOBS_URL)

    def test_when_there_are_no_project_then_it_should_get_an_empty_list(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {"status": "ok", "projects": []}

            projects = self.subject.get_projects()
            self.assertEqual([], projects)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_PROJECTS_URL)

    def test_when_there_are_projects_then_it_should_get_a_list(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status": "ok",
                "projects": [
                    "proj1",
                    "proj2",
                ],
            }

            projects = self.subject.get_projects()
            self.assertEqual(["proj1", "proj2"], projects)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_PROJECTS_URL)

    def test_when_there_are_no_jobs_then_it_should_get_an_empty_dict(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status": "ok",
                "pending": [],
                "running": [],
                "finished": [],
            }

            jobs = self.subject.get_jobs(['test'])

            self.assertEqual({}, jobs)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_JOBS_URL)

    def test_when_there_are_jobs_then_it_should_return_them(self):
        # Had to remove dates from jobs to make tests reliable.
        # The time conversion that's performed adds a configuration dependent
        # offset and a small, millisecond, error.
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status":
                "ok",
                "pending": [{
                    "id": "78391cc0fcaf11e1b0090800272a6d06",
                    "project_name": "spider1",
                }],
                "running": [],
                "finished": [{
                    "id": "2f16646cfcaf11e1b0090800272a6d06",
                    "spider": "spider3",
                }],
            }

            jobs = self.subject.get_jobs(['test'])

            expected = {
                '2f16646cfcaf11e1b0090800272a6d06': {
                    'id': '2f16646cfcaf11e1b0090800272a6d06',
                    'spider': 'spider3',
                    'status': 'finished',
                },
                '78391cc0fcaf11e1b0090800272a6d06': {
                    'id': '78391cc0fcaf11e1b0090800272a6d06',
                    'project_name': 'spider1',
                    'status': 'pending',
                },
            }
            self.assertEqual(expected, jobs)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_JOBS_URL)

    def test_when_there_are_no_spiders_then_it_should_get_an_empty_list(self):
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {"status": "ok", "spiders": []}

            jobs = self.subject.get_spiders('test')

            self.assertEqual([], jobs)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_SPIDERS_URL)

    def test_when_there_are_spiders_then_it_should_return_them(self):
        # Had to remove dates from jobs to make tests reliable.
        # The time conversion that's performed adds a configuration dependent
        # offset and a small, millisecond, error.
        with mock.patch('web_runner.scrapyd.requests') as mock_requests:
            response = mock_requests.get.return_value
            response.json.return_value = {
                "status": "ok",
                "spiders": ["spider1", "spider2", "spider3"],
            }

            jobs = self.subject.get_spiders('test')

            self.assertEqual(["spider1", "spider2", "spider3"], jobs)

            mock_requests.get.assert_called_once_with(
                self.EXPECTED_LIST_SPIDERS_URL)

    def test_when_scrapyd_is_down_then_it_should_make_no_further_reqs(self):
        with mock.patch(
                'web_runner.scrapyd.requests.get') as mock_requests_get:
            response = mock_requests_get.return_value
            response.status_code = 500

            status = self.subject.get_operational_status()

            self.assertEqual(
                {
                    'scrapyd_alive': False,
                    'scrapyd_operational': False,
                    'scrapyd_projects': None,
                    'spiders': None,
                    'queues': None,
                    'summarized_queue': None,
                },
                status,
            )

            mock_requests_get.assert_called_once_with(self.URL)

    def test_when_scrapyd_fails_then_it_should_not_be_operational(self):
        with mock.patch(
                'web_runner.scrapyd.requests.get') as mock_requests_get:
            alive_response = mock.MagicMock()
            alive_response.status_code = 200

            mock_requests_get.side_effect = [
                alive_response,
                exc.HTTPBadGateway(detail="Test"),
            ]

            status = self.subject.get_operational_status()

            self.assertEqual(
                {
                    'scrapyd_alive': True,
                    'scrapyd_operational': False,
                    'scrapyd_projects': None,
                    'spiders': None,
                    'queues': None,
                    'summarized_queue': None,
                },
                status,
            )

            mock_requests_get.assert_any_call(self.URL)
            mock_requests_get.assert_called_with(
                self.EXPECTED_LIST_PROJECTS_URL)

    def test_when_scrapyd_responds_then_it_should_provide_an_ok_status(self):
        with mock.patch(
                'web_runner.scrapyd.requests.get') as mock_requests_get:
            alive_resp = mock.MagicMock()
            alive_resp.status_code = 200

            projects_resp = mock.MagicMock()
            projects_resp.status_code = 200
            projects_resp.json.return_value = {
                'status': 'ok',
                'projects': ['test', 'p2'],
            }

            spiders1_resp = mock.MagicMock()
            spiders1_resp.status_code = 200
            spiders1_resp.json.return_value = {
                'status': 'ok',
                'spiders': ['p1_sp1', 'p1_sp2'],
            }

            spiders2_resp = mock.MagicMock()
            spiders2_resp.status_code = 200
            spiders2_resp.json.return_value = {
                'status': 'ok',
                'spiders': ['p2_sp1', 'p2_sp2'],
            }

            jobs1_resp = mock.MagicMock()
            jobs1_resp.status_code = 200
            jobs1_resp.json.return_value = {
                'status':
                'ok',
                "pending": [{
                    "id": "78391cc0fcaf11e1b0090800272a6d06",
                    "project_name": "spider1",
                }],
                "running": [],
                "finished": [{
                    "id": "2f16646cfcaf11e1b0090800272a6d06",
                    "spider": "spider3",
                }],
            }

            jobs2_resp = mock.MagicMock()
            jobs2_resp.status_code = 200
            jobs2_resp.json.return_value = {
                'status':
                'ok',
                "pending": [{
                    "id": "XXXX1cc0fcaf11e1b0090800272a6d06",
                    "project_name": "spider10",
                }],
                "finished": [],
                "running": [{
                    "id": "XXXX646cfcaf11e1b0090800272a6d06",
                    "spider": "spider30",
                }],
            }

            mock_requests_get.side_effect = [
                alive_resp,
                projects_resp,
                spiders1_resp,
                spiders2_resp,
                jobs1_resp,
                jobs2_resp,
            ]

            status = self.subject.get_operational_status()

            self.assertEqual(
                {
                    'scrapyd_alive': True,
                    'scrapyd_operational': True,
                    'scrapyd_projects': ['test', 'p2'],
                    'spiders': {
                        'test': ['p1_sp1', 'p1_sp2'],
                        'p2': ['p2_sp1', 'p2_sp2'],
                    },
                    'queues': {
                        'test': {
                            'finished': 1,
                            'pending': 1,
                            'running': 0
                        },
                        'p2': {
                            'finished': 0,
                            'pending': 1,
                            'running': 1
                        },
                    },
                    'summarized_queue': {
                        'finished': 1,
                        'pending': 2,
                        'running': 1,
                    },
                },
                status,
            )

            # More requests than these are actually performed.
            mock_requests_get.assert_any_call(self.URL)
            mock_requests_get.assert_any_call(self.EXPECTED_LIST_SPIDERS_URL)
            mock_requests_get.assert_any_call(self.EXPECTED_LIST_JOBS_URL)
            mock_requests_get.assert_any_call(self.EXPECTED_LIST_PROJECTS_URL)

    def test_when_a_job_is_started_ok_then_we_return_its_id(self):
        with mock.patch('web_runner.scrapyd.requests.post') as mock_post:
            response = mock_post.return_value
            response.json.return_value = {"status": "ok", "jobid": "XXX"}

            job_id = self.subject.schedule_job('project', 'spider', {})

            self.assertEqual('XXX', job_id)
コード例 #3
0
def request_history(request):
    """Returns the history of a request

    The view expects to receive a requestid.
    The view returns a dictionary with the following keys:
     * request: dictionary with main request infomation stored in the DB
     * jobids_info: dictionary whose key are all jobids related to
        requestid. The values is a dictionary with jobid information.
     * history: List with history content.
     * status: String with the requestid status

    Example of request:
        {'creation': u'2014-07-30 19:38:53.659982', 
         'params': u'{"searchterms_str": "laundry detergent", "group_name": "Gabo test1", "site": "walmart", "quantity": "100"}', 
         'requestid': 252, 
         'jobids': (u'236c257c182111e4906150465d4bc079',), 
         'remote_ip': u'127.0.0.1', 
         'group_name': u'Gabo test1', 
         'type': u'command', 
         'site': u'walmart', 
         'name': u'cat1'}

    Example of jobids_info:
        {u'17ae4f1c182111e4906150465d4bc079': {
            'spider': u'walmart_products', 
            'status': 'finished', 
            'start_time': u'2014-07-30 16:38:34.218200', 
            'end_time': u'2014-07-30 16:40:50.766396', 
            'id': u'17ae4f1c182111e4906150465d4bc079'}, 
         u'236c257c182111e4906150465d4bc079': {
            'spider': u'walmart_products', 
            'status': 'finished', 
            'start_time': '2014-07-30 16:38:54.116999', 
            'end_time': u'2014-07-30 16:41:06.851201', 
            'id': u'236c257c182111e4906150465d4bc079'}}

    Exanmple of history:
        [["2014-07-30 21:13:02.829964", "1 hour", "Request arrived from 127.0.0.1."],
        ["2014-07-30 21:16:02.829964", "1 hour", "Request Finished"]]
    """
    settings = request.registry.settings

    try:
        requestid = int(request.matchdict['requestid'])
    except ValueError:
        raise exc.HTTPBadGateway(detail="Request id is not valid")

    # Get request info
    dbinterf = web_runner.db.DbInterface(settings['db_filename'],
                                         recreate=False)
    request_info = dbinterf.get_request(requestid)
    operations_info = dbinterf.get_req_operations(requestid)
    dbinterf.close()

    if not request_info:
        # The requestid is not recognized
        raise exc.HTTPBadGateway(detail="No info from Request id")

    # Get the jobid status dictionary.
    scrapyd_baseurl = settings[SCRAPYD_BASE_URL_KEY]
    scrapyd_interf = Scrapyd(scrapyd_baseurl)
    jobids_status = scrapyd_interf.get_jobs()

    try:
        # Get only the jobids of the current request.
        jobids_info = {
            jobid: jobids_status[jobid]
            for jobid in request_info['jobids']
        }
    except KeyError:
        jobids_info = None

    if jobids_info:
        history = _get_history(requestid, request_info, jobids_info,
                               operations_info)
        status = get_request_status(request_info, jobids_status)
    else:
        history = None
        status = UNAVAILABLE

    info = {
        'request': request_info,
        'jobids_info': jobids_info,
        'history': history,
        'status': status,
    }
    return info