コード例 #1
0
 def build_file_error_from_response(self, response, **kwargs):
     item = FileError({
         'url': response.request.url,
         'errors': {
             'http_code': response.status
         },
     })
     if 'file_name' in response.request.meta:
         item['file_name'] = response.request.meta['file_name']
     item.update(kwargs)
     return item
コード例 #2
0
 def build_file_error_from_response(self, response, **kwargs):
     """
     Returns a FileError item to yield, based on the response to a request.
     """
     item = FileError({
         'url': response.request.url,
         'errors': {'http_code': response.status},
     })
     if 'file_name' in response.request.meta:
         item['file_name'] = response.request.meta['file_name']
     item.update(kwargs)
     return item
コード例 #3
0
def test_process_file_error():
    pipeline = Validate()
    item = FileError({
        'file_name': 'test',
        'url': 'http://test.com',
        'errors': 'Error'
    })
    assert pipeline.process_item(item, None) == item
コード例 #4
0
def test_process_file_item_error_error():
    pipeline = Validate()
    item = FileError({'file_name': 'test', 'url': 'http://test.com'})
    with pytest.raises(ValidationError):
        pipeline.process_item(item, None)
    item['errors'] = 'Error'
    item['url'] = 'not an url'
    with pytest.raises(ValidationError):
        pipeline.process_item(item, None)
コード例 #5
0
def test_process_item_file_error():
    pipeline = Sample()
    spider = spider_with_crawler(sample=1)
    item = FileError({
        'file_name': 'test',
        'url': 'http://test.com',
        'errors': 'error',
    })
    with pytest.raises(DropItem):
        pipeline.process_item(item, spider)
コード例 #6
0
 def build_file_error_from_response(self, response, **kwargs):
     file_error = {
         'url': response.request.url,
         'errors': {
             'http_code': response.status
         },
     }
     if 'kf_filename' in response.request.meta:
         file_error['file_name'] = response.request.meta['kf_filename']
     file_error.update(kwargs)
     return FileError(file_error)
コード例 #7
0
def test_file_error():
    spider = spider_with_crawler()
    spider.latest = True

    pipeline = LatestReleaseDate()

    item = FileError({
        'file_name': 'test',
        'url': 'http://test.com',
        'errors': 'error'
    })
    with pytest.raises(DropItem):
        pipeline.process_item(item, spider)
コード例 #8
0
def test_item_scraped_file_error(sample, is_sample, ok, tmpdir, caplog):
    spider = spider_with_files_store(tmpdir, sample=sample)

    api_extension = KingfisherProcessAPI.from_crawler(spider.crawler)

    with patch('requests.post') as mocked:
        response = Mock()
        response.ok = ok
        response.status_code = 400
        mocked.return_value = response

        data = FileError({
            'file_name': 'file.json',
            'url': 'https://example.com/remote.json',
            'errors': {
                'http_code': 500
            },
        })

        api_extension.item_scraped(data, spider)

        if not ok:
            message = 'Failed to post [https://example.com/remote.json]. File Errors API status code: 400'

            assert len(caplog.records) == 1
            assert caplog.records[0].name == 'test'
            assert caplog.records[0].levelname == 'WARNING'
            assert caplog.records[0].message == message

        expected = {
            'collection_source': 'test',
            'collection_data_version': '2001-02-03 04:05:06',
            'collection_sample': is_sample,
            'file_name': 'file.json',
            'url': 'https://example.com/remote.json',
            # Specific to this test case.
            'errors': '{"http_code": 500}',
        }

        mocked.assert_called_once_with(
            'http://httpbin.org/anything/api/v1/submit/file_errors/',
            headers={
                'Authorization': 'ApiKey xxx',
            },
            proxies={
                'http': None,
                'https': None,
            },
            data=expected,
        )
コード例 #9
0
def test_item_scraped_file_error(caplog):
    spider = spider_with_crawler()
    item_extension = KingfisherItemCount.from_crawler(spider.crawler)
    item = FileError({
        'url': 'https://example.com/remote.json',
        'errors': {
            'http_code': 404
        },
    })

    item_extension.item_scraped(item, spider)

    assert item_extension.stats.get_value('fileerror_count') == 1
    assert item_extension.stats.get_value('file_count', 0) == 0
    assert item_extension.stats.get_value('fileitem_count', 0) == 0
コード例 #10
0
 def build_file(self, file_name=None, url=None, data=None, **kwargs):
     json_data = json.loads(data)
     # Some files contain invalid record packages, e.g.:
     # {
     #   "status": 500,
     #   "detail": "error"
     # }
     if 'status' in json_data and json_data['status'] != 200:
         json_data['http_code'] = json_data['status']
         return FileError({
             'file_name': file_name,
             'url': url,
             'errors': json_data,
         })
     else:
         return super().build_file(file_name=file_name, url=url, data=data, **kwargs)
コード例 #11
0
def test_item_scraped_file_error(sample, is_sample, ok, tmpdir, caplog):
    with patch('treq.response._Response.code',
               new_callable=PropertyMock) as mocked:
        mocked.return_value = 200 if ok else 400

        spider = spider_with_files_store(tmpdir, sample=sample)
        extension = KingfisherProcessAPI.from_crawler(spider.crawler)

        item = FileError({
            'file_name': 'file.json',
            'url': 'https://example.com/remote.json',
            'errors': {
                'http_code': 500
            },
        })

        response = yield extension.item_scraped(item, spider)
        data = yield response.json()

        form = {
            'collection_source': 'test',
            'collection_data_version': '2001-02-03 04:05:06',
            'collection_sample': str(is_sample),
            'file_name': 'file.json',
            'url': 'https://example.com/remote.json',
            # Specific to FileError.
            'errors': '{"http_code": 500}',
        }

        assert data['method'] == 'POST'
        assert data[
            'url'] == 'http://httpbin.org/anything/api/v1/submit/file_errors/'
        assert data['headers']['Authorization'] == 'ApiKey xxx'
        assert data['form'] == form
        assert data['args'] == {}
        assert data['data'] == ''
        assert data['files'] == {}

        if not ok:
            message = 'create_file_error failed (https://example.com/remote.json) with status code: 400'

            assert len(caplog.records) == 1
            assert caplog.records[0].name == 'test'
            assert caplog.records[0].levelname == 'WARNING'
            assert caplog.records[0].message == message
    AddPackageMiddleware,
    LineDelimitedMiddleware,
    ResizePackageMiddleware,
    RootPathMiddleware,
    ReadDataMiddleware,
])
@pytest.mark.parametrize('item', [
    File({
        'file_name': 'test',
        'data': 'data',
        'data_type': 'release_package',
        'url': 'http://test.com',
    }),
    FileError({
        'file_name': 'test',
        'url': 'http://test.com',
        'errors': ''
    }),
])
def test_yield_items(middleware_class, item):
    spider = spider_with_crawler()

    middleware = middleware_class()

    generator = middleware.process_spider_output(None, [item], spider)
    returned_item = next(generator)

    assert item == returned_item


@pytest.mark.parametrize('data_type,data,root_path', [