async def test_should_write_bytes_when_giving_content_in_write_mode_without_custom_encoder(self, tmp_path): content = {'name': 'Kevin', 'fruit': 'water melon'} mp_file = tmp_path / 'data.mp' length = await write_mp(mp_file, content, mode='w') assert length > 0 assert [content] == [item async for item in read_mp(mp_file)]
async def test_should_write_bytes_when_giving_content_in_append_mode_without_custom_encoder(self, tmp_path): content = ['foo', 4, {'fruit': 'water melon'}, [1, 4]] mp_file = tmp_path / 'data.mp' for item in content: length = await write_mp(mp_file, item, mode='a') assert length > 0 assert content == [item async for item in read_mp(mp_file)]
async def test_should_write_bytes_when_giving_content_in_write_mode_with_custom_encoder( self, tmp_path, encode_datetime, decode_datetime ): content = {'name': 'Kevin', 'date': datetime.now()} mp_file = tmp_path / 'data.mp' length = await write_mp(f'{mp_file}', content, mode='w', encoder=encode_datetime) assert length > 0 assert [content] == [item async for item in read_mp(mp_file, decoder=decode_datetime)]
async def test_should_return_python_objects_when_reading_file_with_custom_decoder( self, tmp_path, decode_datetime, create_msgpack_file ): given_data = ['hello', datetime.now()] mp_file = tmp_path / 'data.mp' create_msgpack_file(mp_file, given_data) for file in [str(mp_file), mp_file]: assert [item async for item in read_mp(file, decoder=decode_datetime)] == given_data
async def test_should_return_python_objects_when_reading_file_without_custom_decoder( self, tmp_path, create_msgpack_file ): given_data = [[1, 2], 'hello', {'fruit': 'apple'}] mp_file = tmp_path / 'data.mp' create_msgpack_file(mp_file, given_data) for file in [f'{mp_file}', mp_file]: assert [item async for item in read_mp(file)] == given_data
async def test_should_write_bytes_when_giving_content_in_append_mode_with_custom_encoder( self, tmp_path, encode_datetime, decode_datetime ): content = ['foo', datetime.now()] mp_file = tmp_path / 'data.mp' for item in content: length = await write_mp(f'{mp_file}', item, mode='a', encoder=encode_datetime) assert length > 0 assert content == [item async for item in read_mp(mp_file, decoder=decode_datetime)]
async def common_assert(stats: SpiderStatistics, backup_path: Path): assert stats.unreachable_urls == set() assert stats.robot_excluded_urls == set() assert stats.total_time > 0 albert_count = 0 async for item in read_mp(backup_path, decoder=datetime_decoder): assert isinstance(item['date'], datetime) if item['author'] == 'Albert Einstein': albert_count += 1 assert albert_count == 3
async def test_should_save_content_to_backup_file(self, tmp_path, capsys): def processor(item): print("I'm a processor") return item backup = tmp_path / 'backup.mp' fruit_1 = {'fruit': 'pineapple'} fruit_2 = {'fruit': 'orange'} config = Configuration(backup_filename=f'{backup.resolve()}', item_processors=[processor]) static_spider = StaticSpider(urls=['https://foo.com'], parse=lambda x, y: None, config=config) await static_spider.save_item(fruit_1) await static_spider.save_item(fruit_2) out, _ = capsys.readouterr() assert [fruit_1, fruit_2 ] == [item async for item in read_mp(f'{backup.resolve()}')] assert "I'm a processor" in out
async def test_should_raise_error_when_decoder_is_not_callable(self, decoder): with pytest.raises(TypeError) as exc_info: async for item in read_mp('foo', decoder=decoder): print(item) assert f'{decoder} is not callable' == str(exc_info.value)