def test_choice_lookup_queries_choice_bool_parser_source( pq_query, bool_query, result, ): def generate_choice_parser(**kwargs): return parsers.Choice(choices=[ ( "phone", parsers.Bool( query=jp(bool_query), ccontains=["phone", "CELL"], source="json_data", ), ), ("accessory", ["phone"]), ], **kwargs) data_bag = DataBag(main=data_html.categories, json_data=data_dict.name) # Test lookup queries choice_parser = generate_choice_parser(lookups=[pq(pq_query)]) assert choice_parser.parse(data_bag) == result # Test lookup parsers choice_parser = generate_choice_parser( lookups=[parsers.Text(pq(pq_query))]) assert choice_parser.parse(data_bag) == result
def test_bool_dict(): bool_dict_parser = parsers.BoolDict( pq("#size-variants li::items"), key_query=pq("::text"), val_query=pq("::attr(size-stock)"), ) expected_text_result = {"l": True, "xl": False, "xxl": True} assert bool_dict_parser.parse(data_html.sizes) == expected_text_result
class ProductModel(models.ItemModel): _item_category = parsers.Text(pq("#accessory .name::text")) _item_name = parsers.Text(pq("#accessory .type::text")) item_type = parsers.Choice( lookups=["name", "category"], choices=[ ("phone", ["mobile"]), ("accessory", ["phone case"]), ], )
def test_data_variants_processor_html(): # Lets test with HTML data and pq selector iter_db = processors.DataVariantsProcessor( query=pq("#color-variants .color::items"), key_parser=parsers.Text(pq("::text"), uppercase=True), new_source="color_data", ).parse_data(data_html.prices_and_variants) db_list = list(iter_db) assert len(db_list) == 2 assert db_list[0]["color_data"].text() == "Black" assert db_list[0]["color_data_key"] == "BLACK" assert db_list[0]["color_data_variants_len"] == 2
def test_dict(): dict_parser = parsers.Dict( pq("#size-variants li::items"), key_parser=parsers.Text(pq("::text")), val_parser=parsers.Bool(pq("::attr(size-stock)"), contains=["true"]), ) expected_result = {"l": True, "xl": False, "xxl": True} assert dict_parser.parse(data_html.sizes) == expected_result dict_parser = parsers.Dict( pq("#size-variants li::items"), key_query=pq("::text"), val_parser=parsers.Bool(pq("::attr(size-stock)"), contains=["true"]), ) assert dict_parser.parse(data_html.sizes) == expected_result dict_parser = parsers.Dict( pq("#size-variants li::items"), key_query=pq("::text"), val_query=pq("::attr(size-stock)"), ) expected_text_result = {"l": "true", "xl": "false", "xxl": "true"} assert dict_parser.parse(data_html.sizes) == expected_text_result dict_parser = parsers.Dict(jp("sizes"), key_parser=parsers.Text(), val_parser=parsers.Bool()) assert dict_parser.parse(data_dict.sizes) == expected_result dict_parser = parsers.Dict(jp("sizes")) assert dict_parser.parse(data_dict.sizes) == expected_result dict_parser = parsers.Dict(jp("sizes"), key_parser=parsers.Text()) assert dict_parser.parse(data_dict.sizes) == expected_result dict_parser = parsers.Dict(jp("sizes"), val_parser=parsers.Bool()) assert dict_parser.parse(data_dict.sizes) == expected_result dict_parser = parsers.Dict(jp("sizes"), key_parser=parsers.Text(), val_parser=parsers.Text()) expected_result = {"l": "True", "xl": "False", "xxl": "True"} assert dict_parser.parse(data_dict.sizes) == expected_result
def test_pq_query_html(): exp_result = '<div class="brand">EasyData</div>\nTest Product Item' test_data = data_html.item_with_breadcrumbs assert pq(".name::html").get(test_data).strip().replace(" ", "") == exp_result
def test_pq_query_remove_query(): exp_result = "Test Product Item" test_data = data_html.item_with_breadcrumbs assert pq(".name::text", remove_query=".brand").get(test_data) == exp_result
def test_pq_query_attr_href(): result = pq("#url::href").get(data_html.item_with_breadcrumbs) assert result == "https://demo.com/product/123"
def test_pq_query_attr_src(): result = pq(".images img::src").get(data_html.item_with_breadcrumbs) assert result == "https://demo.com/img1.jpg"
def test_pq_query_attr_content(): result = pq('[name="category"]::content').get( data_html.item_with_breadcrumbs) assert result == "phone"
def test_pq_query_attr_name(): result = pq('[name="category"]::name').get(data_html.item_with_breadcrumbs) assert result == "category"
def test_pq_query_attr_all(): test_all_html = '<input some-strange-value="EasyData">' assert pq("input::attr(some-strange-value)-all").get( test_all_html) == "EasyData"
def test_pq_query_attr(): exp_result = "smartphone" test_data = data_html.item_with_breadcrumbs assert pq('[name="category"]::attr(value)').get(test_data) == exp_result
def test_pq_query_items(query): assert pq(query).get(data_html.item_with_breadcrumbs) == exp_result_images
def test_pq_query_all(query, result): assert pq(query).get(data_html.item_with_breadcrumbs) == result assert pq(query=query).get(data_html.item_with_breadcrumbs) == result
def test_pq_query_text(query, test_html, result): assert pq(query).get(test_html) == result
def test_pq_query_attr_val(): result = pq('[name="category"]::val').get(data_html.item_with_breadcrumbs) assert result == "smartphone"
) def test_bool_contains(contains_keys, test_data, result): bool_parser = parsers.Bool(contains=contains_keys) assert bool_parser.parse(test_data) is result @pytest.mark.parametrize( "ccontains_keys, test_data, result", [ (["Pro 13"], data_text.title, True), (["something", "Pro 13"], data_text.title, True), (["pro 13"], data_text.title, False), ], ) def test_bool_contains_case(ccontains_keys, test_data, result): bool_parser = parsers.Bool(ccontains=ccontains_keys) assert bool_parser.parse(test_data) is result @pytest.mark.parametrize( "query, contains_query, test_data, result", [ (pq("#full-name::text"), pq(".brand::text"), "Easybook Pro 13", False), (pq("#full-name::text"), pq(".brand::text-items"), "Easybook Pro 13", False), ], ) def test_bool_contains_query(query, contains_query, test_data, result): bool_parser = parsers.Bool(query, contains_query=contains_query) assert bool_parser.parse(test_data) is result