def test_set_attr_value_from_path_should_set_attribute_for_selected_elements( shining_content): pre = [{ "op": "set_attr", "path": '//ul[@class="genres"]/li', "name": "foo", "value": { "path": "./text()" }, }] items = [{ "key": "genres", "value": { "foreach": "//li[@foo]", "path": "./@foo" } }] data = scrape(shining_content, {"pre": pre, "items": items}) assert data == {"genres": ["Horror", "Drama"]}
def test_set_attr_value_from_path_empty_value_should_be_ignored( shining_content): pre = [{ "op": "set_attr", "path": '//ul[@class="genres"]/li', "name": "foo", "value": { "path": "./@bar" }, }] items = [{ "key": "genres", "value": { "foreach": "//li[@foo]", "path": "./@foo" } }] data = scrape(shining_content, {"pre": pre, "items": items}) assert data == {}
def test_remove_selected_none_should_not_cause_error(shining_content): pre = [{"op": "remove", "path": "//tr[50]"}] items = [{ "key": "cast", "value": { "foreach": '//table[@class="cast"]/tr', "items": [{ "key": "name", "value": { "path": "./td[1]/a/text()" } }], }, }] data = scrape(shining_content, {"pre": pre, "items": items}) assert data == { "cast": [{ "name": "Jack Nicholson" }, { "name": "Shelley Duvall" }] }
def test_multivalued_subrules_should_generate_list_of_subitems( shining_content): items = [{ "key": "cast", "value": { "foreach": '//table[@class="cast"]/tr', "items": [ { "key": "name", "value": { "path": "./td[1]/a/text()" } }, { "key": "character", "value": { "path": "./td[2]/text()" } }, ], }, }] data = scrape(shining_content, {"items": items}) assert data == { "cast": [ { "character": "Jack Torrance", "name": "Jack Nicholson" }, { "character": "Wendy Torrance", "name": "Shelley Duvall" }, ] }
def test_shorthand_notation_should_be_path_and_transform(shining_content): items = [{"key": "year", "value": '//span[@class="year"]/text() | int'}] data = scrape(shining_content, {"items": items}) assert data == {"year": 1980}
def test_empty_rules_should_return_empty_result(shining_content): data = scrape(shining_content, {"items": []}) assert data == {}
def test_unknown_preprocessor_should_raise_error(shining_content): with pytest.raises(ValueError): pre = [{"op": "foo", "path": "//tr[1]"}] scrape(shining_content, {"pre": pre})
def test_extracted_texts_should_be_concatenated(shining_content): items = [{"key": "full_title", "value": {"path": "//h1//text()"}}] data = scrape(shining_content, {"items": items}) assert data == {"full_title": "The Shining (1980)"}
def test_extracted_text_should_be_scalar(shining_content): items = [{"key": "title", "value": {"path": "//title/text()"}}] data = scrape(shining_content, {"items": items}) assert data == {"title": "The Shining"}