Ejemplo n.º 1
0
def test_multivalued_subrules_should_generate_list_of_subitems(shining):
    rules = [
        Rule(
            key="cast",
            value=Items(
                foreach='//table[@class="cast"]/tr',
                rules=[
                    Rule(key="name", value=Path("./td[1]/a/text()")),
                    Rule(key="character", value=Path("./td[2]/text()")),
                ],
            ),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "cast": [
            {
                "character": "Jack Torrance",
                "name": "Jack Nicholson"
            },
            {
                "character": "Wendy Torrance",
                "name": "Shelley Duvall"
            },
        ]
    }
Ejemplo n.º 2
0
def test_section_multiple_roots_should_raise_error(shining):
    with pytest.raises(ValueError):
        rules = [
            Rule(
                key="director",
                value=Items(section="//div",
                            rules=[Rule(key="name", value=Path("./text()"))]),
            )
        ]
        Items(rules)(shining)
Ejemplo n.º 3
0
def test_section_no_roots_should_return_empty_result(shining):
    rules = [
        Rule(
            key="director",
            value=Items(section="//foo",
                        rules=[Rule(key="name", value=Path("./text()"))]),
        )
    ]
    data = Items(rules)(shining)
    assert data == {}
Ejemplo n.º 4
0
def test_extracted_text_should_be_transformable(shining):
    rules = [
        Rule(key="year",
             value=Path('//span[@class="year"]/text()', transform=int))
    ]
    data = Items(rules)(shining)
    assert data == {"year": 1980}
Ejemplo n.º 5
0
def test_extracted_texts_should_be_concatenated_using_given_separator(shining):
    rules = [
        Rule(key="cast_names",
             value=Path('//table[@class="cast"]/tr/td[1]/a/text()', sep=", "))
    ]
    data = Items(rules)(shining)
    assert data == {"cast_names": "Jack Nicholson, Shelley Duvall"}
Ejemplo n.º 6
0
def test_item_with_no_data_should_be_excluded(shining):
    rules = [
        Rule(key="title", value=Path("//title/text()")),
        Rule(key="foo", value=Path("//foo/text()")),
    ]
    data = Items(rules)(shining)
    assert data == {"title": "The Shining"}
Ejemplo n.º 7
0
def test_empty_values_should_be_excluded_from_multivalued_item_list(shining):
    rules = [
        Rule(key="foos",
             value=Path(foreach='//ul[@class="foos"]/li', path="./text()"))
    ]
    data = Items(rules)(shining)
    assert data == {}
Ejemplo n.º 8
0
def test_multivalued_item_should_be_list(shining):
    rules = [
        Rule(key="genres",
             value=Path(foreach='//ul[@class="genres"]/li', path="./text()"))
    ]
    data = Items(rules)(shining)
    assert data == {"genres": ["Horror", "Drama"]}
Ejemplo n.º 9
0
def test_generated_key_none_should_be_excluded(shining):
    rules = [
        Rule(foreach='//div[@class="info"]',
             key=Path("./foo/text()"),
             value=Path("./p/text()"))
    ]
    data = Items(rules)(shining)
    assert data == {}
Ejemplo n.º 10
0
def test_key_should_be_generatable_using_path(shining):
    rules = [
        Rule(foreach='//div[@class="info"]',
             key=Path("./h3/text()"),
             value=Path("./p/text()"))
    ]
    data = Items(rules)(shining)
    assert data == {"Language:": "English", "Runtime:": "144 minutes"}
Ejemplo n.º 11
0
def test_multiple_rules_should_generate_multiple_items(shining):
    rules = [
        Rule(key="title", value=Path("//title/text()")),
        Rule("year", value=Path('//span[@class="year"]/text()',
                                transform=int)),
    ]
    data = Items(rules)(shining)
    assert data == {"title": "The Shining", "year": 1980}
Ejemplo n.º 12
0
def test_generated_key_should_be_transformable(shining):
    rules = [
        Rule(
            foreach='//div[@class="info"]',
            key=Path("./h3/text()", transform=lambda s: s.lower()[:-1]),
            value=Path("./p/text()"),
        )
    ]
    data = Items(rules)(shining)
    assert data == {"language": "English", "runtime": "144 minutes"}
Ejemplo n.º 13
0
def test_subrules_should_generate_subitems(shining):
    rules = [
        Rule(
            key="director",
            value=Items(rules=[
                Rule(key="name",
                     value=Path('//div[@class="director"]//a/text()')),
                Rule(key="link",
                     value=Path('//div[@class="director"]//a/@href')),
            ]),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "director": {
            "link": "/people/1",
            "name": "Stanley Kubrick"
        }
    }
Ejemplo n.º 14
0
def test_transformers_should_be_chainable(shining):
    rules = [
        Rule(
            key="century",
            value=Path('//span[@class="year"]/text()',
                       transform=chain(int, lambda x: x // 100 + 1)),
        )
    ]
    data = Items(rules)(shining)
    assert data == {"century": 20}
Ejemplo n.º 15
0
def test_multivalued_items_should_be_transformable(shining):
    rules = [
        Rule(
            key="genres",
            value=Path(foreach='//ul[@class="genres"]/li',
                       path="./text()",
                       transform=str.lower),
        )
    ]
    data = Items(rules)(shining)
    assert data == {"genres": ["horror", "drama"]}
Ejemplo n.º 16
0
def test_section_should_set_root_for_queries(shining):
    rules = [
        Rule(
            key="director",
            value=Items(
                section='//div[@class="director"]//a',
                rules=[
                    Rule(key="name", value=Path("./text()")),
                    Rule(key="link", value=Path("./@href")),
                ],
            ),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "director": {
            "link": "/people/1",
            "name": "Stanley Kubrick"
        }
    }
Ejemplo n.º 17
0
def test_subitems_should_be_transformable(shining):
    rules = [
        Rule(
            key="cast",
            value=Items(
                foreach='//table[@class="cast"]/tr',
                rules=[
                    Rule(key="name", value=Path("./td[1]/a/text()")),
                    Rule(key="character", value=Path("./td[2]/text()")),
                ],
                transform=lambda x: "%(name)s as %(character)s" % x,
            ),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "cast": [
            "Jack Nicholson as Jack Torrance",
            "Shelley Duvall as Wendy Torrance"
        ]
    }
Ejemplo n.º 18
0
def test_extracted_text_should_be_scalar(shining):
    rules = [Rule(key="title", value=Path("//title/text()"))]
    data = Items(rules)(shining)
    assert data == {"title": "The Shining"}
Ejemplo n.º 19
0
def test_item_with_false_value_should_be_included():
    content = '<root><foo val=""/></root>'
    rules = [Rule(key="foo", value=Path("//foo/@val", transform=bool))]
    data = Items(rules)(build_tree(content))
    assert data == {"foo": False}
Ejemplo n.º 20
0
def test_item_with_empty_str_value_should_be_included():
    content = '<root><foo val=""/></root>'
    rules = [Rule(key="foo", value=Path("//foo/@val"))]
    data = Items(rules)(build_tree(content))
    assert data == {"foo": ""}
Ejemplo n.º 21
0
def test_extracted_texts_should_be_concatenated(shining):
    rules = [Rule(key="full_title", value=Path("//h1//text()"))]
    data = Items(rules)(shining)
    assert data == {"full_title": "The Shining (1980)"}
Ejemplo n.º 22
0
def test_empty_rules_should_return_empty_result(shining):
    data = Items([])(shining)
    assert data == {}