def test_relational_comparison_between_a_node_set_and_a_string(): html_body = """ <div>9.9</div> <div>10.1</div>""" assert query_html_doc(html_body, '//div > "10"') == 'true' assert query_html_doc(html_body, '"10.1" < //div') == 'false' assert query_html_doc(html_body, '//div <= "9.9"') == 'true'
def test_string_value_of_a_sequence_is_concatenation_of_all_items_unlike_node_set(): html_body = """ <p>one</p> <p>two</p>""" assert query_html_doc(html_body, 'let $_ := //p/text() return string($_)') == 'one' assert query_html_doc(html_body, 'let $_ := ("one", "two") return string($_)') == 'onetwo'
def test_relational_comparison_between_a_node_set_and_a_number(): html_body = """ <div>9.9</div> <div>10.1</div>""" assert query_html_doc(html_body, '//div > 10') == 'true' assert query_html_doc(html_body, '10.1 < //div') == 'false' assert query_html_doc(html_body, '//div <= 9.9') == 'true'
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause(): assert query_html_doc('', 'for $x in (1 to 2) return $x, "!"') == expected_result(""" 1 2 !""") assert query_html_doc('', 'sum(for $x in //span return $x, "zero")') == 'zero' assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
def test_use_of_escapes_for_forbidden_characters_in_regex_replace_patterns(): assert query_html_doc('', r"""`it's ${rr:\w{3}:dog::"a cat's"} life`""" ) == "it's a dog's life" assert query_html_doc( '', r'`${rr:: ::: let $x := "re: " return concat($x, "search")}`' ) == 'research'
def test_ancestor_axis_selects_all_matching_ancestors(): html_body = """ <div> <section> <div> <p>text</p> </div> </section> </div>""" expected = expected_result(""" <div> <section> <div> <p> text </p> </div> </section> </div> <div> <p> text </p> </div>""") assert query_html_doc(html_body, '//p/ancestor::div') == expected assert query_html_doc(html_body, '//p/^::div') == expected
def test_sum_function_sums_number_interpretation_of_items_in_sequence(): html_body = """ <span>30</span> <div value="10.42"></div> <span>2</span>""" assert query_html_doc(html_body, 'sum(//span)') == '32' assert query_html_doc(html_body, 'sum((//span, //div/@value))') == '42.42'
def test_filters_are_applied_to_all_items_in_sequence_when_input_is_not_atomic( ): html_body = """ <p>Hello, world!</p> <p>Goodbye, world!</p>""" assert query_html_doc(html_body, '`${tru:8:://p}`') == 'Hello,Goodbye,' assert query_html_doc( html_body, '`${rr:world:test:://p}`') == 'Hello, test!Goodbye, test!'
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec(): html_body = """ <p>moe</p> <p>larry</p> <p>curly</p>""" assert query_html_doc(html_body, '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry') assert query_html_doc(html_body, '//p[matches(text(), ".URL.", "i")]/text()') == expected_result('curly')
def test_class_function_returns_true_when_element_has_name_in_class_attribute(): html_body = """ <p class="not selected">not selected</p> <p class="foo bar">expected</p>""" assert query_html_doc(html_body, 'class(//p[1], "foo")') == 'false' assert query_html_doc(html_body, 'class(//p[2], "foo")') == 'true' assert query_html_doc(html_body, '//p[class("bar")]/text()') == 'expected'
def test_class_function_returns_true_when_element_has_name_in_class_attribute( ): html_body = """ <p class="not selected">not selected</p> <p class="foo bar">expected</p>""" assert query_html_doc(html_body, 'class(//p[1], "foo")') == 'false' assert query_html_doc(html_body, 'class(//p[2], "foo")') == 'true' assert query_html_doc(html_body, '//p[class("bar")]/text()') == 'expected'
def test_text_content_normalization_is_applied_to_attribute_values_in_hash_constructor(): preserved = u'\u00a0non\u00a0breaking\u00a0spaces ' html_body = u'<p>{0}</p>'.format(preserved) actual = json.loads(query_html_doc(html_body, 'hash {para: //p/text()}')) assert actual['para'] == 'non breaking spaces' actual = json.loads(query_html_doc(html_body, 'hash {para: //p/text()}', preserve_space=True)) assert actual['para'] == preserved
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause( ): assert query_html_doc( '', 'for $x in (1 to 2) return $x, "!"') == expected_result(""" 1 2 !""") assert query_html_doc('', 'sum(for $x in //span return $x, "zero")') == 'zero' assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
def test_escapes_work_in_string_literals(): assert query_html_doc('', '"foo bar"') == expected_result(""" foo bar""") assert query_html_doc('', "'foo bar'") == expected_result(""" foo bar""") assert query_html_doc('', '`foo bar`') == expected_result(""" foo bar""")
def test_relational_comparison_involving_two_node_sets(): html_body = """ <p>9</p> <p>10</p> <div>10</div> <div>11</div>""" assert query_html_doc(html_body, '//p > //div') == 'false' assert query_html_doc(html_body, '//p >= //div') == 'true' assert query_html_doc(html_body, '//div[position()=1] <= //p') == 'true'
def test_element_constructor_accepts_numbers_and_booleans(): assert query_html_doc('', 'element test { 98.6 }') == expected_result(""" <test> 98.6 </test>""") assert query_html_doc('', 'element test { false() }') == expected_result(""" <test> false </test>""")
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec(): assert query_html_doc('', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result(""" Moe Larry Curly""") assert query_html_doc('', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result(""" Ha ta patience""") assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
def test_string_value_of_a_sequence_is_concatenation_of_all_items_unlike_node_set( ): html_body = """ <p>one</p> <p>two</p>""" assert query_html_doc(html_body, 'let $_ := //p/text() return string($_)') == 'one' assert query_html_doc( html_body, 'let $_ := ("one", "two") return string($_)') == 'onetwo'
def test_interpretation_of_div_and_mod_and_other_arithmetic_operators_as_operators_vs_node_tests(): div = """ <div> </div>""" mod = """ <mod> </mod>""" assert query_html_doc(div, 'div', wrap_body=False) == expected_result(div) assert query_html_doc(mod, '/ mod', wrap_body=False) == expected_result(mod) assert query_html_doc(div, 'boolean(div)', wrap_body=False) == 'true' assert query_html_doc(mod, 'boolean(div)', wrap_body=False) == 'false' div_with_text = '<div>bar</div>' query_with_div_after_comma = 'starts-with(concat("foo ", div), "foo ba")' assert query_html_doc(div_with_text, query_with_div_after_comma, wrap_body=False) == 'true' assert query_html_doc(div, 'number("84")div2') == '42' assert query_html_doc(div, 'let $x := 4 return $x div 2') == '2' rect = '<rect id="foo" height="2" width="10"/>' assert query_html_doc(rect, 'let $r := //rect return $r/@height * $r/@width') == '20' num_in_text = """ <span>not selected</span> <span id="foo">42</span>""" assert query_html_doc(num_in_text, '//span[@id="foo"] mod 10') == '2'
def test_if_then_else_works_with_node_sets(): html_body = """ <p>eekaboo</p>""" assert query_html_doc(html_body, 'if (//p) then //p else 1 to 3') == expected_result(""" <p> eekaboo </p>""") assert query_html_doc(html_body, 'if (//div) then //p else 1 to 3') == expected_result(""" 1 2 3""")
def test_preceding_sibling_axis_works_with_name_test(): html_body = """ <p>foo</p> <div></div> <p>bar</p>""" expected = expected_result(""" <p> foo </p>""") assert query_html_doc(html_body, '//div/preceding-sibling::p') == expected assert query_html_doc(html_body, '//div/<::p') == expected
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec(): assert query_html_doc( '', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result(""" Moe Larry Curly""") assert query_html_doc( '', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result(""" Ha ta patience""") assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
def test_following_sibling_axis_works_with_node_test(): html_body = """ <div> foo <p></p> bar </div>""" assert query_html_doc(html_body, '//p/following-sibling::text()') == expected_result('bar') assert query_html_doc('<h1></h1><div></div><p>foo</p>', '//div/following-sibling::*') == expected_result(""" <p> foo </p>""")
def test_if_then_else_works_with_node_sets(): html_body = """ <p>eekaboo</p>""" assert query_html_doc( html_body, 'if (//p) then //p else 1 to 3') == expected_result(""" <p> eekaboo </p>""") assert query_html_doc( html_body, 'if (//div) then //p else 1 to 3') == expected_result(""" 1 2 3""")
def test_round_function_follows_xpath_1_rules_for_positive_numbers_but_python_rules_for_negative_ones( ): """ Not fooling with positive or negative infinity or zero, nor the numeric type business in the XPath 3.0 functions spec.. Also not, as the test name attests, respecting XPath 1 rules for negative numbers, as Python rounds away from zero and I anticipate some tiresome drudgery for no particular benefit (again, HQuery is not intended as an execution target for existing XPath code). """ assert query_html_doc('', 'round(5.49)') == '5' assert query_html_doc('', 'round(5.5)') == '6' assert query_html_doc('', 'round(1 div 0)') == 'NaN' assert query_html_doc('', 'round(-5.5)') == '-6' assert query_html_doc('', 'round(-5.49)') == '-5'
def test_various_functions_use_context_node_when_no_argument_passed(): html_body = """ <p>first</p> <p>foo bar</p> <p>last</p>""" assert query_html_doc( html_body, '//p[string() = "first"]/text()') == expected_result('first') assert query_html_doc(html_body, '//p[normalize-space() = "foo bar"]/text()', preserve_space=True) == \ expected_result('foo bar') assert query_html_doc( html_body, '//p[string-length() = 4]/text()') == expected_result('last')
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec( ): html_body = """ <p>moe</p> <p>larry</p> <p>curly</p>""" assert query_html_doc( html_body, '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry') assert query_html_doc( html_body, '//p[matches(text(), ".URL.", "i")]/text()') == expected_result( 'curly')
def test_css_class_axis_finds_elements_based_on_their_css_classes(): html_body = """ <p class="foo">foo</p> <p class="foo bar">foo bar</p> <p class="bar">bar</p>""" expected = expected_result(""" <p class="foo bar"> foo bar </p> <p class="bar"> bar </p>""") assert query_html_doc(html_body, '//class::bar') == expected assert query_html_doc(html_body, '//.::bar') == expected
def test_attribute_axis_in_full_and_abbreviated_form_selects_named_attributes_or_all_attributes(): html_body = """ <div id="one"></div> <div id="two" class="three"></div>""" expected_ids_result = expected_result(''' id="one" id="two"''') expected_all_result = expected_result(''' id="one" class="three" id="two"''') assert query_html_doc(html_body, '//div/attribute::id') == expected_ids_result assert query_html_doc(html_body, '//div/@id') == expected_ids_result assert query_html_doc(html_body, '//attribute::*') == expected_all_result assert query_html_doc(html_body, '//@*') == expected_all_result
def test_ancestor_axis_produces_all_ancestors_and_only_ancestors(): html_body = """ <html> <body> <!-- comment --> <h1></h1> <div></div> </body> </html>""" assert query_html_doc(html_body, '//div/ancestor::*', wrap_body=False) == expected_result(""" <html> <body> <!-- comment --> <h1> </h1> <div> </div> </body> </html> <body> <!-- comment --> <h1> </h1> <div> </div> </body>""")
def test_hash_keys_can_be_used_to_define_attributes_in_a_constructed_hash(): actual = json.loads(query_html_doc('', 'hash {foo: "bar", moe: "larry"}')) assert 'foo' in actual assert actual['foo'] == 'bar' assert 'moe' in actual assert actual['moe'] == 'larry'
def test_element_constructor_accepts_attributes_from_original_document_including_multi_values_like_classes(): html_body = """ <p class="one two" three="four"> contents </p>""" assert query_html_doc(html_body, 'element test { //p/@* }') == expected_result(""" <test class="one two" three="four"> </test>""") assert query_html_doc(html_body, 'element test { //p/@three, //p }') == expected_result(""" <test three="four"> <p class="one two" three="four"> contents </p> </test>""")
def test_non_string_types_survive_conversion_to_json(): actual = json.loads(query_html_doc('', 'hash { integer: 1, float: 1.1, boolean: true() }')) assert all(name in actual for name in ('integer', 'float', 'boolean')) assert isinstance(actual['integer'], int) assert isinstance(actual['float'], float) assert isinstance(actual['boolean'], bool)
def test_selects_the_forty_second_figure_element_in_the_document(): html_body = ''.join('<figure>{0}</figure>'.format(n) for n in range(1, 43)) assert query_html_doc( html_body, '/descendant::figure[position()=42]') == expected_result(""" <figure> 42 </figure>""")
def test_hash_constructor_turns_tags_into_tag_name_keys_with_tag_content_values(): html_body = """ <p>foo</p> <div>bar</div>""" actual = json.loads(query_html_doc(html_body, 'hash { /html/body/* }')) assert actual['p'] == 'foo' assert actual['div'] == 'bar'
def test_abbreviated_context_node_works_in_predicate(): html_body = """ <div> <p>one</p> </div> <p>two</p> <div> three </div> <div> <p>four</p> </div> """ actual = query_html_doc(html_body, '/html/body/node()[./p]') assert actual == expected_result(""" <div> <p> one </p> </div> <div> <p> four </p> </div>""")
def test_position_function_in_second_predicate_applies_to_results_from_first_predicate( ): html_body = """ <table> <tr class="select-me"> <td>one</td> <td>two</td> </tr> <tr class="forget-me"> <td>uno</td> <td>dos</td> </tr> <tr class="select-me"> <td>ichi</td> <td>ni</td> </tr> </table>""" assert query_html_doc( html_body, '//td[../@class="select-me"][position()=1]') == expected_result(""" <td> one </td> <td> ichi </td>""")
def test_double_slash_works_within_path(): html_body = """ <section> <p>moe</p> <div> <div> <p>larry</p> </div> <p>curly</p> </div> </section> <p>joe besser</p> <section> <p>shemp</p> </section>""" assert query_html_doc(html_body, '//section//p') == expected_result(""" <p> moe </p> <p> larry </p> <p> curly </p> <p> shemp </p>""")
def test_position_function_in_predicate_applies_to_current_step_only(): html_body = """ <table> <tr class="select-me"> <td>one</td> <td>two</td> </tr> <tr class="forget-me"> <td>uno</td> <td>dos</td> </tr> <tr class="select-me"> <td>ichi</td> <td>ni</td> </tr> </table>""" assert query_html_doc( html_body, '//tr[@class="select-me"]/td[position()=2]') == expected_result(""" <td> two </td> <td> ni </td>""")
def test_join_filter_joins_string_values_from_node_set(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, '`${j:,://p}`') == expected_result('one,two,three')
def test_even_and_odd_functions_select_the_appropriate_elements_based_on_position(): html_body = """ <p>You</p> <p>I</p> <p>are</p> <p>am</p> <p>odd.</p> <p>even.</p>""" assert query_html_doc(html_body, '//p[even()]/text()') == expected_result(""" I am even.""") assert query_html_doc(html_body, '//p[odd()]/text()') == expected_result(""" You are odd.""")
def test_predicate_can_be_applied_to_variable_containing_node_set(): html_body = """ <p>not selected</p> <p id="foo">selected</p>""" assert query_html_doc(html_body, 'let $x := //p return $x[@id="foo"]') == expected_result(""" <p id="foo"> selected </p>""")
def test_matches_function_supports_a_subset_of_xpath_30_flag_values(): html_body = """ <p>first</p> <p>second one</p> <p> multiple lines of text </p>""" multiline_pattern = r'.+multiple.+text.+' assert query_html_doc(html_body, r'//p[matches(text(), "\w+RST", "i")]/text()') == expected_result('first') assert query_html_doc(html_body, r'//p[matches(text(), ".+lines.+text")]', preserve_space=True) == '' assert re.match( multiline_pattern, query_html_doc(html_body, r'//p[matches(text(), ".+lines.+text", "s")]', preserve_space=True), re.S ) assert query_html_doc(html_body, r'//p[matches(text(), "^ *lines$")]', preserve_space=True) == '' assert re.match( multiline_pattern, query_html_doc(html_body, r'//p[matches(text(), "^\s*lines$", "m")]', preserve_space=True), re.S ) assert query_html_doc(html_body, r'//p[matches(text(), "sec ond\sone")]/text()') == '' assert query_html_doc(html_body, r'//p[matches(text(), "sec ond\sone", "x")]/text()') == 'second one'
def test_union_decomposition_with_parentheses(): html_body = """ <h1>heading</h1> <p>content</p> <h1>another heading</h1>""" assert query_html_doc(html_body, '(//h1 | //p) => ("fizz" | "buzz")') == expected_result(""" fizz buzz fizz""")
def test_element_constructor_can_be_nested(): assert query_html_doc('', 'element moe {element larry {}, element curly {"Hey, Moe!"}}') == expected_result(""" <moe> <larry> </larry> <curly> Hey, Moe! </curly> </moe>""")
def test_absolute_location_path_should_find_multiple_grandchildren(): actual = query_html_doc('<div>one</div><p>not a div</p><div>two</div>', '/html/body/div') assert actual == expected_result(""" <div> one </div> <div> two </div>""")
def test_union_decomposition_naked(): html_body = """ <h1>heading</h1> <p>content</p> <h1>another heading</h1>""" assert query_html_doc(html_body, '(//h1 | //p) => `h1 $_` | `p $_`') == expected_result(""" h1 heading p content h1 another heading""")
def test_iteration_using_for(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, 'for $x in //p return $x/text()') == expected_result(""" one two three""")
def test_abbreviated_flowr_provides_expected_iteration_variable_in_value_clause(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, '//p -> $_/text()') == expected_result(""" one two three""")
def test_hash_constructor_filters_can_be_combined(): html_body = """ <p>20</p> <div>20</div> <h1>20.20</h1>""" actual = json.loads(query_html_doc(html_body, 'hash {a:p,h1:n:div,h1:} { /html/body/* }')) assert isinstance(actual['p'], list) assert isinstance(actual['h1'], list) assert actual['p'][0] == '20' assert actual['div'] == 20 assert actual['h1'][0] == 20.2 actual = json.loads(query_html_doc(html_body, 'hash {n:div,h1:a:p,h1:} { /html/body/* }')) assert isinstance(actual['p'], list) assert isinstance(actual['h1'], list) assert actual['p'][0] == '20' assert actual['div'] == 20 assert actual['h1'][0] == 20.2
def test_hash_constructor_array_filter_causes_matching_elements_to_be_put_in_an_array(): html_body = """ <h1>zero</h1> <p>one</p>""" actual = json.loads(query_html_doc(html_body, 'hash {a:h1:} { /html/body/* }')) assert actual['p'] == 'one' assert isinstance(actual['h1'], list) assert len(actual['h1']) == 1 assert actual['h1'][0] == 'zero'
def test_rooted_location_paths_work_with_both_kinds_of_slash(): html_body = """ <section> <div> <div>foo</div> </div> </section> <section> <div> <div>bar</div> </div> </section>""" assert query_html_doc(html_body, 'for $x in //section return $x/div') == expected_result(""" <div> <div> foo </div> </div> <div> <div> bar </div> </div>""") assert query_html_doc(html_body, 'for $x in //section return $x//div') == expected_result(""" <div> <div> foo </div> </div> <div> foo </div> <div> <div> bar </div> </div> <div> bar </div>""")
def test_hash_constructor_number_filter_causes_contents_of_matching_elements_to_be_interpreted_as_numbers(): html_body = """ <p>20</p> <div>20</div> <h1>20.20</h1>""" actual = json.loads(query_html_doc(html_body, 'hash {n:div,h1:} { /html/body/* }')) assert actual['p'] == '20' assert actual['div'] == 20 assert actual['h1'] == 20.2
def test_hash_constructor_coalesces_like_elements_into_an_array_by_default(): html_body = """ <p>one</p> <div>two</div> <p>three</p>""" actual = json.loads(query_html_doc(html_body, 'hash { /html/body/* }')) assert isinstance(actual['p'], list) assert len(actual['p']) == 2 assert actual['p'][1] == 'three' assert actual['div'] == 'two'