def query_html_doc(html_body, hquery, preserve_space=False, wrap_body=True): soup = soup_with_body(html_body) if wrap_body else make_soup(html_body) raw_result = HqueryProcessor(hquery, preserve_space=preserve_space).query(soup) return eliminate_blank_lines( convert_results_to_output_text(raw_result, preserve_space=preserve_space).strip())
def test_selects_the_para_element_descendants_of_the_context_node(): html = """ <para> <context> <para>selected</para> <not-para>not selected</not-para> <para> <para>also selected</para> </para> </context> </para>""" soup = make_soup(html) assert query_context_node(soup.para.context, './/para') == expected_result(""" <para> selected </para> <para> <para> also selected </para> </para> <para> also selected </para>""")
def test_selects_the_lang_attribute_of_the_parent_of_the_context_node(): html = """ <root lang="English"> <context></context> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '../@lang') == expected_result('lang="English"')
def test_selects_all_the_item_elements_in_the_same_document_as_the_context_node_that_have_an_olist_parent( ): html = """ <root> <olist>no items</olist> <item>not selected</item> <context></context> <olist> <item>first</item> </olist> <item> <olist> <item>second</item> <olist> </item> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '//olist/item') == expected_result(""" <item> first </item> <item> second </item>""")
def test_selects_the_div_ancestors_of_the_context_node_and_if_the_context_node_is_a_div_element_the_context_node_as_well( ): html = """ <div> <div/> <notdiv/> </div>""" soup = make_soup(html) assert query_context_node(soup.div.div, 'ancestor-or-self::div') == expected_result(""" <div> <div> </div> <notdiv> </notdiv> </div> <div> </div>""") assert query_context_node(soup.div.notdiv, 'ancestor-or-self::div') == expected_result(""" <div> <div> </div> <notdiv> </notdiv> </div>""")
def test_selects_all_div_ancestors_of_the_context_node(): html = """ <div> <notdiv/> </div>""" assert query_context_node(make_soup(html).div.notdiv, 'ancestor::div') == expected_result(""" <div> <notdiv> </notdiv> </div>""")
def test_name_test_at_root_ignores_all_but_root_element(): html = """ <!DOCTYPE html> <!-- html --> <html id="root"> </html>""" raw_result = HqueryProcessor('/html').query(make_soup(html)) actual = convert_results_to_output_text(raw_result) assert actual == expected_result(""" <html id="root"> </html>""")
def test_selects_all_the_para_elements_in_the_same_document_as_the_context_node(): html = """ <root> <notpara/> <para>selected</para> </root>""" soup = make_soup(html) assert query_context_node(soup.root.notpara, '/descendant::para') == expected_result(""" <para> selected </para>""")
def test_selects_the_parent_of_the_context_node(): html = """ <root> <context></context> </root>""" soup = make_soup(html) assert query_context_node(html, '..') == expected_result(""" <root> <context> </context> </root>""")
def test_selects_the_previous_chapter_sibling_of_the_context_node(): html = """ <root> <chapter>not selected</chapter> <chapter>selected</chapter> <context/> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, 'preceding-sibling::chapter[position()=1]') == expected_result(""" <chapter> selected </chapter>""")
def test_path_to_root_tag_succeeds_despite_other_root_level_objects(): html = """ <!DOCTYPE html> <!-- outside --> <html> <!-- inside --> </html>""" raw_result = HqueryProcessor('/*').query(make_soup(html)) actual = convert_results_to_output_text(raw_result) assert actual == expected_result(""" <html> <!-- inside --> </html>""")
def test_selects_all_the_para_elements_in_the_same_document_as_the_context_node( ): html = """ <root> <notpara/> <para>selected</para> </root>""" soup = make_soup(html) assert query_context_node(soup.root.notpara, '/descendant::para') == expected_result(""" <para> selected </para>""")
def test_selects_the_previous_chapter_sibling_of_the_context_node(): html = """ <root> <chapter>not selected</chapter> <chapter>selected</chapter> <context/> </root>""" soup = make_soup(html) assert query_context_node( soup.root.context, 'preceding-sibling::chapter[position()=1]') == expected_result(""" <chapter> selected </chapter>""")
def test_selects_all_the_item_elements_that_have_an_olist_parent_and_that_are_in_the_same_document_as_the_context_node(): html = """ <root> <notolist/> <olist> <notitem>not selected</notitem> <item>selected</item> <olist> </root>""" soup = make_soup(html) assert query_context_node(soup.root.notolist, '/descendant::olist/child::item') == expected_result(""" <item> selected </item>""")
def test_selects_all_the_item_elements_that_have_an_olist_parent_and_that_are_in_the_same_document_as_the_context_node( ): html = """ <root> <notolist/> <olist> <notitem>not selected</notitem> <item>selected</item> <olist> </root>""" soup = make_soup(html) assert query_context_node( soup.root.notolist, '/descendant::olist/child::item') == expected_result(""" <item> selected </item>""")
def test_selects_all_the_para_descendants_of_the_document_root_and_thus_selects_all_para_elements_in_the_same_document_as_the_context_node(): html = """ <root> <para> <para>selected</para> </para> <context></context> <para>also selected</para> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '//para') == expected_result(""" <para> <para> selected </para> </para> <para> selected </para> <para> also selected </para>""")
def test_selects_all_the_item_elements_in_the_same_document_as_the_context_node_that_have_an_olist_parent(): html = """ <root> <olist>no items</olist> <item>not selected</item> <context></context> <olist> <item>first</item> </olist> <item> <olist> <item>second</item> <olist> </item> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '//olist/item') == expected_result(""" <item> first </item> <item> second </item>""")
def test_selects_the_div_ancestors_of_the_context_node_and_if_the_context_node_is_a_div_element_the_context_node_as_well(): html = """ <div> <div/> <notdiv/> </div>""" soup = make_soup(html) assert query_context_node(soup.div.div, 'ancestor-or-self::div') == expected_result(""" <div> <div> </div> <notdiv> </notdiv> </div> <div> </div>""") assert query_context_node(soup.div.notdiv, 'ancestor-or-self::div') == expected_result(""" <div> <div> </div> <notdiv> </notdiv> </div>""")
def test_selects_all_the_para_descendants_of_the_document_root_and_thus_selects_all_para_elements_in_the_same_document_as_the_context_node( ): html = """ <root> <para> <para>selected</para> </para> <context></context> <para>also selected</para> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '//para') == expected_result(""" <para> <para> selected </para> </para> <para> selected </para> <para> also selected </para>""")
def soup_with_body(contents): return make_soup(wrap_html_body(contents))
def query_html_doc(html_body, hquery, preserve_space=False, wrap_body=True): soup = soup_with_body(html_body) if wrap_body else make_soup(html_body) raw_result = HqueryProcessor(hquery, preserve_space=preserve_space).query(soup) return eliminate_blank_lines(convert_results_to_output_text(raw_result, preserve_space=preserve_space).strip())
def query_context_node(node_or_source, hquery): if not is_any_node(node_or_source): node_or_source = root_tag_from_soup(make_soup(node_or_source)) raw_result = HqueryProcessor(hquery).query(node_or_source) return eliminate_blank_lines(convert_results_to_output_text(raw_result).strip())
def query_context_node(node_or_source, hquery): if not is_any_node(node_or_source): node_or_source = root_tag_from_soup(make_soup(node_or_source)) raw_result = HqueryProcessor(hquery).query(node_or_source) return eliminate_blank_lines( convert_results_to_output_text(raw_result).strip())