Esempio n. 1
0
def query_html_doc(html_body, hquery, preserve_space=False, wrap_body=True):
    soup = soup_with_body(html_body) if wrap_body else make_soup(html_body)
    raw_result = HqueryProcessor(hquery,
                                 preserve_space=preserve_space).query(soup)
    return eliminate_blank_lines(
        convert_results_to_output_text(raw_result,
                                       preserve_space=preserve_space).strip())
Esempio n. 2
0
def test_selects_the_para_element_descendants_of_the_context_node():
    html = """
    <para>
        <context>
            <para>selected</para>
            <not-para>not selected</not-para>
            <para>
                <para>also selected</para>
            </para>
        </context>
    </para>"""
    soup = make_soup(html)
    assert query_context_node(soup.para.context,
                              './/para') == expected_result("""
    <para>
     selected
    </para>
    <para>
     <para>
      also selected
     </para>
    </para>
    <para>
     also selected
    </para>""")
def test_selects_the_lang_attribute_of_the_parent_of_the_context_node():
    html = """
    <root lang="English">
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context, '../@lang') == expected_result('lang="English"')
Esempio n. 4
0
def test_selects_all_the_item_elements_in_the_same_document_as_the_context_node_that_have_an_olist_parent(
):
    html = """
    <root>
        <olist>no items</olist>
        <item>not selected</item>
        <context></context>
        <olist>
            <item>first</item>
        </olist>
        <item>
            <olist>
                <item>second</item>
            <olist>
        </item>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context,
                              '//olist/item') == expected_result("""
    <item>
     first
    </item>
    <item>
     second
    </item>""")
Esempio n. 5
0
def test_selects_the_div_ancestors_of_the_context_node_and_if_the_context_node_is_a_div_element_the_context_node_as_well(
):
    html = """
    <div>
        <div/>
        <notdiv/>
    </div>"""
    soup = make_soup(html)
    assert query_context_node(soup.div.div,
                              'ancestor-or-self::div') == expected_result("""
    <div>
     <div>
     </div>
     <notdiv>
     </notdiv>
    </div>
    <div>
    </div>""")
    assert query_context_node(soup.div.notdiv,
                              'ancestor-or-self::div') == expected_result("""
    <div>
     <div>
     </div>
     <notdiv>
     </notdiv>
    </div>""")
Esempio n. 6
0
def test_selects_the_lang_attribute_of_the_parent_of_the_context_node():
    html = """
    <root lang="English">
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context,
                              '../@lang') == expected_result('lang="English"')
def test_selects_all_div_ancestors_of_the_context_node():
    html = """
    <div>
        <notdiv/>
    </div>"""
    assert query_context_node(make_soup(html).div.notdiv, 'ancestor::div') == expected_result("""
    <div>
     <notdiv>
     </notdiv>
    </div>""")
Esempio n. 8
0
def test_name_test_at_root_ignores_all_but_root_element():
    html = """
    <!DOCTYPE html>
    <!-- html -->
    <html id="root">
    </html>"""
    raw_result = HqueryProcessor('/html').query(make_soup(html))
    actual = convert_results_to_output_text(raw_result)
    assert actual == expected_result("""
    <html id="root">
    </html>""")
def test_selects_all_the_para_elements_in_the_same_document_as_the_context_node():
    html = """
    <root>
        <notpara/>
        <para>selected</para>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.notpara, '/descendant::para') == expected_result("""
    <para>
     selected
    </para>""")
Esempio n. 10
0
def test_selects_the_parent_of_the_context_node():
    html = """
    <root>
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(html, '..') == expected_result("""
    <root>
     <context>
     </context>
    </root>""")
Esempio n. 11
0
def test_name_test_at_root_ignores_all_but_root_element():
    html = """
    <!DOCTYPE html>
    <!-- html -->
    <html id="root">
    </html>"""
    raw_result = HqueryProcessor('/html').query(make_soup(html))
    actual = convert_results_to_output_text(raw_result)
    assert actual == expected_result("""
    <html id="root">
    </html>""")
def test_selects_the_parent_of_the_context_node():
    html = """
    <root>
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(html, '..') == expected_result("""
    <root>
     <context>
     </context>
    </root>""")
Esempio n. 13
0
def test_selects_all_div_ancestors_of_the_context_node():
    html = """
    <div>
        <notdiv/>
    </div>"""
    assert query_context_node(make_soup(html).div.notdiv,
                              'ancestor::div') == expected_result("""
    <div>
     <notdiv>
     </notdiv>
    </div>""")
def test_selects_the_previous_chapter_sibling_of_the_context_node():
    html = """
    <root>
        <chapter>not selected</chapter>
        <chapter>selected</chapter>
        <context/>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context, 'preceding-sibling::chapter[position()=1]') == expected_result("""
    <chapter>
     selected
    </chapter>""")
Esempio n. 15
0
def test_path_to_root_tag_succeeds_despite_other_root_level_objects():
    html = """
    <!DOCTYPE html>
    <!-- outside -->
    <html>
        <!-- inside -->
    </html>"""
    raw_result = HqueryProcessor('/*').query(make_soup(html))
    actual = convert_results_to_output_text(raw_result)
    assert actual == expected_result("""
    <html>
     <!-- inside -->
    </html>""")
Esempio n. 16
0
def test_selects_all_the_para_elements_in_the_same_document_as_the_context_node(
):
    html = """
    <root>
        <notpara/>
        <para>selected</para>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.notpara,
                              '/descendant::para') == expected_result("""
    <para>
     selected
    </para>""")
Esempio n. 17
0
def test_selects_the_previous_chapter_sibling_of_the_context_node():
    html = """
    <root>
        <chapter>not selected</chapter>
        <chapter>selected</chapter>
        <context/>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(
        soup.root.context,
        'preceding-sibling::chapter[position()=1]') == expected_result("""
    <chapter>
     selected
    </chapter>""")
def test_selects_all_the_item_elements_that_have_an_olist_parent_and_that_are_in_the_same_document_as_the_context_node():
    html = """
    <root>
        <notolist/>
        <olist>
            <notitem>not selected</notitem>
            <item>selected</item>
        <olist>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.notolist, '/descendant::olist/child::item') == expected_result("""
    <item>
     selected
    </item>""")
Esempio n. 19
0
def test_selects_all_the_item_elements_that_have_an_olist_parent_and_that_are_in_the_same_document_as_the_context_node(
):
    html = """
    <root>
        <notolist/>
        <olist>
            <notitem>not selected</notitem>
            <item>selected</item>
        <olist>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(
        soup.root.notolist,
        '/descendant::olist/child::item') == expected_result("""
    <item>
     selected
    </item>""")
def test_selects_all_the_para_descendants_of_the_document_root_and_thus_selects_all_para_elements_in_the_same_document_as_the_context_node():
    html = """
    <root>
        <para>
            <para>selected</para>
        </para>
        <context></context>
        <para>also selected</para>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context, '//para') == expected_result("""
    <para>
     <para>
      selected
     </para>
    </para>
    <para>
     selected
    </para>
    <para>
     also selected
    </para>""")
def test_selects_all_the_item_elements_in_the_same_document_as_the_context_node_that_have_an_olist_parent():
    html = """
    <root>
        <olist>no items</olist>
        <item>not selected</item>
        <context></context>
        <olist>
            <item>first</item>
        </olist>
        <item>
            <olist>
                <item>second</item>
            <olist>
        </item>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context, '//olist/item') == expected_result("""
    <item>
     first
    </item>
    <item>
     second
    </item>""")
def test_selects_the_div_ancestors_of_the_context_node_and_if_the_context_node_is_a_div_element_the_context_node_as_well():
    html = """
    <div>
        <div/>
        <notdiv/>
    </div>"""
    soup = make_soup(html)
    assert query_context_node(soup.div.div, 'ancestor-or-self::div') == expected_result("""
    <div>
     <div>
     </div>
     <notdiv>
     </notdiv>
    </div>
    <div>
    </div>""")
    assert query_context_node(soup.div.notdiv, 'ancestor-or-self::div') == expected_result("""
    <div>
     <div>
     </div>
     <notdiv>
     </notdiv>
    </div>""")
Esempio n. 23
0
def test_selects_all_the_para_descendants_of_the_document_root_and_thus_selects_all_para_elements_in_the_same_document_as_the_context_node(
):
    html = """
    <root>
        <para>
            <para>selected</para>
        </para>
        <context></context>
        <para>also selected</para>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context,
                              '//para') == expected_result("""
    <para>
     <para>
      selected
     </para>
    </para>
    <para>
     selected
    </para>
    <para>
     also selected
    </para>""")
def test_selects_the_para_element_descendants_of_the_context_node():
    html = """
    <para>
        <context>
            <para>selected</para>
            <not-para>not selected</not-para>
            <para>
                <para>also selected</para>
            </para>
        </context>
    </para>"""
    soup = make_soup(html)
    assert query_context_node(soup.para.context, './/para') == expected_result("""
    <para>
     selected
    </para>
    <para>
     <para>
      also selected
     </para>
    </para>
    <para>
     also selected
    </para>""")
Esempio n. 25
0
def soup_with_body(contents):
    return make_soup(wrap_html_body(contents))
Esempio n. 26
0
def query_html_doc(html_body, hquery, preserve_space=False, wrap_body=True):
    soup = soup_with_body(html_body) if wrap_body else make_soup(html_body)
    raw_result = HqueryProcessor(hquery, preserve_space=preserve_space).query(soup)
    return eliminate_blank_lines(convert_results_to_output_text(raw_result, preserve_space=preserve_space).strip())
Esempio n. 27
0
def soup_with_body(contents):
    return make_soup(wrap_html_body(contents))
Esempio n. 28
0
def query_context_node(node_or_source, hquery):
    if not is_any_node(node_or_source):
        node_or_source = root_tag_from_soup(make_soup(node_or_source))
    raw_result = HqueryProcessor(hquery).query(node_or_source)
    return eliminate_blank_lines(convert_results_to_output_text(raw_result).strip())
Esempio n. 29
0
def query_context_node(node_or_source, hquery):
    if not is_any_node(node_or_source):
        node_or_source = root_tag_from_soup(make_soup(node_or_source))
    raw_result = HqueryProcessor(hquery).query(node_or_source)
    return eliminate_blank_lines(
        convert_results_to_output_text(raw_result).strip())