Beispiel #1
0
def test_docx_writing():
    filename = 'test_data/small_table.docx'
    xml = get_docx_xml(filename)
    output = ''.join([random.choice('abcdef')
                      for _ in range(10)]) + '.docx'  # noqa: S311
    write_new_document(filename, str(xml), output)
    xml_2 = get_docx_xml(output)
    assert xml == xml_2
    os.remove(output)
Beispiel #2
0
def test_remove_tables_and_bodies():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup, references_tb, references_body = _remove_tables_and_bodies(soup)
    assert len(references_tb) == 0
    assert len(references_body) == 4
    for ref in references_body:
        assert ref in str(new_soup)
Beispiel #3
0
def test_extract_tags():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup, references = _extract_tags(soup, _find_first_r_tag)
    assert len(references) == 1
    assert list(references.keys())[0] in str(new_soup)
    assert len(list(new_soup.stripped_strings)) != len(
        list(soup.stripped_strings))
Beispiel #4
0
def test_extract_headers():
    assert extract_headers(BeautifulSoup('', 'html.parser')) == []
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    assert extract_headers(soup) == [
        'Article 6.2.3. Auto surveillance des niveaux sonores',
        'Chapitre 6.3 – Vibrations',
    ]
Beispiel #5
0
def test_guess_body_font_size():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    assert _guess_body_font_size(soup) == 24

    xml = '<w></w>'
    soup = BeautifulSoup(xml, 'lxml-xml')
    with pytest.raises(DocxNoTextError):
        _guess_body_font_size(soup)
Beispiel #6
0
def test_build_structured_text_from_docx_xml():
    xml = get_docx_xml('test_data/small_text.docx')
    res = build_structured_text_from_docx_xml(xml)
    assert res.title.text == ''
    assert len(res.sections) == 2
    assert len(res.sections[0].sections) == 0
    assert res.sections[
        0].title.text == 'Article 6.2.3. Auto surveillance des niveaux sonores'
    assert len(res.sections[1].sections) == 0
    assert res.sections[1].title.text == 'Chapitre 6.3 – Vibrations'
Beispiel #7
0
def test_replace_small_tables():
    filename = 'test_data/small_table.docx'
    xml_str = get_docx_xml(filename)
    soup = BeautifulSoup(xml_str, 'lxml-xml')
    assert len(list(soup.find_all('w:tbl'))) == 1
    assert len(list(soup.find_all('w:p'))) == 5
    assert len(list(soup.find_all('w:tc'))) == 3
    soup = _replace_small_tables(soup)
    assert len(list(soup.find_all('w:tbl'))) == 0
    assert len(list(soup.find_all('w:p'))) == 5
    assert len(list(soup.find_all('w:tc'))) == 0
Beispiel #8
0
def test_replace_tables_and_body_text_with_empty_p():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup = _replace_tables_and_body_text_with_empty_p(soup)
    assert list(new_soup.stripped_strings) == [
        'Article 6.2.3. Auto surveillance des niveaux sonores',
        'Chapitre 6.3 – Vibrations',
    ]

    xml = ''
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup = _replace_tables_and_body_text_with_empty_p(soup, 10)
    assert list(new_soup.stripped_strings) == []
Beispiel #9
0
def test_extract_elements():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    elements = _extract_elements(soup)
    assert len(elements) == 6
    for element in elements:
        assert not isinstance(element, Table)
    assert isinstance(elements[0], str)
    assert isinstance(elements[1], str)
    assert isinstance(elements[2], Title) and check_is_title(
        elements[2]).level == 3
    assert isinstance(elements[3], str)
    assert isinstance(elements[4], Title) and check_is_title(
        elements[4]).level == 2
    assert isinstance(elements[5], str)
Beispiel #10
0
def test_copy_soup():
    filename = 'test_data/small_table.docx'
    xml_str = get_docx_xml(filename)
    soup = BeautifulSoup(xml_str, 'lxml-xml')
    soup_copy = _copy_soup(soup)
    assert id(soup) != id(soup_copy)
Beispiel #11
0
def test_get_docx_xml():
    xml = get_docx_xml('test_data/simple_table.docx')
    assert len(xml) == 6580