Python get_docx_xml 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: envinorma.io.docx

메소드/함수: get_docx_xml

hotexamples.com에서의 예제들: 11

Python get_docx_xml - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 envinorma.io.docx.get_docx_xml에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_docx_writing():
    filename = 'test_data/small_table.docx'
    xml = get_docx_xml(filename)
    output = ''.join([random.choice('abcdef')
                      for _ in range(10)]) + '.docx'  # noqa: S311
    write_new_document(filename, str(xml), output)
    xml_2 = get_docx_xml(output)
    assert xml == xml_2
    os.remove(output)

예제 #2

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_remove_tables_and_bodies():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup, references_tb, references_body = _remove_tables_and_bodies(soup)
    assert len(references_tb) == 0
    assert len(references_body) == 4
    for ref in references_body:
        assert ref in str(new_soup)

예제 #3

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_extract_tags():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup, references = _extract_tags(soup, _find_first_r_tag)
    assert len(references) == 1
    assert list(references.keys())[0] in str(new_soup)
    assert len(list(new_soup.stripped_strings)) != len(
        list(soup.stripped_strings))

예제 #4

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_extract_headers():
    assert extract_headers(BeautifulSoup('', 'html.parser')) == []
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    assert extract_headers(soup) == [
        'Article 6.2.3. Auto surveillance des niveaux sonores',
        'Chapitre 6.3 – Vibrations',
    ]

예제 #5

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_guess_body_font_size():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    assert _guess_body_font_size(soup) == 24

    xml = '<w></w>'
    soup = BeautifulSoup(xml, 'lxml-xml')
    with pytest.raises(DocxNoTextError):
        _guess_body_font_size(soup)

예제 #6

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_build_structured_text_from_docx_xml():
    xml = get_docx_xml('test_data/small_text.docx')
    res = build_structured_text_from_docx_xml(xml)
    assert res.title.text == ''
    assert len(res.sections) == 2
    assert len(res.sections[0].sections) == 0
    assert res.sections[
        0].title.text == 'Article 6.2.3. Auto surveillance des niveaux sonores'
    assert len(res.sections[1].sections) == 0
    assert res.sections[1].title.text == 'Chapitre 6.3 – Vibrations'

예제 #7

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_replace_small_tables():
    filename = 'test_data/small_table.docx'
    xml_str = get_docx_xml(filename)
    soup = BeautifulSoup(xml_str, 'lxml-xml')
    assert len(list(soup.find_all('w:tbl'))) == 1
    assert len(list(soup.find_all('w:p'))) == 5
    assert len(list(soup.find_all('w:tc'))) == 3
    soup = _replace_small_tables(soup)
    assert len(list(soup.find_all('w:tbl'))) == 0
    assert len(list(soup.find_all('w:p'))) == 5
    assert len(list(soup.find_all('w:tc'))) == 0

예제 #8

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_replace_tables_and_body_text_with_empty_p():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup = _replace_tables_and_body_text_with_empty_p(soup)
    assert list(new_soup.stripped_strings) == [
        'Article 6.2.3. Auto surveillance des niveaux sonores',
        'Chapitre 6.3 – Vibrations',
    ]

    xml = ''
    soup = BeautifulSoup(xml, 'lxml-xml')
    new_soup = _replace_tables_and_body_text_with_empty_p(soup, 10)
    assert list(new_soup.stripped_strings) == []

예제 #9

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_extract_elements():
    xml = get_docx_xml('test_data/small_text.docx')
    soup = BeautifulSoup(xml, 'lxml-xml')
    elements = _extract_elements(soup)
    assert len(elements) == 6
    for element in elements:
        assert not isinstance(element, Table)
    assert isinstance(elements[0], str)
    assert isinstance(elements[1], str)
    assert isinstance(elements[2], Title) and check_is_title(
        elements[2]).level == 3
    assert isinstance(elements[3], str)
    assert isinstance(elements[4], Title) and check_is_title(
        elements[4]).level == 2
    assert isinstance(elements[5], str)

예제 #10

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_copy_soup():
    filename = 'test_data/small_table.docx'
    xml_str = get_docx_xml(filename)
    soup = BeautifulSoup(xml_str, 'lxml-xml')
    soup_copy = _copy_soup(soup)
    assert id(soup) != id(soup_copy)

예제 #11

0

파일 보기

파일: test_docx.py 프로젝트: Envinorma/envinorma-data

def test_get_docx_xml():
    xml = get_docx_xml('test_data/simple_table.docx')
    assert len(xml) == 6580