Beispiel #1
0
def test_extract_paragraphs(WMLParser):
    WMLParser.return_value = iter([
        Paragraph(text='some text', volume=1),
        Paragraph(text='some more', volume=1)
    ])
    paragraphs = list(extract_paragraphs('TA99_01.xml'))
    assert paragraphs == [
        Paragraph(text='some text', volume=99),
        Paragraph(text='some more', volume=99)
    ]
Beispiel #2
0
def test_assembles_one_citation_with_keyword():
    paragraphs = [
        Paragraph(type=ParagraphType.KEYWORD, volume=5, text='A'),
        Paragraph(type=ParagraphType.CITATION, volume=5, text='1. Some citation')
    ]
    citations = list(assemble_citations(paragraphs))
    assert citations == [
        IntermediateCitation(
            volume=5,
            raw_text='1. Some citation',
            keywords=['A']
        )
    ]
Beispiel #3
0
def test_assembles_one_citation_with_amendment():
    paragraphs = [
        Paragraph(type=ParagraphType.CITATION, volume=5, text='1. Some citation'),
        Paragraph(type=ParagraphType.AMENDMENT, volume=5, text='Some amendment')
    ]
    citations = list(assemble_citations(paragraphs))
    assert citations == [
        IntermediateCitation(
            volume=5,
            keywords=[],
            raw_text='1. Some citation',
            amendments=['Some amendment']
        )
    ]
Beispiel #4
0
def test_detect_paragraph_types():
    sample_paragraphs_with_expected_types = [
        ('Something', None),
        ('ZEITSCHRIFTEN  UND', ParagraphType.JOURNAL_SECTION_BEGIN),
        ('Something', None),
        ('A. Allgemeines', ParagraphType.KEYWORD),
        ('1. First citation', ParagraphType.CITATION),
        ('• Some bullet point', ParagraphType.AMENDMENT),
        ('3. Second citation', ParagraphType.CITATION),
        ('Ac. bibliotheken', ParagraphType.KEYWORD),
        ('4. Third citation', ParagraphType.CITATION),
        ('Autoren, Herausgeber, Übersetzer, Rezensenten',
         ParagraphType.AUTHOR_INDEX_BEGIN),
        ('Something', None),
    ]
    paragraphs = [
        Paragraph(text=p, volume='130')
        for p, _ in sample_paragraphs_with_expected_types
    ]
    paragraphs = list(detect_paragraph_types(paragraphs, KEYWORD_MAPPING))
    detected_types = [p.type for p in paragraphs]
    assert detected_types == [
        paragraph_type
        for _, paragraph_type in sample_paragraphs_with_expected_types
    ]
 def __iter__(self):
     self._parse_xml()
     paragraph_nodes = self._xpath('//w:p')
     for paragraph_index, paragraph_node in enumerate(paragraph_nodes):
         yield Paragraph(
             originalIndex=paragraph_index,
             text=self._get_paragraph_text(paragraph_node),
         )
Beispiel #6
0
def test_assembles_three_citations():
    paragraphs = [
        Paragraph(type=ParagraphType.CITATION, volume=5, text='1. Citation 1'),
        Paragraph(type=ParagraphType.CITATION, volume=5, text='2. Citation 2'),
        Paragraph(type=ParagraphType.CITATION, volume=5, text='3. Citation 3')
    ]
    citations = list(assemble_citations(paragraphs))
    assert citations == [
        IntermediateCitation(
            volume=5,
            raw_text='1. Citation 1'
        ),
        IntermediateCitation(
            volume=5,
            raw_text='2. Citation 2',
        ),
        IntermediateCitation(
            volume=5,
            raw_text='3. Citation 3',
        )
    ]
Beispiel #7
0
def test_assembles_one_citation():
    paragraphs = [
        Paragraph(type=ParagraphType.CITATION, volume=5, text='1. Some citation')
    ]
    citations = list(assemble_citations(paragraphs))
    assert citations == [
        IntermediateCitation(
            volume=5,
            keywords=[],
            raw_text='1. Some citation'
        )
    ]
Beispiel #8
0
def test_assembles_two_citations_with_keywords():
    paragraphs = [
        Paragraph(type=ParagraphType.KEYWORD, volume=5, text='A'),
        Paragraph(type=ParagraphType.CITATION, volume=5, text='1. Citation 1'),
        Paragraph(type=ParagraphType.KEYWORD, volume=5, text='B'),
        Paragraph(type=ParagraphType.AMENDMENT, volume=5, text='Some amendment'),
        Paragraph(type=ParagraphType.CITATION, volume=5, text='2. Citation 2')
    ]
    citations = list(assemble_citations(paragraphs))
    assert citations == [
        IntermediateCitation(
            volume=5,
            raw_text='1. Citation 1',
            keywords=['A'],
            amendments=['Some amendment'],
        ),
        IntermediateCitation(
            volume=5,
            raw_text='2. Citation 2',
            keywords=['B'],
            amendments=[]
        )
    ]