Esempio n. 1
0
def compute_detection_performance(dataset: List[_LabelizedText], ontology: TopicOntology):
    all_labels = [label for _, label in dataset]
    texts = [
        (titles, StructuredText(EnrichedString(''), outer_alineas, [], None)) for (titles, outer_alineas), _ in dataset
    ]
    for topic, texts_ in _missing_topics(all_labels, texts, ontology).items():
        _pretty_print(topic, texts_)
    predicted_labels = [_extract_topics(text, titles, ontology) for titles, text in texts]
    print(Counter([exp == pred for exp, pred in zip(all_labels, predicted_labels) if exp]))
def test_add_topics():
    sub_section_1 = StructuredText(EnrichedString('Section 1.1'), [], [], None)
    section_1 = StructuredText(EnrichedString('Section 1'), [],
                               [sub_section_1], None)
    section_2 = StructuredText(EnrichedString('Section 2'), [], [], None)
    am = ArreteMinisteriel(EnrichedString('arrete du 10/10/10'),
                           [section_1, section_2], [],
                           None,
                           id='FAKE_ID')

    am_with_topics = add_topics(
        am, {
            (0, ): TopicName.INCENDIE,
            (0, 0): TopicName.INCENDIE,
            (1, ): TopicName.BRUIT_VIBRATIONS
        })
    sections = am_with_topics.sections
    assert sections[0].annotations.topic == TopicName.INCENDIE  # type: ignore
    assert sections[0].sections[
        0].annotations.topic == TopicName.INCENDIE  # type: ignore
    assert sections[
        1].annotations.topic == TopicName.BRUIT_VIBRATIONS  # type: ignore
def _get_simple_text() -> StructuredText:
    row1 = Row([_cell('AA'), _cell('BB'), _cell('CC')], True)
    row2 = Row([_cell('DD', rs=2), _cell('EE', cs=2)], False)
    row3 = Row([_cell('FF'), _cell('GG')], False)
    table = Table([row1, row2, row3])
    alineas = [
        _text('Alinea 1'),
        _text('Alinea 2'),
        EnrichedString('', table=table)
    ]
    sections = [StructuredText(_text('Title 2'), [], [], None, None, None)]
    text = StructuredText(_text('Title'), alineas, sections, None, None, None)
    return text
Esempio n. 4
0
def test_extract_text_lines():
    assert _get_simple_text().text_lines() == [
        'AM',
        'alinea',
        'foo',
        '# Section 1',
        '## Section 1.1',
        '# Section 2',
        'bar',
    ]
    assert StructuredText(EnrichedString(' A'), [], [], None).text_lines() == ['A']
    assert StructuredText(EnrichedString(' A'), [EnrichedString('')], [], None).text_lines() == ['A', '']
    assert _TEXT_A.text_lines() == [
        '6. Schématisation des différents types de joints mentionnés :',
        'Vous pouvez consulter les schémas dans le',
        'JO',
        'n° 265 du 16/11/2010 texte numéro 21',
    ]

    assert _TEXT_B.text_lines() == [
        '6. Schématisation des différents types de joints mentionnés :',
        'Vous pouvez consulter les schémas dans le',
        'JO n° 265 du 16/11/2010 texte numéro 21',
    ]
Esempio n. 5
0
def _build_am() -> ArreteMinisteriel:
    subsection1 = StructuredText(EnrichedString('A'), [], [], None)
    subsection2 = StructuredText(EnrichedString('A'), [], [], None)
    sections = [
        StructuredText(EnrichedString('Art. 1'), [], [subsection1], None),
        StructuredText(EnrichedString('Art. 2'), [], [subsection2], None),
        StructuredText(EnrichedString('Conditions d\'application'), [], [],
                       None),
        StructuredText(EnrichedString('Art. 3'), [], [], None),
    ]
    return ArreteMinisteriel(EnrichedString('arrete du 10/10/10'),
                             sections, [],
                             None,
                             id='FAKE_ID')
from envinorma.models.condition import AndCondition, Littler, Range, extract_leaf_conditions
from envinorma.models.parameter import Parameter, ParameterType
from envinorma.models.structured_text import StructuredText
from envinorma.models.text_elements import EnrichedString
from envinorma.parametrization.combinations import _generate_options_dicts, generate_exhaustive_combinations
from envinorma.parametrization.models.parametrization import (
    AlternativeSection,
    AMWarning,
    InapplicableSection,
    Parametrization,
    extract_conditions_from_parametrization,
)

_DATE = Parameter(id='date-d-installation', type=ParameterType.DATE)
_NEW_TEXT = StructuredText(
    title=EnrichedString(text='Article 2.1'),
    outer_alineas=[EnrichedString(text='Contenu nouveau')],
    sections=[],
    applicability=None,
    reference=None,
    annotations=None,
    id='d16d0fE7C7fc',
)
_PARAMETRIZATION = Parametrization(
    inapplicable_sections=[
        InapplicableSection(
            section_id='abcdef',
            alineas=None,
            condition=AndCondition(
                conditions=frozenset([Littler(parameter=_DATE, target=date(2021, 1, 1), strict=True)])
            ),
Esempio n. 7
0
def _extract_cell_data(cell: Tag) -> EnrichedString:
    return EnrichedString(
        _ensure_strs_and_join(
            merge_between_linebreaks(
                _extract_text_elements_with_linebreaks(cell))))
Esempio n. 8
0
def _enriched_string_links() -> EnrichedString:
    return EnrichedString('abc', [Link('abc', 0, 4)], None)
Esempio n. 9
0
def _enriched_string_table() -> EnrichedString:
    return EnrichedString('', [], _table())
Esempio n. 10
0
def _table() -> Table:
    return Table([Row([Cell(EnrichedString('bonjour'), 1, 1)], True)])
Esempio n. 11
0
def _str(text: Optional[str] = None) -> EnrichedString:
    return EnrichedString(text or _random_string())
Esempio n. 12
0
    assert _is_probably_cid('LEGITEXT34234')
    assert _is_probably_cid('FAKE_CID')
    assert _is_probably_cid('FAKETEXT0000324')
    assert not _is_probably_cid('')
    assert not _is_probably_cid('JORFTEX')


def _get_simple_text() -> StructuredText:
    sub_section_1 = StructuredText(_str('Section 1.1'), [], [], None)
    section_1 = StructuredText(_str('Section 1'), [], [sub_section_1], None)
    section_2 = StructuredText(_str('Section 2'), [_str('bar')], [], None)
    return StructuredText(_str('AM '), [_str('alinea'), _str('foo')], [section_1, section_2], None)


_TEXT_A = StructuredText(
    title=EnrichedString(text='6. Schématisation des différents types de joints mentionnés :'),
    outer_alineas=[
        EnrichedString(text='Vous pouvez consulter les schémas dans le'),
        EnrichedString(text='JO\nn° 265 du 16/11/2010 texte numéro 21'),
    ],
    sections=[],
    applicability=None,
    reference=Reference('ref', 'name'),
    annotations=None,
    id='0bEB0b14A96f',
)
_TEXT_B = StructuredText(
    title=EnrichedString(text='6. Schématisation des différents types de joints mentionnés :'),
    outer_alineas=[
        EnrichedString(text='Vous pouvez consulter les schémas dans le'),
        EnrichedString(text='JO n° 265 du 16/11/2010 texte numéro 21'),
Esempio n. 13
0
def _build_labelized_text(raw_text: Tuple[int, List[str], List[Dict]],
                          labels: Set[TopicName]) -> _LabelizedText:
    text = raw_text[1], [
        EnrichedString.from_dict(dict_) for dict_ in raw_text[2]
    ]
    return text, labels
def _str(text: Optional[str] = None) -> EnrichedString:
    return EnrichedString(text) if text else _random_enriched_string()
def _random_enriched_string() -> EnrichedString:
    return EnrichedString(_random_string(), [], None)
Esempio n. 16
0
def test_count_cells():
    assert _count_cells(Table([])) == 0
    assert _count_cells(Table([Row([], True)])) == 0
    cells = [Cell(EnrichedString(''), 1, 1)]
    assert _count_cells(Table([Row(cells, True)])) == 1
    assert _count_cells(Table([Row(cells, True)] * 3)) == 3
Esempio n. 17
0
def _random_cell() -> Cell:
    return Cell(EnrichedString(''), 1, 1)
def _text(txt: str) -> EnrichedString:
    return EnrichedString(txt)