コード例 #1
0
    def test_merge_colliding_spans_close_epsilon(self):
        spans = [[0, 1], [1, 2], [22, 40]]

        # print(sorted_spans)

        res = merge_colliding_spans(spans, 20)

        self.assertEqual(1, len(res))

        sp = res[0]
        self.assertEqual(0, sp[0])
        self.assertEqual(40, sp[1])
コード例 #2
0
    def test_merge_colliding_spans_close(self):
        spans = [[0, 1], [1, 2]]

        # print(sorted_spans)

        res = merge_colliding_spans(spans)

        self.assertEqual(1, len(res))

        sp = res[0]
        self.assertEqual(0, sp[0])
        self.assertEqual(2, sp[1])
コード例 #3
0
    def find_attributes_in_sections(self, subdoc: LegalDocumentExt,
                                    structural_level: CharterStructuralLevel):

        # finding Subjects
        _subject_attentions_map = get_charter_subj_attentions(
            subdoc, self.get_subj_patterns_embeddings())  # dictionary
        subject_spans: Spans = collect_subjects_spans2(
            subdoc, _subject_attentions_map)

        # finding Values(amounts)
        values: [ContractValue
                 ] = find_value_sign_currency_attention(subdoc,
                                                        None,
                                                        absolute_spans=False)
        self._rename_margin_values_tags(values)
        valued_sentence_spans: Spans = collect_sentences_having_constraint_values(
            subdoc, values, merge_spans=True)

        _united_spans: Spans = []
        for _s in valued_sentence_spans:
            _united_spans.append(_s)
        for _s in subject_spans:
            _united_spans.append(_s)

        _united_spans = merge_colliding_spans(_united_spans,
                                              eps=-1)  # TODO: check this

        self.attribute_spans_to_subjects(
            _united_spans,
            subdoc,
            structural_level  # OrgStructuralLevel.BoardOfDirectors
        )

        # offsetting tags to absolute values
        for value in values:
            value += subdoc.start
        for competence_tag in structural_level.competences:
            competence_tag += subdoc.start

        # nesting values (assigning parents)
        for competence in structural_level.competences:  # contract subjects

            for value in values:
                v_group = value.parent
                if competence.contains(v_group.span):
                    # v_group.set_parent_tag(competence_tag)
                    competence.constraints.append(value.as_ContractPrice())
コード例 #4
0
def collect_subjects_spans2(subdoc,
                            subject_attentions_map,
                            min_len=20) -> Spans:
    spans = []
    for subj in subject_attentions_map.keys():

        subject_attention = subject_attentions_map[subj]
        paragraph_span, confidence, _ = _find_most_relevant_paragraph(
            subdoc,
            subject_attention,
            min_len=min_len,
            return_delimiters=False)
        if confidence > HyperParameters.charter_subject_attention_confidence:
            if paragraph_span not in spans:
                spans.append(paragraph_span)

    unique_sentence_spans = merge_colliding_spans(spans, eps=-1)

    return unique_sentence_spans
コード例 #5
0
def collect_sentences_having_constraint_values(subdoc: LegalDocumentExt,
                                               contract_values: [
                                                   ContractValue
                                               ],
                                               merge_spans=True) -> Spans:
    # collect sentences having constraint values
    unique_sentence_spans: Spans = []
    for contract_value in contract_values:
        contract_value_sentence_span = subdoc.sentence_at_index(
            contract_value.parent.span[0], return_delimiters=False)
        if contract_value_sentence_span not in unique_sentence_spans:
            unique_sentence_spans.append(contract_value_sentence_span)
        contract_value_sentence_span = subdoc.sentence_at_index(
            contract_value.parent.span[1], return_delimiters=False)
        if contract_value_sentence_span not in unique_sentence_spans:
            unique_sentence_spans.append(contract_value_sentence_span)
    # --
    # TODO: do not join here, join by subject
    if merge_spans:
        unique_sentence_spans = merge_colliding_spans(unique_sentence_spans,
                                                      eps=1)
    return unique_sentence_spans