def test_merge_colliding_spans_close_epsilon(self): spans = [[0, 1], [1, 2], [22, 40]] # print(sorted_spans) res = merge_colliding_spans(spans, 20) self.assertEqual(1, len(res)) sp = res[0] self.assertEqual(0, sp[0]) self.assertEqual(40, sp[1])
def test_merge_colliding_spans_close(self): spans = [[0, 1], [1, 2]] # print(sorted_spans) res = merge_colliding_spans(spans) self.assertEqual(1, len(res)) sp = res[0] self.assertEqual(0, sp[0]) self.assertEqual(2, sp[1])
def find_attributes_in_sections(self, subdoc: LegalDocumentExt, structural_level: CharterStructuralLevel): # finding Subjects _subject_attentions_map = get_charter_subj_attentions( subdoc, self.get_subj_patterns_embeddings()) # dictionary subject_spans: Spans = collect_subjects_spans2( subdoc, _subject_attentions_map) # finding Values(amounts) values: [ContractValue ] = find_value_sign_currency_attention(subdoc, None, absolute_spans=False) self._rename_margin_values_tags(values) valued_sentence_spans: Spans = collect_sentences_having_constraint_values( subdoc, values, merge_spans=True) _united_spans: Spans = [] for _s in valued_sentence_spans: _united_spans.append(_s) for _s in subject_spans: _united_spans.append(_s) _united_spans = merge_colliding_spans(_united_spans, eps=-1) # TODO: check this self.attribute_spans_to_subjects( _united_spans, subdoc, structural_level # OrgStructuralLevel.BoardOfDirectors ) # offsetting tags to absolute values for value in values: value += subdoc.start for competence_tag in structural_level.competences: competence_tag += subdoc.start # nesting values (assigning parents) for competence in structural_level.competences: # contract subjects for value in values: v_group = value.parent if competence.contains(v_group.span): # v_group.set_parent_tag(competence_tag) competence.constraints.append(value.as_ContractPrice())
def collect_subjects_spans2(subdoc, subject_attentions_map, min_len=20) -> Spans: spans = [] for subj in subject_attentions_map.keys(): subject_attention = subject_attentions_map[subj] paragraph_span, confidence, _ = _find_most_relevant_paragraph( subdoc, subject_attention, min_len=min_len, return_delimiters=False) if confidence > HyperParameters.charter_subject_attention_confidence: if paragraph_span not in spans: spans.append(paragraph_span) unique_sentence_spans = merge_colliding_spans(spans, eps=-1) return unique_sentence_spans
def collect_sentences_having_constraint_values(subdoc: LegalDocumentExt, contract_values: [ ContractValue ], merge_spans=True) -> Spans: # collect sentences having constraint values unique_sentence_spans: Spans = [] for contract_value in contract_values: contract_value_sentence_span = subdoc.sentence_at_index( contract_value.parent.span[0], return_delimiters=False) if contract_value_sentence_span not in unique_sentence_spans: unique_sentence_spans.append(contract_value_sentence_span) contract_value_sentence_span = subdoc.sentence_at_index( contract_value.parent.span[1], return_delimiters=False) if contract_value_sentence_span not in unique_sentence_spans: unique_sentence_spans.append(contract_value_sentence_span) # -- # TODO: do not join here, join by subject if merge_spans: unique_sentence_spans = merge_colliding_spans(unique_sentence_spans, eps=1) return unique_sentence_spans