Ejemplo n.º 1
0
def find_most_relevant_paragraphs(section: TextMap,
                                  attention_vector: FixedVector,
                                  min_len: int = 20,
                                  return_delimiters=True,
                                  threshold=0.45):
    _blur = int(HyperParameters.subject_paragraph_attention_blur)
    _padding = int(_blur * 2 + 1)

    paragraph_attention_vector = smooth_safe(
        np.pad(attention_vector, _padding, mode='constant'),
        _blur)[_padding:-_padding]

    paragraph_attention_vector = relu(paragraph_attention_vector, threshold)

    top_indices = [
        i for i, v in enumerate(paragraph_attention_vector) if v > 0.00001
    ]
    spans = []
    for i in top_indices:
        span = section.sentence_at_index(i, return_delimiters)
        if min_len is not None and span[1] - span[0] < min_len:
            if not span in spans:
                spans.append(span)

    return spans, paragraph_attention_vector
Ejemplo n.º 2
0
  def test_sentence_at_index_return_delimiters(self):

    tm = TextMap('стороны Заключили\n  договор  ПРЕДМЕТ \nДОГОВОРА')
    for i in range(len(tm)):
      print(i, tm[i])

    bounds = tm.sentence_at_index(0)
    print(bounds)
    print(tm.text_range(bounds))
    for i in range(0, 3):
      bounds = tm.sentence_at_index(i)
      self.assertEqual('стороны Заключили\n', tm.text_range(bounds), str(i))

    for i in range(3, 5):
      bounds = tm.sentence_at_index(i)
      self.assertEqual('договор  ПРЕДМЕТ \n', tm.text_range(bounds))

    for i in range(6, 7):
      bounds = tm.sentence_at_index(i)
      self.assertEqual('ДОГОВОРА', tm.text_range(bounds))