Ejemplo n.º 1
0
    def make_contract_value_attention_vectors(self, subdoc):
        sumphrase_attention_vector = max_exclusive_pattern_by_prefix(
            subdoc.distances_per_pattern_dict, '_phrase')
        sumphrase_attention_vector = momentum(sumphrase_attention_vector, 0.99)

        value_attention_vector, _c1 = rectifyed_sum_by_pattern_prefix(
            subdoc.distances_per_pattern_dict, '_sum.work', relu_th=0.4)
        value_attention_vector = cut_above(value_attention_vector, 1)
        value_attention_vector = relu(value_attention_vector, 0.6)
        value_attention_vector = momentum(value_attention_vector, 0.8)

        novalue_attention_vector = max_exclusive_pattern_by_prefix(
            subdoc.distances_per_pattern_dict, 'sum_neg')

        novalue_attention_vector_local_contrast = relu(
            novalue_attention_vector, 0.6)
        novalue_attention_vector_local_contrast = momentum(
            novalue_attention_vector_local_contrast, 0.9)

        value_attention_vector_tuned = (value_attention_vector -
                                        novalue_attention_vector * 0.7)

        value_attention_vector_tuned = (value_attention_vector_tuned +
                                        sumphrase_attention_vector) / 2
        value_attention_vector_tuned = relu(value_attention_vector_tuned, 0.2)

        return {
            'sumphrase_attention_vector': sumphrase_attention_vector,
            'value_attention_vector': value_attention_vector,
            'novalue_attention_vector': novalue_attention_vector,
            'novalue_attention_vector_local_contrast':
            novalue_attention_vector_local_contrast,
            'value_attention_vector_tuned': value_attention_vector_tuned,
        }
Ejemplo n.º 2
0
def make_constraints_attention_vectors(subdoc):
    # TODO: move to notebook, too much tuning
    value_attention_vector, _c1 = rectifyed_sum_by_pattern_prefix(
        subdoc.distances_per_pattern_dict, 'sum_max', relu_th=0.4)
    value_attention_vector = cut_above(value_attention_vector, 1)
    value_attention_vector = relu(value_attention_vector, 0.6)
    value_attention_vector = momentum(value_attention_vector, 0.7)

    deal_attention_vector, _c2 = rectifyed_sum_by_pattern_prefix(
        subdoc.distances_per_pattern_dict, 'd_order', relu_th=0.5)
    deal_attention_vector = cut_above(deal_attention_vector, 1)
    deal_attention_vector = momentum(deal_attention_vector, 0.993)

    margin_attention_vector, _c3 = rectifyed_sum_by_pattern_prefix(
        subdoc.distances_per_pattern_dict, 'sum__', relu_th=0.5)
    margin_attention_vector = cut_above(margin_attention_vector, 1)
    margin_attention_vector = momentum(margin_attention_vector, 0.95)
    margin_attention_vector = relu(margin_attention_vector, 0.65)

    margin_value_attention_vector = relu(
        (margin_attention_vector + value_attention_vector) / 2, 0.6)

    deal_value_attention_vector = (deal_attention_vector +
                                   margin_value_attention_vector) / 2
    deal_value_attention_vector = relu(deal_value_attention_vector, 0.75)

    return {
        'value_attention_vector': value_attention_vector,
        'deal_attention_vector': deal_attention_vector,
        'margin_attention_vector': margin_attention_vector,
        'margin_value_attention_vector': margin_value_attention_vector,
        'deal_value_attention_vector': deal_value_attention_vector
    }
Ejemplo n.º 3
0
  def _find_charter_section_start(self, doc, headline_pattern_prefix, headlines_attention_vector, additional_attention):

    assert headlines_attention_vector is not None

    vectors = filter_values_by_key_prefix(doc.distances_per_pattern_dict, headline_pattern_prefix)
    # v = rectifyed_sum(vectors, 0.3)
    v = max_exclusive_pattern(vectors)
    v = relu(v, 0.6)

    if additional_attention is not None:
      additional_attention_s = smooth_safe(additional_attention, 6)
      v += additional_attention_s

    #   v, _ = improve_attention_vector(doc.embeddings, v, relu_th=0.1)
    v *= (headlines_attention_vector + 0.1)
    if max(v) > 0.75:
      v, _ = improve_attention_vector(doc.embeddings, v, relu_th=0.0)

    doc.distances_per_pattern_dict["ha$." + headline_pattern_prefix] = v

    # span = 100
    best_id = np.argmax(v)
    # dia = slice(max(0, best_id - span), min(best_id + span, len(v)))

    bounds = get_sentence_bounds_at_index(best_id, doc.tokens)
    confidence = v[best_id]
    return bounds, confidence, v
Ejemplo n.º 4
0
def improve_attention_vector(embeddings, vv, relu_th=0.5, mix=1):
  assert vv is not None
  meta_pattern, meta_pattern_confidence, best_id = make_smart_meta_click_pattern(vv, embeddings)
  meta_pattern_attention_v = make_pattern_attention_vector(meta_pattern, embeddings)
  meta_pattern_attention_v = relu(meta_pattern_attention_v, relu_th)

  meta_pattern_attention_v = meta_pattern_attention_v * mix + vv * (1.0 - mix)
  return meta_pattern_attention_v, best_id
Ejemplo n.º 5
0
 def normalize_headline_attention_vector(self, headline_attention_vector_pure):
   # XXX: test it
   #   _max_head_threshold = max(headline_attention_vector_pure) * 0.75
   _max_head_threshold = 1  # max(headline_attention_vector_pure) * 0.75
   # XXX: test it
   #   print(_max_head)
   headline_attention_vector = cut_above(headline_attention_vector_pure, _max_head_threshold)
   #   headline_attention_vector /= 2 # 5 is the maximum points a headline may gain during headlne detection : TODO:
   return relu(headline_attention_vector)
Ejemplo n.º 6
0
    def make_subj_attention_vectors(self, subdoc, subj_types_prefixes):
        r = {}
        for subj_types_prefix in subj_types_prefixes:
            attention_vector = max_exclusive_pattern_by_prefix(
                subdoc.distances_per_pattern_dict, subj_types_prefix)
            attention_vector_l = relu(attention_vector, 0.6)

            r[subj_types_prefix + 'attention_vector'] = attention_vector
            r[subj_types_prefix + 'attention_vector_l'] = attention_vector_l

        return r
Ejemplo n.º 7
0
    def make_subject_attention_vector_3(self,
                                        section,
                                        subject_kind: ContractSubject,
                                        addon=None) -> List[float]:
        from ml_tools import max_exclusive_pattern
        pattern_prefix, attention_vector_name, attention_vector_name_soft = self.__sub_attention_names(
            subject_kind)

        vectors = filter_values_by_key_prefix(
            section.distances_per_pattern_dict, pattern_prefix)
        x = max_exclusive_pattern(vectors)

        section.distances_per_pattern_dict[attention_vector_name_soft] = x
        section.distances_per_pattern_dict[attention_vector_name] = x

        #   x = x-np.mean(x)
        x = relu(x, 0.6)

        return x