def make_contract_value_attention_vectors(self, subdoc): sumphrase_attention_vector = max_exclusive_pattern_by_prefix( subdoc.distances_per_pattern_dict, '_phrase') sumphrase_attention_vector = momentum(sumphrase_attention_vector, 0.99) value_attention_vector, _c1 = rectifyed_sum_by_pattern_prefix( subdoc.distances_per_pattern_dict, '_sum.work', relu_th=0.4) value_attention_vector = cut_above(value_attention_vector, 1) value_attention_vector = relu(value_attention_vector, 0.6) value_attention_vector = momentum(value_attention_vector, 0.8) novalue_attention_vector = max_exclusive_pattern_by_prefix( subdoc.distances_per_pattern_dict, 'sum_neg') novalue_attention_vector_local_contrast = relu( novalue_attention_vector, 0.6) novalue_attention_vector_local_contrast = momentum( novalue_attention_vector_local_contrast, 0.9) value_attention_vector_tuned = (value_attention_vector - novalue_attention_vector * 0.7) value_attention_vector_tuned = (value_attention_vector_tuned + sumphrase_attention_vector) / 2 value_attention_vector_tuned = relu(value_attention_vector_tuned, 0.2) return { 'sumphrase_attention_vector': sumphrase_attention_vector, 'value_attention_vector': value_attention_vector, 'novalue_attention_vector': novalue_attention_vector, 'novalue_attention_vector_local_contrast': novalue_attention_vector_local_contrast, 'value_attention_vector_tuned': value_attention_vector_tuned, }
def make_constraints_attention_vectors(subdoc): # TODO: move to notebook, too much tuning value_attention_vector, _c1 = rectifyed_sum_by_pattern_prefix( subdoc.distances_per_pattern_dict, 'sum_max', relu_th=0.4) value_attention_vector = cut_above(value_attention_vector, 1) value_attention_vector = relu(value_attention_vector, 0.6) value_attention_vector = momentum(value_attention_vector, 0.7) deal_attention_vector, _c2 = rectifyed_sum_by_pattern_prefix( subdoc.distances_per_pattern_dict, 'd_order', relu_th=0.5) deal_attention_vector = cut_above(deal_attention_vector, 1) deal_attention_vector = momentum(deal_attention_vector, 0.993) margin_attention_vector, _c3 = rectifyed_sum_by_pattern_prefix( subdoc.distances_per_pattern_dict, 'sum__', relu_th=0.5) margin_attention_vector = cut_above(margin_attention_vector, 1) margin_attention_vector = momentum(margin_attention_vector, 0.95) margin_attention_vector = relu(margin_attention_vector, 0.65) margin_value_attention_vector = relu( (margin_attention_vector + value_attention_vector) / 2, 0.6) deal_value_attention_vector = (deal_attention_vector + margin_value_attention_vector) / 2 deal_value_attention_vector = relu(deal_value_attention_vector, 0.75) return { 'value_attention_vector': value_attention_vector, 'deal_attention_vector': deal_attention_vector, 'margin_attention_vector': margin_attention_vector, 'margin_value_attention_vector': margin_value_attention_vector, 'deal_value_attention_vector': deal_value_attention_vector }
def _find_charter_section_start(self, doc, headline_pattern_prefix, headlines_attention_vector, additional_attention): assert headlines_attention_vector is not None vectors = filter_values_by_key_prefix(doc.distances_per_pattern_dict, headline_pattern_prefix) # v = rectifyed_sum(vectors, 0.3) v = max_exclusive_pattern(vectors) v = relu(v, 0.6) if additional_attention is not None: additional_attention_s = smooth_safe(additional_attention, 6) v += additional_attention_s # v, _ = improve_attention_vector(doc.embeddings, v, relu_th=0.1) v *= (headlines_attention_vector + 0.1) if max(v) > 0.75: v, _ = improve_attention_vector(doc.embeddings, v, relu_th=0.0) doc.distances_per_pattern_dict["ha$." + headline_pattern_prefix] = v # span = 100 best_id = np.argmax(v) # dia = slice(max(0, best_id - span), min(best_id + span, len(v))) bounds = get_sentence_bounds_at_index(best_id, doc.tokens) confidence = v[best_id] return bounds, confidence, v
def improve_attention_vector(embeddings, vv, relu_th=0.5, mix=1): assert vv is not None meta_pattern, meta_pattern_confidence, best_id = make_smart_meta_click_pattern(vv, embeddings) meta_pattern_attention_v = make_pattern_attention_vector(meta_pattern, embeddings) meta_pattern_attention_v = relu(meta_pattern_attention_v, relu_th) meta_pattern_attention_v = meta_pattern_attention_v * mix + vv * (1.0 - mix) return meta_pattern_attention_v, best_id
def normalize_headline_attention_vector(self, headline_attention_vector_pure): # XXX: test it # _max_head_threshold = max(headline_attention_vector_pure) * 0.75 _max_head_threshold = 1 # max(headline_attention_vector_pure) * 0.75 # XXX: test it # print(_max_head) headline_attention_vector = cut_above(headline_attention_vector_pure, _max_head_threshold) # headline_attention_vector /= 2 # 5 is the maximum points a headline may gain during headlne detection : TODO: return relu(headline_attention_vector)
def make_subj_attention_vectors(self, subdoc, subj_types_prefixes): r = {} for subj_types_prefix in subj_types_prefixes: attention_vector = max_exclusive_pattern_by_prefix( subdoc.distances_per_pattern_dict, subj_types_prefix) attention_vector_l = relu(attention_vector, 0.6) r[subj_types_prefix + 'attention_vector'] = attention_vector r[subj_types_prefix + 'attention_vector_l'] = attention_vector_l return r
def make_subject_attention_vector_3(self, section, subject_kind: ContractSubject, addon=None) -> List[float]: from ml_tools import max_exclusive_pattern pattern_prefix, attention_vector_name, attention_vector_name_soft = self.__sub_attention_names( subject_kind) vectors = filter_values_by_key_prefix( section.distances_per_pattern_dict, pattern_prefix) x = max_exclusive_pattern(vectors) section.distances_per_pattern_dict[attention_vector_name_soft] = x section.distances_per_pattern_dict[attention_vector_name] = x # x = x-np.mean(x) x = relu(x, 0.6) return x