def get_two_sentences(sentence1, sentence2, calculator): # genero la forma parentetica s1 tree_sentence1 = parse_tree(sentence1) tree_sentence1 = re.sub("\(", " (", tree_sentence1) tree_sentence1 = tree_sentence1[1:] # genero la forma parentetica s2 tree_sentence2 = parse_tree(sentence2) tree_sentence2 = re.sub("\(", " (", tree_sentence2) tree_sentence2 = tree_sentence2[1:] # prendo i token di BERT bert_sentence = get_token_BERT(f'{sentence1}[SEP]{sentence2}') # calcolo il DTK s1 alberoCompleto1 = tree.Tree(string=tree_sentence1) dtk_sentence1 = calculator.dt(alberoCompleto1).reshape(1, 4000) dtk_sentence1 = torch.from_numpy(dtk_sentence1).float().cuda() # calcolo il DTK s2 alberoCompleto2 = tree.Tree(string=tree_sentence2) dtk_sentence2 = calculator.dt(alberoCompleto2).reshape(1, 4000) dtk_sentence2 = torch.from_numpy(dtk_sentence2).float().cuda() dtk_sentence = torch.cat((dtk_sentence1, dtk_sentence2), 1) tree_sentence = f'(S {tree_sentence1} {tree_sentence2} )' return tree_sentence, dtk_sentence, bert_sentence
def assign_contribution_nodes(activation_SubTree): to_write = [] for example in tqdm([activation_SubTree]): elem_dict = dict() activeTrees = v.activationsCalculator(example["tree"], example["act_sub_trees"]) if (len(activeTrees.activation_level) > 1): stringout = v.treesWithActivationsToString(activeTrees) for j, i in enumerate(stringout): t1 = re.sub("\[", "(", i) t1 = re.sub("\]", ")", t1) t1 = re.sub("'", "", t1) t1 = re.sub(":", "::", t1) tt1 = tree.Tree(string=t1) for k in tt1.leaves(): k.root = k.root.split(":")[0] + ":" + k.root.split(":")[2] # k.root = k.root.split(":")[0] + ":0" elem_dict['ACT_' + str(j)] = str(tt1) else: stringout = v.treesWithActivationsToString(activeTrees) t1 = re.sub("\[", "(", stringout[0]) t1 = re.sub("\]", ")", t1) t1 = re.sub("'", "", t1) t1 = re.sub(":", "::", t1) tt1 = tree.Tree(string=t1) for k in tt1.leaves(): k.root = k.root.split(":")[0] + ":" + k.root.split(":")[2] elem_dict['ACT_0'] = str(tt1) to_write.append(elem_dict) return to_write
def get_sentence(sentence, calculator): # genero la forma parentetica tree_sentence = parse_tree(sentence) tree_sentence = re.sub("\(", " (", tree_sentence) tree_sentence = tree_sentence[1:] # prendo i token di BERT bert_sentence = get_token_BERT(sentence) # calcolo il DTK alberoCompleto = tree.Tree(string=tree_sentence) dtk_sentence = calculator.dt(alberoCompleto).reshape(1, 4000) dtk_sentence = torch.from_numpy(dtk_sentence).float().cuda() return tree_sentence, dtk_sentence, bert_sentence