def get_embeddings(mdl, fm, ofn, embedding_layer=2, fn=None, vob=None): ''' extract embeddings :param mdl : the model :fm : feature manager :param fn: file containing words that need to be represented as embeddings/ Word,Role separated by a space :return: the new file containing the embeddings of the verbs ''' if fn is not None: vobs = set() f = open(fn, "r") for l in f.readlines(): tmps = l.split(" ") for tmp in tmps: if tmp != "": tmpps = tmp.split(",") if len(tmpps) == 2: vobs.add((tmpps[0], tmpps[1])) vobs = list(vobs) else: if vob is not None: vobs = vob vv = [] for v in vobs: # vobs must be a pair of word and label if v[0] + "_" + v[1] in fm.input_key_map: vv.append(v[0] + "_" + v[1]) vobs = vv print(vobs) X = [[v] for v in vobs] Y = [["EOS"] for i in range(len(X))] X = [[fm.input_key_map[x] + 1 for x in XX] for XX in X] Y = [[fm.input_key_map[x] + 1 for x in XX] for XX in Y] x, x_mask, y, y_mask = preprare_seq_seq_data(X, Y) x, y, mask_x, mask_y, _, _, _, _ = mdl.standardize_data( x, y, x_mask, y_mask, None, None, None, None) rs = mdl.get_output_layer(embedding_layer, x, mask_x) f = open(ofn, "w") for i in range(len(vobs)): w = vobs[i] em = rs[0][i] f.write(w + " ") for e in em: f.write(str(e)) f.write(" ") f.write("\n") f.close()
def get_verb_embeddings(mdl, fm, ofn, embedding_layer=3, fn=None, vob=None): ''' extract the verb embeddings :param mdl : the model :fm : feature manager :param fn: file containing the verbs, verbs are separated by a space :ofn : output file :return: the new file containing the embeddings of the verbs ''' if fn is not None: vobs = set() f = open(fn, "r") for l in f.readlines(): tmps = l.split(" ") for tmp in tmps: if tmp != "": vobs.add(tmp) vobs = list(vobs) else: if vob is not None: vobs = vob vv = [] for v in vobs: if v + "_" + "PRED" in fm.input_key_map: vv.append(v) vobs = vv X = [[v + "_" + "PRED"] for v in vobs] Y = [["EOS"] for i in range(len(X))] X = [[fm.input_key_map[x] + 1 for x in XX] for XX in X] Y = [[fm.input_key_map[x] + 1 for x in XX] for XX in Y] x, x_mask, y, y_mask = preprare_seq_seq_data(X, Y) x, y, mask_x, mask_y, _, _, _, _ = mdl.standardize_data( x, y, x_mask, y_mask, None, None, None, None) rs = mdl.get_output_layer(embedding_layer, x, mask_x) f = open(ofn, "w") for i in range(len(vobs)): w = vobs[i] em = rs[0][i] f.write(w + " ") for e in em: f.write(str(e)) f.write(" ") f.write("\n") f.close()
def get_scores_all(mdl, fm, X, X_new, num_select=10): #f = open(output, "w") X1 = [] # x = X[i], we add new values to the end of x print(X_new) for j in range(len(X_new)): for k in range(len(X_new[j])): xx = [xxx for xxx in X[j]] xx.append(X_new[j][k][0] + "_" + X_new[j][k][1]) X1.append(xx) print(X1) X = [[fm.input_key_map[x] for x in XX] for XX in X1] x, x_mask = preprare_seq_seq_data(X) x, _, mask_x, _, _, _, _, _ = mdl.standardize_data(x, None, x_mask, None, None, None, None, None) score_pos = mdl.get_output_layer(-1, x, mask_x) score_pos = score_pos.swapaxes(0, 1) score_pos = score_pos[:, -1] x = T.matrix("score") sort_f = th.function([x], T.argsort(x)) sorted_values = sort_f(score_pos) sorted_values = sorted_values rs = [] rs_scores = [] my_scores = [] for i in range(sorted_values.shape[0]): #f.write(to_string(X1[i]) + " ") ss = [] for j in range(1, sorted_values.shape[1] + 1): val = sorted_values[i][sorted_values.shape[1] - j] #val_map = fm.fY.map_inversed[val-1] score = score_pos[i][val] #f.write(str(val) + ":" + str(score) + " ") ss.append((val, score)) #f.write("\n") my_scores.append((to_string(X1[i]), ss)) vals = [] c = 0 for t in range(sorted_values.shape[1] - 1, -1, -1): if c == num_select: break v = sorted_values[i][t] if fm.get_key(v) != "EOS": vals.append(v) c += 1 #vals = sorted_values[i][sorted_values.shape[1]-num_select:sorted_values.shape[1]] val_maps = [fm.get_key(v).split("_") for v in list(vals)] #if fm.fY.map_inversed[v-1]!="EOS" ] scores = [score_pos[i][v] for v in list(vals)] # if fm.fY.map_inversed[v-1]!="EOS"] rs.append(val_maps) rs_scores.append(scores) print(rs) print(rs_scores) print(X1) print(my_scores) return rs, rs_scores, X1, my_scores
def get_scores_all(mdl, fm1, fm2,fm3, X1, X2, X_new1, X_new2, num_select = 10): X11 = [] X21 = [] # x = X[i], we add new values to the end of x for j in range(len(X_new1)): for k in range(len(X_new1[j])): xx =[ xxx for xxx in X1[j]] xx.append(X_new1[j][k] ) X11.append(xx) for j in range(len(X_new2)): for k in range(len(X_new2[j])): xx =[ xxx for xxx in X2[j]] xx.append(X_new2[j][k] ) X21.append(xx) X1 = [[fm1.input_key_map[x] for x in XX] for XX in X11 ] X2 = [[fm2.input_key_map[x] for x in XX] for XX in X21 ] x1,x_mask1= preprare_seq_seq_data(X1) x1, _, mask_x1,_, _, _,_, _ = mdl.standardize_data(x1, None, x_mask1, None, None,None, None,None) x2,x_mask2= preprare_seq_seq_data(X2) x2, _, mask_x2,_, _, _,_, _ = mdl.standardize_data(x2, None, x_mask2, None, None,None, None,None) score_pos = mdl.get_output_layer(-1, x1, x2, mask_x1) score_pos=score_pos.swapaxes(0,1) score_pos = score_pos[:,-1] x = T.matrix("score") sort_f = th.function([x], T.argsort(x)) sorted_values = sort_f(score_pos) rs1 = [] rs2 = [] rs_scores = [] my_scores = [] for i in range(sorted_values.shape[0]): #f.write(to_string(X1[i]) + " ") ss=[] for j in range(1,sorted_values.shape[1]): val = sorted_values[i][sorted_values.shape[1]-j] #val_map = fm.fY.map_inversed[val-1] score = score_pos[i][val] #f.write(str(val) + ":" + str(score) + " ") ss.append((val,score)) #f.write("\n") my_scores.append(("_", ss)) vals = [] c = 0 for t in range(sorted_values.shape[1]-1, -1, -1): if c == num_select: break v = sorted_values[i][t] if fm3.get_key(v)!="EOS_EOS" : tm = fm3.get_key(v).split("_") if tm[0] in fm1.input_key_map and tm[1] in fm2.input_key_map: vals.append(v) c+=1 #vals = sorted_values[i][sorted_values.shape[1]-num_select:sorted_values.shape[1]] vals1=[] vals2 = [] #val_maps = [fm1.fY.map_inversed[v-1].split("_") for v in list(vals) ]#if fm.fY.map_inversed[v-1]!="EOS" ] scores = [score_pos[i][v] for v in list(vals)]# if fm.fY.map_inversed[v-1]!="EOS"] for v in list(vals): tm = fm3.get_key(v).split("_") vals1.append(tm[0]) vals2.append(tm[1]) rs1.append(vals1) rs2.append(vals2) rs_scores.append(scores) return (rs1,rs2), rs_scores, X11, X21, my_scores
def get_embeddings(mdl, fm1, fm2, ofn, embedding_layer=2, fn = None, vob = None): ''' extract the verb embeddings :param mdl : the model :fm : feature manager :param fn: file containing the verbs, verbs are separated by a space :return: the new file containing the embeddings of the verbs ''' if fn is not None: vobs = set() f = open(fn, "r") for l in f.readlines(): tmps = l.split(" ") for tmp in tmps: if tmp != "": tmps1=tmp.split(",") if len(tmps1) ==2: vobs.add((tmps1[0], tmps1[1])) vobs = list(vobs) else: if vob is not None: vobs = vob vv = [] for v in vobs: if v[0] in fm1.f.map.keys() and v[1] in fm2.f.map.keys(): vv.append(v) vobs = vv X1 = [ ["EOS", v[0]] for v in vobs] X2 = [ ["EOS", v[1]] for v in vobs] Y1 = [["EOS","EOS"] for i in range(len(X1))] Y2 = [["EOS","EOS"] for i in range(len(X1))] X1 = [[fm1.f.map[fm1.f.getFeatureValue(x)] +1 for x in XX] for XX in X1 ] Y1 = [[fm1.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y1] X2 = [[fm2.f.map[fm2.f.getFeatureValue(x)] +1 for x in XX] for XX in X2 ] Y2 = [[fm2.fY.map[fm2.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y2] x1, mask_x, y1, mask_y =preprare_seq_seq_data (X1, Y1) x2, mask_x, y2, mask_y = preprare_seq_seq_data(X2, Y2) x1, y1, mask_x,mask_y, _, _,_, _ = mdl.standardize_data(x1, y1, mask_x, mask_y, None,None, None,None) x2, y2, _,_, _, _,_, _ = mdl.standardize_data(x2, y2, None, None, None,None, None,None) print (x1) print (x2) print (mask_x) rs = mdl.get_output_layer(embedding_layer, x1, x2, mask_x) print (rs.shape) f = open (ofn, "w") for i in range(len(vobs)): w = vobs[i] em = rs[0][i] f.write(w + " ") for e in em: f.write(str(e)) f.write(" ") f.write("\n") f.close()
def get_verb_embeddings(mdl, map_x1, map_x2, map_y1, ofn, embedding_layer=2, fn = None, vob = None): ''' extract the verb embeddings :param mdl : the model :fm : feature manager :param fn: file containing the verbs, verbs are separated by a space :return: the new file containing the embeddings of the verbs ''' print (map_x1.input_key_map) if fn is not None: vobs = set() f = open(fn, "r") for l in f.readlines(): tmps = l.split(" ") for tmp in tmps: if tmp != "": vobs.add(tmp) vobs = list(vobs) else: if vob is not None: vobs = vob print (vobs) vv = [] print (vobs) for v in vobs: if v in map_x1.input_key_map: vv.append(v) vobs = vv print (vobs) X1 = [ [ v ] for v in vobs] X2 = [ ["PRED"] for v in vobs] Y1 = [["EOS_EOS"] for i in range(len(X1))] X1 = [[map_x1.input_key_map[x] for x in XX] for XX in X1 ] X2 = [[map_x2.input_key_map[x] for x in XX] for XX in X2 ] Y1 = [[map_y1.input_key_map[x] for x in XX] for XX in Y1] x1, mask_x, y, mask_y = preprare_seq_seq_data(X1, Y1) x2, mask_x = preprare_seq_seq_data(X2, None) x1, y, mask_x, mask_y, _, _, _, _ = mdl.standardize_data(x1, y, mask_x, mask_y, None, None, None, None) x2, _, _, _, _, _, _, _ = mdl.standardize_data(x2, None, None, None, None, None, None, None) rs = mdl.get_output_layer(embedding_layer, x1, x2, mask_x) print (rs.shape) f = open (ofn, "w") for i in range(len(vobs)): w = vobs[i] em = rs[0][i] f.write(w + " ") for e in em: f.write(str(e)) f.write(" ") f.write("\n") f.close()