コード例 #1
0
def get_embeddings(mdl, fm, ofn, embedding_layer=2, fn=None, vob=None):
    '''
    extract embeddings
    :param mdl : the model
    :fm : feature manager
    :param fn: file containing words that need to be represented as embeddings/ Word,Role separated by a space
    :return: the new file containing  the embeddings of the verbs
    '''

    if fn is not None:
        vobs = set()

        f = open(fn, "r")
        for l in f.readlines():
            tmps = l.split(" ")
            for tmp in tmps:
                if tmp != "":
                    tmpps = tmp.split(",")
                    if len(tmpps) == 2:

                        vobs.add((tmpps[0], tmpps[1]))

        vobs = list(vobs)
    else:
        if vob is not None:
            vobs = vob

    vv = []

    for v in vobs:  # vobs must be a pair of word and label
        if v[0] + "_" + v[1] in fm.input_key_map:
            vv.append(v[0] + "_" + v[1])

    vobs = vv
    print(vobs)

    X = [[v] for v in vobs]

    Y = [["EOS"] for i in range(len(X))]

    X = [[fm.input_key_map[x] + 1 for x in XX] for XX in X]
    Y = [[fm.input_key_map[x] + 1 for x in XX] for XX in Y]

    x, x_mask, y, y_mask = preprare_seq_seq_data(X, Y)
    x, y, mask_x, mask_y, _, _, _, _ = mdl.standardize_data(
        x, y, x_mask, y_mask, None, None, None, None)
    rs = mdl.get_output_layer(embedding_layer, x, mask_x)
    f = open(ofn, "w")
    for i in range(len(vobs)):
        w = vobs[i]
        em = rs[0][i]
        f.write(w + " ")
        for e in em:
            f.write(str(e))
            f.write(" ")
        f.write("\n")

    f.close()
コード例 #2
0
def get_verb_embeddings(mdl, fm, ofn, embedding_layer=3, fn=None, vob=None):
    '''
    extract the verb embeddings
    :param mdl : the model
    :fm : feature manager
    :param fn: file containing the verbs, verbs are separated by a space
    :ofn : output file
    :return: the new file containing  the embeddings of the verbs
    '''

    if fn is not None:
        vobs = set()
        f = open(fn, "r")
        for l in f.readlines():
            tmps = l.split(" ")
            for tmp in tmps:
                if tmp != "":
                    vobs.add(tmp)

        vobs = list(vobs)
    else:
        if vob is not None:
            vobs = vob

    vv = []

    for v in vobs:
        if v + "_" + "PRED" in fm.input_key_map:
            vv.append(v)

    vobs = vv

    X = [[v + "_" + "PRED"] for v in vobs]

    Y = [["EOS"] for i in range(len(X))]

    X = [[fm.input_key_map[x] + 1 for x in XX] for XX in X]
    Y = [[fm.input_key_map[x] + 1 for x in XX] for XX in Y]

    x, x_mask, y, y_mask = preprare_seq_seq_data(X, Y)
    x, y, mask_x, mask_y, _, _, _, _ = mdl.standardize_data(
        x, y, x_mask, y_mask, None, None, None, None)
    rs = mdl.get_output_layer(embedding_layer, x, mask_x)
    f = open(ofn, "w")
    for i in range(len(vobs)):
        w = vobs[i]
        em = rs[0][i]
        f.write(w + " ")
        for e in em:
            f.write(str(e))
            f.write(" ")
        f.write("\n")
    f.close()
コード例 #3
0
def get_scores_all(mdl, fm, X, X_new, num_select=10):
    #f = open(output, "w")
    X1 = []

    # x = X[i], we add new values to the end of x
    print(X_new)

    for j in range(len(X_new)):

        for k in range(len(X_new[j])):
            xx = [xxx for xxx in X[j]]

            xx.append(X_new[j][k][0] + "_" + X_new[j][k][1])
            X1.append(xx)

    print(X1)

    X = [[fm.input_key_map[x] for x in XX] for XX in X1]

    x, x_mask = preprare_seq_seq_data(X)

    x, _, mask_x, _, _, _, _, _ = mdl.standardize_data(x, None, x_mask, None,
                                                       None, None, None, None)

    score_pos = mdl.get_output_layer(-1, x, mask_x)
    score_pos = score_pos.swapaxes(0, 1)
    score_pos = score_pos[:, -1]
    x = T.matrix("score")

    sort_f = th.function([x], T.argsort(x))

    sorted_values = sort_f(score_pos)
    sorted_values = sorted_values
    rs = []
    rs_scores = []
    my_scores = []
    for i in range(sorted_values.shape[0]):
        #f.write(to_string(X1[i]) + " ")
        ss = []
        for j in range(1, sorted_values.shape[1] + 1):
            val = sorted_values[i][sorted_values.shape[1] - j]

            #val_map = fm.fY.map_inversed[val-1]
            score = score_pos[i][val]
            #f.write(str(val) + ":" + str(score) + " ")
            ss.append((val, score))
        #f.write("\n")
        my_scores.append((to_string(X1[i]), ss))

        vals = []
        c = 0
        for t in range(sorted_values.shape[1] - 1, -1, -1):
            if c == num_select:
                break
            v = sorted_values[i][t]

            if fm.get_key(v) != "EOS":
                vals.append(v)
                c += 1
        #vals = sorted_values[i][sorted_values.shape[1]-num_select:sorted_values.shape[1]]

        val_maps = [fm.get_key(v).split("_")
                    for v in list(vals)]  #if  fm.fY.map_inversed[v-1]!="EOS" ]
        scores = [score_pos[i][v]
                  for v in list(vals)]  # if fm.fY.map_inversed[v-1]!="EOS"]
        rs.append(val_maps)
        rs_scores.append(scores)

        print(rs)
        print(rs_scores)
        print(X1)
        print(my_scores)
    return rs, rs_scores, X1, my_scores
コード例 #4
0
def get_scores_all(mdl, fm1, fm2,fm3, X1, X2, X_new1, X_new2,   num_select = 10):
    X11 = []
    X21 = []

        # x = X[i], we add new values to the end of x

    for j in range(len(X_new1)):

            for k in range(len(X_new1[j])):
                xx =[ xxx for xxx in X1[j]]
                xx.append(X_new1[j][k] )
                X11.append(xx)


    for j in range(len(X_new2)):

            for k in range(len(X_new2[j])):
                xx =[ xxx for xxx in X2[j]]
                xx.append(X_new2[j][k] )
                X21.append(xx)



    X1 = [[fm1.input_key_map[x]  for x in XX] for XX in X11 ]

    X2 = [[fm2.input_key_map[x]  for x in XX] for XX in X21 ]



    x1,x_mask1= preprare_seq_seq_data(X1)

    x1, _,  mask_x1,_, _, _,_, _ = mdl.standardize_data(x1, None, x_mask1, None, None,None, None,None)

    x2,x_mask2= preprare_seq_seq_data(X2)

    x2, _,  mask_x2,_, _, _,_, _ = mdl.standardize_data(x2, None, x_mask2, None, None,None, None,None)


    score_pos = mdl.get_output_layer(-1, x1, x2, mask_x1)



    score_pos=score_pos.swapaxes(0,1)
    score_pos = score_pos[:,-1]


    x = T.matrix("score")


    sort_f = th.function([x], T.argsort(x))

    sorted_values = sort_f(score_pos)
    rs1 = []
    rs2 = []
    rs_scores = []
    my_scores = []
    for i in range(sorted_values.shape[0]):
        #f.write(to_string(X1[i]) + " ")
        ss=[]
        for j in range(1,sorted_values.shape[1]):
            val = sorted_values[i][sorted_values.shape[1]-j]

            #val_map = fm.fY.map_inversed[val-1]
            score = score_pos[i][val]
            #f.write(str(val) + ":" + str(score) + " ")
            ss.append((val,score))
        #f.write("\n")
        my_scores.append(("_", ss))



        vals = []
        c = 0
        for t in range(sorted_values.shape[1]-1, -1, -1):
            if c == num_select:
                break
            v = sorted_values[i][t]

            if fm3.get_key(v)!="EOS_EOS" :
                tm = fm3.get_key(v).split("_")
                if tm[0] in fm1.input_key_map and tm[1] in fm2.input_key_map:
                    vals.append(v)
                    c+=1
        #vals = sorted_values[i][sorted_values.shape[1]-num_select:sorted_values.shape[1]]

        vals1=[]
        vals2 = []

        #val_maps = [fm1.fY.map_inversed[v-1].split("_") for v in list(vals) ]#if  fm.fY.map_inversed[v-1]!="EOS" ]
        scores = [score_pos[i][v] for v in list(vals)]# if fm.fY.map_inversed[v-1]!="EOS"]

        for  v in list(vals):
            tm = fm3.get_key(v).split("_")
            vals1.append(tm[0])
            vals2.append(tm[1])

        rs1.append(vals1)
        rs2.append(vals2)

        rs_scores.append(scores)




    return (rs1,rs2), rs_scores,   X11, X21, my_scores
コード例 #5
0
def get_embeddings(mdl, fm1, fm2, ofn, embedding_layer=2, fn = None, vob = None):
    '''
    extract the verb embeddings
    :param mdl : the model
    :fm : feature manager
    :param fn: file containing the verbs, verbs are separated by a space
    :return: the new file containing  the embeddings of the verbs
    '''

    if fn is not None:
        vobs = set()
        f = open(fn, "r")
        for l in f.readlines():
            tmps = l.split(" ")
            for tmp in tmps:
                if tmp != "":
                    tmps1=tmp.split(",")
                    if len(tmps1) ==2:
                        vobs.add((tmps1[0], tmps1[1]))

        vobs = list(vobs)
    else:
        if vob is not None:
            vobs = vob

    vv = []

    for v in vobs:
        if v[0]  in fm1.f.map.keys() and v[1] in fm2.f.map.keys():
            vv.append(v)

    vobs = vv

    X1 = [ ["EOS", v[0]] for v in vobs]
    X2 = [ ["EOS", v[1]] for v in vobs]

    Y1 = [["EOS","EOS"] for i in range(len(X1))]
    Y2 = [["EOS","EOS"] for i in range(len(X1))]

    X1 = [[fm1.f.map[fm1.f.getFeatureValue(x)] +1 for x in XX] for XX in X1 ]
    Y1 = [[fm1.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y1]

    X2 = [[fm2.f.map[fm2.f.getFeatureValue(x)] +1 for x in XX] for XX in X2 ]
    Y2 = [[fm2.fY.map[fm2.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y2]



    x1, mask_x, y1, mask_y =preprare_seq_seq_data (X1, Y1)

    x2, mask_x, y2, mask_y = preprare_seq_seq_data(X2, Y2)

    x1, y1,  mask_x,mask_y, _, _,_, _ = mdl.standardize_data(x1, y1, mask_x, mask_y, None,None, None,None)

    x2, y2,  _,_, _, _,_, _ = mdl.standardize_data(x2, y2, None, None, None,None, None,None)
    print (x1)
    print (x2)
    print (mask_x)
    rs = mdl.get_output_layer(embedding_layer, x1, x2, mask_x)
    print (rs.shape)
    f = open (ofn, "w")
    for i in range(len(vobs)):
        w = vobs[i]
        em = rs[0][i]
        f.write(w + " ")
        for e in em:
            f.write(str(e))
            f.write(" ")
        f.write("\n")
    f.close()
コード例 #6
0
def get_verb_embeddings(mdl,  map_x1, map_x2, map_y1, ofn, embedding_layer=2, fn = None, vob = None):
    '''
    extract the verb embeddings
    :param mdl : the model
    :fm : feature manager
    :param fn: file containing the verbs, verbs are separated by a space
    :return: the new file containing  the embeddings of the verbs
    '''
    print (map_x1.input_key_map)
    if fn is not None:
        vobs = set()
        f = open(fn, "r")
        for l in f.readlines():
            tmps = l.split(" ")
            for tmp in tmps:
                if tmp != "":
                    vobs.add(tmp)

        vobs = list(vobs)
    else:
        if vob is not None:
            vobs = vob
    print (vobs)

    vv = []
    print (vobs)
    for v in vobs:
        if v  in map_x1.input_key_map:
            vv.append(v)

    vobs = vv
    print (vobs)

    X1 = [ [  v ] for v in vobs]
    X2 = [ ["PRED"] for v in vobs]

    Y1 = [["EOS_EOS"] for i in range(len(X1))]




    X1 = [[map_x1.input_key_map[x]  for x in XX] for XX in X1 ]


    X2 = [[map_x2.input_key_map[x]  for x in XX] for XX in X2 ]
    Y1 = [[map_y1.input_key_map[x] for x in XX] for XX in Y1]

    x1, mask_x, y, mask_y = preprare_seq_seq_data(X1, Y1)

    x2, mask_x = preprare_seq_seq_data(X2, None)

    x1, y, mask_x, mask_y, _, _, _, _ = mdl.standardize_data(x1, y, mask_x, mask_y, None, None, None, None)

    x2, _, _, _, _, _, _, _ = mdl.standardize_data(x2, None, None, None, None, None, None, None)

    rs = mdl.get_output_layer(embedding_layer, x1, x2, mask_x)
    print (rs.shape)
    f = open (ofn, "w")
    for i in range(len(vobs)):
        w = vobs[i]
        em = rs[0][i]
        f.write(w + " ")
        for e in em:
            f.write(str(e))
            f.write(" ")
        f.write("\n")
    f.close()