コード例 #1
0
def generate_batch_data(inputfile, batch_size, args):
    elmo = ElmoEmbedder(args.options, args.weights, -1)
    if args.mat0:
        sys.stderr.write("loading mapping models\n")
        my_funcs = {'htanh':htanh, 'csd2':csd2, 'cosine_proximity':losses.cosine_proximity}
        with tf.device("cpu:0"):
            W0 = load_model(args.mat0, custom_objects=my_funcs)
            W1 = load_model(args.mat1, custom_objects=my_funcs)
            W2 = load_model(args.mat2, custom_objects=my_funcs)
        xlingual = [W0, W1, W2]
    else:
        xlingual = [False,]*3
    while True: # it needs to be infinitely iterable            
        x,y = load_data(inputfile)
        sys.stderr.write("loading eval data\n")
        print("INPUT SIZES X AND Y", len(x), len(y))
        assert len(x) == len(y)
        newxval = []
        yval = []
        for i in range(len(y)):
            newxval.append(x[i])
            yval.append(y[i])
            assert len(newxval) == len(yval)
            if i > 0 and i % batch_size == 0:
                xval0, xval1, xval2 = embed_elmogan(newxval, elmo, xlingual, args)
                ypadded = pad_labels(yval)
                yield ([np.array(xval0), np.array(xval1), np.array(xval2)], np.array(ypadded))
                newxval = []
                yval = []
        if len(newxval) > 0:
            xval0, xval1, xval2 = embed_elmogan(newxval, elmo, xlingual, args)
            ypadded = pad_labels(yval)
            yield ([np.array(xval0), np.array(xval1), np.array(xval2)], np.array(ypadded))
コード例 #2
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("--test_file", default=None, type=str, required=True)
    parser.add_argument("--options",
                        default=None,
                        type=str,
                        required=True,
                        help="elmo options file")
    parser.add_argument("--weights",
                        default=None,
                        type=str,
                        required=True,
                        help="elmo weights file")
    #parser.add_argument("--train_len", default=0, type=int, required=True)
    parser.add_argument("--test_len", default=0, type=int, required=True)
    parser.add_argument('--mat0',
                        help='mapping matrices for layer0 (.npz), optional')
    parser.add_argument('--mat1',
                        help='mapping matrices for layer1 (.npz), optional')
    parser.add_argument('--mat2',
                        help='mapping matrices for layer2 (.npz), optional')
    #parser.add_argument('--trlang', default='trg', type=str, help='src or trg when mapping train file language')
    parser.add_argument('--evlang',
                        default='src',
                        type=str,
                        help='src or trg when mapping test file language')
    parser.add_argument("--bs", default=64, type=int, help="batch size")
    parser.add_argument("--save",
                        default="elmo_new_ner_model",
                        type=str,
                        help="path to trained elmo NER model")
    args = parser.parse_args()

    max_len = None
    # NN
    model = load_model(args.save)
    y_predict = model.predict_generator(generate_batch_data(
        args.test_file, args.bs, args),
                                        steps=ceil(args.test_len / args.bs))

    _, y_ev = load_data(args.test_file)

    y_ev_i = []
    y_pr_i = []
    for s in range(len(y_ev)):
        for w in range(len(y_ev[s])):
            y_ev_i.append(np.argmax(y_ev[s][w]))
            y_pr_i.append(np.argmax(y_predict[s][w]))

    print('---***---')
    print(confusion_matrix(y_ev_i, y_pr_i))
    print(f1_score(y_ev_i, y_pr_i, average='micro'))
    print(f1_score(y_ev_i, y_pr_i, average='macro'))
    print(f1_score(y_ev_i, y_pr_i, average=None))
コード例 #3
0
def generate_batch_data(inputfile, batch_size, args):
    elmo = ElmoEmbedder(args.options, args.weights, -1)
    if args.mat0:
        W0 = {}
        W1 = {}
        W2 = {}
        mapmat = np.load(args.mat0)
        W0['src'] = mapmat['wx2']
        W0['trg'] = mapmat['wz2']
        W0['s'] = mapmat['s']
        mapmat = np.load(args.mat1)
        W1['src'] = mapmat['wx2']
        W1['trg'] = mapmat['wz2']
        W1['s'] = mapmat['s']
        mapmat = np.load(args.mat2)
        W2['src'] = mapmat['wx2']
        W2['trg'] = mapmat['wz2']
        W2['s'] = mapmat['s']
        mapmat = None
        xlingual = [W0, W1, W2]
    else:
        xlingual = [
            False,
        ] * 3
    while True:  # it needs to be infinitely iterable
        x, y = load_data(inputfile)
        print("INPUT SIZES X AND Y", len(x), len(y))
        assert len(x) == len(y)
        newxval = []
        yval = []
        for i in range(len(y)):
            newxval.append(x[i])
            yval.append(y[i])
            assert len(newxval) == len(yval)
            if i > 0 and i % batch_size == 0:
                xval0, xval1, xval2 = embed_elmo(newxval,
                                                 elmo,
                                                 xlingual,
                                                 lang=args.evlang,
                                                 method='muse')
                ypadded = pad_labels(yval)
                yield ([np.array(xval0),
                        np.array(xval1),
                        np.array(xval2)], np.array(ypadded))
                newxval = []
                yval = []
        if len(newxval) > 0:
            xval0, xval1, xval2 = embed_elmo(newxval,
                                             elmo,
                                             xlingual,
                                             lang=args.evlang,
                                             method='muse')
            ypadded = pad_labels(yval)
            yield ([np.array(xval0),
                    np.array(xval1),
                    np.array(xval2)], np.array(ypadded))
コード例 #4
0
def main():

    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--eval_file", default=None, type=str, required=True)
    parser.add_argument("--options", default=None, type=str, required=True)
    parser.add_argument("--weights", default=None, type=str, required=True)
    parser.add_argument("--eval_len", default=0, type=int, required=True)
    parser.add_argument('--mat0', help='maping NN model for layer0 (.h5)')
    parser.add_argument('--mat1', help='mapping NN model for layer1 (.h5)')
    parser.add_argument('--mat2', help='mapping NN model for layer2 (.h5)')
    parser.add_argument("--bs", default=64, type=int, help="batch size")
    parser.add_argument("--save", default="elmo_new_ner_model", type=str)
    parser.add_argument("--direction", type=int, required=True, choices=[0,1], help='Given a model xx-yy to yy-xx choose 0 for xx->yy or 1 for yy->xx')
    parser.add_argument("--normalize", action="store_true")
    args = parser.parse_args()
    


    max_len = None
    # NN
    sys.stderr.write("Loading NER model\n")
    with tf.device("gpu:0"):
        model = load_model(args.save)
    sys.stderr.write("beginning predict process\n")
    y_predict = model.predict_generator(generate_batch_data(args.eval_file,args.bs,args), steps=ceil(args.eval_len/args.bs))
    _, y_ev = load_data(args.eval_file)

    y_ev_i = []
    y_pr_i = []
    for s in range(len(y_ev)):
        for w in range(len(y_ev[s])):
            y_ev_i.append(np.argmax(y_ev[s][w]))
            y_pr_i.append(np.argmax(y_predict[s][w]))

    
    print('---***---')
    print(confusion_matrix(y_ev_i, y_pr_i))
    print(f1_score(y_ev_i, y_pr_i, average='micro'))
    print(f1_score(y_ev_i, y_pr_i, average='macro'))
    print(f1_score(y_ev_i, y_pr_i, average=None))