def generate_batch_data(inputfile, batch_size, args): elmo = ElmoEmbedder(args.options, args.weights, -1) if args.mat0: sys.stderr.write("loading mapping models\n") my_funcs = {'htanh':htanh, 'csd2':csd2, 'cosine_proximity':losses.cosine_proximity} with tf.device("cpu:0"): W0 = load_model(args.mat0, custom_objects=my_funcs) W1 = load_model(args.mat1, custom_objects=my_funcs) W2 = load_model(args.mat2, custom_objects=my_funcs) xlingual = [W0, W1, W2] else: xlingual = [False,]*3 while True: # it needs to be infinitely iterable x,y = load_data(inputfile) sys.stderr.write("loading eval data\n") print("INPUT SIZES X AND Y", len(x), len(y)) assert len(x) == len(y) newxval = [] yval = [] for i in range(len(y)): newxval.append(x[i]) yval.append(y[i]) assert len(newxval) == len(yval) if i > 0 and i % batch_size == 0: xval0, xval1, xval2 = embed_elmogan(newxval, elmo, xlingual, args) ypadded = pad_labels(yval) yield ([np.array(xval0), np.array(xval1), np.array(xval2)], np.array(ypadded)) newxval = [] yval = [] if len(newxval) > 0: xval0, xval1, xval2 = embed_elmogan(newxval, elmo, xlingual, args) ypadded = pad_labels(yval) yield ([np.array(xval0), np.array(xval1), np.array(xval2)], np.array(ypadded))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--test_file", default=None, type=str, required=True) parser.add_argument("--options", default=None, type=str, required=True, help="elmo options file") parser.add_argument("--weights", default=None, type=str, required=True, help="elmo weights file") #parser.add_argument("--train_len", default=0, type=int, required=True) parser.add_argument("--test_len", default=0, type=int, required=True) parser.add_argument('--mat0', help='mapping matrices for layer0 (.npz), optional') parser.add_argument('--mat1', help='mapping matrices for layer1 (.npz), optional') parser.add_argument('--mat2', help='mapping matrices for layer2 (.npz), optional') #parser.add_argument('--trlang', default='trg', type=str, help='src or trg when mapping train file language') parser.add_argument('--evlang', default='src', type=str, help='src or trg when mapping test file language') parser.add_argument("--bs", default=64, type=int, help="batch size") parser.add_argument("--save", default="elmo_new_ner_model", type=str, help="path to trained elmo NER model") args = parser.parse_args() max_len = None # NN model = load_model(args.save) y_predict = model.predict_generator(generate_batch_data( args.test_file, args.bs, args), steps=ceil(args.test_len / args.bs)) _, y_ev = load_data(args.test_file) y_ev_i = [] y_pr_i = [] for s in range(len(y_ev)): for w in range(len(y_ev[s])): y_ev_i.append(np.argmax(y_ev[s][w])) y_pr_i.append(np.argmax(y_predict[s][w])) print('---***---') print(confusion_matrix(y_ev_i, y_pr_i)) print(f1_score(y_ev_i, y_pr_i, average='micro')) print(f1_score(y_ev_i, y_pr_i, average='macro')) print(f1_score(y_ev_i, y_pr_i, average=None))
def generate_batch_data(inputfile, batch_size, args): elmo = ElmoEmbedder(args.options, args.weights, -1) if args.mat0: W0 = {} W1 = {} W2 = {} mapmat = np.load(args.mat0) W0['src'] = mapmat['wx2'] W0['trg'] = mapmat['wz2'] W0['s'] = mapmat['s'] mapmat = np.load(args.mat1) W1['src'] = mapmat['wx2'] W1['trg'] = mapmat['wz2'] W1['s'] = mapmat['s'] mapmat = np.load(args.mat2) W2['src'] = mapmat['wx2'] W2['trg'] = mapmat['wz2'] W2['s'] = mapmat['s'] mapmat = None xlingual = [W0, W1, W2] else: xlingual = [ False, ] * 3 while True: # it needs to be infinitely iterable x, y = load_data(inputfile) print("INPUT SIZES X AND Y", len(x), len(y)) assert len(x) == len(y) newxval = [] yval = [] for i in range(len(y)): newxval.append(x[i]) yval.append(y[i]) assert len(newxval) == len(yval) if i > 0 and i % batch_size == 0: xval0, xval1, xval2 = embed_elmo(newxval, elmo, xlingual, lang=args.evlang, method='muse') ypadded = pad_labels(yval) yield ([np.array(xval0), np.array(xval1), np.array(xval2)], np.array(ypadded)) newxval = [] yval = [] if len(newxval) > 0: xval0, xval1, xval2 = embed_elmo(newxval, elmo, xlingual, lang=args.evlang, method='muse') ypadded = pad_labels(yval) yield ([np.array(xval0), np.array(xval1), np.array(xval2)], np.array(ypadded))
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--eval_file", default=None, type=str, required=True) parser.add_argument("--options", default=None, type=str, required=True) parser.add_argument("--weights", default=None, type=str, required=True) parser.add_argument("--eval_len", default=0, type=int, required=True) parser.add_argument('--mat0', help='maping NN model for layer0 (.h5)') parser.add_argument('--mat1', help='mapping NN model for layer1 (.h5)') parser.add_argument('--mat2', help='mapping NN model for layer2 (.h5)') parser.add_argument("--bs", default=64, type=int, help="batch size") parser.add_argument("--save", default="elmo_new_ner_model", type=str) parser.add_argument("--direction", type=int, required=True, choices=[0,1], help='Given a model xx-yy to yy-xx choose 0 for xx->yy or 1 for yy->xx') parser.add_argument("--normalize", action="store_true") args = parser.parse_args() max_len = None # NN sys.stderr.write("Loading NER model\n") with tf.device("gpu:0"): model = load_model(args.save) sys.stderr.write("beginning predict process\n") y_predict = model.predict_generator(generate_batch_data(args.eval_file,args.bs,args), steps=ceil(args.eval_len/args.bs)) _, y_ev = load_data(args.eval_file) y_ev_i = [] y_pr_i = [] for s in range(len(y_ev)): for w in range(len(y_ev[s])): y_ev_i.append(np.argmax(y_ev[s][w])) y_pr_i.append(np.argmax(y_predict[s][w])) print('---***---') print(confusion_matrix(y_ev_i, y_pr_i)) print(f1_score(y_ev_i, y_pr_i, average='micro')) print(f1_score(y_ev_i, y_pr_i, average='macro')) print(f1_score(y_ev_i, y_pr_i, average=None))