def iter_predict(Cws, Qws, Cchs, Qchs): s_preds = [] e_preds = [] for cwmb, qwmb, cchmb, qchmb in iter_data(Cws, Qws, Cchs, Qchs, n_batch=n_batch_train, truncate=True, verbose=True): n = len(cwmb) if n == n_batch_train: s_p, e_p = sess.run([eval_mgpu_s_preds, eval_mgpu_e_preds], { Cw_train: cwmb, Qw_train: qwmb, Cch_train: cchmb, Qch_train: qchmb }) else: s_p, e_p = sess.run([eval_s_preds, eval_e_preds], { Cw: cwmb, Qw: qwmb, Cch: cchmb, Qch: qchmb }) s_preds.append(s_p) e_preds.append(e_p) s_preds = np.concatenate(s_preds, 0) e_preds = np.concatenate(e_preds, 0) return s_preds, e_preds
def iter_apply(Xs, Ms, Ys): fns = [lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x))] results = [] for xmb, mmb, ymb in iter_data(Xs, Ms, Ys, n_batch=n_batch_train, truncate=False, verbose=True): n = len(xmb) if n == n_batch_train: res = sess.run([eval_mgpu_logits, eval_mgpu_clf_loss], { X_train: xmb, M_train: mmb, Y_train: ymb }) else: res = sess.run([eval_logits, eval_clf_loss], { X: xmb, M: mmb, Y: ymb }) res = [r * n for r in res] results.append(res) results = zip(*results) return [fn(res) for res, fn in zip(results, fns)]
def iter_predict(Xs, Ms): preds = [] for xmb, mmb in iter_data(Xs, Ms, n_batch=n_batch_train, truncate=False, verbose=True): n = len(xmb) idx_mask = np.equal(xmb[:, :, 0], delimiter).astype(int) end_idxs = np.argmax(np.equal(xmb[:, :, 0], end).astype(int), 1) delim_idxs = np.argmax(idx_mask, 1) if n == n_batch_train: for i in range(np.max(end_idxs - delim_idxs)): pred = np.argmax( sess.run(eval_mgpu_logits, { X_train: xmb, M_train: mmb }), 1) idx_mask = roll_mask(idx_mask) xmb = next_xmb(xmb, pred, idx_mask) if all_finished(np.reshape(pred, xmb[:, 1:, 0].shape), end): break else: for i in range(np.max(end_idxs - delim_idxs)): pred = np.argmax(sess.run(eval_logits, {X: xmb, M: mmb}), 1) idx_mask = roll_mask(idx_mask) xmb = next_xmb(xmb, pred, idx_mask) if all_finished(np.reshape(pred, xmb[:, 1:, 0].shape), end): break preds.append(pred) preds = np.concatenate(preds, 0) return preds
def iter_predict(Xs, Ms): logits = [] for xmb, mmb in iter_data(Xs, Ms, n_batch=n_batch_train, truncate=False, verbose=True): n = len(xmb) if n == n_batch_train: logits.append(sess.run(eval_mgpu_logits, {X_train:xmb, M_train:mmb})) else: logits.append(sess.run(eval_logits, {X:xmb, M:mmb})) logits = np.concatenate(logits, 0) return logits
def iter_apply(Cws, Qws, Cchs, Qchs, Yss, Yes): fns = [ lambda x: np.concatenate(x, 0), lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x)) ] results = [] for cwmb, qwmb, cchmb, qchmb, ysmb, yemb in iter_data( Cws, Qws, Cchs, Qchs, Yss, Yes, n_batch=n_batch_train, truncate=True, verbose=True): n = len(cwmb) if n == n_batch_train: res = sess.run( [eval_mgpu_s_preds, eval_mgpu_e_preds, eval_mgpu_qa_loss], { Cw_train: cwmb, Qw_train: qwmb, Cch_train: cchmb, Qch_train: qchmb, Ys_train: ysmb, Ye_train: yemb }) else: res = sess.run([eval_s_preds, eval_e_preds, eval_qa_loss], { Cw: cwmb, Qw: qwmb, Cch: cchmb, Qch: qchmb, Ys: ysmb, Ye: yemb }) res = [r * n for r in res] results.append(res) results = zip(*results) return [fn(res) for res, fn in zip(results, fns)]
def iter_apply(Xs, Ms): fns = [lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x))] results = [] for xmb, mmb in iter_data(Xs, Ms, n_batch=n_batch_train, truncate=False, verbose=True): n = len(xmb) idx_mask = np.equal(xmb[:, :, 0], delimiter).astype(int) end_idxs = np.argmax(np.equal(xmb[:, :, 0], end).astype(int), 1) delim_idxs = np.argmax(idx_mask, 1) if n == n_batch_train: for i in range(np.max(end_idxs - delim_idxs)): ev_logits, ev_lm_loss = sess.run( [eval_mgpu_logits, eval_mgpu_lm_loss], { X_train: xmb, M_train: mmb }) pred = np.argmax(ev_logits, 1) idx_mask = roll_mask(idx_mask) xmb = next_xmb(xmb, pred, idx_mask) if all_finished(np.reshape(pred, xmb[:, 1:, 0].shape), end): break else: for i in range(np.max(end_idxs - delim_idxs)): ev_logits, ev_lm_losses = sess.run([eval_logits, eval_lm_loss], { X: xmb, M: mmb }) pred = np.argmax(ev_logits, 1) idx_mask = roll_mask(idx_mask) xmb = next_xmb(xmb, pred, idx_mask) if all_finished(np.reshape(pred, xmb[:, 1:, 0].shape), end): break res = [pred * n, ev_lm_loss * n] results.append(res) results = zip(*results) return [fn(res) for res, fn in zip(results, fns)]
# params params = find_trainable_variables('model') sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(tf.global_variables_initializer()) # get saved params if use_prev_best and os.path.isfile(os.path.join(save_dir, desc, 'best_params.jl')): sess.run([p.assign(ip) for p, ip in zip(params, joblib.load(os.path.join(save_dir, desc, 'best_params.jl')))]) else: # get the embedding matrix of the pretrained model #emb = np.concatenate([np.load('{}params_{}.npy'.format(pretrained_lm_dir, n)) for n in range(3)], 0)[393216:31480320].reshape((40478,768)) emb = np.load('{}elmo_768_40478_matrix.npy'.format(elmo_dir)) emb = np.concatenate([emb, (np.random.randn(n_special, n_embd)*0.02).astype(np.float32)], 0) sess.run(params[0].assign(emb)) del emb # train, eval, test n_updates = 0 n_epochs = 0 if submit: save(os.path.join(save_dir, desc, 'best_params.jl')) best_score = 0 for i in range(n_iter): for xmb, mmb in iter_data(*shuffle(trX, trM, random_state=np.random), n_batch=n_batch_train, truncate=True, verbose=True): cost, _ = sess.run([lm_loss, train], {X_train:xmb, M_train:mmb}) n_updates += 1 if n_updates in [1000, 2000, 4000, 8000, 16000, 32000] and n_epochs == 0: log() n_epochs += 1 log() if submit: sess.run([p.assign(ip) for p, ip in zip(params, joblib.load(os.path.join(save_dir, desc, 'best_params.jl')))]) predict()
params, joblib.load(os.path.join(save_dir, desc, 'best_params.jl'))) ]) # train, eval, test n_updates = 0 n_epochs = 0 if submit: save(os.path.join(save_dir, desc, 'best_params.jl')) best_score = 0 for i in range(n_iter): for cwmb, qwmb, cchmb, qchmb, ysmb, yemb in iter_data( *shuffle(trCtxW, trQW, trCtxCh, trQCh, trYs, trYe, random_state=np.random), n_batch=n_batch_train, truncate=True, verbose=True): cost, _ = sess.run( [qa_loss, train], { Cw_train: cwmb, Qw_train: qwmb, Cch_train: cchmb, Qch_train: qchmb, Ys_train: ysmb, Ye_train: yemb }) n_updates += 1