def train_text_cnn(data, num_attr): lr = FLAGS.lr batch_size = FLAGS.batch_size train_sents, train_y, test_sents, test_y = data train_x, train_m, test_x, test_m = preprocess_raw_data( train_sents, test_sents) inputs = tf.placeholder(tf.int64, (None, None), name="inputs") masks = tf.placeholder(tf.int32, (None, None), name="masks") labels = tf.placeholder(tf.int64, (None,), name="labels") training = tf.placeholder(tf.bool, name='training') text_cnn = TextCNN(vocab_size=50001, emb_dim=100, num_filter=128, init_word_emb=None) classifier = build_model(num_attr, FLAGS.hidden_size) model_fn = lambda x, m, t: classifier(text_cnn.forward(x, m, t), t) logits = model_fn(inputs, masks, training) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) loss = tf.reduce_mean(loss) opt_loss = loss accuracies, top5_accuracies, predictions = acc_metrics(logits, labels, num_attr) eval_fetches = [loss, accuracies, top5_accuracies] t_vars = tf.trainable_variables() post_ops = [tf.assign(v, v * (1 - FLAGS.wd)) for v in t_vars if 'kernel' in v.name] optimizer = tf.train.AdamOptimizer(lr) grads_and_vars = optimizer.compute_gradients(opt_loss, t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) with tf.control_dependencies([train_ops]): train_ops = tf.group(*post_ops) log('Train attack model...') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) def train_fn(batch_idx): feed = {inputs: train_x[batch_idx], masks: train_m[batch_idx], labels: train_y[batch_idx], training: True} err, _ = sess.run([loss, train_ops], feed_dict=feed) return err def eval_fn(batch_idx): feed = {inputs: test_x[batch_idx], masks: test_m[batch_idx], labels: test_y[batch_idx], training: False} return sess.run(eval_fetches, feed_dict=feed) n_train, n_test = len(train_y), len(test_y) train_loops(FLAGS.epochs, n_train, n_test, train_fn, eval_fn, batch_size)
def train_loops(epochs, n_train, n_test, train_fn, eval_fn, batch_size, n_unlabeled=0, interleave_batch=False): if n_unlabeled: include_last = not interleave_batch u_batch_size = FLAGS.u_batch_size if include_last else batch_size unlabeled_data_sampler = inf_batch_iterator(n_unlabeled, u_batch_size) else: include_last = True unlabeled_data_sampler = None for epoch in range(epochs): train_iterations = 0 train_loss = 0 train_u_loss = 0 for batch_idx in iterate_minibatches_indices(n_train, batch_size, True, include_last=include_last): if unlabeled_data_sampler is None: err = train_fn(batch_idx) else: batch_u_idx = next(unlabeled_data_sampler) err, err_u = train_fn(batch_idx, batch_u_idx) train_u_loss += err_u train_loss += err train_iterations += 1 test_loss = 0 test_acc = 0 test_top5_acc = 0 test_iterations = 0 for batch_idx in iterate_minibatches_indices(n_test, 512, False): err, acc, top5_acc = eval_fn(batch_idx) test_acc += acc test_top5_acc += top5_acc test_loss += err test_iterations += 1 if (epoch + 1) % 10 == 0: log("Epoch: {}, train loss: {:.4f}, train l2u loss {:.4f}, " "test loss={:.4f}, test acc={:.2f}%, test top5 acc={:.2f}%".format( epoch + 1, train_loss / train_iterations, train_u_loss / train_iterations, test_loss / test_iterations, test_acc / n_test * 100, test_top5_acc / n_test * 100))
def main(_): split_word = FLAGS.model_name in {'textcnn', 'quickthought', 'transformer'} if FLAGS.data_name == 'bookcorpus': train_sents, train_authors, test_sents, test_authors,\ unlabeled_sents, unlabeled_authors = bookcorpus_author_data( train_size=FLAGS.train_size, test_size=FLAGS.test_size, unlabeled_size=FLAGS.unlabeled_size, split_by_book=True, split_word=split_word, top_attr=FLAGS.top_attr, min_len=10) elif FLAGS.data_name == 'reddit': train_sents, train_authors, test_sents, test_authors,\ unlabeled_sents, unlabeled_authors = reddit_author_data( train_size=FLAGS.train_size, test_size=FLAGS.test_size, unlabeled_size=FLAGS.unlabeled_size, split_word=split_word, top_attr=FLAGS.top_attr) else: raise ValueError(FLAGS.data_name) author_to_ids = get_attrs_to_ids(train_authors) train_y = np.asarray([author_to_ids[author] for author in train_authors], dtype=np.int64) test_y = np.asarray([author_to_ids[author] for author in test_authors], dtype=np.int64) num_attr = len(author_to_ids) log('{} training, {} testing'.format(len(train_y), len(test_y))) test_label_count = Counter(test_y) log('Majority baseline: {:.4f}% out of {} authors'.format( test_label_count.most_common(1)[0][1] / len(test_y) * 100, len(test_label_count))) data = train_sents, train_y, test_sents, test_y if FLAGS.model_name == 'textcnn': train_text_cnn(data, num_attr) elif FLAGS.model_name == 'charcnn': train_text_char_cnn(data, num_attr) else: train_embedding_classifier(data, unlabeled_sents, num_attr)
def optimization_inversion(): _, _, x, y = load_inversion_data() y = sents_to_labels(y) max_iters = FLAGS.max_iters batch_size = FLAGS.batch_size seq_len = FLAGS.seq_len embed_module = "https://tfhub.dev/google/universal-sentence-encoder-lite/2" embed = hub.Module(embed_module) sp = spm.SentencePieceProcessor() sp.Load(SPM_MODEL_PATH) input_placeholder = tf.sparse_placeholder(tf.int64, shape=[batch_size, None], name='sparse_placeholder') # dummy call to setup the graph embed(inputs=dict(values=input_placeholder.values, indices=input_placeholder.indices, dense_shape=input_placeholder.dense_shape)) emb_lookup = LAYER_NAMES[0] start_vars = set(v.name for v in tf.global_variables()) word_emb = tf.global_variables()[0] logit_inputs = tf.get_variable(name='logit_inputs', shape=(batch_size, seq_len, 8002), initializer=tf.random_normal_initializer( -0.1, 0.1)) permute_inputs = tf.get_variable(name='permute_inputs', shape=(batch_size, seq_len, seq_len), initializer=tf.random_normal_initializer( -0.1, 0.1)) permute_matrix = sinkhorn(permute_inputs / FLAGS.temp, 10) prob_inputs = tf.nn.softmax(logit_inputs / FLAGS.temp, axis=-1) preds = tf.argmax(prob_inputs, axis=-1) emb_inputs = tf.matmul(prob_inputs, word_emb, name='new_embedding_lookup') emb_inputs = tf.matmul(permute_matrix, emb_inputs) if FLAGS.low_layer_idx == 0: encoded = mean_pool(emb_inputs) else: replace_graph(emb_lookup, emb_inputs) encoded = get_fetch_by_layer(FLAGS.low_layer_idx) targets = tf.placeholder(tf.float32, name='target', shape=(batch_size, encoded.shape.as_list()[-1])) loss = get_similarity_metric(encoded, targets, FLAGS.metric, rtn_loss=True) loss = tf.reduce_sum(loss) optimizer = tf.train.AdamOptimizer(FLAGS.lr) grads_and_vars = optimizer.compute_gradients( loss, [logit_inputs, permute_inputs]) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) end_vars = tf.global_variables() new_vars = [v for v in end_vars if v.name not in start_vars] batch_init_ops = tf.variables_initializer(new_vars) total_it = len(x) // batch_size dummy_inputs = prepare_dummpy_sparse(batch_size, seq_len) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) def invert_one_batch(batch_targets): sess.run(batch_init_ops) feed_dict = { targets: batch_targets, 'sparse_placeholder/values:0': dummy_inputs[0], 'sparse_placeholder/indices:0': dummy_inputs[1], 'sparse_placeholder/shape:0': dummy_inputs[2] } prev = 1e6 for i in range(max_iters): curr, _ = sess.run([loss, train_ops], feed_dict) # stop if no progress if (i + 1) % (max_iters // 10) == 0 and curr > prev: break prev = curr return sess.run([preds, loss], feed_dict) start_time = time.time() it = 0.0 all_tp, all_fp, all_fn, all_err = 0.0, 0.0, 0.0, 0.0 for batch_idx in iterate_minibatches_indices(len(x), batch_size, False, False): y_pred, err = invert_one_batch(x[batch_idx]) tp, fp, fn = tp_fp_fn_metrics_np(y_pred, y[batch_idx]) it += 1.0 all_err += err all_tp += tp all_fp += fp all_fn += fn all_pre = all_tp / (all_tp + all_fp + 1e-7) all_rec = all_tp / (all_tp + all_fn + 1e-7) all_f1 = 2 * all_pre * all_rec / (all_pre + all_rec + 1e-7) if it % FLAGS.print_every == 0: it_time = (time.time() - start_time) / it log("Iter {:.2f}%, err={}, pre={:.2f}%, rec={:.2f}%, f1={:.2f}%," " {:.2f} sec/it".format(it / total_it * 100, all_err / it, all_pre * 100, all_rec * 100, all_f1 * 100, it_time)) all_pre = all_tp / (all_tp + all_fp + 1e-7) all_rec = all_tp / (all_tp + all_fn + 1e-7) all_f1 = 2 * all_pre * all_rec / (all_pre + all_rec + 1e-7) log("Final err={}, pre={:.2f}%, rec={:.2f}%, f1={:.2f}%".format( all_err / it, all_pre * 100, all_rec * 100, all_f1 * 100))
def load_inversion_data(): module = hub.Module( "https://tfhub.dev/google/universal-sentence-encoder-lite/2") sp = spm.SentencePieceProcessor() sp.Load(SPM_MODEL_PATH) input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None], name='sparse_placeholder') module(inputs=dict(values=input_placeholder.values, indices=input_placeholder.indices, dense_shape=input_placeholder.dense_shape)) learn_mapping = FLAGS.high_layer_idx != FLAGS.low_layer_idx if learn_mapping: outputs = [ get_fetch_by_layer(FLAGS.low_layer_idx), get_fetch_by_layer(FLAGS.high_layer_idx) ] else: outputs = get_fetch_by_layer(FLAGS.low_layer_idx) train_sents, _, test_sents, _, _, _ = load_bookcorpus_author( train_size=FLAGS.train_size, test_size=FLAGS.test_size, unlabeled_size=0, split_by_book=True, split_word=False, top_attr=800, remove_punct=False) sess = tf.Session() sess.run(tf.global_variables_initializer()) def encode_sents(sents): y = [ np.asarray(sp.EncodeAsIds(x)[:FLAGS.max_seq_length]) for x in train_sents ] y = np.asarray(y) n_data = len(sents) pbar = tqdm.tqdm(total=n_data) embs_low, embs_high = [], [] for b_idx in iterate_minibatches_indices(n_data, 512): values, indices, dense_shape = sents_to_sparse(y[b_idx]) emb = sess.run(outputs, feed_dict={ input_placeholder.values: values, input_placeholder.indices: indices, input_placeholder.dense_shape: dense_shape }) if learn_mapping: embs_low.append(emb[0]) embs_high.append(emb[1]) else: embs_low.append(emb) pbar.update(len(b_idx)) pbar.close() if learn_mapping: return [np.vstack(embs_low), np.vstack(embs_high)], y else: return np.vstack(embs_low), y train_x, train_y = encode_sents(train_sents) test_x, test_y = encode_sents(test_sents) tf.keras.backend.clear_session() if learn_mapping: log('Training high to low mapping...') if FLAGS.mapper == 'linear': mapping = linear_mapping(train_x[1], train_x[0]) elif FLAGS.mapper == 'mlp': mapping = mlp_mapping(train_x[1], train_x[0], epochs=10, activation=tf.tanh) elif FLAGS.mapper == 'gan': mapping = gan_mapping(train_x[1], train_x[0], disc_iters=5, batch_size=64, gamma=1.0, epoch=100, activation=tf.tanh) else: raise ValueError(FLAGS.mapper) test_x = mapping(test_x[1]) return train_x, train_y, test_x, test_y
def train_embedding_classifier(data, unlabeled_data, num_attr): batch_size = FLAGS.batch_size interleave_batch = FLAGS.interleave train_sents, train_y, test_sents, test_y = data train_embs, test_embs, unlabeled_embs = encode_sentences( train_sents, test_sents, unlabeled_data) semi_supervised = len(unlabeled_embs) > 0 n_train, n_test = len(train_y), len(test_y) encoder_dim = train_embs.shape[1] inputs = tf.placeholder(tf.float32, (None, encoder_dim), name='inputs') unlabeled_inputs = tf.placeholder(tf.float32, (None, encoder_dim), name="u_inputs") labels = tf.placeholder(tf.int64, (None,), name='labels') training = tf.placeholder(tf.bool, name='training') model_fn = build_model(num_attr, FLAGS.hidden_size) def augment_unlabeled(u): u = tf.nn.dropout(u, rate=0.25) u = add_gaussian_noise(u, gamma=0.1) u = batch_interpolation(u, alpha=0.9, random=True) return u if not semi_supervised: logits = model_fn(inputs, training) accuracies, top5_accuracies, _ = acc_metrics(logits, labels, num_attr) loss_xe = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) loss_xe = tf.reduce_mean(loss_xe) loss_l2u = tf.constant(0.) loss = loss_xe eval_loss = loss_xe elif FLAGS.algo == 'mixmatch': augment = MixMode(FLAGS.mixmode) us = [] logits_us = [] for _ in range(FLAGS.k): u = augment_unlabeled(unlabeled_inputs) logits_u = model_fn(u, training) logits_us.append(logits_u) us.append(u) guess = guess_label(logits_us, temp=FLAGS.temp) lu = tf.stop_gradient(guess) lx = tf.one_hot(labels, num_attr) xu, labels_xu = augment([inputs] + us, [lx] + [lu] * FLAGS.k, [FLAGS.beta, FLAGS.beta]) labels_x, labels_us = labels_xu[0], tf.concat(labels_xu[1:], 0) if interleave_batch: xu = interleave(xu, batch_size) logits_x = model_fn(xu[0], training) logits_us = [] for u in xu[1:]: logits_u = model_fn(u, training) logits_us.append(logits_u) logits_xu = [logits_x] + logits_us if interleave_batch: logits_xu = interleave(logits_xu, batch_size) logits_x = logits_xu[0] loss_xe = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_x, logits=logits_x) loss_xe = tf.reduce_mean(loss_xe) logits_us = tf.concat(logits_xu[1:], 0) loss_l2u = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_us, logits=logits_us) # loss_l2u = tf.square(labels_us - tf.nn.softmax(logits_us)) loss_l2u = tf.reduce_mean(loss_l2u) global_step = tf.train.get_or_create_global_step() w_match = tf.clip_by_value( tf.cast(global_step, tf.float32) / (FLAGS.epochs * (int(n_train // batch_size) + 1)), 0, 1) loss = FLAGS.lambda_u * w_match * loss_l2u + loss_xe test_logits = model_fn(inputs, training) test_loss_xe = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=test_logits) accuracies, top5_accuracies, _ = acc_metrics(test_logits, labels, num_attr) eval_loss = tf.reduce_mean(test_loss_xe) elif FLAGS.algo == 'uda': model_fn = build_ae_model(num_attr, 256, encoder_dim) us = [] logits_us = [] for _ in range(FLAGS.k): u = augment_unlabeled(unlabeled_inputs) logits_u = model_fn(u, training)[0] logits_us.append(logits_u) us.append(u) labels_u = guess_label(logits_us, temp=FLAGS.temp) labels_us = tf.concat([labels_u] * FLAGS.k, 0) logits_x, recon_x = model_fn(inputs, training) logits_us = [] recon_us = [] for u in us: logits_u, recon_u = model_fn(u, training) logits_us.append(logits_u) recon_us.append(recon_u) recon_loss = tf.reduce_mean( tf.reduce_sum(tf.square(inputs - recon_x), axis=-1)) recon_us = tf.concat(recon_us, 0) us = tf.concat(us, 0) recon_loss += tf.reduce_mean( tf.reduce_sum(tf.square(us - recon_us), axis=-1)) loss_xe = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits_x) loss_xe = tf.reduce_mean(loss_xe) logits_us = tf.concat(logits_us, 0) loss_l2u = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_us, logits=logits_us) # loss_l2u = tf.square(labels_us - tf.nn.softmax(logits_us)) loss_l2u = tf.reduce_mean(loss_l2u) global_step = tf.train.get_or_create_global_step() w_match = tf.clip_by_value( tf.cast(global_step, tf.float32) / (FLAGS.epochs * (int(n_train // batch_size) + 1)), 0, 1) loss = FLAGS.lambda_u * w_match * loss_l2u + loss_xe + recon_loss test_logits = model_fn(inputs, training)[0] test_loss_xe = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=test_logits) accuracies, top5_accuracies, _ = acc_metrics(test_logits, labels, num_attr) eval_loss = tf.reduce_mean(test_loss_xe) else: raise ValueError(FLAGS.algo) eval_fetches = [eval_loss, accuracies, top5_accuracies] t_vars = tf.trainable_variables() post_ops = [tf.assign(v, v * (1 - FLAGS.wd)) for v in t_vars if 'kernel' in v.name] optimizer = tf.train.AdamOptimizer(FLAGS.lr) grads_and_vars = optimizer.compute_gradients(loss, t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) with tf.control_dependencies([train_ops]): train_ops = tf.group(*post_ops) log('Train attack model...') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) def train_fn(*batch_idx): if len(batch_idx) == 1: batch_idx = batch_idx[0] feed = {inputs: train_embs[batch_idx], labels: train_y[batch_idx], training: True} else: feed = {inputs: train_embs[batch_idx[0]], labels: train_y[batch_idx[0]], unlabeled_inputs: unlabeled_embs[batch_idx[1]], training: True} err_xe, err_l2u, _ = sess.run([loss_xe, loss_l2u, train_ops], feed_dict=feed) if semi_supervised: return err_xe, err_l2u return err_xe def eval_fn(batch_idx): feed = {inputs: test_embs[batch_idx], labels: test_y[batch_idx], training: False} return sess.run(eval_fetches, feed_dict=feed) train_loops(FLAGS.epochs, n_train, n_test, train_fn, eval_fn, batch_size, len(unlabeled_embs), interleave_batch)
def encode_sentences(train_sents, test_sents, unlabeled_sents): query_size = 2048 vocab = bookcorpus_vocab(0, rebuild=False) local_models = {'quickthought', 'transformer'} log('Encoding sentences...') if FLAGS.model_name in local_models: ckpt_name = get_model_ckpt_name(FLAGS.model_name, epoch=FLAGS.encoder_epoch, batch_size=800, gamma=FLAGS.gamma, attr='author') model_path = os.path.join(FLAGS.model_dir, ckpt_name) config = get_model_config(FLAGS.model_name) vocab, init_word_emb = expand_vocabulary(model_path, vocab) vocab_size = len(vocab) + 1 model = QuickThoughtModel(vocab_size, config['emb_dim'], config['encoder_dim'], 1, init_word_emb=None, cell_type=config['cell_type'], train=False) inputs = tf.placeholder(tf.int64, (None, None), name='inputs') masks = tf.placeholder(tf.int32, (None, None), name='masks') encode_emb = tf.nn.embedding_lookup(model.word_in_emb, inputs) encoded = model.encode(encode_emb, masks, model.in_cells, model.proj_in) if FLAGS.norm: encoded = tf.nn.l2_normalize(encoded, axis=-1) # model_vars = tf.trainable_variables() model_vars = {v.name[:-2]: v for v in tf.trainable_variables() if not v.name.startswith('emb')} saver = tf.train.Saver(model_vars) sess = tf.Session() emb_plhdr = tf.placeholder(tf.float32, shape=(vocab_size, config['emb_dim'])) sess.run(model.word_in_emb.assign(emb_plhdr), {emb_plhdr: init_word_emb}) print('Loading weight from {}'.format(model_path)) saver.restore(sess, os.path.join(model_path, 'model.ckpt')) encoder_fn = lambda s: sess.run(encoded, feed_dict={inputs: s[0], masks: s[1]}) elif FLAGS.model_name == 'skipthought': from models.skip_thoughts import encoder_manager from models.skip_thoughts import configuration model_dir = os.path.join(NFS_DIR, 'models/skip/') vocab_file = os.path.join(model_dir, 'vocab.txt') embedding_file = os.path.join('./skip_thoughts/', 'embeddings.npy') ckpt_path = os.path.join(model_dir, 'model.ckpt-500008') encoder = encoder_manager.EncoderManager() encoder.load_model(configuration.model_config(bidirectional_encoder=True, shuffle_input_data=False), vocabulary_file=vocab_file, embedding_matrix_file=embedding_file, checkpoint_path=ckpt_path) encoder_fn = lambda s: encoder.encode(s, batch_size=query_size, use_norm=False) sess = encoder.sessions[0] elif FLAGS.model_name == 'use': embed_module = 'https://tfhub.dev/google/' \ 'universal-sentence-encoder-large/3' embed = hub.Module(embed_module, trainable=False) inputs = tf.placeholder(tf.string, shape=(None,)) encoded = embed(inputs) sess = tf.Session() sess.run([tf.global_variables_initializer(), tf.tables_initializer()]) encoder_fn = lambda s: sess.run(encoded, feed_dict={inputs: s}) elif FLAGS.model_name == 'elmo': query_size = 512 embed_module = 'https://tfhub.dev/google/elmo/2' embed = hub.Module(embed_module, trainable=False) inputs = tf.placeholder(tf.string, shape=(None,)) encoded = embed(inputs, signature='default', as_dict=True)['default'] sess = tf.Session() sess.run([tf.global_variables_initializer(), tf.tables_initializer()]) encoder_fn = lambda s: sess.run(encoded, feed_dict={inputs: s}) elif FLAGS.model_name == 'infersent': from models.infersent.models import InferSent import torch model_version = 2 params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048, 'pool_type': 'max', 'dpout_model': 0.0, 'version': model_version} encoder = InferSent(params_model) encoder.load_state_dict(torch.load("./infersent/infersent%s.pkl" % model_version)) encoder.cuda() encoder.set_w2v_path('./infersent/crawl-300d-2M.vec') encoder.build_vocab_k_words(K=100000) encoder_fn = lambda s: encoder.encode(s, tokenize=False) else: raise ValueError(FLAGS.model) def encode_sents(s, n): embs = [] pbar = tqdm.tqdm(total=n) tuple_inputs = isinstance(s, tuple) for batch_idx in iterate_minibatches_indices(n, query_size, False): if tuple_inputs: batch_embs = encoder_fn((s[0][batch_idx], s[1][batch_idx])) else: batch_embs = encoder_fn(s[batch_idx]) embs.append(batch_embs) pbar.update(len(batch_embs)) pbar.close() return np.vstack(embs) n_train, n_test, n_unlabeled = len(train_sents), len(test_sents),\ len(unlabeled_sents) unlabeled_embs = [] if FLAGS.model_name in local_models: rtn = preprocess_raw_data(train_sents, test_sents, unlabeled_sents, vocab=vocab) train_embs = encode_sents((rtn[0], rtn[1]), n_train) test_embs = encode_sents((rtn[2], rtn[3]), n_test) if n_unlabeled: unlabeled_embs = encode_sents((rtn[4], rtn[5]), n_unlabeled) else: train_embs = encode_sents(train_sents, n_train) test_embs = encode_sents(test_sents, n_test) if n_unlabeled: unlabeled_embs = encode_sents(unlabeled_sents, n_unlabeled) tf.keras.backend.clear_session() log('Encoded train {}, test {}'.format(train_embs.shape, test_embs.shape)) return train_embs, test_embs, unlabeled_embs
def trained_metric_attack(): freq_min = FLAGS.freq_min # load data part train_filenames, test_filenames = split_bookcorpus(0) member_embeds, nonmember_embeds = load_book_embedding( train_filenames, test_filenames, freq_min) membership_labels = np.concatenate( [np.ones(len(member_embeds)), np.zeros(len(nonmember_embeds))]) all_embeds = member_embeds + nonmember_embeds train_indices, test_indices, _ = membership_split( (all_embeds, membership_labels)) def indices_to_data(indices): embeds, labels, weights = [], [], [] for idx in indices: embeds.append(all_embeds[idx]) labels.append(membership_labels[idx]) return embeds, labels train_embeds, train_labels = indices_to_data(train_indices) test_embeds, test_labels = indices_to_data(test_indices) train_y = [] for emb, label in zip(train_embeds, train_labels): train_y.append(np.ones(len(emb)) * label) train_y = np.concatenate(train_y).astype(np.float32) train_x = np.vstack(train_embeds) # define attack model config = get_model_config(FLAGS.model_name) encoder_dim = config["encoder_dim"] optimizer = tf.train.AdamOptimizer(1e-4) inputs_a = tf.placeholder(tf.float32, (None, encoder_dim), name="inputs_a") inputs_b = tf.placeholder(tf.float32, (None, encoder_dim), name="inputs_b") labels = tf.placeholder(tf.float32, (None, ), name="labels") training = tf.placeholder(tf.bool, name="training") if FLAGS.model == 'deepset': model = DeepSetModel(encoder_dim // 2) elif FLAGS.model == 'bilinear': model = BilinearMetricModel(encoder_dim) elif FLAGS.model == 'linear': model = LinearMetricModel(encoder_dim // 2) else: raise ValueError(FLAGS.model) logits = model.forward(inputs_a, inputs_b, training=training) learned_sim = logits loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) loss = tf.reduce_mean(loss) if FLAGS.metric == 'dot': sim = tf.reduce_sum(tf.multiply(inputs_a, inputs_b), axis=1) elif FLAGS.metric == 'cosine': sim = tf.reduce_sum(tf.multiply(tf.nn.l2_normalize(inputs_a, axis=-1), tf.nn.l2_normalize(inputs_b, axis=-1)), axis=1) elif FLAGS.metric == 'l2': sim = -tf.reduce_sum(tf.square(inputs_a - inputs_b), axis=1) else: raise ValueError(FLAGS.metric) t_vars = tf.trainable_variables() post_ops = [ tf.assign(v, v * (1 - FLAGS.wd)) for v in t_vars if 'kernel' in v.name ] grads_and_vars = optimizer.compute_gradients(loss, t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) with tf.control_dependencies([train_ops]): train_ops = tf.group(*post_ops) inputs = [inputs_a, inputs_b] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) def split_metrics(ms, ls): member_ms, nonmember_ms = [], [] for m, l in zip(ms, ls): if l == 1: member_ms.append(m) else: nonmember_ms.append(m) return member_ms, nonmember_ms def weighted_average(x): return np.mean(x) def calculate_adversarial_advantage(fetch): test_metrics = collect_scores(inputs, test_embeds, sess, fetch, training) test_member_ms, test_nonmember_ms = split_metrics( test_metrics, test_labels) compute_adversarial_advantage(np.concatenate(test_member_ms), np.concatenate(test_nonmember_ms)) if FLAGS.book_level: compute_adversarial_advantage( [weighted_average(m) for m in test_member_ms], [weighted_average(m) for m in test_nonmember_ms]) calculate_adversarial_advantage(sim) print('Training attack model with {} embs...'.format(len(train_y))) for epoch in range(10): iterations = 0 train_loss = 0 for batch_idx in iterate_minibatches_indices( len(train_y), batch_size=FLAGS.batch_size, shuffle=True): feed = { inputs_a: train_x[batch_idx][:, :encoder_dim], inputs_b: train_x[batch_idx][:, encoder_dim:], labels: train_y[batch_idx], training: True } err, _ = sess.run([loss, train_ops], feed_dict=feed) train_loss += err iterations += 1 log("\nEpoch: {}, Loss: {:.4f}".format(epoch, train_loss / iterations)) calculate_adversarial_advantage(learned_sim)
def optimization_inversion(): tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file) cls_id = tokenizer.vocab['[CLS]'] sep_id = tokenizer.vocab['[SEP]'] mask_id = tokenizer.vocab['[MASK]'] _, _, x, y = load_inversion_data() y = y[0] filters = [cls_id, sep_id, mask_id] y = filter_labels(y, filters) batch_size = FLAGS.batch_size seq_len = FLAGS.seq_len max_iters = FLAGS.max_iters albert_config = modeling.AlbertConfig.from_json_file(FLAGS.albert_config_file) input_ids = tf.ones((batch_size, seq_len + 2), tf.int32) input_mask = tf.ones_like(input_ids, tf.int32) input_type_ids = tf.zeros_like(input_ids, tf.int32) model = modeling.AlbertModel( config=albert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) word_emb = model.output_embedding_table albert_vars = tf.trainable_variables() (assignment_map, _) = modeling.get_assignment_map_from_checkpoint(albert_vars, FLAGS.init_checkpoint) tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map) batch_cls_ids = tf.ones((batch_size, 1), tf.int32) * cls_id batch_sep_ids = tf.ones((batch_size, 1), tf.int32) * sep_id cls_emb = tf.nn.embedding_lookup(word_emb, batch_cls_ids) sep_emb = tf.nn.embedding_lookup(word_emb, batch_sep_ids) prob_mask = np.zeros((albert_config.vocab_size,), np.float32) prob_mask[filters] = -1e9 prob_mask = tf.constant(prob_mask, dtype=np.float32) logit_inputs = tf.get_variable( name='inputs', shape=(batch_size, seq_len, albert_config.vocab_size), initializer=tf.random_uniform_initializer(-0.1, 0.1)) t_vars = [logit_inputs] t_var_names = {logit_inputs.name} logit_inputs += prob_mask prob_inputs = tf.nn.softmax(logit_inputs / FLAGS.temp, axis=-1) emb_inputs = tf.matmul(prob_inputs, word_emb) emb_inputs = tf.concat([cls_emb, emb_inputs, sep_emb], axis=1) if FLAGS.low_layer_idx == 0: encoded = mean_pool(emb_inputs, input_mask) else: encoded = encode(emb_inputs, input_mask, input_type_ids, albert_config) targets = tf.placeholder( tf.float32, shape=(batch_size, encoded.shape.as_list()[-1])) loss = get_similarity_metric(encoded, targets, FLAGS.metric, rtn_loss=True) loss = tf.reduce_sum(loss) optimizer = tf.train.AdamOptimizer(FLAGS.lr) start_vars = set(v.name for v in tf.global_variables() if v.name not in t_var_names) grads_and_vars = optimizer.compute_gradients(loss, t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) end_vars = tf.global_variables() new_vars = [v for v in end_vars if v.name not in start_vars] preds = tf.argmax(prob_inputs, axis=-1) batch_init_ops = tf.variables_initializer(new_vars) total_it = len(x) // batch_size with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.tables_initializer()]) def invert_one_batch(batch_targets): sess.run(batch_init_ops) feed_dict = {targets: batch_targets} prev = 1e6 for i in range(max_iters): curr, _ = sess.run([loss, train_ops], feed_dict) # stop if no progress if (i + 1) % (max_iters // 10) == 0 and curr > prev: break prev = curr return sess.run([preds, loss], feed_dict) start_time = time.time() it = 0.0 all_tp, all_fp, all_fn, all_err = 0.0, 0.0, 0.0, 0.0 for batch_idx in iterate_minibatches_indices(len(x), batch_size, False, False): y_pred, err = invert_one_batch(x[batch_idx]) tp, fp, fn = tp_fp_fn_metrics_np(y_pred, y[batch_idx]) # for yp, yt in zip(y_pred, y[batch_idx]): # print(' '.join(set(tokenizer.convert_ids_to_tokens(yp)))) # print(' '.join(set(tokenizer.convert_ids_to_tokens(yt)))) it += 1.0 all_err += err all_tp += tp all_fp += fp all_fn += fn all_pre = all_tp / (all_tp + all_fp + 1e-7) all_rec = all_tp / (all_tp + all_fn + 1e-7) all_f1 = 2 * all_pre * all_rec / (all_pre + all_rec + 1e-7) if it % FLAGS.print_every == 0: it_time = (time.time() - start_time) / it log("Iter {:.2f}%, err={}, pre={:.2f}%, rec={:.2f}%, f1={:.2f}%," " {:.2f} sec/it".format(it / total_it * 100, all_err / it, all_pre * 100, all_rec * 100, all_f1 * 100, it_time)) all_pre = all_tp / (all_tp + all_fp + 1e-7) all_rec = all_tp / (all_tp + all_fn + 1e-7) all_f1 = 2 * all_pre * all_rec / (all_pre + all_rec + 1e-7) log("Final err={}, pre={:.2f}%, rec={:.2f}%, f1={:.2f}%".format( all_err / it, all_pre * 100, all_rec * 100, all_f1 * 100))
def load_inversion_data(): vocab = build_vocabulary(rebuild=False) if FLAGS.data_name == 'bookcorpus': train_sents, _, test_sents, _, _, _ = load_bookcorpus_author( train_size=FLAGS.train_size, test_size=FLAGS.test_size, unlabeled_size=0, split_by_book=True, split_word=True, top_attr=800) elif FLAGS.data_name == 'reddit': train_sents, _, test_sents, _, _, _ = reddit_author_data( train_size=FLAGS.train_size, test_size=1, unlabeled_size=0, split_word=True, top_attr=0) else: raise ValueError(FLAGS.data_name) if FLAGS.cross_domain: train_sents = load_cross_domain_data(800000, split_word=True) log('Loaded {} cross domain sentences'.format(len(train_sents))) ckpt_name = get_model_ckpt_name(FLAGS.model_name, epoch=FLAGS.epoch, batch_size=FLAGS.batch_size, gamma=FLAGS.gamma, num_layer=3, attr=FLAGS.attr) model_path = os.path.join(FLAGS.model_dir, ckpt_name, 'model.ckpt') config = get_model_config(FLAGS.model_name) train_data, test_data = encode_sentences( vocab, model_path, config, train_sents, test_sents, low_layer_idx=FLAGS.low_layer_idx, high_layer_idx=FLAGS.high_layer_idx) # clear session data for later optimization or learning tf.keras.backend.clear_session() train_x, train_y, train_m = train_data test_x, test_y, test_m = test_data if FLAGS.low_layer_idx != FLAGS.high_layer_idx: log('Training high to low mapping...') if FLAGS.mapper == 'linear': mapping = linear_mapping(train_x[1], train_x[0]) elif FLAGS.mapper == 'mlp': mapping = mlp_mapping(train_x[1], train_x[0], epochs=50, activation=tf.nn.relu) elif FLAGS.mapper == 'gan': mapping = gan_mapping(train_x[1], train_x[0], disc_iters=5, batch_size=64, gamma=1.0, epoch=100, activation=tf.tanh) else: raise ValueError(FLAGS.mapper) test_x = mapping(test_x[1]) train_x = train_x[0] log('Loaded {} embeddings for inversion with shape {}'.format( test_x.shape[0], test_x.shape[1])) data = (train_x, test_x, train_y, test_y, train_m, test_m) return data
def optimization_invert(data, lr=1e-3, attack_batch_size=8, seq_len=5, max_iters=1000): # use softmax to select words _, x, _, y = data[:4] y = sents_to_labels(y) config = get_model_config(FLAGS.model_name) num_words = config['vocab_size'] model = QuickThoughtModel(num_words, config['emb_dim'], config['encoder_dim'], 1, init_word_emb=None, cell_type=config['cell_type'], num_layer=config['num_layer'], train=False) word_emb = model.word_in_emb targets = tf.placeholder(tf.float32, shape=(attack_batch_size, x.shape[1])) log('Inverting {} words from {} embeddings'.format(num_words, len(x))) if FLAGS.permute: # modeling the top k words then permute the order logit_inputs = tf.get_variable( name='inputs', shape=(attack_batch_size, seq_len, num_words - 1), initializer=tf.random_uniform_initializer(-0.1, 0.1)) t_vars = [logit_inputs] prob_inputs = continuous_topk_v2(logit_inputs, seq_len, FLAGS.temp) pad_inputs = tf.zeros((attack_batch_size, seq_len, 1)) prob_inputs = tf.concat([pad_inputs, prob_inputs], axis=2) emb_inputs = tf.matmul(prob_inputs, word_emb) permute_inputs = tf.get_variable( name='permute_inputs', shape=(attack_batch_size, seq_len, seq_len), initializer=tf.truncated_normal_initializer(0, 0.1)) t_vars.append(permute_inputs) permute_matrix = sinkhorn(permute_inputs / FLAGS.temp, 20) emb_inputs = tf.matmul(permute_matrix, emb_inputs) else: logit_inputs = tf.get_variable( name='inputs', shape=(attack_batch_size, seq_len, num_words - 1), initializer=tf.random_uniform_initializer(-0.1, 0.1)) t_vars = [logit_inputs] pad_inputs = tf.ones( (attack_batch_size, seq_len, 1), tf.float32) * (-1e9) logit_inputs = tf.concat([pad_inputs, logit_inputs], axis=2) prob_inputs = tf.nn.softmax(logit_inputs / FLAGS.temp, axis=-1) emb_inputs = tf.matmul(prob_inputs, word_emb) preds = tf.argmax(prob_inputs, axis=-1) t_var_names = set([v.name for v in t_vars]) masks = tf.ones(shape=(attack_batch_size, seq_len), dtype=tf.int32) all_layers = model.encode(emb_inputs, masks, model.in_cells, model.proj_in, return_all_layers=True) encoded = all_layers[FLAGS.low_layer_idx] loss = get_similarity_metric(encoded, targets, FLAGS.metric, rtn_loss=True) loss = tf.reduce_sum(loss) if FLAGS.alpha > 0.: # encourage the words to be different diff = tf.expand_dims(prob_inputs, 2) - tf.expand_dims(prob_inputs, 1) reg = tf.reduce_mean(-tf.exp(tf.reduce_sum(diff**2, axis=-1)), [1, 2]) loss += FLAGS.alpha * tf.reduce_sum(reg) optimizer = tf.train.AdamOptimizer(lr) model_vars = [ v for v in tf.global_variables() if v.name not in t_var_names ] saver = tf.train.Saver(model_vars) start_vars = set(v.name for v in model_vars) grads_and_vars = optimizer.compute_gradients(loss, t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) end_vars = tf.global_variables() new_vars = [v for v in end_vars if v.name not in start_vars] batch_init_ops = tf.variables_initializer(new_vars) total_it = len(x) // attack_batch_size with tf.Session() as sess: ckpt_name = get_model_ckpt_name(FLAGS.model_name, epoch=FLAGS.epoch, batch_size=FLAGS.batch_size, num_layer=3, gamma=FLAGS.gamma, attr=FLAGS.attr) ckpt_path = os.path.join(FLAGS.model_dir, ckpt_name, 'model.ckpt') log('Restoring model from {}'.format(ckpt_path)) saver.restore(sess, ckpt_path) def invert_one_batch(batch_targets): sess.run(batch_init_ops) feed_dict = {targets: batch_targets} prev = 1e6 for i in range(max_iters): curr, _ = sess.run([loss, train_ops], feed_dict) # stop if no progress if (i + 1) % (max_iters // 10) == 0 and curr > prev: break prev = curr return sess.run([preds, loss], feed_dict) it = 0.0 all_tp, all_fp, all_fn, all_err = 0.0, 0.0, 0.0, 0.0 start_time = time.time() # vocab = build_vocabulary(exp_id=0, rebuild=False) # inv_vocab = dict((v, k) for k, v in vocab.items()) for batch_idx in iterate_minibatches_indices(len(x), attack_batch_size, False, False): y_pred, err = invert_one_batch(x[batch_idx]) tp, fp, fn = tp_fp_fn_metrics_np(y_pred, y[batch_idx]) # for yy, pp in zip(y[batch_idx], y_pred): # matched = np.intersect1d(np.unique(yy), np.unique(pp)) # if len(matched) >= 0.75 * len(yy): # print(' '.join([inv_vocab[w] for w in yy])) # print(' '.join([inv_vocab[w] for w in np.unique(pp)])) it += 1.0 all_err += err all_tp += tp all_fp += fp all_fn += fn all_pre = all_tp / (all_tp + all_fp + 1e-7) all_rec = all_tp / (all_tp + all_fn + 1e-7) all_f1 = 2 * all_pre * all_rec / (all_pre + all_rec + 1e-7) if it % FLAGS.print_every == 0: it_time = (time.time() - start_time) / it log('Iter {:.2f}%, err={}, pre={:.2f}%, rec={:.2f}%, f1={:.2f}%,' ' {:.2f} sec/it'.format(it / total_it * 100, all_err / it, all_pre * 100, all_rec * 100, all_f1 * 100, it_time)) all_pre = all_tp / (all_tp + all_fp + 1e-7) all_rec = all_tp / (all_tp + all_fn + 1e-7) all_f1 = 2 * all_pre * all_rec / (all_pre + all_rec + 1e-7) log('Final err={}, pre={:.2f}%, rec={:.2f}%, f1={:.2f}%'.format( all_err / it, all_pre * 100, all_rec * 100, all_f1 * 100))
def learning_invert(data, batch_size): train_x, test_x, train_y, test_y, train_m, test_m = data config = get_model_config(FLAGS.model_name) num_words = config['vocab_size'] if FLAGS.model != 'rnn': train_y, test_y = sents_to_labels(train_y), sents_to_labels(test_y) label_freq = count_label_freq(train_y, num_words) log('Imbalace ratio: {}'.format(np.max(label_freq) / np.min(label_freq))) label_margin = tf.constant(np.reciprocal(label_freq**0.25), dtype=tf.float32) C = FLAGS.C log('Build attack model for {} words...'.format(num_words)) encoder_dim = train_x.shape[1] inputs = tf.placeholder(tf.float32, (None, encoder_dim), name="inputs") labels = tf.placeholder(tf.float32, (None, num_words), name="labels") masks = None training = tf.placeholder(tf.bool, name='training') if FLAGS.model == 'multiset': if num_words == 50001: init_word_emb = load_initialized_word_emb() emb_dim = init_word_emb.shape[1] else: init_word_emb = None emb_dim = 512 model = MultiSetInversionModel(emb_dim, num_words, FLAGS.seq_len, init_word_emb, C=C, label_margin=label_margin) preds, loss = model.forward(inputs, labels, training) true_pos, false_pos, false_neg = tp_fp_fn_metrics(labels, preds) eval_fetch = [loss, true_pos, false_pos, false_neg] elif FLAGS.model == 'rnn': labels = tf.placeholder(tf.int64, (None, None), name="labels") masks = tf.placeholder(tf.int32, (None, None), name="masks") init_word_emb = load_initialized_word_emb(glove_only=True) model = RecurrentInversionModel(init_word_emb.shape[1], num_words, FLAGS.seq_len, init_word_emb, beam_size=5, C=C, label_margin=label_margin) preds, loss = model.forward(inputs, labels, masks, training) eval_fetch = [loss, preds] elif FLAGS.model == 'multilabel': model = MultiLabelInversionModel(num_words, C=C, label_margin=label_margin) preds, loss = model.forward(inputs, labels, training) true_pos, false_pos, false_neg = tp_fp_fn_metrics(labels, preds) eval_fetch = [loss, true_pos, false_pos, false_neg] else: raise ValueError(FLAGS.model) t_vars = tf.trainable_variables() wd = FLAGS.wd post_ops = [ tf.assign(v, v * (1 - wd)) for v in t_vars if 'kernel' in v.name ] optimizer = tf.train.AdamOptimizer(FLAGS.lr) grads_and_vars = optimizer.compute_gradients( loss + tf.losses.get_regularization_loss(), t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) with tf.control_dependencies([train_ops]): train_ops = tf.group(*post_ops) log('Train attack model with {} data...'.format(len(train_x))) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(30): train_iterations = 0 train_loss = 0 for batch_idx in iterate_minibatches_indices( len(train_y), batch_size, True): if masks is None: one_hot_labels = np.zeros((len(batch_idx), num_words), dtype=np.float32) for i, idx in enumerate(batch_idx): one_hot_labels[i][train_y[idx]] = 1 feed = { inputs: train_x[batch_idx], labels: one_hot_labels, training: True } else: feed = { inputs: train_x[batch_idx], labels: train_y[batch_idx], masks: train_m[batch_idx], training: True } err, _ = sess.run([loss, train_ops], feed_dict=feed) train_loss += err train_iterations += 1 test_iterations = 0 test_loss = 0 test_tp, test_fp, test_fn = 0, 0, 0 for batch_idx in iterate_minibatches_indices(len(test_y), batch_size=512, shuffle=False): if masks is None: one_hot_labels = np.zeros((len(batch_idx), num_words), dtype=np.float32) for i, idx in enumerate(batch_idx): one_hot_labels[i][test_y[idx]] = 1 feed = { inputs: test_x[batch_idx], labels: one_hot_labels, training: False } else: feed = { inputs: test_x[batch_idx], labels: test_y[batch_idx], masks: test_m[batch_idx], training: False } fetch = sess.run(eval_fetch, feed_dict=feed) if len(fetch) == 2: err, pred = fetch tp, fp, fn = tp_fp_fn_metrics_np(pred, test_y[batch_idx]) else: err, tp, fp, fn = fetch # for yy, pp in zip(test_y[batch_idx], pred): # matched = np.intersect1d(np.unique(yy), np.unique(pp)) # if len(matched) >= 0.8 * len(yy): # print(' '.join([inv_vocab[w] for w in yy])) # print(' '.join([inv_vocab[w] for w in np.unique(pp)])) test_iterations += 1 test_loss += err test_tp += tp test_fp += fp test_fn += fn precision = test_tp / (test_tp + test_fp) * 100 recall = test_tp / (test_tp + test_fn) * 100 f1 = 2 * precision * recall / (precision + recall) log("Epoch: {}, train loss: {:.4f}, test loss: {:.4f}, " "pre: {:.2f}%, rec: {:.2f}%, f1: {:.2f}%".format( epoch, train_loss / train_iterations, test_loss / test_iterations, precision, recall, f1))
def main(_): epochs = FLAGS.epochs gamma = FLAGS.gamma batch_size = FLAGS.batch_size sents, sent_masks, authors, vocab = \ load_bookcorpus_sentences(load_author=True) num_author = len(np.unique(authors)) init_word_emb = load_initialized_word_emb() vocab_size = len(vocab) + 1 log("training with {} sents and {} vocabs and {} authors".format( sents.shape, vocab_size, num_author)) if init_word_emb is not None and init_word_emb.shape[1] != FLAGS.emb_dim: offset = FLAGS.emb_dim - init_word_emb.shape[1] if offset > 0: random_emb = np.random.uniform( -0.1, 0.1, (vocab_size, offset)).astype(np.float32) init_word_emb = np.hstack([init_word_emb, random_emb]) else: init_word_emb = init_word_emb[:, :FLAGS.emb_dim] model = QuickThoughtModel(vocab_size, FLAGS.emb_dim, FLAGS.encoder_dim, FLAGS.context_size, cell_type=FLAGS.cell_type, num_layer=FLAGS.num_layer, init_word_emb=init_word_emb, train=True, drop_p=0.15) global_step = tf.train.get_or_create_global_step() lr = get_lr(global_step) if FLAGS.cell_type == 'TRANS' else FLAGS.lr optimizer = tf.train.AdamOptimizer(lr) i_inputs = tf.placeholder(tf.int64, (None, None), name="i_inputs") i_masks = tf.placeholder(tf.int32, (None, None), name="i_masks") p_inputs = tf.placeholder(tf.int64, (None, None), name="p_inputs") p_masks = tf.placeholder(tf.int32, (None, None), name="p_masks") r_inputs = tf.placeholder(tf.int64, (None, None), name="r_inputs") r_masks = tf.placeholder(tf.int32, (None, None), name="r_masks") output_tensors = [(p_inputs, p_masks), (r_inputs, r_masks)] accs, loss = model.forward_triplet((i_inputs, i_masks), output_tensors, batch_size) thought_vector = model.thought_vector thought_vector = flip_gradient(thought_vector, gamma) if FLAGS.attr == 'author': labels = tf.placeholder(tf.int64, (None, ), name="labels") adv_model = build_model(num_author, FLAGS.encoder_dim // 2) adv_logits = adv_model(thought_vector, tf.constant(True, dtype=tf.bool)) adv_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=adv_logits) adv_loss = tf.reduce_mean(adv_loss) adv_acc = tf.reduce_mean( tf.cast(tf.equal(labels, tf.argmax(adv_logits, axis=-1)), tf.float32)) elif FLAGS.attr == 'word': labels = tf.placeholder(tf.float32, (None, None), name='labels') adv_model = build_model(vocab_size, FLAGS.encoder_dim // 2) adv_logits = adv_model(thought_vector, tf.constant(True, dtype=tf.bool)) adv_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels[:, 1:], logits=adv_logits[:, 1:]) adv_loss = tf.reduce_mean(tf.reduce_sum(adv_loss, axis=-1)) adv_predictions = tf.round(tf.nn.sigmoid(adv_logits)) tp, fp, fn = tp_fp_fn_metrics(labels[:, 1:], adv_predictions[:, 1:]) pre = tp / (tp + fp) rec = tp / (tp + fn) adv_acc = 2 * (pre * rec) / (pre + rec) else: raise ValueError(FLAGS.attr) accs.append(adv_acc) opt_loss = loss + gamma * adv_loss t_vars = tf.trainable_variables() grads_and_vars = optimizer.compute_gradients(opt_loss, t_vars) grads, variables = zip(*grads_and_vars) grads, _ = tf.clip_by_global_norm(grads, 10.0) grads_and_vars = zip(grads, variables) train_ops = optimizer.apply_gradients(grads_and_vars, global_step=global_step) iterations = epochs * len(sents) // batch_size print_every = FLAGS.print_every saver = tf.train.Saver(max_to_keep=epochs) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: iteration = 0 train_loss = 0 train_adv_loss = 0 fw_accs = 0 bw_accs = 0 adv_accs = 0 sess.run(tf.global_variables_initializer()) for e in range(epochs): start = time.time() for batch in iterate_triplet_minibatches(sents, sent_masks, authors, batch_size): xx, xp, xr, y = batch if FLAGS.attr == 'word': b = len(y) y = np.zeros((b, vocab_size), dtype=np.float32) for i, idx in enumerate(range(b)): y[i][xx[0][i]] = 1.0 feed = { i_inputs: xx[0], i_masks: xx[1], p_inputs: xp[0], p_masks: xp[1], r_inputs: xr[0], r_masks: xr[1], labels: y } fetch = sess.run([train_ops, loss, adv_loss] + accs, feed_dict=feed) train_loss += fetch[1] train_adv_loss += fetch[2] fw_accs += fetch[3] bw_accs += fetch[4] adv_accs += fetch[5] iteration += 1 if iteration % print_every == 0: end = time.time() log("Iteration: {:.4f}%, Loss: {:.4f}, Adv Loss:{:.4f}," " FW Acc:{:.2f}%, BW Acc:{:.2f}%, Adv Perf: {:.2f}%," " {:.4f} sec/batch".format( iteration / iterations * 100, train_loss / print_every, train_adv_loss / print_every, fw_accs / print_every * 100, bw_accs / print_every * 100, adv_accs / print_every * 100, (end - start) / print_every)) train_loss = 0 train_adv_loss = 0 fw_accs = 0 bw_accs = 0 adv_accs = 0 start = time.time() model_type = FLAGS.cell_type if model_type == 'TRANS': model_type += 'l{}'.format(FLAGS.num_layer) model_name = 'bookcorpus_e{}_{}_b{}_{}_adv{}'.format( e, model_type, batch_size, FLAGS.attr, gamma) save_path = os.path.join(FLAGS.save_dir, model_name) if not os.path.exists(save_path): os.makedirs(save_path) saver.save(sess, os.path.join(save_path, "model.ckpt"))
def encode_parsed_sentences(config, model_path, *data, **kwargs): high_layer_idx = kwargs.get('high_layer_idx', -1) low_layer_idx = kwargs.get('low_layer_idx', -1) query_size = kwargs.get('query_size', 2048) log('Encoding sentences on the fly...') model = QuickThoughtModel(config['vocab_size'], config['emb_dim'], config['encoder_dim'], 1, init_word_emb=None, cell_type=config['cell_type'], num_layer=config['num_layer'], train=False) inputs = tf.placeholder(tf.int64, (None, None), name='inputs') masks = tf.placeholder(tf.int32, (None, None), name='masks') encode_emb = tf.nn.embedding_lookup(model.word_in_emb, inputs) all_layers = model.encode(encode_emb, masks, model.in_cells, model.proj_in, return_all_layers=True) learn_mapping = high_layer_idx != low_layer_idx if high_layer_idx == low_layer_idx: encoded = all_layers[high_layer_idx] else: encoded = (all_layers[low_layer_idx], all_layers[high_layer_idx]) model_vars = tf.trainable_variables() saver = tf.train.Saver(model_vars) sess = tf.Session() saver.restore(sess, model_path) encoder_fn = lambda s: sess.run(encoded, {inputs: s[0], masks: s[1]}) def encode_sents(s, n): embs_low, embs_high = [], [] pbar = tqdm.tqdm(total=n) for batch_idx in iterate_minibatches_indices(n, query_size, False): emb = encoder_fn((s[0][batch_idx], s[1][batch_idx])) if learn_mapping: embs_low.append(emb[0]) embs_high.append(emb[1]) n_batch = len(emb[0]) else: embs_low.append(emb) n_batch = len(emb) pbar.update(n_batch) pbar.close() if learn_mapping: return np.vstack(embs_low), np.vstack(embs_high) else: return np.vstack(embs_low) rtn_data = [] for y, m in data: n_sent = len(y) x = encode_sents((y, m.astype(np.int32)), n_sent) rtn_data.append(x) return rtn_data
def gan_mapping(x, y, lr=1e-4, lmbda=10., gamma=0., beta1=0.5, activation=tf.nn.relu, epoch=30, disc_iters=10, batch_size=128): n_data, x_dim = x.shape y_dim = y.shape[1] model = WGANGP(x_dim, y_dim, lr=lr, lmbda=lmbda, gamma=gamma, beta1=beta1, activation=activation) gen_sampler = inf_batch_iterator(n_data, batch_size) disc_sampler = inf_batch_iterator(n_data, batch_size) num_batch_per_epoch = n_data // batch_size + (n_data % batch_size) != 0 sess = tf.Session() sess.run(tf.global_variables_initializer()) for e in range(epoch): train_d_loss = [] train_g_loss = [] train_l2_loss = [] for _ in range(num_batch_per_epoch): # train disc first for _ in range(disc_iters): disc_idx = next(disc_sampler) disc_x, disc_y = x[disc_idx], y[disc_idx] d_err = model.train_disc_one_batch(sess, disc_x, disc_y) train_d_loss.append(d_err) gen_idx = next(gen_sampler) gen_x, gen_y = x[gen_idx], y[gen_idx] g_err, l2_err = model.train_gen_one_batch(sess, gen_x, gen_y) train_g_loss.append(g_err) train_l2_loss.append(l2_err) train_d_loss = np.mean(train_d_loss) train_g_loss = np.mean(train_g_loss) train_l2_loss = np.mean(train_l2_loss) log('Epoch: {}, disc loss: {:.4f}, gen loss: {:.4f},' ' l2 loss: {:.4f}'.format(e + 1, train_d_loss, train_g_loss, train_l2_loss)) def mapping(z): mapped = [] for idx in iterate_minibatches_indices(len(z), batch_size=2048): batch_mapped = model.generate(sess, z[idx]) mapped.append(batch_mapped) tf.keras.backend.clear_session() return np.vstack(mapped) return mapping
def main(_): assert FLAGS.dpsgd exp_id = FLAGS.exp_id num_gpu = FLAGS.num_gpu train_words, unigrams, word_sample_int = preprocess_texts(exp_id) n_vocab = len(unigrams) n_sampled = FLAGS.n_sampled n_embedding = FLAGS.hidden_size init_width = 0.5 / n_embedding epochs = FLAGS.epochs window_size = 5 batch_size = FLAGS.batch_size learning_rate = FLAGS.learning_rate delta = 1 / len(train_words) cumtable = tf.constant(np.cumsum(unigrams)) inputs = tf.placeholder(tf.int64, [None], name='inputs') labels = tf.placeholder(tf.int64, [None, 1], name='labels') embedding = tf.Variable(tf.random_uniform((n_vocab, n_embedding), -init_width, init_width), name="emb") sm_w_t = embedding sm_b = tf.Variable(tf.zeros(n_vocab), name="sm_b") curr_words = tf.Variable(0, trainable=False) update_curr_words = curr_words.assign_add(batch_size) lr = learning_rate * tf.maximum( 0.0001, 1.0 - tf.cast(curr_words, tf.float32) / len(train_words) / epochs) num_microbatches = FLAGS.microbatches if FLAGS.dpsgd: optimizer = SparseDPAdamGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=num_microbatches if num_microbatches > 0 else None, learning_rate=lr) else: optimizer = tf.train.AdamOptimizer(lr) t_vars = tf.trainable_variables() def model(x, y): nb = tf.shape(x)[0] example_emb = tf.nn.embedding_lookup(embedding, x) # Negative sampling. random_ints = tf.random.uniform((n_sampled * nb, ), maxval=cumtable[-1], dtype=tf.int64) sampled_ids = tf.searchsorted(cumtable, random_ints, out_type=tf.int64) y_vec = tf.squeeze(y) true_w = tf.nn.embedding_lookup(sm_w_t, y_vec) true_b = tf.nn.embedding_lookup(sm_b, y_vec) true_logits = tf.reduce_sum(tf.multiply(example_emb, true_w), 1) + true_b sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids) sampled_b = tf.nn.embedding_lookup(sm_b, sampled_ids) sampled_w_mat = tf.reshape(sampled_w, [nb, n_sampled, n_embedding]) sampled_b_vec = tf.reshape(sampled_b, [nb, n_sampled]) example_emb_mat = tf.reshape(example_emb, [nb, n_embedding, 1]) sampled_logits = tf.squeeze(tf.matmul(sampled_w_mat, example_emb_mat)) + sampled_b_vec # Calculate the loss using negative sampling true_xent = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(true_logits), logits=true_logits) sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(sampled_logits), logits=sampled_logits) sampled_mask = 1 - tf.cast( tf.equal(y, tf.reshape(sampled_ids, [nb, n_sampled])), tf.float32) vector_loss = true_xent + tf.reduce_sum(sampled_xent * sampled_mask, axis=1) scalar_loss = tf.reduce_mean(vector_loss) if FLAGS.dpsgd: grads = optimizer.compute_gradients( vector_loss, t_vars, colocate_gradients_with_ops=num_gpu > 1) else: grads = optimizer.compute_gradients( scalar_loss, t_vars, colocate_gradients_with_ops=num_gpu > 1) return grads, scalar_loss if num_gpu > 1: tower_grads, scalar_loss = make_parallel(model, optimizer, num_gpu, x=inputs, y=labels) train_ops = rigid_op_sequence(tower_grads) else: grads_and_vars, scalar_loss = model(inputs, labels) train_ops = optimizer.apply_gradients(grads_and_vars) saver = tf.train.Saver() iterations = epochs * len(train_words) // batch_size print_every = FLAGS.print_every with tf.Session() as sess: iteration = 0 train_loss = 0 sess.run(tf.global_variables_initializer()) for e in range(1, epochs + 1): start = time.time() for x, y in get_batches(train_words, batch_size, word_sample_int, window_size): b = len(x) if num_microbatches > 0: offset = b - b % num_microbatches x, y = x[:offset], y[:offset] feed = {inputs: x, labels: np.array(y)[:, None]} err, _, _ = sess.run( [scalar_loss, train_ops, update_curr_words], feed_dict=feed) train_loss += err iteration += 1 if iteration % print_every == 0: end = time.time() log("Iteration: {:.4f}%, Loss: {:.4f}, {:.4f} sec/batch". format(iteration / iterations * 100, train_loss / print_every, (end - start) / print_every)) train_loss = 0 start = time.time() if FLAGS.dpsgd: eps = compute_epsilon(iteration, len(train_words)) log('The current epsilon is: {:.2f} for delta={}'. format(eps, delta)) model_name = 'tfw2v_{}'.format(exp_id) if FLAGS.dpsgd: model_name += 'e{}_n{}_l{}_mb{}'.format( e, FLAGS.noise_multiplier, FLAGS.l2_norm_clip, num_microbatches) eps = compute_epsilon(iteration, len(train_words)) save_path = os.path.join(FLAGS.save_dir, model_name) if not os.path.exists(save_path): os.makedirs(save_path) saver.save(sess, os.path.join(save_path, "model.ckpt")) if FLAGS.dpsgd: with open(os.path.join(save_path, 'eps{:.2f}'.format(eps)), 'w'): pass
def load_inversion_data(): albert_config = modeling.AlbertConfig.from_json_file(FLAGS.albert_config_file) tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file ) train_sents, _, test_sents, _, _, _ = load_bookcorpus_author( train_size=FLAGS.train_size, test_size=FLAGS.test_size, unlabeled_size=0, split_by_book=True, split_word=False, top_attr=800) if FLAGS.cross_domain: train_sents = load_cross_domain_data(800000, split_word=False) def sents_to_examples(sents): examples = read_examples(sents, tokenization.convert_to_unicode) return convert_examples_to_features(examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) input_ids, input_mask, input_type_ids, outputs = model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, use_one_hot_embeddings=False) sess = tf.Session() sess.run([tf.global_variables_initializer(), tf.tables_initializer()]) learn_mapping = FLAGS.high_layer_idx != FLAGS.low_layer_idx def encode_example(features): n_data = len(features[0]) embs_low, embs_high = [], [] pbar = tqdm.tqdm(total=n_data) for b_idx in iterate_minibatches_indices(n_data, 128): emb = sess.run(outputs, feed_dict={input_ids: features[0][b_idx], input_mask: features[1][b_idx], input_type_ids: features[2][b_idx]}) if learn_mapping: embs_low.append(emb[0]) embs_high.append(emb[1]) n_batch = len(emb[0]) else: embs_low.append(emb) n_batch = len(emb) pbar.update(n_batch) pbar.close() if learn_mapping: return np.vstack(embs_low), np.vstack(embs_high) else: return np.vstack(embs_low) train_features = sents_to_examples(train_sents) train_x = encode_example(train_features) test_features = sents_to_examples(test_sents) test_x = encode_example(test_features) tf.keras.backend.clear_session() if learn_mapping: log('Training high to low mapping...') if FLAGS.mapper == 'linear': mapping = linear_mapping(train_x[1], train_x[0]) elif FLAGS.mapper == 'mlp': mapping = mlp_mapping(train_x[1], train_x[0], epochs=50, activation=modeling.gelu) elif FLAGS.mapper == 'gan': mapping = gan_mapping(train_x[1], train_x[0], disc_iters=5, batch_size=64, gamma=1.0, epoch=100, activation=tf.tanh) else: raise ValueError(FLAGS.mapper) test_x = mapping(test_x[1]) return train_x, train_features, test_x, test_features
def train_text_char_cnn(data, num_attr): lr = FLAGS.lr batch_size = FLAGS.batch_size train_sents, train_y, test_sents, test_y = data num_chars = 129 max_char_len = 400 def sents_to_chars(sents): max_len = 0 chars = np.ones((len(sents), max_char_len), dtype=np.int64) * num_chars for i, sent in enumerate(sents): sent_chars = [ord(c) for c in str(sent)] max_len = max(len(sent_chars), max_len) if len(sent_chars) > max_char_len: sent_chars = sent_chars[:max_char_len] chars[i, :len(sent_chars)] = sent_chars return chars train_x = sents_to_chars(train_sents) test_x = sents_to_chars(test_sents) inputs = tf.placeholder(tf.int64, (None, max_char_len), name="inputs") labels = tf.placeholder(tf.int64, (None,), name="labels") training = tf.placeholder(tf.bool, name='training') text_cnn = TextCharCNN(num_chars, hidden_size=512, num_filter=128) classifier = build_model(num_attr, FLAGS.hidden_size) model_fn = lambda x, t: classifier(text_cnn.forward(x, t), t) logits = model_fn(inputs, training) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) loss = tf.reduce_mean(loss) opt_loss = loss accuracies, top5_accuracies, predictions = acc_metrics(logits, labels, num_attr) eval_fetches = [loss, accuracies, top5_accuracies] t_vars = tf.trainable_variables() post_ops = [tf.assign(v, v * (1 - FLAGS.wd)) for v in t_vars if 'kernel' in v.name] optimizer = tf.train.AdamOptimizer(lr) grads_and_vars = optimizer.compute_gradients(opt_loss, t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) with tf.control_dependencies([train_ops]): train_ops = tf.group(*post_ops) log('Train attack model...') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) def train_fn(batch_idx): feed = {inputs: train_x[batch_idx], labels: train_y[batch_idx], training: True} err, _ = sess.run([loss, train_ops], feed_dict=feed) return err def eval_fn(batch_idx): feed = {inputs: test_x[batch_idx], labels: test_y[batch_idx], training: False} return sess.run(eval_fetches, feed_dict=feed) n_train, n_test = len(train_y), len(test_y) train_loops(FLAGS.epochs, n_train, n_test, train_fn, eval_fn, batch_size)
def learning_inversion(): assert FLAGS.low_layer_idx == FLAGS.high_layer_idx == -1 albert_config = modeling.AlbertConfig.from_json_file(FLAGS.albert_config_file) num_words = albert_config.vocab_size tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file) cls_id = tokenizer.vocab['[CLS]'] sep_id = tokenizer.vocab['[SEP]'] mask_id = tokenizer.vocab['[MASK]'] train_x, train_y, test_x, test_y = load_inversion_data() filters = [cls_id, sep_id, mask_id, 0] train_y = filter_labels(train_y[0], filters) test_y = filter_labels(test_y[0], filters) label_freq = count_label_freq(train_y, num_words) log('Imbalace ratio: {}'.format(np.max(label_freq) / np.min(label_freq))) label_margin = tf.constant(np.reciprocal(label_freq ** 0.25), dtype=tf.float32) C = FLAGS.C log('Build attack model for {} words...'.format(num_words)) encoder_dim = train_x.shape[1] inputs = tf.placeholder(tf.float32, (None, encoder_dim), name="inputs") labels = tf.placeholder(tf.float32, (None, num_words), name="labels") training = tf.placeholder(tf.bool, name='training') if FLAGS.model == 'multiset': emb_dim = 512 model = MultiSetInversionModel(emb_dim, num_words, FLAGS.seq_len, None, C=C, label_margin=label_margin) elif FLAGS.model == 'multilabel': model = MultiLabelInversionModel(num_words, C=C, label_margin=label_margin) else: raise ValueError(FLAGS.model) preds, loss = model.forward(inputs, labels, training) true_pos, false_pos, false_neg = tp_fp_fn_metrics(labels, preds) eval_fetch = [loss, true_pos, false_pos, false_neg] t_vars = tf.trainable_variables() wd = FLAGS.wd post_ops = [tf.assign(v, v * (1 - wd)) for v in t_vars if 'kernel' in v.name] optimizer = tf.train.AdamOptimizer(FLAGS.lr) grads_and_vars = optimizer.compute_gradients( loss + tf.losses.get_regularization_loss(), t_vars) train_ops = optimizer.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) with tf.control_dependencies([train_ops]): train_ops = tf.group(*post_ops) log('Train attack model with {} data...'.format(len(train_x))) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(30): train_iterations = 0 train_loss = 0 for batch_idx in iterate_minibatches_indices(len(train_y), FLAGS.batch_size, True): one_hot_labels = np.zeros((len(batch_idx), num_words), dtype=np.float32) for i, idx in enumerate(batch_idx): one_hot_labels[i][train_y[idx]] = 1 feed = {inputs: train_x[batch_idx], labels: one_hot_labels, training: True} err, _ = sess.run([loss, train_ops], feed_dict=feed) train_loss += err train_iterations += 1 test_iterations = 0 test_loss = 0 test_tp, test_fp, test_fn = 0, 0, 0 for batch_idx in iterate_minibatches_indices(len(test_y), batch_size=512, shuffle=False): one_hot_labels = np.zeros((len(batch_idx), num_words), dtype=np.float32) for i, idx in enumerate(batch_idx): one_hot_labels[i][test_y[idx]] = 1 feed = {inputs: test_x[batch_idx], labels: one_hot_labels, training: False} fetch = sess.run(eval_fetch, feed_dict=feed) err, tp, fp, fn = fetch test_iterations += 1 test_loss += err test_tp += tp test_fp += fp test_fn += fn precision = test_tp / (test_tp + test_fp) * 100 recall = test_tp / (test_tp + test_fn) * 100 f1 = 2 * precision * recall / (precision + recall) log("Epoch: {}, train loss: {:.4f}, test loss: {:.4f}, " "pre: {:.2f}%, rec: {:.2f}%, f1: {:.2f}%".format( epoch, train_loss / train_iterations, test_loss / test_iterations, precision, recall, f1))
def main(_): epochs = FLAGS.epochs batch_size = FLAGS.batch_size sents, sent_masks, vocab = load_bookcorpus_sentences() vocab_size = len(vocab) + 1 log('training with {} sents and {} vocabs'.format(sents.shape, vocab_size)) init_word_emb = load_initialized_word_emb() if init_word_emb.shape[1] < FLAGS.emb_dim: offset = FLAGS.emb_dim - init_word_emb.shape[1] random_emb = np.random.uniform(-0.1, 0.1, (vocab_size, offset)) init_word_emb = np.hstack([init_word_emb, random_emb.astype(np.float32)]) init_word_emb = init_word_emb[:, :FLAGS.emb_dim] model = QuickThoughtModel(vocab_size, FLAGS.emb_dim, FLAGS.encoder_dim, FLAGS.context_size, cell_type=FLAGS.cell_type, num_layer=FLAGS.num_layer, init_word_emb=init_word_emb, drop_p=FLAGS.drop_p, train=True) global_step = tf.train.get_or_create_global_step() lr = get_lr(global_step) if FLAGS.cell_type == 'TRANS' else FLAGS.lr optimizer = tf.train.AdamOptimizer(lr) # use negative examples from shuffled sentences i_inputs = tf.placeholder(tf.int64, (None, None), name='i_inputs') i_masks = tf.placeholder(tf.int32, (None, None), name='i_masks') p_inputs = tf.placeholder(tf.int64, (None, None), name='p_inputs') p_masks = tf.placeholder(tf.int32, (None, None), name='p_masks') r_inputs = tf.placeholder(tf.int64, (None, None), name='r_inputs') r_masks = tf.placeholder(tf.int32, (None, None), name='r_masks') output_tensors = [(p_inputs, p_masks), (r_inputs, r_masks)] accs, loss = model.forward_triplet((i_inputs, i_masks), output_tensors, batch_size) t_vars = tf.trainable_variables() grads_and_vars = optimizer.compute_gradients(loss, t_vars) grads, variables = zip(*grads_and_vars) grads, _ = tf.clip_by_global_norm(grads, 10.0) grads_and_vars = zip(grads, variables) train_ops = optimizer.apply_gradients(grads_and_vars, global_step=global_step) iterations = epochs * len(sents) // batch_size print_every = FLAGS.print_every saver = tf.train.Saver(max_to_keep=epochs) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: iteration = 0 train_loss = 0 fw_accs = 0 bw_accs = 0 sess.run(tf.global_variables_initializer()) for e in range(epochs): start = time.time() for batch in iterate_triplet_minibatches(sents, sent_masks, batch_size): xx, xp, xr = batch feed = {i_inputs: xx[0], i_masks: xx[1], p_inputs: xp[0], p_masks: xp[1], r_inputs: xr[0], r_masks: xr[1]} fetch = sess.run([loss, train_ops] + list(accs), feed_dict=feed) train_loss += fetch[0] fw_accs += fetch[-2] if len(fetch) == 4 else fetch[-1] bw_accs += fetch[-1] iteration += 1 if iteration % print_every == 0: end = time.time() log('Iteration: {:.4f}%, Loss: {:.4f}, FW Acc:{:.2f}%, ' 'BW Acc:{:.2f}%, {:.4f} sec/batch'.format( iteration / iterations * 100, train_loss / print_every, fw_accs / print_every * 100, bw_accs / print_every * 100, (end - start) / print_every)) train_loss = 0 fw_accs = 0 bw_accs = 0 start = time.time() model_type = FLAGS.cell_type if model_type == 'TRANS': model_type += 'l{}'.format(FLAGS.num_layer) model_name = 'bookcorpus_e{}_{}_b{}'.format(e, model_type, batch_size) save_path = os.path.join(FLAGS.save_dir, model_name) if not os.path.exists(save_path): os.makedirs(save_path) saver.save(sess, os.path.join(save_path, 'model.ckpt'))