def main(): random.seed(SEED) np.random.seed(SEED) stringGenerator = TextGenerator('../corpus/index2word.pickle', '../corpus/word2index.pickle', '../corpus/all.code') assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) #vocab_size = 5000 vocab_size = len(stringGenerator.index2Word) generator = get_trainable_model(vocab_size) target_params = cPickle.load(open('save/target_params.pkl')) target_params[00] = np.random.rand(vocab_size, 32).astype(np.float32) target_params[-2] = np.random.rand(32, vocab_size).astype(np.float32) target_params[-1] = np.random.rand(vocab_size).astype(np.float32) target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.initialize_all_variables()) #generate_samples(sess, target_lstm, 64, 10000, positive_file) stringGenerator.saveSamplesToFile(20, 10000, positive_file) gen_data_loader.create_batches(positive_file) log = open('log/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: #generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) stringGenerator.saveSamplesToFile(20, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(test_loss) + '\n' log.write(buffer) #generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) stringGenerator.saveSamplesToFile(20, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n' log.write(buffer) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 # load data (likelihood?) gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) vocab_size = 68 # load generator with parameters generator = get_trainable_model(vocab_size) # target_params = cPickle.load(open('save/target_params.pkl')) target_params = initialize_parameters(68) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # generating synthetic data which constitute to original data # generate_samples(sess, target_lstm, 64, 100, positive_file) gen_data_loader.create_batches(positive_file) log = open('log/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'final pre-train epoch ', 'test_loss ', test_loss buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n' log.write(buffer) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) print(gen_data_loader)
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) vocab_size = 5000 best_score = 9.5 generator = get_trainable_model(vocab_size) target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer) generate_samples(sess, target_lstm, 64, 10000, positive_file) gen_data_loader.create_batches(positive_file) log = open('log/experiment-log.txt', 'w') # pre-train generator print 'Start scheduled sampling training...' log.write('scheduled sampling training...\n') curriculum_rate = 1.0 for epoch in xrange(EPOCH_NUM): curriculum_rate = max(0.0, curriculum_rate - 0.002) loss = pre_train_epoch(sess, generator, gen_data_loader, curriculum_rate) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'curriculum rate:', curriculum_rate, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(curriculum_rate) + ' ' + str(test_loss) + '\n' log.write(buffer) if test_loss < best_score: best_score = test_loss print 'best score: ', test_loss generate_samples(sess, generator, BATCH_SIZE, 100000, eval_file) likelihood_data_loader.create_batches(eval_file) significance_test(sess, target_lstm, likelihood_data_loader, 'significance/schedule_sampling.txt') log.close()
def main(): random.seed(SEED) np.random.seed(SEED) # assert START_TOKEN == 0 vocab_size = NUM_EMB dis_data_loader = Dis_dataloader() best_score = 1000 generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, MAX_LENGTH, START_TOKEN) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, MAX_LENGTH, 0) with tf.variable_scope('discriminator'): cnn = TextCNN( sequence_length=MAX_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) cnn_params = [param for param in tf.trainable_variables() if 'discriminator' in param.name] # Define Discriminator Training procedure dis_global_step = tf.Variable(0, name="global_step", trainable=False) dis_optimizer = tf.train.AdamOptimizer(1e-4) dis_grads_and_vars = dis_optimizer.compute_gradients( cnn.loss, cnn_params, aggregation_method=2) dis_train_op = dis_optimizer.apply_gradients( dis_grads_and_vars, global_step=dis_global_step) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) def train_discriminator(): if D_WEIGHT == 0: return 0, 0 negative_samples = generate_samples( sess, generator, BATCH_SIZE, POSITIVE_NUM) # train discriminator dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_samples, negative_samples) dis_batches = dis_data_loader.batch_iter( zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs ) for batch in dis_batches: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step, loss, accuracy = sess.run( [dis_train_op, dis_global_step, cnn.loss, cnn.accuracy], feed) print('\tD loss : {}'.format(loss)) print('\tAccuracy: {}'.format(accuracy)) return loss, accuracy # Pretrain is checkpointed and only execcutes if we don't find a checkpoint saver = tf.train.Saver() ckpt_dir = 'checkpoints/{}_pretrain'.format(PREFIX) if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) ckpt_file = os.path.join(ckpt_dir, 'pretrain_ckpt') if os.path.isfile(ckpt_file + '.meta') and params["LOAD_PRETRAIN"]: saver.restore(sess, ckpt_file) print('Pretrain loaded from previous checkpoint {}'.format(ckpt_file)) else: sess.run(tf.global_variables_initializer()) pretrain(sess, generator, target_lstm, train_discriminator) path = saver.save(sess, ckpt_file) print('Pretrain finished and saved at {}'.format(path)) # create reward function batch_reward = make_reward(train_samples) rollout = ROLLOUT(generator, 0.8) print('#########################################################################') print('Start Reinforcement Training Generator...') results_rows = [] for nbatch in range(TOTAL_BATCH): results = OrderedDict({'exp_name': PREFIX}) if nbatch % 1 == 0 or nbatch == TOTAL_BATCH - 1: print('* Making samples') if nbatch % 10 == 0: gen_samples = generate_samples( sess, generator, BATCH_SIZE, BIG_SAMPLE_NUM) else: gen_samples = generate_samples( sess, generator, BATCH_SIZE, SAMPLE_NUM) likelihood_data_loader.create_batches(gen_samples) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('batch_num: {}'.format(nbatch)) print('test_loss: {}'.format(test_loss)) results['Batch'] = nbatch results['test_loss'] = test_loss if test_loss < best_score: best_score = test_loss print('best score: %f' % test_loss) # results mm.compute_results(gen_samples, train_samples, ord_dict, results) print('#########################################################################') print('-> Training generator with RL.') print('G Epoch {}'.format(nbatch)) for it in range(TRAIN_ITER): samples = generator.generate(sess) rewards = rollout.get_reward( sess, samples, 16, cnn, batch_reward, D_WEIGHT) print('Rewards be like...') print(rewards) nll = generator.generator_step(sess, samples, rewards) print('neg-loglike: {}'.format(nll)) results['neg-loglike'] = nll rollout.update_params() # generate for discriminator print('-> Training Discriminator') for i in range(D): print('D_Epoch {}'.format(i)) d_loss, accuracy = train_discriminator() results['D_loss_{}'.format(i)] = d_loss results['Accuracy_{}'.format(i)] = accuracy print('results') results_rows.append(results) if nbatch % params["EPOCH_SAVES"] == 0: save_results(sess, PREFIX, PREFIX + '_model', results_rows) # write results save_results(sess, PREFIX, PREFIX + '_model', results_rows) print('\n:*** FINISHED ***') return
def main(FLAGS): ######################################################################################### # Generator Hyper-parameters ###################################################################################### EMB_DIM = FLAGS.gen_emb_dim # 32 # embedding dimension HIDDEN_DIM = FLAGS.gen_hidden_dim # 32 # hidden state dimension of lstm cell SEQ_LENGTH = FLAGS.seq_len # 20 # sequence length START_TOKEN = 0 PRE_EPOCH_NUM = FLAGS.gen_pretrain_epoch_num # 120 # supervise (maximum likelihood estimation) epochs for generator DISC_PRE_EPOCH_NUM = FLAGS.dis_pretrain_epoch_num # 50 # supervise (maximum likelihood estimation) epochs for descriminator SEED = 88 BATCH_SIZE = FLAGS.batch_size #64 gen_dropout_keep_prob = FLAGS.gen_dropout_keep_prob # 0.75 gen_num_recurrent_layers = FLAGS.gen_num_recurrent_layers # 1 gen_learning_rate = FLAGS.gen_learning_rate ######################################################################################### # Discriminator Hyper-parameters ######################################################################################### dis_embedding_dim = FLAGS.dis_emb_dim # 64 dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] dis_dropout_keep_prob = 0.75 dis_l2_reg_lambda = 0.2 dis_batch_size = FLAGS.batch_size #64 ######################################################################################### # Basic Training Parameters ######################################################################################### EXPERIMENT_NAME = FLAGS.experiment_name TOTAL_BATCH = FLAGS.num_epochs # 200 #num of adversarial epochs positive_file = 'save/real_data_%0s.txt' % EXPERIMENT_NAME negative_file = 'save/generator_sample_%0s.txt' % EXPERIMENT_NAME eval_file = "save/eval_file_%0s" % EXPERIMENT_NAME generated_num = 10000 # 10000 ######################################################################################### # Data configurations ######################################################################################### use_real_world_data = True real_data_file_path = FLAGS.dataset_path # './data/text8/text8' dataset_name = os.path.basename(real_data_file_path) base_token = FLAGS.base_token # 'char' random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 if use_real_world_data: real_data_train_file = real_data_file_path + '-train' real_data_valid_file = real_data_file_path + '-valid' real_data_test_file = real_data_file_path + '-test' real_data_dict_file = real_data_file_path + '-{}-dict.json'.format( base_token) if not os.path.exists(real_data_train_file): split_text8(real_data_file_path) map, inv_map = create_real_data_dict(real_data_train_file, real_data_dict_file, base_token) vocab_size = len(map) if dataset_name == 'text8' and base_token == 'char': assert vocab_size == 27 # SORRY FOR THE HARD CODING elif dataset_name == 'ptb' and base_token == 'word': assert vocab_size == 10001 # SORRY FOR THE HARD CODING elif dataset_name == 'toy' and base_token == 'word': assert vocab_size == 8 # SORRY FOR THE HARD CODING elif dataset_name == 'wt2' and base_token == 'word': assert vocab_size == 33279 # SORRY FOR THE HARD CODING else: raise TypeError gen_data_loader = Gen_Data_loader_text(BATCH_SIZE, map, inv_map, seq_len=SEQ_LENGTH, token_type=base_token) dis_data_loader = Dis_dataloader_text(BATCH_SIZE, map, inv_map, seq_len=SEQ_LENGTH, token_type=base_token) else: gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, dropout_keep_prob=gen_dropout_keep_prob, num_recurrent_layers=gen_num_recurrent_layers) if not use_real_world_data: target_params = pickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.Session(config=config) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=999999) sess.run(tf.global_variables_initializer()) if use_real_world_data: # gen_data_loader.create_batches(real_data_train_file) pass else: # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): print("start epoch %0d" % epoch) # update learning rate if epoch > 5: gen_learning_rate /= FLAGS.gen_learning_decay * 1. if epoch % FLAGS.save_each_epochs == 0: print( '#########################################################################' ) print('saving model...') save_file = os.path.join( '.', 'ckp', EXPERIMENT_NAME + '_pretrain_epoch_%0d' % epoch, EXPERIMENT_NAME + '_pretrain_epoch_%0d' % epoch) saver.save(sess, save_file) if use_real_world_data: gen_data_loader.create_batches(real_data_train_file, limit_num_samples=generated_num) loss = pre_train_epoch(sess, generator, gen_data_loader, gen_learning_rate) if epoch % 1 == 0: if use_real_world_data: generate_real_data_samples( sess, generator, BATCH_SIZE, generated_num, eval_file + "_epoch_%0d.txt" % epoch, inv_map, base_token) test_loss = 0 # FIXME - TEMP else: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for epoch in range(DISC_PRE_EPOCH_NUM): print("start epoch %0d" % epoch) if use_real_world_data: generate_real_data_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, inv_map, base_token) dis_data_loader.load_train_data(real_data_train_file, negative_file) else: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step print("start epoch %0d" % total_batch) if total_batch % FLAGS.save_each_epochs == 0: print( '#########################################################################' ) print('saving model...') save_file = os.path.join( '.', 'ckp', EXPERIMENT_NAME + '_epoch_%0d' % total_batch, EXPERIMENT_NAME + '_epoch_%0d' % total_batch) saver.save(sess, save_file) for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = { generator.x: samples, generator.rewards: rewards, generator.learning_rate: 0.01 } _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: if not use_real_world_data: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): if use_real_world_data: generate_real_data_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, inv_map, base_token) dis_data_loader.load_train_data(real_data_train_file, negative_file) else: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) print( '#########################################################################' ) print('saving model...') save_file = os.path.join('.', 'ckp', EXPERIMENT_NAME, EXPERIMENT_NAME) saver.save(sess, save_file) # # print '#########################################################################' # print 'Start Language Model Evaluation...' # test_data_loader = Gen_Data_loader_text(BATCH_SIZE,map,inv_map) # test_data_loader.create_batches(real_data_test_file) # language_model_evaluation(sess,generator, test_data_loader) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(re_batch_size) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, MID_LAYER_G) rewarder = Rewarder(vocab_size, BATCH_SIZE, EMB_DIM * 4, HIDDEN_DIM * 4, SEQ_LENGTH, START_TOKEN, MID_LAYER_R, l2_reg_lambda=re_l2_reg_lambda) target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) # ground_loss = target_loss(sess, target_lstm, gen_data_loader) # print 'Ground-Truth:', ground_loss log = open('save/experiment-ent' + str(entropy_w), 'w') # pre-train generator if restore is False: print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print 'Start pre-training rewarder...' start = time.time() for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(1): dis_data_loader.reset_pointer() r_losses = [] for it in xrange(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() _, r_loss = rewarder.reward_train_step( sess, x_text, np.ones(BATCH_SIZE), 1.0, re_dropout_keep_prob, 0.01) r_losses.append(r_loss) print 'reward_loss', np.mean(r_losses) speed = time.time() - start print 'Reward pre_training Speed:{:.3f}'.format(speed) checkpoint_path = os.path.join('save', 'exper_40.ckpt') saver.save(sess, checkpoint_path) else: print 'Restore pretrained model ...' log.write('Restore pre-trained model...\n') ckpt = tf.train.get_checkpoint_state('save') saver.restore(sess, ckpt.model_checkpoint_path) # by setting the parameters to 0.0 and 1.0, we didn't use the mixed policy RL training in SeqGAN rollout = ROLLOUT(generator, 0.0, 1.0) print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) # Train the generator for one step start = time.time() g_losses = [] off_samples, off_probs = off_policy_samples(sess, rollout, BATCH_SIZE, off_num) avg_reward = [] for g_it in range(1): for it in range(off_num // BATCH_SIZE): rewards = rollout.get_reward(sess, off_samples[it], 8, rewarder) avg_reward.append(rewards) baseline = np.zeros(SEQ_LENGTH) for it in range(1): for it2 in range(off_num // BATCH_SIZE): _, g_loss = generator.rl_train_step( sess, off_samples[it2], avg_reward[it2], baseline, off_probs[it2], entropy_w, G_rate) g_losses.append(g_loss) speed = time.time() - start print 'MaxentPolicy Gradient {} round, Speed:{:.3f}, Loss:{:.3f}'.format( total_batch, speed, np.mean(g_losses)) # Update roll-out parameters rollout.update_params() # Train the rewarder start = time.time() r_loss_list = [] for _ in range(2): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() weights = rewarder.reward_weight(sess, x_text, generator) _, r_loss = rewarder.reward_train_step( sess, x_text, weights, 1, re_dropout_keep_prob, R_rate * np.exp(-(total_batch // R_decay))) r_loss_list.append(r_loss) speed = time.time() - start print 'Reward training {} round, Speed:{:.3f}, Loss:{:.3f}'.format( total_batch, speed, np.mean(r_loss_list)) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) # assert START_TOKEN == 0 vocab_size = NUM_EMB dis_data_loader = Dis_dataloader() best_score = 1000 generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, MAX_LENGTH, START_TOKEN) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, MAX_LENGTH, 0) with tf.variable_scope('discriminator'): # batch_size=dis_batch_size cnn = GraphConvTensorGraph(n_tasks=1, batch_size=dis_batch_size, mode='classification') if not cnn.built: cnn.build() #indentation different 2 spaces # with cnn._get_tf("Graph").as_default(): # manager = cnn._get_tf("Graph").as_default() # manager.__enter__() # Define Discriminator Training procedure # train_op = cnn._get_tf('train_op') # Define Discriminator Training procedure cnn_params = [param for param in tf.trainable_variables() if 'discriminator' in param.name] dis_global_step = tf.Variable(0, name="global_step", trainable=False) dis_optimizer = tf.train.AdamOptimizer(1e-4) dis_grads_and_vars = dis_optimizer.compute_gradients( cnn.loss.out_tensor, cnn_params, aggregation_method=2) dis_train_op = dis_optimizer.apply_gradients( dis_grads_and_vars, global_step=dis_global_step) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) def train_discriminator(): output_tensors = [cnn.outputs[0].out_tensor] #added from deepchem fetches = output_tensors + [dis_train_op, cnn.loss.out_tensor] if D_WEIGHT == 0: return 0, 0 negative_samples = generate_samples( sess, generator, BATCH_SIZE, POSITIVE_NUM) # train discriminator #pos_smiles= [mm.decode(sample, ord_dict) for sample in positive_samples] #already defined in intro neg_smiles= [mm.decode(sample, ord_dict) for sample in negative_samples] #df_pos=pd.DataFrame({'real/fake': [1]*len(pos_smiles) , 'smiles': pos_smiles}) #already defined in intro df_neg=pd.DataFrame({'real/fake': [0]*len(neg_smiles) , 'smiles': neg_smiles}) #df_total=df_pos.append(df_neg) df_total=pd.concat([df_pos,df_neg], ignore_index=True) def shuffle_by_batches(df, batch_size=dis_batch_size): l=len(df) l_batches=range(l) l_batches=[l_batches[x*batch_size:(x+1)*batch_size] for x in range(np.ceil(l/batch_size))] permut=np.random.permutation(len(l_batches)) l_new=[l_batches[n] for n in permut] l_new = [item for sublist in l_new for item in sublist] #flatten l_new df=df.reindex(np.array(l_new)) df=df.reset_index(drop=True) return df df_total=shuffle_by_batches(df_total,batch_size=dis_batch_size) #shuffle rows by batch, see: Tip 4: https://github.com/soumith/ganhacks loader = deepchem.data.PandasLoader(tasks=['real/fake'], smiles_field="smiles", featurizer=deepchem.feat.ConvMolFeaturizer()) train_dataset = loader.featurize(df_total, shard_size=8192) feed_dict_generator=cnn.default_generator(train_dataset, epochs=1) def create_feed_dict(): for d in feed_dict_generator: feed_dict = {k.out_tensor: v for k, v in six.iteritems(d)} feed_dict[cnn._training_placeholder] = 1.0 yield feed_dict def select_label(feed_dict): #Select the output label layer in the feed dictionaty newkeys=[] for k in feed_dict.keys(): if 'Label' in k.name: newkeys.append(k) return newkeys[0] avg_loss, avg_acc, n_batches = 0.0, 0.0, 0 for feed_dict in create_feed_dict(): fetched_values = sess.run(fetches, feed_dict=feed_dict) loss = fetched_values[-1] predicted_results=[np.argmax(x) for x in fetched_values[0]] ground_truth= [np.argmax(x) for x in feed_dict[select_label(feed_dict)] ] results= [ predicted_results[n] ==ground_truth[n] for n in range(len(predicted_results)) ] accuracy= float(sum(results))/len(results) n_batches += 1 ratio= (1/float(n_batches)) avg_loss= (1-ratio)*avg_loss + ratio*loss avg_acc= (1-ratio)*avg_acc + ratio*accuracy print('\tD loss : {}'.format(avg_loss)) print('\tAccuracy: {}'.format(avg_acc)) return avg_loss, avg_acc # Pretrain is checkpointed and only execcutes if we don't find a checkpoint saver = tf.train.Saver() ckpt_dir = 'checkpoints_new/{}_pretrain'.format(PREFIX) if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) ckpt_file = os.path.join(ckpt_dir, 'pretrain_ckpt') if os.path.isfile(ckpt_file + '.meta') and params["LOAD_PRETRAIN"]: saver.restore(sess, ckpt_file) print('Pretrain loaded from previous checkpoint {}'.format(ckpt_file)) else: if params["LOAD_PRETRAIN"]: print('\t* No pre-training data found as {:s}.'.format(ckpt_file)) else: print('\t* LOAD_PRETRAIN was set to false.') cnn._initialize_weights(sess, saver) #added from deepchem sess.run(tf.global_variables_initializer()) pretrain(sess, generator, target_lstm, train_discriminator) path = saver.save(sess, ckpt_file) print('Pretrain finished and saved at {}'.format(path)) # create reward function batch_reward = make_reward(train_samples) rollout = ROLLOUT(generator, 0.8) print('#########################################################################') print('Start Reinforcement Training Generator...') results_rows = [] for nbatch in tqdm(range(TOTAL_BATCH)): results = OrderedDict({'exp_name': PREFIX}) if nbatch % 1 == 0 or nbatch == TOTAL_BATCH - 1: print('* Making samples') if nbatch % 10 == 0: gen_samples = generate_samples( sess, generator, BATCH_SIZE, BIG_SAMPLE_NUM) else: gen_samples = generate_samples( sess, generator, BATCH_SIZE, SAMPLE_NUM) likelihood_data_loader.create_batches(gen_samples) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('batch_num: {}'.format(nbatch)) print('test_loss: {}'.format(test_loss)) results['Batch'] = nbatch results['test_loss'] = test_loss if test_loss < best_score: best_score = test_loss print('best score: %f' % test_loss) # results mm.compute_results(gen_samples, train_samples, ord_dict, results) print('#########################################################################') print('-> Training generator with RL.') print('G Epoch {}'.format(nbatch)) for it in range(TRAIN_ITER): samples = generator.generate(sess) rewards = rollout.get_reward( sess, samples, 16, cnn, ord_dict, batch_reward, D_WEIGHT) nll = generator.generator_step(sess, samples, rewards) # results print_rewards(rewards) print('neg-loglike: {}'.format(nll)) results['neg-loglike'] = nll rollout.update_params() # generate for discriminator print('-> Training Discriminator') for i in range(D): print('D_Epoch {}'.format(i)) d_loss, accuracy = train_discriminator() results['D_loss_{}'.format(i)] = d_loss results['Accuracy_{}'.format(i)] = accuracy print('results') results_rows.append(results) if nbatch % params["EPOCH_SAVES"] == 0: save_results(sess, PREFIX, PREFIX + '_model', results_rows) # write results save_results(sess, PREFIX, PREFIX + '_model', results_rows) print('\n:*** FINISHED ***') return
def main(): random.seed(SEED) np.random.seed(SEED) # prepare data gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_Data_loader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # target_params's size: [15 * 5000 * 32] target_params = pickle.load(open('./save/target_params_py3.pkl', 'rb')) # The oracle model target_lstm = TARGET_LSTM(5000, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, 20, 0, target_params) discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) generate_samples_from_target(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) # print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # # likelihood_data_loader.create_batches(positive_file) # for i in range(100): # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('my step ', i, 'test_loss ', test_loss) # input("next:") # input("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') ans_file = open("learning_cure.txt", 'w') for epoch in range(120): # 120 loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 1 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) ans_file.write("%s\n" % str(test_loss)) buffer = 'Start pre-training discriminator...' print(buffer) log.write(buffer) for _ in range(10): # 10 generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob, } d_loss, d_acc, _ = sess.run([discriminator.loss, discriminator.accuracy, discriminator.train_op], feed) buffer = "discriminator loss %f acc %f\n" % (d_loss, d_acc) print(buffer) log.write(buffer) ans_file.write("==========\n") print("Start Adversarial Training...") log.write('adversarial training...') for total_batch in range(TOTAL_BATCH): # Train the generator for it in range(1): samples = generator.generate(sess) rewards = generator.get_reward(sess, samples, 16, discriminator, START_TOKEN) a = str(samples[0]) b = str(rewards[0]) buffer = "%s\n%s\n\n" % (a, b) # print(buffer) log.write(buffer) rewards_loss = generator.update_with_rewards(sess, samples, rewards, START_TOKEN) # good rewards # good_samples = gen_data_loader.next_batch() # rewards = np.array([[1.0] * SEQ_LENGTH] * BATCH_SIZE) # a = str(good_samples[0]) # b = str(rewards[0]) # buffer = "%s\n%s\n\n" % (a, b) # print(buffer) # log.write(buffer) # rewards_loss = generator.update_with_rewards(sess, good_samples, rewards, START_TOKEN) # little1 good reward # litter1_samples = gen_data_loader.next_batch() # rewards = generator.get_reward(sess, litter1_samples, 16, discriminator, START_TOKEN) # a = str(little1 good reward[0]) # b = str(rewards[0]) # buffer = "%s\n%s\n\n" % (a, b) # print(buffer) # log.write(buffer) # rewards_loss = generator.update_with_rewards(sess, litter1_samples, rewards, START_TOKEN) # Test if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'reward-train epoch %s train loss %s test_loss %s\n' % (str(total_batch), str(rewards_loss), str(test_loss)) print(buffer) log.write(buffer) ans_file.write("%s\n" % str(test_loss)) # Train the discriminator for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob, } d_loss, d_acc, _ = sess.run([discriminator.loss, discriminator.accuracy, discriminator.train_op], feed) if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: buffer = "discriminator loss %f acc %f\n" % (d_loss, d_acc) print(buffer) log.write(buffer)
def construct_gold_generator(vocab_size): file_obj = open('save/target_params_py3.pkl', 'rb') target_params = pickle.load(file_obj, encoding='utf8') #target_params = pickle.load(open('save/target_params_py3.pkl')) return TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 # # Declare data loader # ---------------------------------------------------------------------------- gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) # ---------------------------------------------------------------------------- # # Declare Generator & Discriminator # ---------------------------------------------------------------------------- # declare: generator generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model # declare: discriminator discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) # ---------------------------------------------------------------------------- # # Set the session <sess> # ---------------------------------------------------------------------------- config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # ---------------------------------------------------------------------------- # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution # generate samples by using <target_lstm> and write the samples to file <positive_file> #generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # # Pre-train <generator> by using <gen_data_loader>, # and then compute the <test_loss> of <target_lstm> and <likelihood_data_loader> # ---------------------------------------------------------------------------- print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: # generate samples by using <generator> and write the samples to file <eval_file> generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # load samples from file <eval_file> likelihood_data_loader.create_batches(eval_file) # compute <test_loss> of <target_lstm>, with input <likelihood_data_loader> test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) # ---------------------------------------------------------------------------- # # Pre-train <discriminator> by using <generator> # ---------------------------------------------------------------------------- print('Start pre-training discriminator...') # Generate data and train 3 epoch on the generated data, which will be done for 50 times for _ in range(50): # generate samples by using <generator> and write the samples to file <negative_file> generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # load samples from file <negative_file> dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = {discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob} _ = sess.run(discriminator.train_op, feed_dict=feed) # ---------------------------------------------------------------------------- rollout = ROLLOUT(generator, 0.8) # # Start seqGAN, train <discriminator> and <generator> # ---------------------------------------------------------------------------- print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # ----- Train the generator for one step ----------------- for it in range(G_STEPS): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, ROLLOUT_NUM, discriminator, SEQ_LENGTH) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # -------------------------------------------------------- # Update roll-out parameters rollout.update_params() # ----- Train the discriminator ------------------------- for _ in range(D_STEPS): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = {discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob} _ = sess.run(discriminator.train_op, feed_dict=feed) # -------------------------------------------------------- # ---------------------------------------------------------------------------- log.close()
def main(): random.seed(SEED) np.random.seed(SEED) stringGenerator = TextGenerator('../corpus/index2word.pickle', '../corpus/word2index.pickle', '../corpus/all.code') assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) vocab_size = len(stringGenerator.index2Word) dis_data_loader = Dis_dataloader() best_score = 1000 generator = get_trainable_model(vocab_size) target_params = cPickle.load(open('save/target_params.pkl')) target_params[00] = np.random.rand(vocab_size, 32).astype(np.float32) target_params[-2] = np.random.rand(32, vocab_size).astype(np.float32) target_params[-1] = np.random.rand(vocab_size).astype(np.float32) target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params) with tf.variable_scope('discriminator'): cnn = TextCNN(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) cnn_params = [ param for param in tf.trainable_variables() if 'discriminator' in param.name ] # Define Discriminator Training procedure dis_global_step = tf.Variable(0, name="global_step", trainable=False) dis_optimizer = tf.train.AdamOptimizer(1e-4) dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss, cnn_params, aggregation_method=2) dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars, global_step=dis_global_step) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.initialize_all_variables()) #generate_samples(sess, target_lstm, 64, 10000, positive_file) stringGenerator.saveSamplesToFile(20, 10000, positive_file) gen_data_loader.create_batches(positive_file) log = open('log/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt') print 'Start training discriminator...' for _ in range(dis_alter_epoch): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # train discriminator dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_file, negative_file) dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs) for batch in dis_batches: try: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step = sess.run([dis_train_op, dis_global_step], feed) except ValueError: pass rollout = ROLLOUT(generator, 0.8) print '#########################################################################' print 'Start Reinforcement Training Generator...' log.write('Reinforcement Training...\n') for total_batch in range(TOTAL_BATCH): for it in range(TRAIN_ITER): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, cnn) feed = {generator.x: samples, generator.rewards: rewards} _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed) if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = str(total_batch) + ' ' + str(test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) if test_loss < best_score: best_score = test_loss print 'best score: ', test_loss significance_test(sess, target_lstm, likelihood_data_loader, 'significance/seqgan.txt') rollout.update_params() # generate for discriminator print 'Start training discriminator' for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_file, negative_file) dis_batches = dis_data_loader.batch_iter( zip(dis_x_train, dis_y_train), dis_batch_size, 3) for batch in dis_batches: try: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step = sess.run([dis_train_op, dis_global_step], feed) except ValueError: pass log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) vocab_size = 5000 best_score = 9.1 generator = get_trainable_model(vocab_size) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer) generate_samples(sess, target_lstm, 64, 10000, positive_file) ################################################################ gen_data_loader.create_batches(positive_file) references = load_references(positive_file) log = open('log/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt') rollout = ROLLOUT(generator, references) print '#########################################################################' print 'Start Reinforcement Training Generator...' log.write('Reinforcement Training...\n') for total_batch in range(TOTAL_BATCH): for it in range(TRAIN_ITER): samples = generator.generate(sess) print 'start calculating BLEU...' rewards = rollout.get_reward(sess, samples, 1, (1.0 / 3, 1.0 / 3, 1.0 / 3)) feed = {generator.x: samples, generator.rewards: rewards} _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed) if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = str(total_batch) + ' ' + str(test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) if test_loss < best_score: best_score = test_loss print 'best score: ', test_loss significance_test(sess, target_lstm, likelihood_data_loader, 'significance/pg_bleu.txt') rollout.update_params() log.close()
def main(): assert START_TOKEN == 0 assert ((FLAGS.use_oracle_data or FLAGS.use_natural_data) == True) assert ((FLAGS.use_character_level_model == True) if (FLAGS.use_onehot_embeddings == True) else (FLAGS.use_character_level_model == False)) if FLAGS.use_natural_data: print("WARNING: " + \ "since FLAGS.use_natural_data is set to True, " + \ "we must are setting FLAGS.use_oracle_data to False.") FLAGS.use_oracle_data = False if FLAGS.use_oracle_data: print("WARNING: " + \ "since FLAGS.use_oracle_data is set to True, " + \ "we must are setting FLAGS.use_character_level_model to False.") FLAGS.use_character_level_model = False vocab_dict = None vocab_size = oracle_vocab_size EMB_DIM = WORD_EMB_DIM dis_embedding_dim = dis_word_embedding_dim if FLAGS.use_natural_data: vocab_dict = VocabDictionary( data_fp=positive_file, max_seq_length=SEQ_LENGTH, character_level_model_bool=FLAGS.use_character_level_model, drop_freq_thresh=10) print(vocab_dict.vocab_dict) print(vocab_dict.int_to_token_dict) vocab_size = vocab_dict.get_length() if FLAGS.use_onehot_embeddings: # if we're using one-hot encodings, # the embedding dim must be the same as the number of possible tokens: EMB_DIM = vocab_size # Data loaders gen_data_loader = Gen_Dataloader( BATCH_SIZE, vocab_dictionary=vocab_dict, max_seq_length=SEQ_LENGTH, character_level_model_bool=FLAGS.use_character_level_model) likelihood_data_loader = Gen_Dataloader( BATCH_SIZE, vocab_dictionary=vocab_dict, max_seq_length=SEQ_LENGTH, character_level_model_bool=FLAGS.use_character_level_model) dis_data_loader = Dis_Dataloader( BATCH_SIZE, vocab_dictionary=vocab_dict, max_seq_length=SEQ_LENGTH, character_level_model_bool=FLAGS.use_character_level_model) # Gen, Dis, and Oracle Models generator = Generator( vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, go_token=START_TOKEN, eos_token=EOS_TOKEN, pad_token=(PAD_TOKEN if vocab_dict is not None else None), use_onehot_embeddings=FLAGS.use_onehot_embeddings) discriminator = Discriminator(sequence_length=SEQ_LENGTH, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, dropout_keep_prob=dis_dropout_keep_prob) target_params = [] target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution if FLAGS.use_oracle_data: generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w+') print('Starting pre-training for the generator') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): pretrain_cross_entropy_loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0 or FLAGS.show_every_epoch: if (FLAGS.use_natural_data == False): generate_samples( sess, generator, BATCH_SIZE, generated_num, eval_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) likelihood_data_loader.create_batches(eval_file) oracle_nll_loss = compute_oracle_loss(sess, target_lstm, likelihood_data_loader) print( 'generator pre-train epoch {}... oracle_nll {}... training set cross entropy loss {}... datetime {}' .format(epoch, oracle_nll_loss, pretrain_cross_entropy_loss, datetime.datetime.now())) buffer = 'epoch:\t' + str(epoch) + '\t' + \ 'oracle_nll:\t' + str(oracle_nll_loss) + '\n' log.write(buffer) else: generate_samples( sess, generator, BATCH_SIZE, generated_num, eval_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) dis_data_loader.load_train_data(positive_file, eval_file) likelihood_data_loader.create_batches(eval_file) gen_data_loader.create_batches(positive_file) logging_prefix_string = 'generator pre-train epoch {}\n\t token_cross_entropy_loss: {}'.format( epoch, pretrain_cross_entropy_loss) log_all_the_things(sess=sess, discriminator=discriminator, mixed_data_loader=dis_data_loader, fake_data_loader=likelihood_data_loader, real_data_loader=gen_data_loader, logging_prefix_string=logging_prefix_string) buffer = 'epoch:\t' + str(epoch) + '\t' + \ 'pretrain_cross_entropy_loss:\t' + str(pretrain_cross_entropy_loss) + '\n' log.write(buffer) print('Starting pre-training for the discriminator...') for epoch in range(dis_pre_epoch_num): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(FLAGS.k_steps): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.training_mode: True } _ = sess.run(discriminator.train_op, feed) if epoch % 5 == 0 or FLAGS.show_every_epoch: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) dis_data_loader.load_train_data(positive_file, eval_file) likelihood_data_loader.create_batches(eval_file) gen_data_loader.create_batches(positive_file) logging_prefix_string = 'discriminator pre-train epoch {}... '.format( epoch) log_all_the_things(sess=sess, discriminator=discriminator, mixed_data_loader=dis_data_loader, fake_data_loader=likelihood_data_loader, real_data_loader=gen_data_loader, logging_prefix_string=logging_prefix_string) rollout = ROLLOUT(generator, 0.0) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(FLAGS.g_steps): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, rollout_branch_factor, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Update roll-out parameters if FLAGS.update_rollout_every_gstep: rollout.update_params() # Evaluate the generator if (total_batch % 5 == 0) or (total_batch == TOTAL_BATCH - 1) or FLAGS.show_every_epoch: if (FLAGS.use_natural_data == False): generate_samples( sess, generator, BATCH_SIZE, generated_num, eval_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) oracle_nll_loss = compute_oracle_loss(sess, target_lstm, likelihood_data_loader) print( 'epoch: {}\t generator training... oracle_nll: {}\t datetime: {}' .format(total_batch, oracle_nll_loss, datetime.datetime.now())) buffer = 'epoch:\t' + str(total_batch) + '\t' + \ 'oracle_nll:\t' + str(oracle_nll_loss) + '\n' log.write(buffer) else: generate_samples( sess, generator, BATCH_SIZE, generated_num, eval_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) dis_data_loader.load_train_data(positive_file, eval_file) likelihood_data_loader.create_batches(eval_file) gen_data_loader.create_batches(positive_file) logging_prefix_string = 'adversarial epoch: {}\n\t generator training... '.format( total_batch) kv = log_all_the_things( sess=sess, discriminator=discriminator, mixed_data_loader=dis_data_loader, fake_data_loader=likelihood_data_loader, real_data_loader=gen_data_loader, logging_prefix_string=logging_prefix_string) g_loss = kv['g_loss'] buffer = 'epoch:\t' + str(total_batch) + '\t' + \ 'g_loss:\t' + str(g_loss) + '\n' log.write(buffer) # Update roll-out parameters, if we didn't already do so if not FLAGS.update_rollout_every_gstep: rollout.update_params() # Train the discriminator for _ in range(FLAGS.d_steps): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(FLAGS.k_steps): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.training_mode: True } _ = sess.run(discriminator.train_op, feed) # Test if (total_batch % 5 == 0) or (total_batch == TOTAL_BATCH - 1) or FLAGS.show_every_epoch: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file, vocab_dict=vocab_dict, char_level_bool=FLAGS.use_character_level_model) dis_data_loader.load_train_data(positive_file, eval_file) likelihood_data_loader.create_batches(eval_file) gen_data_loader.create_batches(positive_file) logging_prefix_string = 'adversarial epoch: {}\n\t discriminator training... '.format( total_batch) kv = log_all_the_things( sess=sess, discriminator=discriminator, mixed_data_loader=dis_data_loader, fake_data_loader=likelihood_data_loader, real_data_loader=gen_data_loader, logging_prefix_string=logging_prefix_string) d_loss = kv['d_loss'] buffer = 'epoch:\t' + str(total_batch) + '\t' + \ 'd_loss:\t' + str(d_loss) + '\n' log.write(buffer) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = pickle.load(open(TARGET_PARAMS, 'rb')) target_lstm = TARGET_LSTM(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=VOCAB_SIZE, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) saver = tf.train.Saver() # saver # 开始 Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open(LOG_FILE, 'w') # pre-train generator print ('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_GEN_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print ('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) print ('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times ## 将真实数据与生成数据都切分成不同段的数据来训练判别器 split_sentence_file(positive_file, positive_file_split) for epoch in range(PRE_DIS_EPOCH_NUM): print("EPOCH : %d $$$$$$$$$$$" % epoch) print("Generating and Spliting Negative file.......") generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) split_sentence_file(negative_file, negative_file_split) print("Load file to loader.....") dis_data_loader.load_train_data(positive_file_split, negative_file_split) print("Start training ...... ") for ep in range(IN_DIS_EPOCH): # 3 --> 1 print("inner epoch: %d :" % ep) dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } ## 获取判别器loss, 以观察进度 loss, _ = sess.run([discriminator.loss, discriminator.train_op], feed) if it % 1000 == 0: print (f'Total Epoch {epoch}, Gen Epoch {ep}, steps {it}, loss {loss}') print ('#########################################################################') print ('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): print(f"Total batch {total_batch} ------------------------------------------") # Train the generator for one step for it in range(ADV_GEN_EPOCH_NUM): samples = generator.generate(sess) # 修改reward获取方式, 改为从判别器直接获取各个段的rewards rewards = get_rewords_from_discriminator(sess, samples, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' print ('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) # Train the discriminator for epoch in range(ADV_DIS_EPOCH_NUM): # 5 --> 1 generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) split_sentence_file(negative_file, negative_file_split) dis_data_loader.load_train_data(positive_file_split, negative_file_split) for ep in range(IN_DIS_EPOCH): # 3 --> 1 dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } ## 获取判别器loss, 以观察进度 loss, _ = sess.run([discriminator.loss, discriminator.train_op], feed) if it % 1000 == 0: print (f'Total Epoch {epoch}, Gen Epoch {ep}, steps {it}, loss {loss}') # Save model path = os.path.join(save_path, 'after-epoch') saver.save(sess, path, global_step=total_batch+1) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 # 定义gen_data_loader, 专门读取真实样本集 gen_data_loader = Gen_Data_loader(BATCH_SIZE) # 定义gen_data_loader, 专门读取验证样本集 likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 # 生成混合数据 dis_data_loader = Dis_dataloader(BATCH_SIZE) # 定义生成模型 generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # pickle.load(open("./dataset/atis.pkl", "rb"), encoding='iso-8859-1') pickle_pack = open('save/target_params_py3.pkl', 'rb') target_params = pickle.load(pickle_pack) # 定义oracle模型 target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model # 定义判别模型 discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 # 分配50% config.gpu_options.allow_growth = True # 显存自适应 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution # 通过oracle模型生成generated_num条真实数据, generated_num/BATCH_SIZE个batch(list) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) print('begin to record save/experiment-log.txt') log = open('save/experiment-log.txt', 'w') # pre-train generator # print('Start pre-training generator...') # log.write('pre-training...\n') # for epoch in range(PRE_EPOCH_NUM): # # 训练生成模型 # loss = pre_train_epoch(sess, generator, gen_data_loader) # if epoch % 5 == 0: # # 使用生成模型生成数据写入eval_file # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # # 用oracle模型测试生成数据 # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) # # print('Start pre-training discriminator...') # # Train 3 epoch on the generated data and do this for 50 times # for _ in range(50): # generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # dis_data_loader.load_train_data(positive_file, negative_file) # for _ in range(3): # dis_data_loader.reset_pointer() # set next_batch pointer to 0 # for it in range(dis_data_loader.num_batch): # x_batch, y_batch = dis_data_loader.next_batch() # feed = { # discriminator.input_x: x_batch, # discriminator.input_y: y_batch, # discriminator.dropout_keep_prob: dis_dropout_keep_prob # } # _ = sess.run(discriminator.train_op, feed) print('define a rollout object!') rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step print('begin to train generator with rollout policy') for it in range(1): samples = generator.generate(sess) print( 'start a rollout and get reward from discriminator(rollout number is 16)...' ) print('rollout samples shape is', samples.shape) print(samples[:5]) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) # Update roll-out parameters using exponentially weighted averages beta=0.8 rollout.update_params() # Train the discriminator print( 'begin to train discriminator with positive and negative samples') for _ in range(5): print( 'generate %d negative samples from generator and write in %s' % (int(generated_num / BATCH_SIZE) * BATCH_SIZE, negative_file)) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) print('load pos and neg samples and shuffle and bootstrap') dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 with open(true_file, 'r') as f_pos: file_contents = f_pos.read().splitlines() file_contents = [content.split() for content in file_contents] tokens = set([item for sublist in file_contents for item in sublist]) # tokens = set(file_contents) pad_idx = len(tokens) vocab_size = pad_idx + 1 token2idx = dict((token, i) for i, token in enumerate(tokens)) idx2token = dict((i, token) for i, token in enumerate(tokens)) idx2token[pad_idx] = " " load_positive(true_file, positive_file, token2idx, pad_idx) gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = cPickle.load(open('save/target_params.pkl', 'rb'), encoding='latin1') target_params[0] = np.random.random([vocab_size, 32]).astype(np.float32) target_params[13] = np.random.random([32, vocab_size]).astype(np.float32) target_params[14] = np.random.random([ vocab_size, ]).astype(np.float32) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) gen_data_loader.create_batches(positive_file, SEQ_LENGTH) # log file that stores progress log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') all_pre_train_losses = [] for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) all_pre_train_losses.append(loss) plt.plot(all_pre_train_losses) plt.savefig('pre_train_losses_plot.png') gen_outfile = 'save/generated_by_generator_after_' + str( PRE_EPOCH_NUM) + '_' + str(datetime.datetime.now()) + '_epochs.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, gen_outfile, idx2token) checksyntax.check_code(log, gen_outfile) # if epoch % 5 == 0: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file, SEQ_LENGTH) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for i in range(50): print("discriminator pre train epoch : ", i) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file, SEQ_LENGTH) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) gen_outfile = 'save/generated_by_generator_after_discriminator_training_' + str( datetime.datetime.now) + '.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, gen_outfile, idx2token) checksyntax.check_code(log, gen_outfile) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): print("total_batch : ", total_batch) if total_batch % 20 == 0: file_name = 'save/output_batch_' + str(total_batch) + '.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name, idx2token) checksyntax.check_code(log, file_name) # Train the generator for one step for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test # if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' # print('total_batch: ', total_batch, 'test_loss: ', test_loss) # log.write(buffer) # # Update roll-out parameters rollout.update_params() # # Train the discriminator for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file, SEQ_LENGTH) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) final_gen_file = 'save/final_output.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, final_gen_file, idx2token) checksyntax.check_code(log, final_gen_file) # with open('save/output.txt','r') as f: # with open('save/output_word.txt','w') as fout: # for line in f: # line = line.strip() # line = line.split() # word_line = ''.join([idx2token[int(x)] for x in line]) # fout.write(word_line + '\n') # log.close()
def main(): random.seed(SEED) np.random.seed(SEED) tf.random.set_seed(SEED) assert START_TOKEN == 0 vocab_size = 5000 physical_devices = tf.config.experimental.list_physical_devices("GPU") if len(physical_devices) > 0: for dev in physical_devices: tf.config.experimental.set_memory_growth(dev, True) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = pickle.load(open('save/target_params_py3.pkl', 'rb')) target_lstm = TARGET_LSTM(BATCH_SIZE, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, dropout_keep_prob=dis_dropout_keep_prob, l2_reg_lambda=dis_l2_reg_lambda) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution if not os.path.exists(positive_file): target_lstm.generate_samples(generated_num // BATCH_SIZE, positive_file) gen_dataset = dataset_for_generator(positive_file, BATCH_SIZE) log = open('save/experiment-log.txt', 'w') # pre-train generator if not os.path.exists("generator_pretrained.h5"): print('Start pre-training...') log.write('pre-training...\n') generator.pretrain(gen_dataset, target_lstm, PRE_EPOCH_NUM, generated_num // BATCH_SIZE, eval_file) generator.save("generator_pretrained.h5") else: generator.load("generator_pretrained.h5") if not os.path.exists("discriminator_pretrained.h5"): print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for _ in range(50): print("Dataset", _) generator.generate_samples(generated_num // BATCH_SIZE, negative_file) dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE) discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2) discriminator.save("discriminator_pretrained.h5") else: discriminator.load("discriminator_pretrained.h5") rollout = ROLLOUT(generator, 0.8) print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): print("Generator", total_batch) # Train the generator for one step for it in range(1): samples = generator.generate_one_batch() rewards = rollout.get_reward(samples, 16, discriminator) generator.train_step(samples, rewards) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generator.generate_samples(generated_num // BATCH_SIZE, eval_file) likelihood_dataset = dataset_for_generator(eval_file, BATCH_SIZE) test_loss = target_lstm.target_loss(likelihood_dataset) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator print("Discriminator", total_batch) for _ in range(5): generator.generate_samples(generated_num // BATCH_SIZE, negative_file) dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE) discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2) generator.save("generator.h5") discriminator.save("discriminator.h5") log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) # For testing vocab_size = 5000 file = open('save/target_params.pkl', 'rb') target_params = cPickle.load(file) dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) discriminator = Discriminator(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, step_size=4) leakgan = LeakGAN(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, emb_dim=EMB_DIM, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, goal_size=GOAL_SIZE, step_size=4, D_model=discriminator, learning_rate=LEARNING_RATE) if SEQ_LENGTH == 40: target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # The oracle model else: target_lstm = TARGET_LSTM20(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) for a in range(1): g = sess.run(leakgan.gen_x, feed_dict={leakgan.drop_out: 0.8, leakgan.train: 1}) print(g) print("epoch:", a, " ") log = open('save/experiment-log.txt', 'w') gen_data_loader.create_batches(positive_file) saver_variables = tf.global_variables() saver = tf.train.Saver(saver_variables) model = tf.train.latest_checkpoint(model_path) print(model) if FLAGS.restore and model: # model = tf.train.latest_checkpoint(model_path) # if model and FLAGS.restore: if model_path + '/' + FLAGS.model: print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) else: saver.restore(sess, model) else: if FLAGS.resD and model_path + '/' + FLAGS.model: print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') else: print('Start pre-training discriminator...') for i in range(10): for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) # gen_data_loader.create_batches(positive_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) print ("D_loss: ", D_loss) leakgan.update_feature_function(discriminator) ## todo: is important saver.save(sess, model_path + '/leakgan_preD') print('Start pre-training generator...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM / 10): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: print ("MLE Generator Loss: ", loss) # generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') gencircle = 1 # print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): for gi in range(gencircle): samples = leakgan.generate(sess, 1.0, 1) rewards = get_reward(leakgan, discriminator, sess, samples, 4, dis_dropout_keep_prob) feed = {leakgan.x: samples, leakgan.reward: rewards, leakgan.drop_out: 0.5} _, _, g_loss, w_loss = sess.run( [leakgan.manager_updates, leakgan.worker_updates, leakgan.goal_loss, leakgan.worker_loss], feed_dict=feed) print('total_batch: ', total_batch, " ", g_loss, " ", w_loss) # Test # if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' # print('total_batch: ', total_batch, 'test_loss: ', test_loss) # log.write(buffer) # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print("Groud-Truth:", test_loss) # Train the discriminator for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) leakgan.update_feature_function(discriminator) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(re_batch_size) # TODO: Reimpliment this class with same interface. # generator = GeneratorTransformer( # vocab_size, # BATCH_SIZE, # SEQ_LENGTH, # START_TOKEN # ) generator = Generator( vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, MID_LAYER_G, ) # TODO: Reimpliment this class with same interface. rewarder = Rewarder( vocab_size, BATCH_SIZE, EMB_DIM * 4, HIDDEN_DIM * 4, SEQ_LENGTH, START_TOKEN, MID_LAYER_R, l2_reg_lambda=re_l2_reg_lambda, ) target_params = pickle.load(open("save/target_params.pkl", "rb"), encoding="latin1") # TODO: Reimpliment this class with same interface. (target_transformer) # I think we leave this as is, since it's the distribution we're trying to match? (Cailin) target_lstm = TARGET_LSTM( vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params, ) # The oracle model config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) # First, use the oracle model to provide the positive examples, # which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) # ground_loss = target_loss(sess, target_lstm, gen_data_loader) # print('Ground-Truth:', ground_loss) log = open("save/experiment-ent" + str(entropy_w), "w") # pre-train generator if restore is False: print("Start pre-training...") log.write("pre-training...\n") for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("pre-train epoch ", epoch, "test_loss ", test_loss) buffer = "epoch:\t" + str(epoch) + "\tnll:\t" + str(test_loss) + "\n" log.write(buffer) print("Start pre-training rewarder...") start = time.time() for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(1): dis_data_loader.reset_pointer() r_losses = [] for it in range(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() _, r_loss = rewarder.reward_train_step( sess, x_text, np.ones(BATCH_SIZE), 1.0, re_dropout_keep_prob, 0.01, ) r_losses.append(r_loss) print("reward_loss", np.mean(r_losses)) speed = time.time() - start print("Reward pre_training Speed:{:.3f}".format(speed)) checkpoint_path = os.path.join("save", "exper_40.ckpt") saver.save(sess, checkpoint_path) else: print("Restore pretrained model ...") log.write("Restore pre-trained model...\n") ckpt = tf.train.get_checkpoint_state("save") saver.restore(sess, ckpt.model_checkpoint_path) # by setting the parameters to 0.0 and 1.0, we didn't use the mixed policy RL training in SeqGAN rollout = ROLLOUT(generator, 0.0, 1.0) print("#########################################################################") print("Start Adversarial Training...") log.write("adversarial training...\n") for total_batch in range(TOTAL_BATCH): if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = "epoch:\t" + str(total_batch) + "\tnll:\t" + str(test_loss) + "\n" print("total_batch: ", total_batch, "test_loss: ", test_loss) log.write(buffer) # Train the generator for one step start = time.time() g_losses = [] # Draw trajectories (sequences) from generator off_samples, off_probs = off_policy_samples(sess, rollout, BATCH_SIZE, off_num) avg_reward = [] for g_it in range(1): # Compute MCMC reward for each trajectory for it in range(off_num // BATCH_SIZE): rewards = rollout.get_reward(sess, off_samples[it], 8, rewarder) avg_reward.append(rewards) # Perform gradient update for generator baseline = np.zeros(SEQ_LENGTH) for it in range(1): for it2 in range(off_num // BATCH_SIZE): _, g_loss = generator.rl_train_step( sess, off_samples[it2], avg_reward[it2], baseline, off_probs[it2], entropy_w, G_rate, ) g_losses.append(g_loss) speed = time.time() - start print( "MaxentPolicy Gradient {} round, Speed:{:.3f}, Loss:{:.3f}".format( total_batch, speed, np.mean(g_losses) ) ) # Update roll-out parameters rollout.update_params() # Train the rewarder start = time.time() r_loss_list = [] for _ in range(2): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() weights = rewarder.reward_weight(sess, x_text, generator) _, r_loss = rewarder.reward_train_step( sess, x_text, weights, 1, re_dropout_keep_prob, R_rate * np.exp(-(total_batch // R_decay)), ) r_loss_list.append(r_loss) speed = time.time() - start print( "Reward training {} round, Speed:{:.3f}, Loss:{:.3f}".format( total_batch, speed, np.mean(r_loss_list) ) ) log.close()
def main(FLAGS): ######################################################################################### # Generator Hyper-parameters ###################################################################################### EMB_DIM = FLAGS.gen_emb_dim # 32 # embedding dimension HIDDEN_DIM = FLAGS.gen_hidden_dim # 32 # hidden state dimension of lstm cell SEQ_LENGTH = FLAGS.seq_len # 20 # sequence length START_TOKEN = 0 PRE_EPOCH_NUM = FLAGS.pretrain_epoch_num # 80 # supervise (maximum likelihood estimation) epochs for generator(X1) & descriminator(X5) SEED = 88 BATCH_SIZE = FLAGS.batch_size #64 LEARNING_RATE = 0.01 GOAL_SIZE = 16 STEP_SIZE = 4 ######################################################################################### # Discriminator Hyper-parameters ######################################################################################### dis_embedding_dim = FLAGS.dis_emb_dim # 64 dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] if FLAGS.seq_len == 20: dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] LEARNING_RATE = 0.0015 # EMB_DIM = 32 # embedding dimension # HIDDEN_DIM = 32 # hidden state dimension of lstm cell elif FLAGS.seq_len == 40: dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160, 160 ] LEARNING_RATE = 0.0005 # EMB_DIM = 64 # HIDDEN_DIM = 64 else: exit(0) print(SEQ_LENGTH) GOAL_OUT_SIZE = sum(dis_num_filters) # dis_dropout_keep_prob = 0.75 dis_dropout_keep_prob = 1.0 dis_l2_reg_lambda = 0.2 dis_batch_size = FLAGS.batch_size #64 ######################################################################################### # Basic Training Parameters ######################################################################################### EXPERIMENT_NAME = FLAGS.experiment_name TOTAL_BATCH = FLAGS.num_epochs # 800 #num of adversarial epochs positive_file = 'save/real_data_%0s.txt' % EXPERIMENT_NAME negative_file = 'save/generator_sample_%0s.txt' % EXPERIMENT_NAME eval_file = "save/eval_file_%0s" % EXPERIMENT_NAME generated_num = 10000 # 10000 model_path = './ckpts' ######################################################################################### # Data configurations ######################################################################################### use_real_world_data = True real_data_file_path = './data/text8' random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 if use_real_world_data: vocab_size = 27 # split to train-valid-test real_data_train_file = real_data_file_path + '-train' real_data_valid_file = real_data_file_path + '-valid' real_data_test_file = real_data_file_path + '-test' real_data_dict_file = real_data_file_path + '-dict.json' if not os.path.exists(real_data_train_file): split_text8(real_data_file_path) charmap, inv_charmap = create_real_data_dict(real_data_train_file, real_data_dict_file) gen_data_loader = Gen_Data_loader_text8(BATCH_SIZE, charmap, inv_charmap, seq_len=SEQ_LENGTH) dis_data_loader = Dis_dataloader_text8(BATCH_SIZE, charmap, inv_charmap, seq_len=SEQ_LENGTH) #TODO else: gen_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) # For testing vocab_size = 5000 file = open('save/target_params.pkl', 'rb') target_params = pickle.load(file) dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) discriminator = Discriminator(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, step_size=4) leakgan = LeakGAN(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, emb_dim=EMB_DIM, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, goal_size=GOAL_SIZE, step_size=4, D_model=discriminator, learning_rate=LEARNING_RATE) if not use_real_world_data: if SEQ_LENGTH == 40: target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # The oracle model else: target_lstm = TARGET_LSTM20(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) config = tf.ConfigProto() config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.Session(config=config) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=999999) sess.run(tf.global_variables_initializer()) if use_real_world_data: # gen_data_loader.create_batches(real_data_train_file) gen_data_loader.create_batches(real_data_train_file, limit_num_samples=generated_num) pass else: # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) gen_data_loader.create_batches(positive_file) for a in range(1): g = sess.run(leakgan.gen_x, feed_dict={ leakgan.drop_out: 0.8, leakgan.train: 1 }) print(g) print("epoch:", a, " ") log = open('save/experiment-log.txt', 'w') saver_variables = tf.global_variables() saver = tf.train.Saver(saver_variables) model = tf.train.latest_checkpoint(model_path) print(model) if FLAGS.restore and model: # model = tf.train.latest_checkpoint(model_path) # if model and FLAGS.restore: if model_path + '/' + FLAGS.model: print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) else: saver.restore(sess, model) else: # if FLAGS.resD and model_path + '/' + FLAGS.model: if False: #default of resD print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: if use_real_world_data: generate_real_data_samples( sess, leakgan, BATCH_SIZE, generated_num, eval_file + "_epoch_%0d.txt" % epoch, inv_charmap) test_loss = 0 # FIXME - TEMP else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) if use_real_world_data: test_loss = 0 # FIXME - TEMP else: generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') else: print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for i in range(10): for _ in range(5): if use_real_world_data: generate_real_data_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, inv_charmap) dis_data_loader.load_train_data( real_data_train_file, negative_file) else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) # gen_data_loader.create_batches(positive_file) dis_data_loader.load_train_data( positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([ discriminator.D_loss, discriminator.D_train_op ], feed) # # print 'D_loss ', D_loss # buffer = str(D_loss) + '\n' # log.write(buffer) leakgan.update_feature_function(discriminator) saver.save(sess, model_path + '/leakgan_preD') # saver.save(sess, model_path + '/leakgan') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM // 10): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: if use_real_world_data: generate_real_data_samples( sess, leakgan, BATCH_SIZE, generated_num, eval_file + "_epoch_%0d.txt" % epoch, inv_charmap) test_loss = 0 # FIXME - TEMP else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) if use_real_world_data: test_loss = 0 # FIXME - TEMP else: generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') gencircle = 1 # print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step print("start epoch %0d" % total_batch) if total_batch % FLAGS.save_each_epochs == 0: print( '#########################################################################' ) print('saving model...') save_file = os.path.join( '.', 'ckp', EXPERIMENT_NAME + '_epoch_%0d' % total_batch, EXPERIMENT_NAME + '_epoch_%0d' % total_batch) saver.save(sess, save_file) for it in range(1): for gi in range(gencircle): samples = leakgan.generate(sess, 1.0, 1) rewards = get_reward(leakgan, discriminator, sess, samples, 4, dis_dropout_keep_prob) feed = { leakgan.x: samples, leakgan.reward: rewards, leakgan.drop_out: 1.0 } _, _, g_loss, w_loss = sess.run([ leakgan.manager_updates, leakgan.worker_updates, leakgan.goal_loss, leakgan.worker_loss ], feed_dict=feed) print('total_batch: ', total_batch, " ", g_loss, " ", w_loss) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: if not use_real_world_data: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) # Train the discriminator for _ in range(5): if use_real_world_data: generate_real_data_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, inv_charmap) dis_data_loader.load_train_data(real_data_train_file, negative_file) else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run( [discriminator.D_loss, discriminator.D_train_op], feed) # print 'D_loss ', D_loss leakgan.update_feature_function(discriminator) print( '#########################################################################' ) print('saving model...') save_file = os.path.join('.', 'ckp', EXPERIMENT_NAME, EXPERIMENT_NAME) saver.save(sess, save_file) # # print '#########################################################################' # print 'Start Language Model Evaluation...' # test_data_loader = Gen_Data_loader_text8(BATCH_SIZE,charmap,inv_charmap) # test_data_loader.create_batches(real_data_test_file) # language_model_evaluation(sess,generator, test_data_loader) log.close()
def main(): opt = Options() create_logging(FLAGS) random.seed(SEED) np.random.seed(SEED) # data loader gen_data_loader = Gen_Data_loader(FLAGS.gen_pre_batch_size) likelihood_data_loader = Gen_Data_loader( FLAGS.gen_pre_batch_size) # For testing rank_data_loader = Rank_Data_loader(FLAGS.rank_batch_size, FLAGS.ref_size) # network initialization generator = Generator(opt, FLAGS, pretrain=True) target_params = cPickle.load(open(opt.target_path)) target_lstm = TARGET_LSTM(opt, FLAGS, target_params, pretrain=True) # The oracle model ranker = Ranker(opt, FLAGS) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # create positive files for MLE training generate_samples(sess, target_lstm, FLAGS.gen_pre_batch_size, opt.generated_num, opt.positive_file) gen_data_loader.create_batches(opt.positive_file) #################################################################pretraining with MLE # pre-train generator logging.info('Start pre-training generator') for epoch in xrange(FLAGS.pre_g_epoch): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, FLAGS.gen_pre_batch_size, opt.generated_num, opt.eval_file) likelihood_data_loader.create_batches(opt.eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) logging.info("Pretrain generator epoch: %d, test_loss: %0.4f" % (epoch, test_loss)) logging.info('Start pre-training rankder') # Train 3 epoch on the generated data and do this for 50 times for epoch in range(FLAGS.pre_r_epoch): generate_samples(sess, generator, FLAGS.gen_pre_batch_size, opt.generated_num, opt.negative_file) rank_data_loader.load_train_data(opt.positive_file, opt.negative_file) for _ in range(3): rank_data_loader.reset_pointer() for it in xrange(rank_data_loader.num_batch): x_batch, y_batch, ref = rank_data_loader.next_batch() feed = { ranker.input_x: x_batch, ranker.input_y: y_batch, ranker.input_ref: ref, ranker.dropout_keep_prob: opt.dropout_ratio } _, loss = sess.run([ranker.train_op, ranker.loss], feed) if epoch % 5 == 0: logging.info("Pretrain ranker epoch: %d, training loss: %0.4f" % (epoch, loss)) # # # Save all params to disk. save_path = saver.save(sess, "./save/pre_model.ckpt") print("pretrain Model saved in file: %s" % save_path) # modify generator batch size for adversarial training tf.reset_default_graph() generator = Generator(opt, FLAGS, pretrain=False) ranker = Ranker(opt, FLAGS) target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(opt, FLAGS, target_params, pretrain=False) # The oracle model config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # load parameters saver.restore(sess, "./save/pre_model.ckpt") likelihood_data_loader = Gen_Data_loader( FLAGS.gen_batch_size) # For testing print("Model restored.") rollout = ROLLOUT(generator, FLAGS.rollout_ratio, FLAGS.rollout_num) logging.info( '#########################################################################' ) logging.info('Start adversarial training.') for epoch in range(FLAGS.epoch): # Train the generator for one step for it in range(FLAGS.g_step): samples = generator.generate(sess) generate_samples(sess, generator, FLAGS.gen_batch_size, opt.generated_num, opt.negative_file) rank_data_loader.load_train_data(opt.positive_file, opt.negative_file) rewards = rollout.get_reward(sess, samples, FLAGS.rollout_num, ranker, rank_data_loader) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Testing if epoch % 5 == 0 or epoch == epoch - 1: generate_samples(sess, generator, FLAGS.gen_batch_size, opt.generated_num, opt.eval_file) likelihood_data_loader.create_batches(opt.eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) logging.info("Epoch: %d, test_loss: %0.4f" % (epoch, test_loss)) # Update roll-out parameters rollout.update_params() # Train the ranker for idx in range(FLAGS.r_step): generate_samples(sess, generator, FLAGS.gen_batch_size, opt.generated_num, opt.negative_file) rank_data_loader.load_train_data(opt.positive_file, opt.negative_file) for it in xrange(rank_data_loader.num_batch): x_batch, y_batch, ref = rank_data_loader.next_batch() feed = { ranker.input_x: x_batch, ranker.input_y: y_batch, ranker.input_ref: ref, ranker.dropout_keep_prob: opt.dropout_ratio } _ = sess.run(ranker.train_op, feed)
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) val_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) # For testing vocab_size = 5000 generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = pickle.load(open('save/target_params_py3.pkl', 'rb')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, 32, 32, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model mediator = Generator(vocab_size, BATCH_SIZE * 2, EMB_DIM * 2, HIDDEN_DIM * 2, SEQ_LENGTH, START_TOKEN, name="mediator", dropout_rate=M_DROPOUT_RATE) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file) val_data_loader.create_batches(eval_file) log = open('save/experiment-log.txt', 'w') log_nll = open('save/experiment-log-nll.txt', 'w') log_jsd = open('save/experiment-log-jsd.txt', 'w') # pre-train generator (default 0 epochs)(not recommended) print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = mle_epoch(sess, generator, gen_data_loader) if epoch % 1 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) likelihood_data_loader.create_batches(negative_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'nll_oracle ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll_oracle:\t' + str( test_loss) + '\n' log_nll.write(buffer) if epoch % 1 == 0: test_loss = target_loss(sess, generator, val_data_loader) print('pre-train epoch ', epoch, 'nll_test ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll_test:\t' + str( test_loss) + '\n' log_nll.write(buffer) print( '#########################################################################' ) print('Start Cooperative Training...') for iter_idx in range(TOTAL_BATCH): # Train the generator for one step for it in range(2): samples = generator.generate(sess) rewards = mediator.get_reward( sess, np.concatenate([samples, samples], axis=0)) feed = { generator.x: samples, generator.rewards: rewards[0:BATCH_SIZE] } _ = sess.run(generator.g_updates, feed_dict=feed) # Test if iter_idx % 100 == 0 or iter_idx == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) likelihood_data_loader.create_batches(negative_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'batch:\t' + str(iter_idx) + '\tnll_oracle:\t' + str( test_loss) + '\n' print('batch: ', iter_idx, 'nll_oracle: ', test_loss) log_nll.write(buffer) if iter_idx % 100 == 0: test_loss = target_loss(sess, generator, val_data_loader) print('batch:\t', iter_idx, 'nll_test ', test_loss) buffer = 'batch:\t' + str(iter_idx) + '\tnll_test:\t' + str( test_loss) + '\n' log_nll.write(buffer) # Train the mediator for _ in range(1): bnll_ = [] collected_x = [] ratio = 2 for it in range(ratio): if it % 2 == 0: x_batch = gen_data_loader.next_batch() else: x_batch = generator.generate(sess) collected_x.append(x_batch) collected_x = np.reshape(collected_x, [-1, SEQ_LENGTH]) np.random.shuffle(collected_x) collected_x = np.reshape(collected_x, [-1, BATCH_SIZE * 2, SEQ_LENGTH]) for it in range(1): feed = { mediator.x: collected_x[it], } bnll = sess.run(mediator.likelihood_loss, feed) bnll_.append(bnll) # sess.run(mediator.dropout_on) _ = sess.run(mediator.likelihood_updates, feed) # sess.run(mediator.dropout_off) if (iter_idx * 4) % gen_data_loader.num_batch == 0: bnll = np.mean(bnll_) gnll = sess.run( mediator.likelihood_loss, feed_dict={ mediator.x: np.reshape( [generator.generate(sess), generator.generate(sess)], [BATCH_SIZE * 2, SEQ_LENGTH]) }) print("mediator cooptrain iter#%d, balanced_nll %f, g_nll %f" % (iter_idx, bnll, gnll)) log.write("%d\t%f\n" % (iter_idx, bnll)) if iter_idx % gen_data_loader.num_batch == 0: jsd = jsd_calculate(sess, generator, target_lstm) print('cooptrain epoch#', iter_idx // gen_data_loader.num_batch, 'jsd ', jsd) log_jsd.write("%d\t%f\n" % (iter_idx // gen_data_loader.num_batch, jsd)) log.close() log_nll.close() log_jsd.close()
def main(): random.seed(SEED) np.random.seed(SEED) # assert START_TOKEN == 0 vocab_size = NUM_EMB dis_data_loader = Dis_dataloader() best_score = 1000 generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, MAX_LENGTH, START_TOKEN) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, MAX_LENGTH, 0) with tf.variable_scope('discriminator'): cnn = TextCNN(sequence_length=MAX_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) cnn_params = [ param for param in tf.trainable_variables() if 'discriminator' in param.name ] # Define Discriminator Training procedure dis_global_step = tf.Variable(0, name="global_step", trainable=False) dis_optimizer = tf.train.AdamOptimizer(1e-4) dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss, cnn_params, aggregation_method=2) dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars, global_step=dis_global_step) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) def train_discriminator(): if D_WEIGHT == 0: return 0, 0 negative_samples = generate_samples(sess, generator, BATCH_SIZE, POSITIVE_NUM) # global positive_samples # pos_new=positive_samples # random 10% of positive samples are labeled negatively to weaken generator and avoid collapsing training # random.shuffle(pos_new) # length=len(pos_new) # fake_neg_number= int(0.05*length) # fake_neg= pos_new[:fake_neg_number] # pos_new=pos_new[fake_neg_number:] # negative_samples+=fake_neg # random.shuffle(negative_samples) # train discriminator dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_samples, negative_samples) dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs) ypred = 0 counter = 0 for batch in dis_batches: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step, loss, accuracy, ypred_for_auc = sess.run([ dis_train_op, dis_global_step, cnn.loss, cnn.accuracy, cnn.ypred_for_auc ], feed) ypred_vect = np.array([item[1] for item in ypred_for_auc]) ypred += np.mean(ypred_vect) counter += 1 ypred = float(ypred) / counter print('\tD loss : {}'.format(loss)) print('\tAccuracy: {}'.format(accuracy)) print('\tMean ypred: {}'.format(ypred)) return loss, accuracy, ypred # Pretrain is checkpointed and only execcutes if we don't find a checkpoint saver = tf.train.Saver() # We check previous session and pretrain is checkpointed and only execcutes if we don't find a checkpoint saver = tf.train.Saver() #check previous session prev_sess = False ckpt_dir = 'checkpoints/mingan' if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) # ckpt_file = os.path.join(ckpt_dir, ckpt_dir + '_model') #old checkpoint ckpt_file = os.path.join( ckpt_dir, 'drd2_new' + '_model_' ) #new checkpoint iterate over checkpoints to find largest total a nbatches_max = 0 for i in range(500): #maximal number of batches iterations is 500 if os.path.isfile(ckpt_file + str(i) + '.meta'): #and params["LOAD_PREV_SESS"] nbatches_max = i #end try find max checkpoint ckpt_file = ckpt_file + str(nbatches_max) + '.meta' if params["LOAD_PREV_SESS"]: # and os.path.isfile(ckpt_file): # saver_test = tf.train.import_meta_graph(ckpt_file) # sess.run(tf.global_variables_initializer()) saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir)) # saver.restore(sess, ckpt_file) print('Previous session loaded from previous checkpoint {}'.format( ckpt_file)) prev_sess = True else: if params["LOAD_PREV_SESS"]: print('\t* No previous session data found as {:s}.'.format( ckpt_file)) else: print('\t* LOAD_PREV_SESS was set to false.') # sess.run(tf.global_variables_initializer()) # pretrain(sess, generator, target_lstm, train_discriminator) # path = saver.save(sess, ckpt_file) # print('Pretrain finished and saved at {}'.format(path)) if prev_sess == False: #check pretraining ckpt_dir = 'checkpoints/{}_pretrain'.format(PREFIX) if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) ckpt_file = os.path.join(ckpt_dir, 'pretrain_ckpt') if os.path.isfile(ckpt_file + '.meta') and params["LOAD_PRETRAIN"]: saver.restore(sess, ckpt_file) print('Pretrain loaded from previous checkpoint {}'.format( ckpt_file)) else: if params["LOAD_PRETRAIN"]: print('\t* No pre-training data found as {:s}.'.format( ckpt_file)) else: print('\t* LOAD_PRETRAIN was set to false.') sess.run(tf.global_variables_initializer()) pretrain(sess, generator, target_lstm, train_discriminator) path = saver.save(sess, ckpt_file) print('Pretrain finished and saved at {}'.format(path)) #end loading previous session or pre-training # create reward function batch_reward = make_reward(train_samples) rollout = ROLLOUT(generator, 0.8) # nbatches_max= 30 print( '#########################################################################' ) print('Start Reinforcement Training Generator...') results_rows = [] if nbatches_max + 1 > TOTAL_BATCH: print( ' We already trained that many batches: Check the Checkpoints folder or take a larger TOTAL_BATCH' ) else: for nbatch in tqdm(range(nbatches_max + 1, TOTAL_BATCH)): #for nbatch in tqdm(range(TOTAL_BATCH)): results = OrderedDict({'exp_name': PREFIX}) if nbatch % 1 == 0 or nbatch == TOTAL_BATCH - 1: print('* Making samples') if nbatch % 10 == 0: gen_samples = generate_samples(sess, generator, BATCH_SIZE, BIG_SAMPLE_NUM) else: gen_samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM) likelihood_data_loader.create_batches(gen_samples) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('batch_num: {}'.format(nbatch)) print('test_loss: {}'.format(test_loss)) results['Batch'] = nbatch results['test_loss'] = test_loss if test_loss < best_score: best_score = test_loss print('best score: %f' % test_loss) # results mm.compute_results(gen_samples, train_samples, ord_dict, results) print( '#########################################################################' ) print('-> Training generator with RL.') print('G Epoch {}'.format(nbatch)) for it in range(TRAIN_ITER): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, cnn, batch_reward, D_WEIGHT) nll = generator.generator_step(sess, samples, rewards) # results print_rewards(rewards) print('neg-loglike: {}'.format(nll)) results['neg-loglike'] = nll rollout.update_params() # generate for discriminator print('-> Training Discriminator') for i in range(D): print('D_Epoch {}'.format(i)) d_loss, accuracy, ypred = train_discriminator() results['D_loss_{}'.format(i)] = d_loss results['Accuracy_{}'.format(i)] = accuracy results['Mean_ypred_{}'.format(i)] = ypred print('results') results_rows.append(results) if nbatch % params["EPOCH_SAVES"] == 0: save_results(sess, PREFIX, PREFIX + '_model_' + str(nbatch), results_rows) # write results save_results(sess, PREFIX, PREFIX + '_model_' + str(nbatch), results_rows) print('\n:*** FINISHED ***') return
def main(unused_argv): config_train = training_config() config_gen = generator_config() config_dis = discriminator_config() np.random.seed(config_train.seed) assert config_train.start_token == 0 #Build dataloader for generaotr, testing and discriminator gen_data_loader = Gen_Data_loader(config_gen.gen_batch_size) likelihood_data_loader = Gen_Data_loader(config_gen.gen_batch_size) dis_data_loader = Dis_dataloader(config_dis.dis_batch_size) #Build generator and its rollout generator = Generator(config=config_gen) # 生成 3个神经网络 generator.build() # 快速展开网络,序列未生成完就预测后边的序列,用于计算reward rollout_gen = rollout(config=config_gen) #Build target LSTM target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(config=config_gen, params=target_params) # The oracle model #Build discriminator discriminator = Discriminator(config=config_dis) discriminator.build_discriminator() #Build optimizer op for pretraining pretrained_optimizer = tf.train.AdamOptimizer( config_train.gen_learning_rate) # 取出 teller 的所有变量, teller在 generator和rollout网络中 var_pretrained = [ v for v in tf.trainable_variables() if 'teller' in v.name ] #Using name 'teller' here to prevent name collision of target LSTM # zip函数将 2个迭代器 组成tuple gradients, variables = zip(*pretrained_optimizer.compute_gradients( generator.pretrained_loss, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) gen_pre_upate = pretrained_optimizer.apply_gradients( zip(gradients, variables)) #Initialize all variables sess = tf.Session(config=config_hardware) sess.run(tf.global_variables_initializer()) #Initalize data loader of generator utils.py文件中 # target_lstm 网络生成真实数据 写入config_train.positive_file 文件 generate_samples(sess, target_lstm, config_train.batch_size, config_train.generated_num, config_train.positive_file) gen_data_loader.create_batches(config_train.positive_file) #Start pretraining log = open('save/experiment-log.txt', 'w') print 'Start pre-training generator...' log.write('pre-training...\n') for epoch in xrange(config_train.pretrained_epoch_num): gen_data_loader.reset_pointer() for it in xrange(gen_data_loader.num_batch): #见第60行,加载target_lstm 神经网络的数据,用于预训练生成器====真实样本 batch = gen_data_loader.next_batch() #真实数据训练 generator;有监督学习 batch 最后第一个是label _, g_loss = sess.run([gen_pre_upate, generator.pretrained_loss], feed_dict={generator.input_seqs_pre:batch,\ generator.input_seqs_mask:np.ones_like(batch)}) if epoch % config_train.test_per_epoch == 0: # generator 生成样本 与 真实数据的相似度 generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) #评估生成质量 test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print 'Start pre-training discriminator...' for t in range(config_train.dis_update_time_pre): print "Times: " + str(t) # generator生成假数据+ target_lstm的真实数据;; 用于训练 generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) # 混合真假数据 dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_pre): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } #交叉上最小; 主要是训练评分网络 用于给generator提供reward _ = sess.run(discriminator.train_op, feed) #Build optimizer op for adversarial training train_adv_opt = tf.train.AdamOptimizer(config_train.gen_learning_rate) gradients, variables = zip(*train_adv_opt.compute_gradients( generator.gen_loss_adv, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) train_adv_update = train_adv_opt.apply_gradients(zip(gradients, variables)) #Initialize global variables of optimizer for adversarial training uninitialized_var = [ e for e in tf.global_variables() if e not in tf.trainable_variables() ] init_vars_uninit_op = tf.variables_initializer(uninitialized_var) sess.run(init_vars_uninit_op) #Start adversarial training 开始对抗训练 for total_batch in xrange(config_train.total_batch): for iter_gen in xrange(config_train.gen_update_time): # 用generator进行抽样; LSTM 生成序列 samples = sess.run(generator.sample_word_list_reshape) feed = {"pred_seq_rollout:0": samples} reward_rollout = [] #calcuate the reward given in the specific stpe t by roll out # 用rollout网络计算指定动作的回报 for iter_roll in xrange(config_train.rollout_num): # 生成器采样的获得的单词传给 rollout ??有一个疑问?samples看代码是完整序列(与论文不符),为什么还要rollout rollout_list = sess.run(rollout_gen.sample_rollout_step, feed_dict=feed) rollout_list_stack = np.vstack( rollout_list ) #shape: #batch_size * #rollout_step, #sequence length # 蒙特卡洛 展开成序列,贝尔曼方程计算 reward reward_rollout_seq = sess.run( discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: rollout_list_stack, discriminator.dropout_keep_prob: 1.0 }) reward_last_tok = sess.run(discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: samples, discriminator.dropout_keep_prob: 1.0 }) reward_allseq = np.concatenate( (reward_rollout_seq, reward_last_tok), axis=0)[:, 1] reward_tmp = [] for r in xrange(config_gen.gen_batch_size): reward_tmp.append(reward_allseq[range( r, config_gen.gen_batch_size * config_gen.sequence_length, config_gen.gen_batch_size)]) reward_rollout.append(np.array(reward_tmp)) #计算reward rewards = np.sum(reward_rollout, axis=0) / config_train.rollout_num # 用reward 指导 generator 更新梯度 _, gen_loss = sess.run([train_adv_update, generator.gen_loss_adv], feed_dict={generator.input_seqs_adv:samples,\ generator.rewards:rewards}) if total_batch % config_train.test_per_epoch == 0 or total_batch == config_train.total_batch - 1: #对抗训练后 用generator再次生成样本与模拟器(target_lstm,真实数据)进行比对 generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) #util.py中定义 test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) for _ in range(config_train.dis_update_time_adv): generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_adv): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } #训练这个评分网络, score _ = sess.run(discriminator.train_op, feed) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) vocab_size = 5000 dis_data_loader = Dis_dataloader() best_score = 1000 # initialize a LSTM object and use the LSTM object to initialize PoemGen object generator = get_trainable_model(vocab_size) # cPickle is a object serialization library # the loaded picle object will be an array of numbers # later, these params will be used to initalize the target LSTM target_params = cPickle.load(open('save/target_params.pkl')) # print target_params time.sleep(1000) # This seems like the generator model which used RNN target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params) # This is the discriminator which uses CNN with tf.variable_scope('discriminator'): cnn = TextCNN(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) cnn_params = [ param for param in tf.trainable_variables() if 'discriminator' in param.name ] # Define Discriminator Training procedure dis_global_step = tf.Variable(0, name="global_step", trainable=False) dis_optimizer = tf.train.AdamOptimizer(1e-4) dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss, cnn_params, aggregation_method=2) dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars, global_step=dis_global_step) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.initialize_all_variables()) generate_samples(sess, target_lstm, 64, 10000, positive_file) gen_data_loader.create_batches(positive_file) log = open('log/experiment-log.txt', 'w') # pre-train generator # Initialize the generator with MLE estimators print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt') print 'Start training discriminator...' for _ in range(dis_alter_epoch): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # train discriminator dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_file, negative_file) dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs) for batch in dis_batches: try: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step = sess.run([dis_train_op, dis_global_step], feed) except ValueError: pass rollout = ROLLOUT(generator, 0.8) print '#########################################################################' print 'Start Reinforcement Training Generator...' log.write('Reinforcement Training...\n') for total_batch in range(TOTAL_BATCH): for it in range(TRAIN_ITER): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, cnn) feed = {generator.x: samples, generator.rewards: rewards} _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed) if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: # The trainable model 'generator' is a RNN model from PoemGen generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = str(total_batch) + ' ' + str(test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) if test_loss < best_score: best_score = test_loss print 'best score: ', test_loss significance_test(sess, target_lstm, likelihood_data_loader, 'significance/seqgan.txt') # rollout policy??? rollout.update_params() # generate for discriminator print 'Start training discriminator' for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_file, negative_file) dis_batches = dis_data_loader.batch_iter( zip(dis_x_train, dis_y_train), dis_batch_size, 3) for batch in dis_batches: try: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step = sess.run([dis_train_op, dis_global_step], feed) except ValueError: pass log.close()
def main(unused_argv): config_train = training_config() config_gen = generator_config() config_dis = discriminator_config() np.random.seed(config_train.seed) assert config_train.start_token == 0 #Build dataloader for generaotr, testing and discriminator gen_data_loader = Gen_Data_loader(config_gen.gen_batch_size) likelihood_data_loader = Gen_Data_loader(config_gen.gen_batch_size) dis_data_loader = Dis_dataloader(config_dis.dis_batch_size) #Build generator and its rollout generator = Generator(config=config_gen) generator.build() rollout_gen = rollout(config=config_gen) #Build target LSTM target_params = cPickle.load(StrToBytes(open('save/target_params.pkl')), encoding='bytes') target_lstm = TARGET_LSTM(config=config_gen, params=target_params) # The oracle model #Build discriminator discriminator = Discriminator(config=config_dis) discriminator.build_discriminator() #Build optimizer op for pretraining pretrained_optimizer = tf.train.AdamOptimizer( config_train.gen_learning_rate) var_pretrained = [ v for v in tf.trainable_variables() if 'teller' in v.name ] #Using name 'teller' here to prevent name collision of target LSTM gradients, variables = zip(*pretrained_optimizer.compute_gradients( generator.pretrained_loss, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) gen_pre_upate = pretrained_optimizer.apply_gradients( zip(gradients, variables)) #Initialize all variables sess = tf.Session(config=config_hardware) sess.run(tf.global_variables_initializer()) #Initalize data loader of generator # generate_samples(sess, target_lstm, config_train.batch_size, config_train.generated_num, config_train.positive_file) gen_data_loader.create_batches(config_train.positive_file) #Start pretraining log = open('save/experiment-log.txt', 'w') print('Start pre-training generator...') log.write('pre-training...\n') for epoch in range(config_train.pretrained_epoch_num): gen_data_loader.reset_pointer() for it in range(gen_data_loader.num_batch): batch = gen_data_loader.next_batch() _, g_loss = sess.run([gen_pre_upate, generator.pretrained_loss], feed_dict={generator.input_seqs_pre:batch,\ generator.input_seqs_mask:np.ones_like(batch)}) if epoch % config_train.test_per_epoch == 0: # generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print('Start pre-training discriminator...') for t in range(config_train.dis_update_time_pre): print("Times: " + str(t)) generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_pre): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) #Build optimizer op for adversarial training train_adv_opt = tf.train.AdamOptimizer(config_train.gen_learning_rate) gradients, variables = zip(*train_adv_opt.compute_gradients( generator.gen_loss_adv, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) train_adv_update = train_adv_opt.apply_gradients(zip(gradients, variables)) #Initialize global variables of optimizer for adversarial training uninitialized_var = [ e for e in tf.global_variables() if e not in tf.trainable_variables() ] init_vars_uninit_op = tf.variables_initializer(uninitialized_var) sess.run(init_vars_uninit_op) #Start adversarial training for total_batch in range(config_train.total_batch): for iter_gen in range(config_train.gen_update_time): samples = sess.run(generator.sample_word_list_reshape) feed = {"pred_seq_rollout:0": samples} reward_rollout = [] #calcuate the reward given in the specific stpe t by roll out for iter_roll in range(config_train.rollout_num): rollout_list = sess.run(rollout_gen.sample_rollout_step, feed_dict=feed) rollout_list_stack = np.vstack( rollout_list ) #shape: #batch_size * #rollout_step, #sequence length reward_rollout_seq = sess.run( discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: rollout_list_stack, discriminator.dropout_keep_prob: 1.0 }) reward_last_tok = sess.run(discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: samples, discriminator.dropout_keep_prob: 1.0 }) reward_allseq = np.concatenate( (reward_rollout_seq, reward_last_tok), axis=0)[:, 1] reward_tmp = [] for r in range(config_gen.gen_batch_size): reward_tmp.append(reward_allseq[range( r, config_gen.gen_batch_size * config_gen.sequence_length, config_gen.gen_batch_size)]) reward_rollout.append(np.array(reward_tmp)) rewards = np.sum(reward_rollout, axis=0) / config_train.rollout_num _, gen_loss = sess.run([train_adv_update, generator.gen_loss_adv], feed_dict={generator.input_seqs_adv:samples,\ generator.rewards:rewards}) if total_batch % config_train.test_per_epoch == 0 or total_batch == config_train.total_batch - 1: generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) for _ in range(config_train.dis_update_time_adv): generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_adv): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE) vocab_size = 68 dis_data_loader = Dis_dataloader() best_score = 1000 # load generator with parameters generator = get_trainable_model(vocab_size) target_params = initialize_parameters(vocab_size) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # CNNs with tf.variable_scope('discriminator'): cnn = TextCNN(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) cnn_params = [ param for param in tf.trainable_variables() if 'discriminator' in param.name ] # Define Discriminator Training procedure dis_global_step = tf.Variable(0, name="global_step", trainable=False) dis_optimizer = tf.train.AdamOptimizer(1e-4) dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss, cnn_params, aggregation_method=2) dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars, global_step=dis_global_step) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # generate_samples(sess, target_lstm, 64, 10000, positive_file) gen_data_loader.create_batches(positive_file) log = open(logpath, 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: file_name = 'target_generate/pretrain_epoch' + str(epoch) + '.pkl' generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name) likelihood_data_loader.create_batches(file_name) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = str(epoch) + ' ' + str(test_loss) + '\n' log.write(buffer) if epoch % 100 != 0: os.remove(file_name) file_name = 'target_generate/pretrain_finished.pkl' generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name) likelihood_data_loader.create_batches(file_name) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n' log.write(buffer) file_name = 'target_generate/supervise.pkl' generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name) likelihood_data_loader.create_batches(file_name) significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt') os.remove(file_name) print 'Start training discriminator...' for i in range(dis_alter_epoch): print 'dis_alter_epoch : ' + str(i) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # train discriminator dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_file, negative_file) dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs) for batch in dis_batches: try: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step = sess.run([dis_train_op, dis_global_step], feed) except ValueError: pass rollout = ROLLOUT(generator, 0.8) print '#########################################################################' print 'Start Reinforcement Training Generator...' log.write('Reinforcement Training...\n') for total_batch in range(TOTAL_BATCH): for it in range(TRAIN_ITER): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, cnn) feed = {generator.x: samples, generator.rewards: rewards} _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed) if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: file_name = 'target_generate/reinforce_batch' + str( total_batch) + '.pkl' generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name) likelihood_data_loader.create_batches(file_name) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = str(total_batch) + ' ' + str(test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) if total_batch % 50 != 0: os.remove(file_name) if test_loss < best_score: best_score = test_loss print 'best score: ', test_loss significance_test(sess, target_lstm, likelihood_data_loader, 'significance/seqgan.txt') rollout.update_params() # generate for discriminator print 'Start training discriminator' for _ in range(5): # for _ in range(2): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_x_train, dis_y_train = dis_data_loader.load_train_data( positive_file, negative_file) dis_batches = dis_data_loader.batch_iter( zip(dis_x_train, dis_y_train), dis_batch_size, 3) for batch in dis_batches: try: x_batch, y_batch = zip(*batch) feed = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dis_dropout_keep_prob } _, step = sess.run([dis_train_op, dis_global_step], feed) except ValueError: pass log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 50 times for epoch in range(50): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) if epoch % 5 == 0: print 'pre-train discriminator epoch ', epoch for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) rollout = ROLLOUT(generator, 0.8) print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) gan_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) val_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) # For testing vocab_size = 5000 generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = pickle.load(open('save/target_params_py3.pkl', 'rb')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, 32, 32, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model mediator = Mediator(vocab_size, BATCH_SIZE, EMB_DIM * 2, HIDDEN_DIM * 2, SEQ_LENGTH, START_TOKEN, name="mediator", dropout_rate=M_DROPOUT_RATE, learning_rate=3e-3, with_professor_forcing=False) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) gan_data_loader.create_batches(positive_file) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file) val_data_loader.create_batches(eval_file) log = open('save/experiment-log.txt', 'w') log_nll = open('save/experiment-log-nll.txt', 'w') log_jsd = open('save/experiment-log-jsd.txt', 'w') # pre-train generator (default 0 epochs)(not recommended) print('Start pre-training...') log.write('pre-training...\n') saver = tf.train.Saver(tf.global_variables()) if RESTORE: saver.restore(sess, "saved_model/CoT") for epoch in range(PRE_EPOCH_NUM): loss = mle_epoch(sess, generator, gen_data_loader) if epoch % 1 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) likelihood_data_loader.create_batches(negative_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'nll_oracle ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll_oracle:\t' + str( test_loss) + '\n' log_nll.write(buffer) if epoch % 1 == 0: test_loss = target_loss(sess, generator, val_data_loader) print('pre-train epoch ', epoch, 'nll_test ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll_test:\t' + str( test_loss) + '\n' log_nll.write(buffer) print( '#########################################################################' ) print('Start Cooperative Training...') for iter_idx in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): samples = generator.generate(sess) rewards = mediator.get_reward(sess, samples) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if iter_idx % 100 == 0 or iter_idx == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) likelihood_data_loader.create_batches(negative_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'batch:\t' + str(iter_idx) + '\tnll_oracle:\t' + str( test_loss) + '\n' print('batch: ', iter_idx, 'nll_oracle: ', test_loss) log_nll.write(buffer) if iter_idx % 100 == 0: test_loss = target_loss(sess, generator, val_data_loader) print('batch:\t', iter_idx, 'nll_test ', test_loss) buffer = 'batch:\t' + str(iter_idx) + '\tnll_test:\t' + str( test_loss) + '\n' log_nll.write(buffer) # Train the mediator for _ in range(1): bnll_ = [] """ d_loss_ = [] for it in range(3): feed = { mediator.x0: gan_data_loader.next_batch(), mediator.x1: generator.generate(sess) } d_loss, _ = sess.run([mediator.d_loss, mediator.d_update], feed) d_loss_.append(d_loss) """ for it in range(1): feed = { mediator.x0: gen_data_loader.next_batch(), mediator.x1: generator.generate(sess) } bnll = sess.run(mediator.likelihood_loss, feed) bnll_.append(bnll) sess.run(mediator.dropout_on) _ = sess.run(mediator.likelihood_updates, feed) sess.run(mediator.dropout_off) if iter_idx % 10 == 0: bnll = np.mean(bnll_) print("mediator cooptrain iter#%d, balanced_nll %f" % (iter_idx, bnll)) log.write("%d\t%f\n" % (iter_idx, bnll)) if iter_idx % gen_data_loader.num_batch == 0: jsd = jsd_calculate(sess, generator, target_lstm) print('cooptrain epoch#', iter_idx // gen_data_loader.num_batch, 'jsd ', jsd) log_jsd.write("%d\t%f\n" % (iter_idx // gen_data_loader.num_batch, jsd)) saver.save(sess, "saved_model/CoT") log.close() log_nll.close() log_jsd.close()