def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) print(gen_data_loader)
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 vocab_size = 5000 # why not a constant? log = open('save/experiment-log.txt', 'w') #likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = construct_generator(vocab_size) target_lstm = construct_gold_generator(vocab_size) discriminator = construct_discriminator(vocab_size) sess = initialize_session() # First, use the oracle model to provide the positive examples, # which are sampled from the oracle data distribution gen_data_loader = Gen_Data_loader(BATCH_SIZE) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) # pre-train generator print('Start pre-training...') pretrain_generator(sess, generator, gen_data_loader, target_lstm, log) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times pretrain_discriminator(sess, discriminator, dis_data_loader, generator) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): train_generator(sess, generator, target_lstm, rollout, discriminator, total_batch, log) rollout.update_params() train_discriminator(sess, discriminator, dis_data_loader, generator) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(re_batch_size) # TODO: Reimpliment this class with same interface. # generator = GeneratorTransformer( # vocab_size, # BATCH_SIZE, # SEQ_LENGTH, # START_TOKEN # ) generator = Generator( vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, MID_LAYER_G, ) # TODO: Reimpliment this class with same interface. rewarder = Rewarder( vocab_size, BATCH_SIZE, EMB_DIM * 4, HIDDEN_DIM * 4, SEQ_LENGTH, START_TOKEN, MID_LAYER_R, l2_reg_lambda=re_l2_reg_lambda, ) target_params = pickle.load(open("save/target_params.pkl", "rb"), encoding="latin1") # TODO: Reimpliment this class with same interface. (target_transformer) # I think we leave this as is, since it's the distribution we're trying to match? (Cailin) target_lstm = TARGET_LSTM( vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params, ) # The oracle model config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) # First, use the oracle model to provide the positive examples, # which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) # ground_loss = target_loss(sess, target_lstm, gen_data_loader) # print('Ground-Truth:', ground_loss) log = open("save/experiment-ent" + str(entropy_w), "w") # pre-train generator if restore is False: print("Start pre-training...") log.write("pre-training...\n") for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("pre-train epoch ", epoch, "test_loss ", test_loss) buffer = "epoch:\t" + str(epoch) + "\tnll:\t" + str(test_loss) + "\n" log.write(buffer) print("Start pre-training rewarder...") start = time.time() for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(1): dis_data_loader.reset_pointer() r_losses = [] for it in range(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() _, r_loss = rewarder.reward_train_step( sess, x_text, np.ones(BATCH_SIZE), 1.0, re_dropout_keep_prob, 0.01, ) r_losses.append(r_loss) print("reward_loss", np.mean(r_losses)) speed = time.time() - start print("Reward pre_training Speed:{:.3f}".format(speed)) checkpoint_path = os.path.join("save", "exper_40.ckpt") saver.save(sess, checkpoint_path) else: print("Restore pretrained model ...") log.write("Restore pre-trained model...\n") ckpt = tf.train.get_checkpoint_state("save") saver.restore(sess, ckpt.model_checkpoint_path) # by setting the parameters to 0.0 and 1.0, we didn't use the mixed policy RL training in SeqGAN rollout = ROLLOUT(generator, 0.0, 1.0) print("#########################################################################") print("Start Adversarial Training...") log.write("adversarial training...\n") for total_batch in range(TOTAL_BATCH): if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = "epoch:\t" + str(total_batch) + "\tnll:\t" + str(test_loss) + "\n" print("total_batch: ", total_batch, "test_loss: ", test_loss) log.write(buffer) # Train the generator for one step start = time.time() g_losses = [] # Draw trajectories (sequences) from generator off_samples, off_probs = off_policy_samples(sess, rollout, BATCH_SIZE, off_num) avg_reward = [] for g_it in range(1): # Compute MCMC reward for each trajectory for it in range(off_num // BATCH_SIZE): rewards = rollout.get_reward(sess, off_samples[it], 8, rewarder) avg_reward.append(rewards) # Perform gradient update for generator baseline = np.zeros(SEQ_LENGTH) for it in range(1): for it2 in range(off_num // BATCH_SIZE): _, g_loss = generator.rl_train_step( sess, off_samples[it2], avg_reward[it2], baseline, off_probs[it2], entropy_w, G_rate, ) g_losses.append(g_loss) speed = time.time() - start print( "MaxentPolicy Gradient {} round, Speed:{:.3f}, Loss:{:.3f}".format( total_batch, speed, np.mean(g_losses) ) ) # Update roll-out parameters rollout.update_params() # Train the rewarder start = time.time() r_loss_list = [] for _ in range(2): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() weights = rewarder.reward_weight(sess, x_text, generator) _, r_loss = rewarder.reward_train_step( sess, x_text, weights, 1, re_dropout_keep_prob, R_rate * np.exp(-(total_batch // R_decay)), ) r_loss_list.append(r_loss) speed = time.time() - start print( "Reward training {} round, Speed:{:.3f}, Loss:{:.3f}".format( total_batch, speed, np.mean(r_loss_list) ) ) log.close()
def main(): # Create a parser to parse user input def parse_arguments(): parser = argparse.ArgumentParser(description='Program for running several SeqGan applications.') parser.add_argument('app', metavar='application', type=str, choices=['obama', 'haiku', 'synth'], help='Enter either \'obama\' or \'haiku\'') parser.add_argument('gen_n', type = int, help='Number of generator pre-training steps') parser.add_argument('disc_n', type = int, help='Number of discriminator pre-training steps') parser.add_argument('adv_n', type = int, help='Number of adversarial pre-training steps') parser.add_argument('-mn', metavar="model_name", type = str, default = "", help = "Name for the checkpoint files. Will be stored at ./<app>/models/<model_name>") parser.add_argument('-numeat', metavar="num_eat", type = int, default = 500, help = "For synthetic data generation. Determines number of eaters in vocab.") parser.add_argument('-numfeed', metavar="num_feed", type = int, default = 500, help = "For synthetic data generation. Determines number of feeders in vocab.") parser.add_argument('-numsent', metavar="num_sent", type = int, default = 10000, help = "For synthetic data generation. Determines number of sentences generated.") args = parser.parse_args() synth_gen_params = ("NA", "NA", "NA") if args.app == "synth": synth_gen_params = (args.numsent, args.numfeed, args.numeat) generate_random_sents("../data/synth/input.txt", args.numsent, args.numfeed, args.numeat) task = load_task(args.app) #Make the /models directory if its not there. model_string = task.path +"/models/" if not os.path.exists("./"+model_string): os.mkdir("./"+model_string) #make the checkpoint directory if its not there. if args.mn == "": model_string += str(args.gen_n)+ "_" + str(args.disc_n) + "_" + str(args.adv_n) model_string += time.strftime("_on_%m_%d_%y", time.gmtime()) else: model_string += args.mn if not os.path.exists("./"+model_string): os.mkdir("./"+model_string) return args.gen_n, args.disc_n, args.adv_n, model_string, task, synth_gen_params gen_n, disc_n, adv_n, MODEL_STRING, task, SYNTH_GEN_PARAMS = parse_arguments() assert START_TOKEN == 0 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # Initialize the data loaders gen_data_loader = Gen_Data_loader(BATCH_SIZE, task.max_seq_length) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, task.max_seq_length) # For validation dis_data_loader = Dis_dataloader(BATCH_SIZE, task.max_seq_length) # Initialize the Generator generator = Generator(len(task.vocab), BATCH_SIZE, EMB_DIM, HIDDEN_DIM, task.max_seq_length, START_TOKEN) # Initialize the Discriminator discriminator = Discriminator(sequence_length=task.max_seq_length, num_classes=2, vocab_size=len(task.vocab), embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) # Set session configurations. config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf.train.Saver() sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # If restoring from a previous run .... if len(os.listdir("./"+MODEL_STRING)) > 0: saver.restore(sess, tf.train.latest_checkpoint(MODEL_STRING)) # Create batches from the positive file. gen_data_loader.create_batches(task.train_file) # Open log file for writing log = open(task.log_file, 'w') # Pre_train the generator with MLE. pre_train_generator(sess, saver, MODEL_STRING, generator, gen_data_loader, likelihood_data_loader, task, log, gen_n, BATCH_SIZE, task.generated_num) print('Start pre-training discriminator...') # Do the discriminator pre-training steps saver.restore(sess, tf.train.latest_checkpoint(MODEL_STRING)) train_discriminator(sess, generator, discriminator, dis_data_loader, task, log, disc_n, BATCH_SIZE, task.generated_num, dis_dropout_keep_prob) print("Saving checkpoint ...") saver.save(sess, MODEL_STRING+ "/model") # Do the adversarial training steps rollout = ROLLOUT(generator, 0.8) train_adversarial(sess, saver, MODEL_STRING, generator, discriminator, rollout, dis_data_loader, likelihood_data_loader, task, log, adv_n) #Use the best model to generate final sample saver.restore(sess, tf.train.latest_checkpoint(MODEL_STRING)) generate_samples(sess, generator, BATCH_SIZE, task.generated_num, task.eval_file) #Writing results to CSV with open(task.eval_file) as f: generated = [] for line in f: line = line.strip().split() generated.append(line) generated = task.vocab.decode(generated) f.close() with open(task.test_file) as f: references = [] for line in f: line = line.strip().split() references.append(line) references = task.vocab.decode(references) f.close() blue = corpus_bleu([references]*len(generated), generated) print("Run with args {} {} {}: BLEUscore = {}\n".format(gen_n, disc_n, adv_n, blue)) prop = "NA" if task.name == "synth": total_correct = 0 for sentence in generated: if is_valid_phrase(sentence): total_correct +=1 prop = total_correct/len(generated) if not os.path.exists("./results.csv"): os.mknod("./results.csv") with open("./results.csv", 'a') as csvfile: fieldnames = ["name", "task_name", "num_gen", "num_disc", "num_adv", "num_sents", "num_feeders", "num_eaters", "BLEU", "prop_valid"] writer = csv.DictWriter(csvfile, fieldnames = fieldnames) writer.writeheader() writer.writerow({"name": MODEL_STRING, "task_name": task.name, "num_gen": gen_n, "num_disc":disc_n, "num_adv": adv_n, "num_sents":SYNTH_GEN_PARAMS[0], "num_feeders":SYNTH_GEN_PARAMS[1], "num_eaters":SYNTH_GEN_PARAMS[2], "BLEU": blue, "prop_valid": prop}) f.close() log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 50 times for epoch in range(50): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) if epoch % 5 == 0: print 'pre-train discriminator epoch ', epoch for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) rollout = ROLLOUT(generator, 0.8) print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) log.close()
def main(unused_argv): config_train = training_config() config_gen = generator_config() config_dis = discriminator_config() np.random.seed(config_train.seed) assert config_train.start_token == 0 #Build dataloader for generaotr, testing and discriminator gen_data_loader = Gen_Data_loader(config_gen.gen_batch_size) likelihood_data_loader = Gen_Data_loader(config_gen.gen_batch_size) dis_data_loader = Dis_dataloader(config_dis.dis_batch_size) #Build generator and its rollout generator = Generator(config=config_gen) generator.build() rollout_gen = rollout(config=config_gen) #Build target LSTM target_params = cPickle.load(StrToBytes(open('save/target_params.pkl')), encoding='bytes') target_lstm = TARGET_LSTM(config=config_gen, params=target_params) # The oracle model #Build discriminator discriminator = Discriminator(config=config_dis) discriminator.build_discriminator() #Build optimizer op for pretraining pretrained_optimizer = tf.train.AdamOptimizer( config_train.gen_learning_rate) var_pretrained = [ v for v in tf.trainable_variables() if 'teller' in v.name ] #Using name 'teller' here to prevent name collision of target LSTM gradients, variables = zip(*pretrained_optimizer.compute_gradients( generator.pretrained_loss, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) gen_pre_upate = pretrained_optimizer.apply_gradients( zip(gradients, variables)) #Initialize all variables sess = tf.Session(config=config_hardware) sess.run(tf.global_variables_initializer()) #Initalize data loader of generator # generate_samples(sess, target_lstm, config_train.batch_size, config_train.generated_num, config_train.positive_file) gen_data_loader.create_batches(config_train.positive_file) #Start pretraining log = open('save/experiment-log.txt', 'w') print('Start pre-training generator...') log.write('pre-training...\n') for epoch in range(config_train.pretrained_epoch_num): gen_data_loader.reset_pointer() for it in range(gen_data_loader.num_batch): batch = gen_data_loader.next_batch() _, g_loss = sess.run([gen_pre_upate, generator.pretrained_loss], feed_dict={generator.input_seqs_pre:batch,\ generator.input_seqs_mask:np.ones_like(batch)}) if epoch % config_train.test_per_epoch == 0: # generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print('Start pre-training discriminator...') for t in range(config_train.dis_update_time_pre): print("Times: " + str(t)) generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_pre): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) #Build optimizer op for adversarial training train_adv_opt = tf.train.AdamOptimizer(config_train.gen_learning_rate) gradients, variables = zip(*train_adv_opt.compute_gradients( generator.gen_loss_adv, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) train_adv_update = train_adv_opt.apply_gradients(zip(gradients, variables)) #Initialize global variables of optimizer for adversarial training uninitialized_var = [ e for e in tf.global_variables() if e not in tf.trainable_variables() ] init_vars_uninit_op = tf.variables_initializer(uninitialized_var) sess.run(init_vars_uninit_op) #Start adversarial training for total_batch in range(config_train.total_batch): for iter_gen in range(config_train.gen_update_time): samples = sess.run(generator.sample_word_list_reshape) feed = {"pred_seq_rollout:0": samples} reward_rollout = [] #calcuate the reward given in the specific stpe t by roll out for iter_roll in range(config_train.rollout_num): rollout_list = sess.run(rollout_gen.sample_rollout_step, feed_dict=feed) rollout_list_stack = np.vstack( rollout_list ) #shape: #batch_size * #rollout_step, #sequence length reward_rollout_seq = sess.run( discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: rollout_list_stack, discriminator.dropout_keep_prob: 1.0 }) reward_last_tok = sess.run(discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: samples, discriminator.dropout_keep_prob: 1.0 }) reward_allseq = np.concatenate( (reward_rollout_seq, reward_last_tok), axis=0)[:, 1] reward_tmp = [] for r in range(config_gen.gen_batch_size): reward_tmp.append(reward_allseq[range( r, config_gen.gen_batch_size * config_gen.sequence_length, config_gen.gen_batch_size)]) reward_rollout.append(np.array(reward_tmp)) rewards = np.sum(reward_rollout, axis=0) / config_train.rollout_num _, gen_loss = sess.run([train_adv_update, generator.gen_loss_adv], feed_dict={generator.input_seqs_adv:samples,\ generator.rewards:rewards}) if total_batch % config_train.test_per_epoch == 0 or total_batch == config_train.total_batch - 1: generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) for _ in range(config_train.dis_update_time_adv): generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_adv): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE,SEQ_LENGTH) vocab_size = 4839 dis_data_loader = Dis_dataloader(BATCH_SIZE,SEQ_LENGTH) discriminator = Discriminator(SEQ_LENGTH,num_classes=2,vocab_size=vocab_size,dis_emb_dim=dis_embedding_dim,filter_sizes=dis_filter_sizes,num_filters=dis_num_filters, batch_size=BATCH_SIZE,hidden_dim=HIDDEN_DIM,start_token=START_TOKEN,goal_out_size=GOAL_OUT_SIZE,step_size=4) leakgan = LeakGAN(SEQ_LENGTH,num_classes=2,vocab_size=vocab_size,emb_dim=EMB_DIM,dis_emb_dim=dis_embedding_dim,filter_sizes=dis_filter_sizes,num_filters=dis_num_filters, batch_size=BATCH_SIZE,hidden_dim=HIDDEN_DIM,start_token=START_TOKEN,goal_out_size=GOAL_OUT_SIZE,goal_size=GOAL_SIZE,step_size=4,D_model=discriminator) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) for a in range(1): g = sess.run(leakgan.gen_x,feed_dict={leakgan.drop_out:0.8,leakgan.train:1}) print g print "epoch:",a," " log = open('save/experiment-log.txt', 'w') generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) gen_data_loader.create_batches(positive_file) saver_variables = tf.global_variables() saver = tf.train.Saver(saver_variables) model = tf.train.latest_checkpoint(model_path) print model if FLAGS.restore and model: # model = tf.train.latest_checkpoint(model_path) # if model and FLAGS.restore: if model_path+'/' + FLAGS.model: print model_path+'/' + FLAGS.model saver.restore(sess, model_path+'/' + FLAGS.model) else: saver.restore(sess, model) else: if FLAGS.resD and model_path + '/' + FLAGS.model: print model_path + '/' + FLAGS.model saver.restore(sess, model_path + '/' + FLAGS.model) print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(loss) + '\n' log.write(buffer) saver.save(sess, model_path + '/leakgan_pre') else: print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 50 times for i in range(16): for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file,0) # gen_data_loader.create_batches(positive_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss,_ = sess.run([discriminator.D_loss,discriminator.D_train_op], feed) # print 'D_loss ', D_loss buffer = str(D_loss) + '\n' log.write(buffer) leakgan.update_feature_function(discriminator) saver.save(sess, model_path + '/leakgan_preD') # saver.save(sess, model_path + '/leakgan') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM/16): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file,0) print 'pre-train epoch ', epoch, 'test_loss ', loss buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(loss) + '\n' log.write(buffer) saver.save(sess, model_path + '/leakgan_pre') gencircle = 1 # print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): for gi in range(gencircle): samples = leakgan.generate(sess,1.0,1) rewards = get_reward(leakgan, discriminator,sess, samples, 4, dis_dropout_keep_prob,total_batch,gen_data_loader) feed = {leakgan.x: samples, leakgan.reward: rewards,leakgan.drop_out:1.0} _,_,g_loss,w_loss = sess.run([leakgan.manager_updates,leakgan.worker_updates,leakgan.goal_loss,leakgan.worker_loss], feed_dict=feed) print 'total_batch: ', total_batch, " ",g_loss," ", w_loss # Test if total_batch % 10 == 1 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, "./save/coco_" + str(total_batch) + ".txt", 0) saver.save(sess, model_path + '/leakgan', global_step=total_batch) if total_batch % 15 == 0: for epoch in xrange(1): loss = pre_train_epoch(sess, leakgan, gen_data_loader) # Train the discriminator for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file,0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) # print 'D_loss ', D_loss leakgan.update_feature_function(discriminator) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 start_candidates = [] p_start_candidates = [] with open(START_TOKEN_CANDIDATES_PATH) as fin: for l in fin: token = l.strip().split(",") start_candidates.append(token[0]) p_start_candidates.append(float(token[1])) start_candidates = np.array(start_candidates, dtype=np.int32) p_start_candidates = np.array(p_start_candidates, dtype=np.float32) gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) # likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = VOCAB_SIZE dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) #target_params = pickle.load(open('save/target_params_py3.pkl', "rb")) #target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution #generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) #if epoch % 5 == 0: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) buffer = 'epoch:%i\tnll:%f'%(epoch, loss) print(buffer) log.write(buffer+"\n") print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for idx in range(50): print(idx) start_tokens = get_start_token(start_candidates, generated_num, p=p_start_candidates) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file+".pre%i"%idx, start_tokens) dis_data_loader.load_train_data(positive_file, negative_file+".pre%i"%idx) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) rollout = ROLLOUT(generator, 0.8) print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') saver = tf.train.Saver() for total_batch in range(TOTAL_BATCH): print(total_batch) # Train the generator for one step for it in range(1): start_token = get_start_token(start_candidates, BATCH_SIZE, p=p_start_candidates) samples = generator.generate(sess, start_token) rewards = rollout.get_reward(sess, samples, 16, discriminator, start_token) feed = {generator.x: samples, generator.rewards: rewards, generator.start_token: start_token} _ = sess.run(generator.g_updates, feed_dict=feed) # Test #if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' # print('total_batch: ', total_batch, 'test_loss: ', test_loss) # log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): start_tokens = get_start_token(start_candidates, generated_num, p=p_start_candidates) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file+".gan%i"%total_batch, start_tokens) dis_data_loader.load_train_data(positive_file, negative_file+".gan%i"%total_batch) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) if total_batch % 20 == 0 or total_batch == TOTAL_BATCH - 1: saver.save(sess, "save/*model_%i.ckpt"%total_batch, global_step=total_batch) log.close()
def main(source_file, wordVocab, vocab_size): tf.reset_default_graph() random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 dis_data_loader = Dis_dataloader(BATCH_SIZE) gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing # todo: print ("starting generating positive samples...") generated_num = gen_data_loader.transform_positive_file_2( train_dir + source_file, train_dir + positive_file, wordVocab, SEQ_LENGTH) print("generated_num: ", generated_num) if generated_num < 100: return gen_data_loader.create_batches(train_dir + positive_file) with tf.variable_scope("Train", reuse=None): generator = Generator(wordVocab, vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, learning_rate=g_lrn) discriminator = Discriminator(word_vocab=wordVocab, sequence_length=SEQ_LENGTH, num_classes=2, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda, learning_rate=d_lrn) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # todo: 1.##############pre-train generator############## print 'Start pre-training generator with MLE...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM_generator): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: buffer = 'epoch:\t' + str(epoch) + '\tloss:\t' + str(loss) print(buffer) sys.stdout.flush() log.write(buffer) # generate_samples(sess, # generator, # BATCH_SIZE, # generated_num, # eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print 'pre-train epoch ', epoch, 'test_loss ', test_loss # buffer = 'epoch:\t' + str(epoch) + '\tnllscore:\t' + str(test_loss) + '\n' # log.write(buffer) # todo: 2.##############pre-train discriminator############## print 'Start pre-training discriminator...' for _ in range(PRE_EPOCH_NUM_discriminator): ## 由于是对概率分布的采样,所以每次生成的fake data数据都是不同的 generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): ## 对每批fake_data进行训练discriminator dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) with tf.variable_scope("Train", reuse=None): g_beta = ROLLOUT(generator, 0.8) ## 这是表示 g_beta # todo: 3.############## Adversarial Training ############## print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # todo: Train the generator for one batch samples for it in range(1): samples = generator.generate(sess) rewards = g_beta.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed) # Test if total_batch % 10 == 0 or total_batch == TOTAL_BATCH - 1: buffer = 'epoch:\t' + str(total_batch) + '\tg_loss:\t' + str( g_loss) print(buffer) sys.stdout.flush() log.write(buffer) g_beta.update_params() # todo: Train the discriminator for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) if total_batch % 10 == 0 or total_batch == TOTAL_BATCH - 1: out_file = out_negative_file + str(total_batch) + ".txt" transform_file(negative_file, wordVocab, out_file) generate_samples(sess, generator, BATCH_SIZE, need_generated_samples, negative_file) transform_file(negative_file, wordVocab, source_file + ".GEN")
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 with open(true_file, 'r') as f_pos: file_contents = f_pos.read().splitlines() file_contents = [content.split() for content in file_contents] tokens = set([item for sublist in file_contents for item in sublist]) # tokens = set(file_contents) pad_idx = len(tokens) vocab_size = pad_idx + 1 token2idx = dict((token, i) for i, token in enumerate(tokens)) idx2token = dict((i, token) for i, token in enumerate(tokens)) idx2token[pad_idx] = " " load_positive(true_file, positive_file, token2idx, pad_idx) gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = cPickle.load(open('save/target_params.pkl', 'rb'), encoding='latin1') target_params[0] = np.random.random([vocab_size, 32]).astype(np.float32) target_params[13] = np.random.random([32, vocab_size]).astype(np.float32) target_params[14] = np.random.random([ vocab_size, ]).astype(np.float32) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) gen_data_loader.create_batches(positive_file, SEQ_LENGTH) # log file that stores progress log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') all_pre_train_losses = [] for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) all_pre_train_losses.append(loss) plt.plot(all_pre_train_losses) plt.savefig('pre_train_losses_plot.png') gen_outfile = 'save/generated_by_generator_after_' + str( PRE_EPOCH_NUM) + '_' + str(datetime.datetime.now()) + '_epochs.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, gen_outfile, idx2token) checksyntax.check_code(log, gen_outfile) # if epoch % 5 == 0: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file, SEQ_LENGTH) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for i in range(50): print("discriminator pre train epoch : ", i) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file, SEQ_LENGTH) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) gen_outfile = 'save/generated_by_generator_after_discriminator_training_' + str( datetime.datetime.now) + '.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, gen_outfile, idx2token) checksyntax.check_code(log, gen_outfile) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): print("total_batch : ", total_batch) if total_batch % 20 == 0: file_name = 'save/output_batch_' + str(total_batch) + '.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name, idx2token) checksyntax.check_code(log, file_name) # Train the generator for one step for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test # if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' # print('total_batch: ', total_batch, 'test_loss: ', test_loss) # log.write(buffer) # # Update roll-out parameters rollout.update_params() # # Train the discriminator for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file, SEQ_LENGTH) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) final_gen_file = 'save/final_output.txt' generate_samples(sess, generator, BATCH_SIZE, generated_num, final_gen_file, idx2token) checksyntax.check_code(log, final_gen_file) # with open('save/output.txt','r') as f: # with open('save/output_word.txt','w') as fout: # for line in f: # line = line.strip() # line = line.split() # word_line = ''.join([idx2token[int(x)] for x in line]) # fout.write(word_line + '\n') # log.close()
def main(FLAGS): ######################################################################################### # Generator Hyper-parameters ###################################################################################### EMB_DIM = FLAGS.gen_emb_dim # 32 # embedding dimension HIDDEN_DIM = FLAGS.gen_hidden_dim # 32 # hidden state dimension of lstm cell SEQ_LENGTH = FLAGS.seq_len # 20 # sequence length START_TOKEN = 0 PRE_EPOCH_NUM = FLAGS.gen_pretrain_epoch_num # 120 # supervise (maximum likelihood estimation) epochs for generator DISC_PRE_EPOCH_NUM = FLAGS.dis_pretrain_epoch_num # 50 # supervise (maximum likelihood estimation) epochs for descriminator SEED = 88 BATCH_SIZE = FLAGS.batch_size #64 gen_dropout_keep_prob = FLAGS.gen_dropout_keep_prob # 0.75 gen_num_recurrent_layers = FLAGS.gen_num_recurrent_layers # 1 gen_learning_rate = FLAGS.gen_learning_rate ######################################################################################### # Discriminator Hyper-parameters ######################################################################################### dis_embedding_dim = FLAGS.dis_emb_dim # 64 dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] dis_dropout_keep_prob = 0.75 dis_l2_reg_lambda = 0.2 dis_batch_size = FLAGS.batch_size #64 ######################################################################################### # Basic Training Parameters ######################################################################################### EXPERIMENT_NAME = FLAGS.experiment_name TOTAL_BATCH = FLAGS.num_epochs # 200 #num of adversarial epochs positive_file = 'save/real_data_%0s.txt' % EXPERIMENT_NAME negative_file = 'save/generator_sample_%0s.txt' % EXPERIMENT_NAME eval_file = "save/eval_file_%0s" % EXPERIMENT_NAME generated_num = 10000 # 10000 ######################################################################################### # Data configurations ######################################################################################### use_real_world_data = True real_data_file_path = FLAGS.dataset_path # './data/text8/text8' dataset_name = os.path.basename(real_data_file_path) base_token = FLAGS.base_token # 'char' random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 if use_real_world_data: real_data_train_file = real_data_file_path + '-train' real_data_valid_file = real_data_file_path + '-valid' real_data_test_file = real_data_file_path + '-test' real_data_dict_file = real_data_file_path + '-{}-dict.json'.format( base_token) if not os.path.exists(real_data_train_file): split_text8(real_data_file_path) map, inv_map = create_real_data_dict(real_data_train_file, real_data_dict_file, base_token) vocab_size = len(map) if dataset_name == 'text8' and base_token == 'char': assert vocab_size == 27 # SORRY FOR THE HARD CODING elif dataset_name == 'ptb' and base_token == 'word': assert vocab_size == 10001 # SORRY FOR THE HARD CODING elif dataset_name == 'toy' and base_token == 'word': assert vocab_size == 8 # SORRY FOR THE HARD CODING elif dataset_name == 'wt2' and base_token == 'word': assert vocab_size == 33279 # SORRY FOR THE HARD CODING else: raise TypeError gen_data_loader = Gen_Data_loader_text(BATCH_SIZE, map, inv_map, seq_len=SEQ_LENGTH, token_type=base_token) dis_data_loader = Dis_dataloader_text(BATCH_SIZE, map, inv_map, seq_len=SEQ_LENGTH, token_type=base_token) else: gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, dropout_keep_prob=gen_dropout_keep_prob, num_recurrent_layers=gen_num_recurrent_layers) if not use_real_world_data: target_params = pickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.Session(config=config) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=999999) sess.run(tf.global_variables_initializer()) if use_real_world_data: # gen_data_loader.create_batches(real_data_train_file) pass else: # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): print("start epoch %0d" % epoch) # update learning rate if epoch > 5: gen_learning_rate /= FLAGS.gen_learning_decay * 1. if epoch % FLAGS.save_each_epochs == 0: print( '#########################################################################' ) print('saving model...') save_file = os.path.join( '.', 'ckp', EXPERIMENT_NAME + '_pretrain_epoch_%0d' % epoch, EXPERIMENT_NAME + '_pretrain_epoch_%0d' % epoch) saver.save(sess, save_file) if use_real_world_data: gen_data_loader.create_batches(real_data_train_file, limit_num_samples=generated_num) loss = pre_train_epoch(sess, generator, gen_data_loader, gen_learning_rate) if epoch % 1 == 0: if use_real_world_data: generate_real_data_samples( sess, generator, BATCH_SIZE, generated_num, eval_file + "_epoch_%0d.txt" % epoch, inv_map, base_token) test_loss = 0 # FIXME - TEMP else: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for epoch in range(DISC_PRE_EPOCH_NUM): print("start epoch %0d" % epoch) if use_real_world_data: generate_real_data_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, inv_map, base_token) dis_data_loader.load_train_data(real_data_train_file, negative_file) else: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step print("start epoch %0d" % total_batch) if total_batch % FLAGS.save_each_epochs == 0: print( '#########################################################################' ) print('saving model...') save_file = os.path.join( '.', 'ckp', EXPERIMENT_NAME + '_epoch_%0d' % total_batch, EXPERIMENT_NAME + '_epoch_%0d' % total_batch) saver.save(sess, save_file) for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = { generator.x: samples, generator.rewards: rewards, generator.learning_rate: 0.01 } _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: if not use_real_world_data: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): if use_real_world_data: generate_real_data_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, inv_map, base_token) dis_data_loader.load_train_data(real_data_train_file, negative_file) else: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) print( '#########################################################################' ) print('saving model...') save_file = os.path.join('.', 'ckp', EXPERIMENT_NAME, EXPERIMENT_NAME) saver.save(sess, save_file) # # print '#########################################################################' # print 'Start Language Model Evaluation...' # test_data_loader = Gen_Data_loader_text(BATCH_SIZE,map,inv_map) # test_data_loader.create_batches(real_data_test_file) # language_model_evaluation(sess,generator, test_data_loader) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) # TODO: I changed this. Why was this asserted? Was it just to ensure the replication # of results? Or is zero important otherwise? # Changed because 0 is a bad start token for our data. (cannot have home label=0) # assert START_TOKEN == 0 # set up logging log_fpath = logger.get_experiment_log_filepath() gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) # For testing vocab_size = VOCAB_SIZE dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) if not USE_GPU: # Prevent the environment from seeing the available GPUs (to avoid error on matlaber cluster) import os os.environ["CUDA_VISIBLE_DEVICES"]="-1" config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) gen_data_loader.create_batches(real_file) # pre-train generator logger.write_log(log_fpath, 'pre-training generator...') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: logger.write_log(log_fpath, 'generator loss:') logger.log_progress(log_fpath, epoch, loss) generate_samples(sess, generator, BATCH_SIZE, eval_generated_num, eval_file.format('pretrain')) logger.write_log(log_fpath, 'Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for i in range(50): generate_samples(sess, generator, BATCH_SIZE, generated_num, fake_file) dis_data_loader.load_train_data(real_file, fake_file) # dis_data_loader.load_train_data(positive_file, negative_file) logger.write_log(log_fpath, 'epoch iterator: %s / 50' % i) for j in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _d_train_output = sess.run(discriminator.train_op, feed) logger.write_log(log_fpath, 'finished pre-training discriminator') rollout = ROLLOUT(generator, 0.8) logger.write_log(log_fpath, 'Start Adversarial Training...') g_steps = 1 d_steps = 1 k = 10 for batch in range(TOTAL_BATCH): buff = 'batch %s/%s' % (batch, TOTAL_BATCH) logger.write_log(log_fpath, buff) # Train the generator for one step for it in range(g_steps): samples = generator.generate(sess) rollout_num = 16 # TODO: experiment with this value rewards = rollout.get_reward(sess, samples, rollout_num, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if batch % 5 == 0 or batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, eval_generated_num, eval_file.format(batch)) logger.write_log(log_fpath, 'generated some more eval samples...') # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(d_steps): generate_samples(sess, generator, BATCH_SIZE, generated_num, fake_file) dis_data_loader.load_train_data(real_file, fake_file) for _ in range(k): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) logger.write_log(log_fpath, 'I\'M DONE')
def main(unused_argv): config_train = training_config() config_gen = generator_config() config_dis = discriminator_config() np.random.seed(config_train.seed) assert config_train.start_token == 0 #Build dataloader for generaotr, testing and discriminator gen_data_loader = Gen_Data_loader(config_gen.gen_batch_size) likelihood_data_loader = Gen_Data_loader(config_gen.gen_batch_size) dis_data_loader = Dis_dataloader(config_dis.dis_batch_size) #Build generator and its rollout generator = Generator(config=config_gen) # 生成 3个神经网络 generator.build() # 快速展开网络,序列未生成完就预测后边的序列,用于计算reward rollout_gen = rollout(config=config_gen) #Build target LSTM target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(config=config_gen, params=target_params) # The oracle model #Build discriminator discriminator = Discriminator(config=config_dis) discriminator.build_discriminator() #Build optimizer op for pretraining pretrained_optimizer = tf.train.AdamOptimizer( config_train.gen_learning_rate) # 取出 teller 的所有变量, teller在 generator和rollout网络中 var_pretrained = [ v for v in tf.trainable_variables() if 'teller' in v.name ] #Using name 'teller' here to prevent name collision of target LSTM # zip函数将 2个迭代器 组成tuple gradients, variables = zip(*pretrained_optimizer.compute_gradients( generator.pretrained_loss, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) gen_pre_upate = pretrained_optimizer.apply_gradients( zip(gradients, variables)) #Initialize all variables sess = tf.Session(config=config_hardware) sess.run(tf.global_variables_initializer()) #Initalize data loader of generator utils.py文件中 # target_lstm 网络生成真实数据 写入config_train.positive_file 文件 generate_samples(sess, target_lstm, config_train.batch_size, config_train.generated_num, config_train.positive_file) gen_data_loader.create_batches(config_train.positive_file) #Start pretraining log = open('save/experiment-log.txt', 'w') print 'Start pre-training generator...' log.write('pre-training...\n') for epoch in xrange(config_train.pretrained_epoch_num): gen_data_loader.reset_pointer() for it in xrange(gen_data_loader.num_batch): #见第60行,加载target_lstm 神经网络的数据,用于预训练生成器====真实样本 batch = gen_data_loader.next_batch() #真实数据训练 generator;有监督学习 batch 最后第一个是label _, g_loss = sess.run([gen_pre_upate, generator.pretrained_loss], feed_dict={generator.input_seqs_pre:batch,\ generator.input_seqs_mask:np.ones_like(batch)}) if epoch % config_train.test_per_epoch == 0: # generator 生成样本 与 真实数据的相似度 generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) #评估生成质量 test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) print 'Start pre-training discriminator...' for t in range(config_train.dis_update_time_pre): print "Times: " + str(t) # generator生成假数据+ target_lstm的真实数据;; 用于训练 generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) # 混合真假数据 dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_pre): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } #交叉上最小; 主要是训练评分网络 用于给generator提供reward _ = sess.run(discriminator.train_op, feed) #Build optimizer op for adversarial training train_adv_opt = tf.train.AdamOptimizer(config_train.gen_learning_rate) gradients, variables = zip(*train_adv_opt.compute_gradients( generator.gen_loss_adv, var_list=var_pretrained)) gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip) train_adv_update = train_adv_opt.apply_gradients(zip(gradients, variables)) #Initialize global variables of optimizer for adversarial training uninitialized_var = [ e for e in tf.global_variables() if e not in tf.trainable_variables() ] init_vars_uninit_op = tf.variables_initializer(uninitialized_var) sess.run(init_vars_uninit_op) #Start adversarial training 开始对抗训练 for total_batch in xrange(config_train.total_batch): for iter_gen in xrange(config_train.gen_update_time): # 用generator进行抽样; LSTM 生成序列 samples = sess.run(generator.sample_word_list_reshape) feed = {"pred_seq_rollout:0": samples} reward_rollout = [] #calcuate the reward given in the specific stpe t by roll out # 用rollout网络计算指定动作的回报 for iter_roll in xrange(config_train.rollout_num): # 生成器采样的获得的单词传给 rollout ??有一个疑问?samples看代码是完整序列(与论文不符),为什么还要rollout rollout_list = sess.run(rollout_gen.sample_rollout_step, feed_dict=feed) rollout_list_stack = np.vstack( rollout_list ) #shape: #batch_size * #rollout_step, #sequence length # 蒙特卡洛 展开成序列,贝尔曼方程计算 reward reward_rollout_seq = sess.run( discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: rollout_list_stack, discriminator.dropout_keep_prob: 1.0 }) reward_last_tok = sess.run(discriminator.ypred_for_auc, feed_dict={ discriminator.input_x: samples, discriminator.dropout_keep_prob: 1.0 }) reward_allseq = np.concatenate( (reward_rollout_seq, reward_last_tok), axis=0)[:, 1] reward_tmp = [] for r in xrange(config_gen.gen_batch_size): reward_tmp.append(reward_allseq[range( r, config_gen.gen_batch_size * config_gen.sequence_length, config_gen.gen_batch_size)]) reward_rollout.append(np.array(reward_tmp)) #计算reward rewards = np.sum(reward_rollout, axis=0) / config_train.rollout_num # 用reward 指导 generator 更新梯度 _, gen_loss = sess.run([train_adv_update, generator.gen_loss_adv], feed_dict={generator.input_seqs_adv:samples,\ generator.rewards:rewards}) if total_batch % config_train.test_per_epoch == 0 or total_batch == config_train.total_batch - 1: #对抗训练后 用generator再次生成样本与模拟器(target_lstm,真实数据)进行比对 generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file) likelihood_data_loader.create_batches(config_train.eval_file) #util.py中定义 test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss log.write(buffer) for _ in range(config_train.dis_update_time_adv): generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.negative_file) dis_data_loader.load_train_data(config_train.positive_file, config_train.negative_file) for _ in range(config_train.dis_update_epoch_adv): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: config_dis.dis_dropout_keep_prob } #训练这个评分网络, score _ = sess.run(discriminator.train_op, feed) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) dis_data_loader = Dis_dataloader(BATCH_SIZE) with open('data/ihaiku.pickle', 'rb') as f: haiku_list = pickle.load(f) #usew2v--------------------------------------------------------------------------------------------- with open('data/index.pickle', 'rb') as f: index = pickle.load(f) vocab_size = len(index) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): select_haikus(haiku_list, generated_num, positive_file) gen_data_loader.create_batches(positive_file) loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: print('pre-train epoch ', epoch) buffer = 'epoch:\t' + str(epoch) + '\n' log.write(buffer) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for _ in range(50): select_haikus(haiku_list, generated_num, positive_file) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): kigos = select_kigos(kigo_list, BATCH_SIZE) samples, rate = generator.generate_with_rate(sess, kigos) rewards = rollout.get_reward(sess, samples, 16, discriminator, rate) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): select_haikus(haiku_list, generated_num, positive_file) generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) # Test print( 'total_batch:', total_batch, ) if total_batch - 1 % 50 == 0: output_file = 'result/result_{0:04d}_epoch.txt'.format(total_batch) generate_samples_with_pred(sess, generator, discriminator, BATCH_SIZE, generated_num, output_file) buffer = 'epoch:\t' + str(total_batch) + '\n' log.write(buffer) log.close()
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=num_classes, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) #target_params = cPickle.load(open('save/target_params.pkl')) #target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model # avoid occupy all the memory if the GPU config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) #Savers saver_gen = tf.train.Saver() saver_dis = tf.train.Saver() saver_seqgan = tf.train.Saver() # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution gen_data_loader.create_batches(positive_file) #把data load進來 log = open('save/experiment-log.txt', 'w') # pre-train generator print('Start pre-training Generator...') #MLE log.write('pre-training generator...\n') for epoch in range(PRE_GEN_EPOCH_NUM): s = time.time() loss = pre_train_epoch(sess, generator, gen_data_loader) # detect best model best = 1000 if loss < best: saver_gen.save(sess,"model/pretrain_gen_best") if epoch % 5 == 0: print('pre-train epoch: ', epoch, 'loss: ', loss, "time: ", time.time()-s) log.write('epoch:\t'+ str(epoch) + '\tloss:\t' + str(loss) + '\n') # pre-train discriminator print('Start pre-training discriminator...') log.write('pre-training discriminator...\n') # Train 3 epoch on the generated data and do this for 50 times for epoch in range(PRE_DIS_EPOCH_NUM): s = time.time() generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _,acc = sess.run([discriminator.train_op,discriminator.accuracy], feed) best = 0 if acc > best: saver_dis.save(sess, "./model/pretrain_dis_best") best = acc print("pre-train epoch: ", epoch, " acc: ", acc," time: ", time.time()-s) log.write("epoch:\t" + str(epoch) + "\tacc:\t" + str(acc) + "\n") rollout = ROLLOUT(generator, 0.8) print( '#########################################################################') print( 'Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step s = time.time() for it in range(ADV_GEN_TIME): samples = generator.generate(sess) # 一條seq rewards = rollout.get_reward(sess, samples, 16, discriminator) #MC search feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # do policy gradient # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # cal NLL avg = np.mean(np.sum(rewards, axis=1), axis=0) / SEQ_LENGTH #print('total_batch: ', total_batch, 'average reward: ', avg) log.write('epoch:\t' + str(total_batch) + '\treward:\t' + str(avg) + '\n') saver_seqgan.save(sess, "./model/seq_gan", global_step=total_batch) # Update roll-out parameters rollout.update_params() # train G # Train the discriminator for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) print('epoch: ', total_batch, 'average reward: ', avg," time: ",time.time()-s) log.close() # generate examples print("Training Finished, starting to generating test") generate_samples(sess, generator, BATCH_SIZE, test_num,generate_file) print("Finish")
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) vocab_size = 4839 dis_data_loader = Dis_dataloader(re_batch_size) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, MID_LAYER_G) rewarder = Rewarder(vocab_size, BATCH_SIZE, EMB_DIM * 2, HIDDEN_DIM * 2, SEQ_LENGTH, START_TOKEN, MID_LAYER_R, l2_reg_lambda=re_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log-' + str(ent_w) + '.txt', 'w') # pre-train generator if restore is False: print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: print 'pre-train epoch ', epoch, 'test_loss ', loss buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( loss) + '\n' log.write(buffer) if epoch % 20 == 0 and epoch > 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, pretrain_file_prefix + str(epoch)) print 'Start pre-training rewarder...' start = time.time() for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(1): dis_data_loader.reset_pointer() r_losses = [] for it in xrange(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() _, r_loss = rewarder.reward_train_step( sess, x_text, np.ones(BATCH_SIZE), 1.0, re_dropout_keep_prob, 0.01) r_losses.append(r_loss) print 'reward_loss', np.mean(r_losses) speed = time.time() - start print 'Reward pre_training Speed:{:.3f}'.format(speed) checkpoint_path = os.path.join('save', 'exper_40.ckpt') saver.save(sess, checkpoint_path) else: print 'Restore pretrained model ...' log.write('Restore pre-trained model...\n') ckpt = tf.train.get_checkpoint_state('save') saver.restore(sess, ckpt.model_checkpoint_path) # by setting the parameters to 0.0 and 1.0, we didn't use the mixed policy RL training in SeqGAN rollout = ROLLOUT(generator, 0.0, 1.0) print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file_prefix + str(total_batch)) # Train the generator for one step start = time.time() g_losses = [] off_samples, off_probs = off_policy_samples(sess, rollout, BATCH_SIZE, off_num) avg_reward = [] for it in range(off_num // BATCH_SIZE): rewards = rollout.get_reward(sess, off_samples[it], 4, rewarder) avg_reward.append(rewards) baseline = np.zeros(SEQ_LENGTH) for it in range(1): for it2 in range(off_num // BATCH_SIZE): _, g_loss = generator.rl_train_step(sess, off_samples[it2], avg_reward[it2], baseline, off_probs[it2], ent_w) g_losses.append(g_loss) speed = time.time() - start print 'MaxentPolicy Gradient {} round, Speed:{:.3f}, Loss:{:.3f}'.format( total_batch, speed, np.mean(g_losses)) # Update roll-out parameters rollout.update_params() # Train the rewarder start = time.time() r_loss_list = [] for _ in range(8): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_text = dis_data_loader.next_batch() weights = rewarder.reward_weight(sess, x_text, generator) _, r_loss = rewarder.reward_train_step( sess, x_text, weights, 1, re_dropout_keep_prob, R_rate * np.exp(-(total_batch // R_decay))) r_loss_list.append(r_loss) avg_loss = np.mean(r_loss_list) speed = time.time() - start print 'Reward training {} round, Speed:{:.3f}, Loss:{:.3f}'.format( total_batch, speed, avg_loss) log.close()
def init_data_loader(positive_file): dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) gen_data_loader.create_batches(positive_file) return gen_data_loader, dis_data_loader
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) # For testing vocab_size = 5000 file = open('save/target_params.pkl', 'rb') target_params = cPickle.load(file) dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) discriminator = Discriminator(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, step_size=4) leakgan = LeakGAN(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, emb_dim=EMB_DIM, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, goal_size=GOAL_SIZE, step_size=4, D_model=discriminator, learning_rate=LEARNING_RATE) if SEQ_LENGTH == 40: target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # The oracle model else: target_lstm = TARGET_LSTM20(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) for a in range(1): g = sess.run(leakgan.gen_x, feed_dict={leakgan.drop_out: 0.8, leakgan.train: 1}) print(g) print("epoch:", a, " ") log = open('save/experiment-log.txt', 'w') gen_data_loader.create_batches(positive_file) saver_variables = tf.global_variables() saver = tf.train.Saver(saver_variables) model = tf.train.latest_checkpoint(model_path) print(model) if FLAGS.restore and model: # model = tf.train.latest_checkpoint(model_path) # if model and FLAGS.restore: if model_path + '/' + FLAGS.model: print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) else: saver.restore(sess, model) else: if FLAGS.resD and model_path + '/' + FLAGS.model: print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') else: print('Start pre-training discriminator...') for i in range(10): for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) # gen_data_loader.create_batches(positive_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) print ("D_loss: ", D_loss) leakgan.update_feature_function(discriminator) ## todo: is important saver.save(sess, model_path + '/leakgan_preD') print('Start pre-training generator...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM / 10): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: print ("MLE Generator Loss: ", loss) # generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') gencircle = 1 # print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): for gi in range(gencircle): samples = leakgan.generate(sess, 1.0, 1) rewards = get_reward(leakgan, discriminator, sess, samples, 4, dis_dropout_keep_prob) feed = {leakgan.x: samples, leakgan.reward: rewards, leakgan.drop_out: 0.5} _, _, g_loss, w_loss = sess.run( [leakgan.manager_updates, leakgan.worker_updates, leakgan.goal_loss, leakgan.worker_loss], feed_dict=feed) print('total_batch: ', total_batch, " ", g_loss, " ", w_loss) # Test # if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' # print('total_batch: ', total_batch, 'test_loss: ', test_loss) # log.write(buffer) # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print("Groud-Truth:", test_loss) # Train the discriminator for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) leakgan.update_feature_function(discriminator) log.close()
return sample_result ################################## main() ######################################### # 시간측정 start_time = time.time() tf.reset_default_graph() random.seed(SEED) np.random.seed(SEED) gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) vocab_size = len(vocab_to_int) # 6390 print(vocab_size) dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, TYPE_SIZE) discriminator = Discriminator(sequence_length=SEQ_LENGTH, batch_size=BATCH_SIZE, num_classes=2, word_embedding_matrix=word_embedding_matrix, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, type_size=TYPE_SIZE, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 # # Declare data loader # ---------------------------------------------------------------------------- gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 5000 dis_data_loader = Dis_dataloader(BATCH_SIZE) # ---------------------------------------------------------------------------- # # Declare Generator & Discriminator # ---------------------------------------------------------------------------- # declare: generator generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) target_params = cPickle.load(open('save/target_params.pkl')) target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model # declare: discriminator discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) # ---------------------------------------------------------------------------- # # Set the session <sess> # ---------------------------------------------------------------------------- config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # ---------------------------------------------------------------------------- # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution # generate samples by using <target_lstm> and write the samples to file <positive_file> #generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) log = open('save/experiment-log.txt', 'w') # # Pre-train <generator> by using <gen_data_loader>, # and then compute the <test_loss> of <target_lstm> and <likelihood_data_loader> # ---------------------------------------------------------------------------- print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: # generate samples by using <generator> and write the samples to file <eval_file> generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) # load samples from file <eval_file> likelihood_data_loader.create_batches(eval_file) # compute <test_loss> of <target_lstm>, with input <likelihood_data_loader> test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) # ---------------------------------------------------------------------------- # # Pre-train <discriminator> by using <generator> # ---------------------------------------------------------------------------- print('Start pre-training discriminator...') # Generate data and train 3 epoch on the generated data, which will be done for 50 times for _ in range(50): # generate samples by using <generator> and write the samples to file <negative_file> generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # load samples from file <negative_file> dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = {discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob} _ = sess.run(discriminator.train_op, feed_dict=feed) # ---------------------------------------------------------------------------- rollout = ROLLOUT(generator, 0.8) # # Start seqGAN, train <discriminator> and <generator> # ---------------------------------------------------------------------------- print('#########################################################################') print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # ----- Train the generator for one step ----------------- for it in range(G_STEPS): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, ROLLOUT_NUM, discriminator, SEQ_LENGTH) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # -------------------------------------------------------- # Update roll-out parameters rollout.update_params() # ----- Train the discriminator ------------------------- for _ in range(D_STEPS): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = {discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob} _ = sess.run(discriminator.train_op, feed_dict=feed) # -------------------------------------------------------- # ---------------------------------------------------------------------------- log.close()
def main(): clock = Clock() clock.start() random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 parser = argparse.ArgumentParser(description='conditional SeqGAN') parser.add_argument('--conditional', '-c', type=int, default=0, help='If you make SeqGAN conditional, set `-c` 1.') args = parser.parse_args() cond = args.conditional vocab = Vocab() vocab.construct(parsed_haiku_file) vocab.word2id(parsed_haiku_file, positive_file) UNK = vocab.dic.token2id[u'<UNK>'] COMMA = vocab.dic.token2id[u','] gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH, COND_LENGTH, UNK) # likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH, COND_LENGTH, UNK) # For testing vocab_size = len(vocab.dic.token2id) with open(output_token2id, 'w') as f: pickle.dump(vocab.dic.token2id, f) dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH, UNK) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, COND_LENGTH, START_TOKEN, is_cond=cond) # target_params = cPickle.load(open('save/target_params.pkl')) # target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model discriminator = Discriminator(sequence_length=SEQ_LENGTH, cond_length=COND_LENGTH, num_classes=2, vocab_size=vocab_size, batch_size=BATCH_SIZE, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda, is_cond=cond) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) if cond: vocab.word2id(parsed_kigo_file, positive_condition_file) vocab.load_cond(positive_condition_file, COND_LENGTH, UNK) gen_data_loader.create_cond_batches(positive_condition_file) log = open('save/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_GEN_NUM): loss = pre_train_epoch(sess, generator, gen_data_loader, cond=cond) if epoch % 5 == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file, cond, vocab) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print 'pre-train epoch ', epoch, 'test_loss ', test_loss # buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) clock.check_HMS() print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 50 times for _ in range(PRE_EPOCH_DIS_NUM): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, cond, vocab) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) clock.check_HMS() rollout = ROLLOUT(generator, 0.8, SEQ_LENGTH) print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): if cond: cond_batch = vocab.choice_cond(BATCH_SIZE) samples = generator.generate(sess, cond=cond_batch) rewards = rollout.get_reward(sess, samples, 16, discriminator, cond=cond_batch) else: samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, 16, discriminator) feed = {generator.x: samples, generator.rewards: rewards} if cond: feed[generator.cond] = cond_batch _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file, cond, vocab) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n' # print 'total_batch: ', total_batch, 'test_loss: ', test_loss # log.write(buffer) if total_batch % 20 == 0 or total_batch == TOTAL_BATCH - 1: if cond: vocab.id2word( eval_file, generated_haiku_with_kigo_file.format(total_batch)) else: vocab.id2word(eval_file, generated_haiku_file.format(total_batch)) # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(5): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file, cond, vocab) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) clock.check_HMS() saver = tf.train.Saver() saver.save(sess, output_generator) log.close()
def main(FLAGS): ######################################################################################### # Generator Hyper-parameters ###################################################################################### EMB_DIM = FLAGS.gen_emb_dim # 32 # embedding dimension HIDDEN_DIM = FLAGS.gen_hidden_dim # 32 # hidden state dimension of lstm cell SEQ_LENGTH = FLAGS.seq_len # 20 # sequence length START_TOKEN = 0 PRE_EPOCH_NUM = FLAGS.pretrain_epoch_num # 80 # supervise (maximum likelihood estimation) epochs for generator(X1) & descriminator(X5) SEED = 88 BATCH_SIZE = FLAGS.batch_size #64 LEARNING_RATE = 0.01 GOAL_SIZE = 16 STEP_SIZE = 4 ######################################################################################### # Discriminator Hyper-parameters ######################################################################################### dis_embedding_dim = FLAGS.dis_emb_dim # 64 dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] if FLAGS.seq_len == 20: dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] LEARNING_RATE = 0.0015 # EMB_DIM = 32 # embedding dimension # HIDDEN_DIM = 32 # hidden state dimension of lstm cell elif FLAGS.seq_len == 40: dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160, 160 ] LEARNING_RATE = 0.0005 # EMB_DIM = 64 # HIDDEN_DIM = 64 else: exit(0) print(SEQ_LENGTH) GOAL_OUT_SIZE = sum(dis_num_filters) # dis_dropout_keep_prob = 0.75 dis_dropout_keep_prob = 1.0 dis_l2_reg_lambda = 0.2 dis_batch_size = FLAGS.batch_size #64 ######################################################################################### # Basic Training Parameters ######################################################################################### EXPERIMENT_NAME = FLAGS.experiment_name TOTAL_BATCH = FLAGS.num_epochs # 800 #num of adversarial epochs positive_file = 'save/real_data_%0s.txt' % EXPERIMENT_NAME negative_file = 'save/generator_sample_%0s.txt' % EXPERIMENT_NAME eval_file = "save/eval_file_%0s" % EXPERIMENT_NAME generated_num = 10000 # 10000 model_path = './ckpts' ######################################################################################### # Data configurations ######################################################################################### use_real_world_data = True real_data_file_path = './data/text8' random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 if use_real_world_data: vocab_size = 27 # split to train-valid-test real_data_train_file = real_data_file_path + '-train' real_data_valid_file = real_data_file_path + '-valid' real_data_test_file = real_data_file_path + '-test' real_data_dict_file = real_data_file_path + '-dict.json' if not os.path.exists(real_data_train_file): split_text8(real_data_file_path) charmap, inv_charmap = create_real_data_dict(real_data_train_file, real_data_dict_file) gen_data_loader = Gen_Data_loader_text8(BATCH_SIZE, charmap, inv_charmap, seq_len=SEQ_LENGTH) dis_data_loader = Dis_dataloader_text8(BATCH_SIZE, charmap, inv_charmap, seq_len=SEQ_LENGTH) #TODO else: gen_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length) # For testing vocab_size = 5000 file = open('save/target_params.pkl', 'rb') target_params = pickle.load(file) dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH) discriminator = Discriminator(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, step_size=4) leakgan = LeakGAN(SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, emb_dim=EMB_DIM, dis_emb_dim=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, batch_size=BATCH_SIZE, hidden_dim=HIDDEN_DIM, start_token=START_TOKEN, goal_out_size=GOAL_OUT_SIZE, goal_size=GOAL_SIZE, step_size=4, D_model=discriminator, learning_rate=LEARNING_RATE) if not use_real_world_data: if SEQ_LENGTH == 40: target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # The oracle model else: target_lstm = TARGET_LSTM20(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) config = tf.ConfigProto() config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.Session(config=config) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=999999) sess.run(tf.global_variables_initializer()) if use_real_world_data: # gen_data_loader.create_batches(real_data_train_file) gen_data_loader.create_batches(real_data_train_file, limit_num_samples=generated_num) pass else: # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) gen_data_loader.create_batches(positive_file) for a in range(1): g = sess.run(leakgan.gen_x, feed_dict={ leakgan.drop_out: 0.8, leakgan.train: 1 }) print(g) print("epoch:", a, " ") log = open('save/experiment-log.txt', 'w') saver_variables = tf.global_variables() saver = tf.train.Saver(saver_variables) model = tf.train.latest_checkpoint(model_path) print(model) if FLAGS.restore and model: # model = tf.train.latest_checkpoint(model_path) # if model and FLAGS.restore: if model_path + '/' + FLAGS.model: print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) else: saver.restore(sess, model) else: # if FLAGS.resD and model_path + '/' + FLAGS.model: if False: #default of resD print(model_path + '/' + FLAGS.model) saver.restore(sess, model_path + '/' + FLAGS.model) print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: if use_real_world_data: generate_real_data_samples( sess, leakgan, BATCH_SIZE, generated_num, eval_file + "_epoch_%0d.txt" % epoch, inv_charmap) test_loss = 0 # FIXME - TEMP else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) if use_real_world_data: test_loss = 0 # FIXME - TEMP else: generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') else: print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for i in range(10): for _ in range(5): if use_real_world_data: generate_real_data_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, inv_charmap) dis_data_loader.load_train_data( real_data_train_file, negative_file) else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) # gen_data_loader.create_batches(positive_file) dis_data_loader.load_train_data( positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([ discriminator.D_loss, discriminator.D_train_op ], feed) # # print 'D_loss ', D_loss # buffer = str(D_loss) + '\n' # log.write(buffer) leakgan.update_feature_function(discriminator) saver.save(sess, model_path + '/leakgan_preD') # saver.save(sess, model_path + '/leakgan') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM // 10): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: if use_real_world_data: generate_real_data_samples( sess, leakgan, BATCH_SIZE, generated_num, eval_file + "_epoch_%0d.txt" % epoch, inv_charmap) test_loss = 0 # FIXME - TEMP else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch, 'test_loss ', test_loss) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str( test_loss) + '\n' log.write(buffer) if use_real_world_data: test_loss = 0 # FIXME - TEMP else: generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) saver.save(sess, model_path + '/leakgan_pre') gencircle = 1 # print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # Train the generator for one step print("start epoch %0d" % total_batch) if total_batch % FLAGS.save_each_epochs == 0: print( '#########################################################################' ) print('saving model...') save_file = os.path.join( '.', 'ckp', EXPERIMENT_NAME + '_epoch_%0d' % total_batch, EXPERIMENT_NAME + '_epoch_%0d' % total_batch) saver.save(sess, save_file) for it in range(1): for gi in range(gencircle): samples = leakgan.generate(sess, 1.0, 1) rewards = get_reward(leakgan, discriminator, sess, samples, 4, dis_dropout_keep_prob) feed = { leakgan.x: samples, leakgan.reward: rewards, leakgan.drop_out: 1.0 } _, _, g_loss, w_loss = sess.run([ leakgan.manager_updates, leakgan.worker_updates, leakgan.goal_loss, leakgan.worker_loss ], feed_dict=feed) print('total_batch: ', total_batch, " ", g_loss, " ", w_loss) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: if not use_real_world_data: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print('total_batch: ', total_batch, 'test_loss: ', test_loss) log.write(buffer) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(sess, target_lstm, likelihood_data_loader) print("Groud-Truth:", test_loss) # Train the discriminator for _ in range(5): if use_real_world_data: generate_real_data_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, inv_charmap) dis_data_loader.load_train_data(real_data_train_file, negative_file) else: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run( [discriminator.D_loss, discriminator.D_train_op], feed) # print 'D_loss ', D_loss leakgan.update_feature_function(discriminator) print( '#########################################################################' ) print('saving model...') save_file = os.path.join('.', 'ckp', EXPERIMENT_NAME, EXPERIMENT_NAME) saver.save(sess, save_file) # # print '#########################################################################' # print 'Start Language Model Evaluation...' # test_data_loader = Gen_Data_loader_text8(BATCH_SIZE,charmap,inv_charmap) # test_data_loader.create_batches(real_data_test_file) # language_model_evaluation(sess,generator, test_data_loader) log.close()
def main(): # set random seed (may important to the result) np.random.seed(SEED) random.seed(SEED) # data loader gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) D = Discriminator(SEQ_LENGTH, num_class, vocab_size, dis_emb_size, dis_filter_sizes, dis_num_filters, 0.2) G = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) # avoid occupy all the memory of the GPU config = tf.ConfigProto() config.gpu_options.allow_growth = True # sess sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # change the train data to real poems to be done gen_data_loader.create_batches(positive_file) log = open('./experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): start = time.time() loss = pre_train_epoch(sess, G, gen_data_loader) print("Epoch ", epoch, " loss: ", loss) print("per epoch time consumed: ", time.time() - start) # if epoch % 5 == 0: # generate_samples(sess, G, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) # print('pre-train epoch ', epoch, 'test_loss ', test_loss) # buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n' # log.write(buffer) print("Start pretraining the discriminator") for _ in range(50): generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { D.input_x: x_batch, D.input_y: y_batch, D.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(D.train_op, feed) g_beta = G_beta(G, update_rate=0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # train generator once for it in range(1): samples = G.generate(sess) rewards = g_beta.get_reward(sess, samples, sample_time, D) feed = {G.x: samples, G.rewards: rewards} _ = sess.run(G.g_update, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, G, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\treward:\t' + str( rewards) + '\n' print('total_batch: ', total_batch, 'reward: ', rewards) log.write(buffer) # update G_beta with weight decay g_beta.update_params() # train the discriminator for it in range(10): generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for batch in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { D.input_x: x_batch, D.input_y: y_batch, D.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(D.train_op, feed_dict=feed) # finnal generation print("Wrting final results to test file") test_file = "./final2.txt" generate_samples(sess, G, BATCH_SIZE, generated_num, test_file) print("Finished")
def main(): # load rhyme table table = np.load("./data/table.npy") np.random.seed(SEED) random.seed(SEED) # data loader # gen_data_loader = Gen_Data_loader(BATCH_SIZE) input_data_loader = Input_Data_loader(BATCH_SIZE) dis_data_loader = Dis_dataloader(BATCH_SIZE) D = Discriminator(SEQ_LENGTH, num_class, vocab_size, dis_emb_size, dis_filter_sizes, dis_num_filters, 0.2) G = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, table, has_input=True) # avoid occupy all the memory of the GPU config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # savers for different models saver_gen = tf.train.Saver() saver_dis = tf.train.Saver() saver_seqgan = tf.train.Saver() # gen_data_loader.create_batches(positive_file) input_data_loader.create_batches(x_file, y_file) log = open('./experiment-log.txt', 'w') # pre-train generator if pre_train_gen_path: print("loading pretrain generator model...") log.write("loading pretrain generator model...") restore_model(G, sess, saver_gen, pre_train_gen_path) print("loaded") else: log.write('pre-training generator...\n') print('Start pre-training...') for epoch in range(PRE_GEN_NUM): s = time.time() # loss = pre_train_epoch(sess, G, gen_data_loader) loss = pre_train_epoch(sess, G, input_data_loader) print("Epoch ", epoch, " loss: ", loss) log.write("Epoch:\t" + str(epoch) + "\tloss:\t" + str(loss) + "\n") print("pre-train generator epoch time: ", time.time() - s, " s") best = 1000 if loss < best: saver_gen.save(sess, "./model/pre_gen/pretrain_gen_best") best = loss dev_loader = Input_Data_loader(BATCH_SIZE) dev_loader.create_batches(dev_x, dev_y) if pre_train_dis_path: print("loading pretrain discriminator model...") log.write("loading pretrain discriminator model...") restore_model(D, sess, saver_dis, pre_train_dis_path) print("loaded") else: log.write('pre-training discriminator...\n') print("Start pre-train the discriminator") s = time.time() for epoch in range(PRE_DIS_NUM): # generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file) generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file, input_data_loader) # dis_data_loader.load_train_data(positive_file, negative_file) dis_data_loader.load_train_data(y_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { D.input_x: x_batch, D.input_y: y_batch, D.dropout_keep_prob: dis_dropout_keep_prob } _, acc = sess.run([D.train_op, D.accuracy], feed) print("Epoch ", epoch, " Accuracy: ", acc) log.write("Epoch:\t" + str(epoch) + "\tAccuracy:\t" + str(acc) + "\n") best = 0 # if epoch % 20 == 0 or epoch == PRE_DIS_NUM -1: # print("saving at epoch: ", epoch) # saver_dis.save(sess, "./model/per_dis/pretrain_dis", global_step=epoch) if acc > best: saver_dis.save(sess, "./model/pre_dis/pretrain_dis_best") best = acc print("pre-train discriminator: ", time.time() - s, " s") g_beta = G_beta(G, update_rate=0.8) print('#########################################################################') print('Start Adversarial Training...') log.write('Start adversarial training...\n') for total_batch in range(TOTAL_BATCH): s = time.time() for it in range(ADV_GEN_TIME): for i in range(input_data_loader.num_batch): input_x, target = input_data_loader.next_batch() samples = G.generate(sess, input_x) rewards = g_beta.get_reward(sess, samples, input_x, sample_time, D) avg = np.mean(np.sum(rewards, axis=1), axis=0) / SEQ_LENGTH print(" epoch : %d time : %di: %d avg %f" % (total_batch, it, i, avg)) feed = {G.x: samples, G.rewards: rewards, G.inputs: input_x} _ = sess.run(G.g_update, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: avg = np.mean(np.sum(rewards, axis=1), axis=0) / SEQ_LENGTH buffer = 'epoch:\t' + str(total_batch) + '\treward:\t' + str(avg) + '\n' print('total_batch: ', total_batch, 'average reward: ', avg) log.write(buffer) saver_seqgan.save(sess, "./model/seq_gan/seq_gan", global_step=total_batch) g_beta.update_params() # train the discriminator for it in range(ADV_GEN_TIME // GEN_VS_DIS_TIME): # generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file) generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file, input_data_loader) dis_data_loader.load_train_data(y_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for batch in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { D.input_x: x_batch, D.input_y: y_batch, D.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(D.train_op, feed_dict=feed) print("Adversarial Epoch consumed: ", time.time() - s, " s") # final generation print("Finished") log.close() # save model print("Training Finished, starting to generating test ") test_loader = Input_Data_loader(batch_size=BATCH_SIZE) test_loader.create_batches(test_x, test_y) generate_samples(sess, G, BATCH_SIZE, test_num, test_file + "_final.txt", test_loader)
def main(): print 'start time : ' print datetime.now() random.seed(SEED) np.random.seed(SEED) _, _, _, SEQ_LENGTH, vocab_size = cPickle.load(open(pickle_loc)) print 'SEQ_LENGTH' , SEQ_LENGTH, 'vocab_size', vocab_size assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE,SEQ_LENGTH) dis_data_loader = Dis_dataloader(BATCH_SIZE,SEQ_LENGTH) discriminator = Discriminator(SEQ_LENGTH,num_classes=2,vocab_size=vocab_size,dis_emb_dim=dis_embedding_dim,filter_sizes=dis_filter_sizes,num_filters=dis_num_filters, batch_size=BATCH_SIZE,hidden_dim=HIDDEN_DIM,start_token=START_TOKEN,goal_out_size=GOAL_OUT_SIZE,step_size=4) leakgan = LeakGAN(SEQ_LENGTH,num_classes=2,vocab_size=vocab_size,emb_dim=EMB_DIM,dis_emb_dim=dis_embedding_dim,filter_sizes=dis_filter_sizes,num_filters=dis_num_filters, batch_size=BATCH_SIZE,hidden_dim=HIDDEN_DIM,start_token=START_TOKEN,goal_out_size=GOAL_OUT_SIZE,goal_size=GOAL_SIZE,step_size=4,D_model=discriminator) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 1 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) for a in range(1): g = sess.run(leakgan.gen_x,feed_dict={leakgan.drop_out:0.8,leakgan.train:1}) print g print "epoch:",a," " log = open('save/experiment-log.txt', 'w') generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) gen_data_loader.create_batches(positive_file) saver_variables = tf.global_variables() saver = tf.train.Saver(saver_variables, max_to_keep=maxModelSave) if FLAGS.restore and FLAGS.model: if model_path+'/' + FLAGS.model: print model_path+'/' + FLAGS.model saver.restore(sess, model_path+'/' + FLAGS.model) else: print 'input all arguments, \"restore\" and \"model\"' exit() else: if FLAGS.resD and model_path + '/' + FLAGS.model: print model_path + '/' + FLAGS.model saver.restore(sess, model_path + '/' + FLAGS.model) print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file) buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(loss) + '\n' log.write(buffer) saver.save(sess, model_path + '/leakgan_pre') else: # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_EPOCH_NUM): loss = pre_train_epoch(sess, leakgan, gen_data_loader) if epoch % 5 == 0: generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file,0) print 'pre-train epoch ', epoch, 'test_loss ', loss buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(loss) + '\n' log.write(buffer) print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 80 times for _ in range(PRE_EPOCH_NUM): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) buffer = str(D_loss) + '\n' log.write(buffer) leakgan.update_feature_function(discriminator) saver.save(sess, model_path + '/leakgan_pre') gencircle = 1 print '#########################################################################' print 'Start Adversarial Training...' print 'start Adv time : ' print datetime.now() log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH // 10): # Train the generator for one step for iter1 in range(10): for it in range(1): for gi in range(gencircle): samples = leakgan.generate(sess,1.0,1) rewards = get_reward(leakgan, discriminator,sess, samples, 4, dis_dropout_keep_prob,(total_batch * 10 + iter1),gen_data_loader) feed = {leakgan.x: samples, leakgan.reward: rewards,leakgan.drop_out:1.0} _,_,g_loss,w_loss = sess.run([leakgan.manager_updates,leakgan.worker_updates,leakgan.goal_loss,leakgan.worker_loss], feed_dict=feed) print 'total_batch: ', (total_batch * 10 + iter1), " ",g_loss," ", w_loss # Test testFileName = "./save/movie_" + str((total_batch * 10 + iter1)) + ".txt" generate_samples(sess, leakgan, BATCH_SIZE, generated_num, testFileName, 0) convertor(testFileName, filedir='save/') if iter1 == 1 or (total_batch * 10 + iter1) == TOTAL_BATCH - 1: saver.save(sess, model_path + '/leakgan', global_step=(total_batch * 10 + iter1)) for _ in range(15): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) buffer = str(D_loss) + '\n' log.write(buffer) leakgan.update_feature_function(discriminator) for epoch in xrange(5): loss = pre_train_epoch(sess, leakgan, gen_data_loader) # Train the discriminator for _ in range(5): generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.D_input_x: x_batch, discriminator.D_input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed) buffer = str(D_loss) + '\n' log.write(buffer) leakgan.update_feature_function(discriminator) log.close() print 'end time : ' print datetime.now()
def main(): # set random seed (may important to the result) np.random.seed(SEED) random.seed(SEED) # data loader # gen_data_loader = Gen_Data_loader(BATCH_SIZE) input_data_loader = Input_Data_loader(BATCH_SIZE) dis_data_loader = Dis_dataloader(BATCH_SIZE) D = Discriminator(SEQ_LENGTH, num_class, vocab_size, dis_emb_size, dis_filter_sizes, dis_num_filters, 0.2) G = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, has_input=True) # avoid occupy all the memory of the GPU config = tf.ConfigProto() config.gpu_options.allow_growth = True # sess sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # change the train data to real poems to be done # gen_data_loader.create_batches(positive_file) input_data_loader.create_batches(x_file, y_file) log = open('./experiment-log.txt', 'w') # pre-train generator print('Start pre-training...') log.write('pre-training generator...\n') for epoch in range(PRE_EPOCH_NUM): s = time.time() # loss = pre_train_epoch(sess, G, gen_data_loader) loss = pre_train_epoch_v2(sess, G, input_data_loader) print("Epoch ", epoch, " loss: ", loss) print("pre-train generator epoch time: ", time.time() - s, " s") dev_loader = Input_Data_loader(BATCH_SIZE) dev_loader.create_batches(dev_x, dev_y) generate_samples_v2(sess, G, BATCH_SIZE, dev_num, dev_file + "_no_adv" + ".txt", dev_loader) bleu = calc_bleu(dev_y, dev_file + "_no_adv.txt") print("pre-train bleu: ", bleu) log.write("pre-train bleu: %f " % bleu) print("Start pre-train the discriminator") s = time.time() for _ in range(PRE_DIS_NUM): # generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file) generate_samples_v2(sess, G, BATCH_SIZE, generated_num, negative_file, input_data_loader) # dis_data_loader.load_train_data(positive_file, negative_file) dis_data_loader.load_train_data(y_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { D.input_x: x_batch, D.input_y: y_batch, D.dropout_keep_prob: dis_dropout_keep_prob } _, acc = sess.run([D.train_op, D.accuracy], feed) print(acc) print("pretrain discriminator: ", time.time() - s, " s") g_beta = G_beta(G, update_rate=0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') for total_batch in range(TOTAL_BATCH): # train generator once s = time.time() for it in range(1): # samples = G.generate(sess) # print(input_data_loader.get_all().shape) # input_data_loader.reset_pointer() # samples = [] # for i in range(input_data_loader.num_batch): input_x = input_data_loader.next_batch()[0] samples = G.generate_v2(sess, input_x) # print(sample) # print(samples) rewards = g_beta.get_reward(sess, samples, sample_time, D) feed = {G.x: samples, G.rewards: rewards, G.inputs: input_x} _ = sess.run(G.g_update, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, G, BATCH_SIZE, generated_num, eval_file) # likelihood_data_loader.create_batches(eval_file) # test_loss = target_loss(sess, target_lstm, likelihood_data_loader) avg = np.mean(np.sum(rewards, axis=1), axis=0) / SEQ_LENGTH buffer = 'epoch:\t' + str(total_batch) + '\treward:\t' + str( avg) + '\n' print('total_batch: ', total_batch, 'average reward: ', avg) log.write(buffer) print("generating dev sentences") generate_samples_v2(sess, G, BATCH_SIZE, dev_num, dev_file + "_" + str(total_batch) + ".txt", dev_loader) bleu = calc_bleu(dev_y, dev_file + "_" + str(total_batch) + ".txt") print("dev bleu: ", bleu) log.write("bleu: %.5f \n" % bleu) # update G_beta with weight decay g_beta.update_params() # train the discriminator for it in range(DIS_VS_GEN_TIME): # generate_samples(sess, G, BATCH_SIZE, generated_num, negative_file) generate_samples_v2(sess, G, BATCH_SIZE, generated_num, negative_file, input_data_loader) dis_data_loader.load_train_data(y_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for batch in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { D.input_x: x_batch, D.input_y: y_batch, D.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(D.train_op, feed_dict=feed) print("Adversarial Epoch consumed: ", time.time() - s, " s") # finnal generation print("Finished") log.close() # save model print("Training Finished, starting to generating test ") test_loader = Input_Data_loader(batch_size=BATCH_SIZE) test_loader.create_batches(test_x, test_y) generate_samples_v2(sess, G, BATCH_SIZE, test_num, test_file + "_final.txt", test_loader)
def main(): random.seed(SEED) np.random.seed(SEED) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing # vocab_size = 97 dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, learning_rate=0.01) discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=10) # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution gen_data_loader.create_batches(positive_file) # pre-train generator print 'Start pre-training...' early_stop_buffer = [10.] * 5 for e_cnt, epoch in enumerate(xrange(PRE_EPOCH_NUM)): loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 2 == 0: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_real_file) test_loss = target_loss(sess, generator, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss early_stop_buffer = early_stop_buffer[1:] early_stop_buffer.append(test_loss) if all(early_stop_buffer[0] < np.asarray(early_stop_buffer[1:])): break print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 50 times for e in range(50): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(3): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) print 'Epoch {}'.format(e) rollout = ROLLOUT_OLD(generator, 0.8) print '#########################################################################' print 'Start Adversarial Training...' model_idx = 1 fname = 'model' + str(model_idx) model_save_path = './Model/' + fname + '/' while os.path.exists(model_save_path): model_idx += 1 fname = 'model' + str(model_idx) model_save_path = './Model/' + fname + '/' os.makedirs(model_save_path) os.makedirs(os.path.join('./log', fname)) early_stop_buffer = [10.] * 4 for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(1): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, SAMP_NUM, discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 3 == 0 or total_batch == TOTAL_BATCH - 1: # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_real_file) test_loss = target_loss(sess, generator, likelihood_data_loader) buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str( test_loss) + '\n' print 'total_batch: ', total_batch, 'test_loss: ', test_loss early_stop_buffer = early_stop_buffer[1:] early_stop_buffer.append(test_loss) if all(early_stop_buffer[0] < np.asarray(early_stop_buffer[1:])): break # Update roll-out parameters rollout.update_params() # Train the discriminator for _ in range(1): generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) for _ in range(1): dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) saver.save(sess, os.path.join(model_save_path, fname), global_step=total_batch, write_meta_graph=False) metagraph_filename = os.path.join(model_save_path, fname + '.meta') if not os.path.exists(metagraph_filename): saver.export_meta_graph(metagraph_filename)
def main(): random.seed(SEED) np.random.seed(SEED) # data loaders declaration # loaders for generator, discriminator, and additional validation data loader gen_data_loader = Gen_Data_loader(BATCH_SIZE) dis_data_loader = Dis_dataloader(BATCH_SIZE) eval_data_loader = Gen_Data_loader(BATCH_SIZE) # define generator and discriminator # general structures are same with the original model # learning rates for generator needs heavy tuning for general use # l2 reg for D & G also affects performance generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, GENERATOR_LR, REWARD_GAMMA) discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) # VRAM limitation for efficient deployment tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) sess.run(tf.global_variables_initializer()) # define saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=1) # generate real data from the true dataset gen_data_loader.create_batches(positive_file) # generate real validation data from true validation dataset eval_data_loader.create_batches(valid_file) time = str(datetime.datetime.now())[:-7] log = open('save/experiment-log-conditional' + str(time) + '.txt', 'w') log.write(str(config) + '\n') log.write('D loss: original\n') log.flush() #summary_writer = tf.summary.FileWriter('save/tensorboard/', graph=tf.get_default_graph()) if config['pretrain'] == True: # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') for epoch in xrange(PRE_GEN_EPOCH): # calculate the loss by running an epoch loss = pre_train_epoch_condtional(sess, generator, gen_data_loader) # measure bleu score with the validation set bleu_score = calculate_bleu(sess, generator, eval_data_loader) # since the real data is the true data distribution, only evaluate the pretraining loss # note the absence of the oracle model which is meaningless for general use buffer = 'pre-train epoch: ' + str( epoch) + ' pretrain_loss: ' + str(loss) + ' bleu: ' + str( bleu_score) print(buffer) log.write(buffer + '\n') log.flush() # generate 5 test samples per epoch # it automatically samples from the generator and postprocess to midi file # midi files are saved to the pre-defined folder if epoch == 0: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) POST.main(negative_file, 5, str(-1) + '_vanilla_', 'midi_conditional') elif epoch == PRE_GEN_EPOCH - 1: generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) POST.main(negative_file, 5, str(-PRE_GEN_EPOCH) + '_vanilla_', 'midi_conditional') print 'Start pre-training discriminator...' # Train 3 epoch on the generated data and do this for 50 times # this trick is also in spirit of the original work, but the epoch strategy needs tuning for epochs in range(PRE_DIS_EPOCH): generate_samples_conditional_v2(sess, gen_data_loader, generator, BATCH_SIZE, generated_num, negative_file) D_loss = 0 for _ in range(3): dis_data_loader.load_train_data(positive_file, negative_file) dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) D_loss += discriminator.loss.eval(feed, session=sess) buffer = 'epoch: ' + str(epochs + 1) + ' D loss: ' + str( D_loss / dis_data_loader.num_batch / 3) print(buffer) log.write(buffer + '\n') log.flush() # save the pre-trained checkpoint for future use # if one wants adv. training only, comment out the pre-training section after the save save_checkpoint(sess, saver, PRE_GEN_EPOCH, PRE_DIS_EPOCH) # define rollout target object # the second parameter specifies target update rate # the higher rate makes rollout "conservative", with less update from the learned generator # we found that higher update rate stabilized learning, constraining divergence of the generator rollout = ROLLOUT(generator, ROLLOUT_UPDATE_RATE) print '#########################################################################' print 'Start Adversarial Training...' log.write('adversarial training...\n') if config['pretrain'] == False: # load checkpoint of pre-trained model load_checkpoint(sess, saver) # 0.001 to 0.01 if config['x10adv_g'] == True: generator.learning_rate *= 10 for total_batch in range(TOTAL_BATCH): G_loss = 0 # Train the generator for one step for it in range(epochs_generator): samples = generator.generate(sess) rewards = rollout.get_reward(sess, samples, config['rollout_num'], discriminator) feed = {generator.x: samples, generator.rewards: rewards} _ = sess.run(generator.g_updates, feed_dict=feed) G_loss += generator.g_loss.eval(feed, session=sess) # Update roll-out parameters rollout.update_params() # Train the discriminator D_loss = 0 for _ in range(epochs_discriminator): generate_samples_conditional_v2(sess, gen_data_loader, generator, BATCH_SIZE, generated_num, negative_file) for _ in range(config['epochs_discriminator_multiplier']): dis_data_loader.load_train_data(positive_file, negative_file) dis_data_loader.reset_pointer() for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) D_loss += discriminator.loss.eval(feed, session=sess) # measure stability and performance evaluation with bleu score bleu_score = calculate_bleu(sess, generator, eval_data_loader) buffer = 'epoch: ' + str(total_batch + 1) + \ ', G_adv_loss: %.12f' % (G_loss / epochs_generator) + \ ', D loss: %.12f' % (D_loss / epochs_discriminator / config['epochs_discriminator_multiplier']) + \ ', bleu score: %.12f' % bleu_score print(buffer) log.write(buffer + '\n') log.flush() if config['infinite_loop'] is True: if bleu_score < config['loop_threshold']: buffer = 'Mode collapse detected, restarting from pretrained model...' print(buffer) log.write(buffer + '\n') log.flush() load_checkpoint(sess, saver) # generate random test samples and postprocess the sequence to midi file #generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file) # instead of the above, generate samples conditionally # randomly sample a batch # rng = np.random.randint(0, high=gen_data_loader.num_batch, size=1) # random_batch = np.squeeze(gen_data_loader.sequence_batch[rng]) generate_samples_conditional_v2(sess, gen_data_loader, generator, BATCH_SIZE, generated_num, negative_file) POST.main(negative_file, 5, str(total_batch) + '_vanilla_', 'midi_conditional') log.close()
def main(): random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) gen_data_loader = Gen_Data_loader(BATCH_SIZE) likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing vocab_size = 2000 dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(vocab_size, EMB_DIM, HIDDEN_DIM, 1, START_TOKEN, SEQ_LENGTH).to(device) target_lstm = Generator(vocab_size, EMB_DIM, HIDDEN_DIM, 1, START_TOKEN, SEQ_LENGTH, oracle=True).to(device) discriminator = Discriminator(vocab_size, dis_embedding_dim, dis_filter_sizes, dis_num_filters, dis_dropout).to(device) generate_samples(target_lstm, BATCH_SIZE, generated_num, positive_file) gen_data_loader.create_batches(positive_file) pre_gen_opt = torch.optim.Adam(generator.parameters(), 1e-2) adv_gen_opt = torch.optim.Adam(generator.parameters(), 1e-2) dis_opt = torch.optim.Adam(discriminator.parameters(), 1e-4) dis_criterion = nn.NLLLoss() log = open('save/experiment-log.txt', 'w') print('Start pre-training...') log.write('pre-training...\n') for epoch in range(PRE_EPOCH_NUM): loss = pre_train_epoch(generator, pre_gen_opt, gen_data_loader) if (epoch + 1) % 5 == 0: generate_samples(generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(target_lstm, likelihood_data_loader) print('pre-train epoch ', epoch + 1, '\tnll:\t', test_loss) buffer = 'epoch:\t' + str(epoch + 1) + '\tnll:\t' + str(test_loss) + '\n' log.write(buffer) print('Start pre-training discriminator...') # Train 3 epoch on the generated data and do this for 50 times for e in range(50): generate_samples(generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) d_total_loss = [] for _ in range(3): dis_data_loader.reset_pointer() total_loss = [] for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() x_batch = x_batch.to(device) y_batch = y_batch.to(device) dis_output = discriminator(x_batch.detach()) d_loss = dis_criterion(dis_output, y_batch.detach()) dis_opt.zero_grad() d_loss.backward() dis_opt.step() total_loss.append(d_loss.data.cpu().numpy()) d_total_loss.append(np.mean(total_loss)) if (e + 1) % 5 == 0: buffer = 'Epoch [{}], discriminator loss [{:.4f}]\n'.format( e + 1, np.mean(d_total_loss)) print(buffer) log.write(buffer) rollout = Rollout(generator, 0.8) print( '#########################################################################' ) print('Start Adversarial Training...') log.write('adversarial training...\n') gan_loss = GANLoss() for total_batch in range(TOTAL_BATCH): # Train the generator for one step discriminator.eval() for it in range(1): samples, _ = generator.sample(num_samples=BATCH_SIZE) rewards = rollout.get_reward(samples, 16, discriminator) prob = generator(samples.detach()) adv_loss = gan_loss(prob, samples.detach(), rewards.detach()) adv_gen_opt.zero_grad() adv_loss.backward() nn.utils.clip_grad_norm_(generator.parameters(), 5.0) adv_gen_opt.step() # Test if (total_batch + 1) % 5 == 0: generate_samples(generator, BATCH_SIZE, generated_num, eval_file) likelihood_data_loader.create_batches(eval_file) test_loss = target_loss(target_lstm, likelihood_data_loader) self_bleu_score = self_bleu(generator) buffer = 'epoch:\t' + str(total_batch + 1) + '\tnll:\t' + str( test_loss) + '\tSelf Bleu:\t' + str(self_bleu_score) + '\n' print(buffer) log.write(buffer) # Update roll-out parameters rollout.update_params() # Train the discriminator discriminator.train() for _ in range(5): generate_samples(generator, BATCH_SIZE, generated_num, negative_file) dis_data_loader.load_train_data(positive_file, negative_file) d_total_loss = [] for _ in range(3): dis_data_loader.reset_pointer() total_loss = [] for it in range(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() x_batch = x_batch.to(device) y_batch = y_batch.to(device) dis_output = discriminator(x_batch.detach()) d_loss = dis_criterion(dis_output, y_batch.detach()) dis_opt.zero_grad() d_loss.backward() dis_opt.step() total_loss.append(d_loss.data.cpu().numpy()) d_total_loss.append(np.mean(total_loss)) if (total_batch + 1) % 5 == 0: buffer = 'Epoch [{}], discriminator loss [{:.4f}]\n'.format( total_batch + 1, np.mean(d_total_loss)) print(buffer) log.write(buffer) log.close()
# vocab.append('<u_k_n_o_w_n>') # embd.append(['0' for _ in range(embedding_size)]) # src_vocab_size = len(vocab) # embedding = np.asarray(embd) #vocab to int # vocab_to_int = {} # for i in range(src_vocab_size): # vocab_to_int[vocab[i]] = i print('Glove vector loaded. Total vocab: ', src_vocab_size, '. embedding_size: ', embedding_size) gen_data_loader = Gen_Data_loader(BATCH_SIZE) #likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing dis_data_loader = Dis_dataloader(BATCH_SIZE) generator = Generator(src_vocab_size, BATCH_SIZE, embedding_size, HIDDEN_DIM, embedding, SEQ_LENGTH, START_TOKEN, gen_filter_sizes, gen_num_filters) # target_params = cPickle.load(open('save/target_params_py3.pkl', 'rb')) # target_lstm = TARGET_LSTM(src_vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model #TODO change discriminator's embedding layer discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=src_vocab_size, embedding_size=embedding_size, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda)