Exemple #1
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    stringGenerator = TextGenerator('../corpus/index2word.pickle',
                                    '../corpus/word2index.pickle',
                                    '../corpus/all.code')

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    #vocab_size = 5000
    vocab_size = len(stringGenerator.index2Word)

    generator = get_trainable_model(vocab_size)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_params[00] = np.random.rand(vocab_size, 32).astype(np.float32)
    target_params[-2] = np.random.rand(32, vocab_size).astype(np.float32)
    target_params[-1] = np.random.rand(vocab_size).astype(np.float32)
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    #generate_samples(sess, target_lstm, 64, 10000, positive_file)
    stringGenerator.saveSamplesToFile(20, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            #generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            stringGenerator.saveSamplesToFile(20, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    #generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    stringGenerator.saveSamplesToFile(20, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    log.close()
Exemple #2
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    # load data (likelihood?)
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 68


    # load generator with parameters
    generator = get_trainable_model(vocab_size)
    # target_params = cPickle.load(open('save/target_params.pkl'))
    target_params = initialize_parameters(68)

    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # generating synthetic data which constitute to  original data
    # generate_samples(sess, target_lstm, 64, 100, positive_file)


    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    print 'final pre-train epoch ', 'test_loss ', test_loss
    buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    log.close()
Exemple #3
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
    vocab_size = 5000
    dis_data_loader = Dis_dataloader(BATCH_SIZE)
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN, target_params)

    print(gen_data_loader)
Exemple #4
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000

    best_score = 9.5

    generator = get_trainable_model(vocab_size)
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer)

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start scheduled sampling training...'
    log.write('scheduled sampling training...\n')
    curriculum_rate = 1.0
    for epoch in xrange(EPOCH_NUM):
        curriculum_rate = max(0.0, curriculum_rate - 0.002)
        loss = pre_train_epoch(sess, generator, gen_data_loader, curriculum_rate)
        generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
        likelihood_data_loader.create_batches(eval_file)
        test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
        print 'pre-train epoch ', epoch, 'curriculum rate:', curriculum_rate, 'test_loss ', test_loss
        buffer = str(epoch) + ' ' + str(curriculum_rate) + ' ' + str(test_loss) + '\n'
        log.write(buffer)

        if test_loss < best_score:
            best_score = test_loss
            print 'best score: ', test_loss
            generate_samples(sess, generator, BATCH_SIZE, 100000, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            significance_test(sess, target_lstm, likelihood_data_loader, 'significance/schedule_sampling.txt')

    log.close()
Exemple #5
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    # assert START_TOKEN == 0

    vocab_size = NUM_EMB
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM,
                          HIDDEN_DIM, MAX_LENGTH, START_TOKEN)
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE,
                              EMB_DIM, HIDDEN_DIM, MAX_LENGTH, 0)

    with tf.variable_scope('discriminator'):
        cnn = TextCNN(
            sequence_length=MAX_LENGTH,
            num_classes=2,
            vocab_size=vocab_size,
            embedding_size=dis_embedding_dim,
            filter_sizes=dis_filter_sizes,
            num_filters=dis_num_filters,
            l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [param for param in tf.trainable_variables()
                  if 'discriminator' in param.name]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(
        cnn.loss, cnn_params, aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(
        dis_grads_and_vars, global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    def train_discriminator():
        if D_WEIGHT == 0:
            return 0, 0

        negative_samples = generate_samples(
            sess, generator, BATCH_SIZE, POSITIVE_NUM)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_samples, negative_samples)
        dis_batches = dis_data_loader.batch_iter(
            zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs
        )

        for batch in dis_batches:
            x_batch, y_batch = zip(*batch)
            feed = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: dis_dropout_keep_prob
            }
            _, step, loss, accuracy = sess.run(
                [dis_train_op, dis_global_step, cnn.loss, cnn.accuracy], feed)
        print('\tD loss  :   {}'.format(loss))
        print('\tAccuracy: {}'.format(accuracy))
        return loss, accuracy

    # Pretrain is checkpointed and only execcutes if we don't find a checkpoint
    saver = tf.train.Saver()
    ckpt_dir = 'checkpoints/{}_pretrain'.format(PREFIX)
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    ckpt_file = os.path.join(ckpt_dir, 'pretrain_ckpt')
    if os.path.isfile(ckpt_file + '.meta') and params["LOAD_PRETRAIN"]:
        saver.restore(sess, ckpt_file)
        print('Pretrain loaded from previous checkpoint {}'.format(ckpt_file))
    else:
        sess.run(tf.global_variables_initializer())
        pretrain(sess, generator, target_lstm, train_discriminator)
        path = saver.save(sess, ckpt_file)
        print('Pretrain finished and saved at {}'.format(path))

    # create reward function
    batch_reward = make_reward(train_samples)

    rollout = ROLLOUT(generator, 0.8)

    print('#########################################################################')
    print('Start Reinforcement Training Generator...')
    results_rows = []
    for nbatch in range(TOTAL_BATCH):
        results = OrderedDict({'exp_name': PREFIX})
        if nbatch % 1 == 0 or nbatch == TOTAL_BATCH - 1:
            print('* Making samples')
            if nbatch % 10 == 0:
                gen_samples = generate_samples(
                    sess, generator, BATCH_SIZE, BIG_SAMPLE_NUM)
            else:
                gen_samples = generate_samples(
                    sess, generator, BATCH_SIZE, SAMPLE_NUM)
            likelihood_data_loader.create_batches(gen_samples)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('batch_num: {}'.format(nbatch))
            print('test_loss: {}'.format(test_loss))
            results['Batch'] = nbatch
            results['test_loss'] = test_loss

            if test_loss < best_score:
                best_score = test_loss
                print('best score: %f' % test_loss)

            # results
            mm.compute_results(gen_samples, train_samples, ord_dict, results)

        print('#########################################################################')
        print('-> Training generator with RL.')
        print('G Epoch {}'.format(nbatch))

        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(
                sess, samples, 16, cnn, batch_reward, D_WEIGHT)
            print('Rewards be like...')
            print(rewards)
            nll = generator.generator_step(sess, samples, rewards)

            print('neg-loglike: {}'.format(nll))
            results['neg-loglike'] = nll
        rollout.update_params()

        # generate for discriminator
        print('-> Training Discriminator')
        for i in range(D):
            print('D_Epoch {}'.format(i))
            d_loss, accuracy = train_discriminator()
            results['D_loss_{}'.format(i)] = d_loss
            results['Accuracy_{}'.format(i)] = accuracy
        print('results')
        results_rows.append(results)
        if nbatch % params["EPOCH_SAVES"] == 0:
            save_results(sess, PREFIX, PREFIX + '_model', results_rows)

    # write results
    save_results(sess, PREFIX, PREFIX + '_model', results_rows)

    print('\n:*** FINISHED ***')
    return
Exemple #6
0
def main(FLAGS):
    #########################################################################################
    #  Generator  Hyper-parameters
    ######################################################################################
    EMB_DIM = FLAGS.gen_emb_dim  # 32  # embedding dimension
    HIDDEN_DIM = FLAGS.gen_hidden_dim  # 32  # hidden state dimension of lstm cell
    SEQ_LENGTH = FLAGS.seq_len  # 20  # sequence length
    START_TOKEN = 0
    PRE_EPOCH_NUM = FLAGS.gen_pretrain_epoch_num  # 120 # supervise (maximum likelihood estimation) epochs for generator
    DISC_PRE_EPOCH_NUM = FLAGS.dis_pretrain_epoch_num  # 50 # supervise (maximum likelihood estimation) epochs for descriminator
    SEED = 88
    BATCH_SIZE = FLAGS.batch_size  #64
    gen_dropout_keep_prob = FLAGS.gen_dropout_keep_prob  # 0.75
    gen_num_recurrent_layers = FLAGS.gen_num_recurrent_layers  # 1
    gen_learning_rate = FLAGS.gen_learning_rate

    #########################################################################################
    #  Discriminator  Hyper-parameters
    #########################################################################################
    dis_embedding_dim = FLAGS.dis_emb_dim  # 64
    dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
    dis_num_filters = [
        100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160
    ]
    dis_dropout_keep_prob = 0.75
    dis_l2_reg_lambda = 0.2
    dis_batch_size = FLAGS.batch_size  #64

    #########################################################################################
    #  Basic Training Parameters
    #########################################################################################
    EXPERIMENT_NAME = FLAGS.experiment_name
    TOTAL_BATCH = FLAGS.num_epochs  # 200 #num of adversarial epochs
    positive_file = 'save/real_data_%0s.txt' % EXPERIMENT_NAME
    negative_file = 'save/generator_sample_%0s.txt' % EXPERIMENT_NAME
    eval_file = "save/eval_file_%0s" % EXPERIMENT_NAME
    generated_num = 10000  # 10000

    #########################################################################################
    #  Data configurations
    #########################################################################################
    use_real_world_data = True
    real_data_file_path = FLAGS.dataset_path  # './data/text8/text8'
    dataset_name = os.path.basename(real_data_file_path)
    base_token = FLAGS.base_token  # 'char'

    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    if use_real_world_data:

        real_data_train_file = real_data_file_path + '-train'
        real_data_valid_file = real_data_file_path + '-valid'
        real_data_test_file = real_data_file_path + '-test'
        real_data_dict_file = real_data_file_path + '-{}-dict.json'.format(
            base_token)

        if not os.path.exists(real_data_train_file):
            split_text8(real_data_file_path)

        map, inv_map = create_real_data_dict(real_data_train_file,
                                             real_data_dict_file, base_token)
        vocab_size = len(map)

        if dataset_name == 'text8' and base_token == 'char':
            assert vocab_size == 27  # SORRY FOR THE HARD CODING
        elif dataset_name == 'ptb' and base_token == 'word':
            assert vocab_size == 10001  # SORRY FOR THE HARD CODING
        elif dataset_name == 'toy' and base_token == 'word':
            assert vocab_size == 8  # SORRY FOR THE HARD CODING
        elif dataset_name == 'wt2' and base_token == 'word':
            assert vocab_size == 33279  # SORRY FOR THE HARD CODING
        else:
            raise TypeError

        gen_data_loader = Gen_Data_loader_text(BATCH_SIZE,
                                               map,
                                               inv_map,
                                               seq_len=SEQ_LENGTH,
                                               token_type=base_token)
        dis_data_loader = Dis_dataloader_text(BATCH_SIZE,
                                              map,
                                              inv_map,
                                              seq_len=SEQ_LENGTH,
                                              token_type=base_token)

    else:
        gen_data_loader = Gen_Data_loader(BATCH_SIZE)
        likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
        vocab_size = 5000
        dis_data_loader = Dis_dataloader(BATCH_SIZE)

    generator = Generator(vocab_size,
                          BATCH_SIZE,
                          EMB_DIM,
                          HIDDEN_DIM,
                          SEQ_LENGTH,
                          START_TOKEN,
                          dropout_keep_prob=gen_dropout_keep_prob,
                          num_recurrent_layers=gen_num_recurrent_layers)

    if not use_real_world_data:
        target_params = pickle.load(open('save/target_params.pkl'))
        target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                                  SEQ_LENGTH, START_TOKEN,
                                  target_params)  # The oracle model

    discriminator = Discriminator(sequence_length=SEQ_LENGTH,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  l2_reg_lambda=dis_l2_reg_lambda)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
    sess = tf.Session(config=config)
    saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=999999)
    sess.run(tf.global_variables_initializer())

    if use_real_world_data:
        # gen_data_loader.create_batches(real_data_train_file)
        pass
    else:
        # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
        generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                         positive_file)
        gen_data_loader.create_batches(positive_file)

    log = open('save/experiment-log.txt', 'w')
    #  pre-train generator
    print('Start pre-training...')
    log.write('pre-training...\n')
    for epoch in range(PRE_EPOCH_NUM):
        print("start epoch %0d" % epoch)

        # update learning rate
        if epoch > 5:
            gen_learning_rate /= FLAGS.gen_learning_decay * 1.

        if epoch % FLAGS.save_each_epochs == 0:
            print(
                '#########################################################################'
            )
            print('saving model...')
            save_file = os.path.join(
                '.', 'ckp', EXPERIMENT_NAME + '_pretrain_epoch_%0d' % epoch,
                EXPERIMENT_NAME + '_pretrain_epoch_%0d' % epoch)
            saver.save(sess, save_file)

        if use_real_world_data:
            gen_data_loader.create_batches(real_data_train_file,
                                           limit_num_samples=generated_num)

        loss = pre_train_epoch(sess, generator, gen_data_loader,
                               gen_learning_rate)
        if epoch % 1 == 0:
            if use_real_world_data:
                generate_real_data_samples(
                    sess, generator, BATCH_SIZE, generated_num,
                    eval_file + "_epoch_%0d.txt" % epoch, inv_map, base_token)
                test_loss = 0  # FIXME - TEMP
            else:
                generate_samples(sess, generator, BATCH_SIZE, generated_num,
                                 eval_file)
                likelihood_data_loader.create_batches(eval_file)
                test_loss = target_loss(sess, target_lstm,
                                        likelihood_data_loader)

            print('pre-train epoch ', epoch, 'test_loss ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                test_loss) + '\n'
            log.write(buffer)

    print('Start pre-training discriminator...')
    # Train 3 epoch on the generated data and do this for 50 times
    for epoch in range(DISC_PRE_EPOCH_NUM):
        print("start epoch %0d" % epoch)
        if use_real_world_data:
            generate_real_data_samples(sess, generator, BATCH_SIZE,
                                       generated_num, negative_file, inv_map,
                                       base_token)
            dis_data_loader.load_train_data(real_data_train_file,
                                            negative_file)
        else:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)
        for _ in range(3):
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x: x_batch,
                    discriminator.input_y: y_batch,
                    discriminator.dropout_keep_prob: dis_dropout_keep_prob
                }
                _ = sess.run(discriminator.train_op, feed)

    rollout = ROLLOUT(generator, 0.8)

    print(
        '#########################################################################'
    )
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        # Train the generator for one step
        print("start epoch %0d" % total_batch)

        if total_batch % FLAGS.save_each_epochs == 0:
            print(
                '#########################################################################'
            )
            print('saving model...')
            save_file = os.path.join(
                '.', 'ckp', EXPERIMENT_NAME + '_epoch_%0d' % total_batch,
                EXPERIMENT_NAME + '_epoch_%0d' % total_batch)
            saver.save(sess, save_file)

        for it in range(1):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, discriminator)
            feed = {
                generator.x: samples,
                generator.rewards: rewards,
                generator.learning_rate: 0.01
            }
            _ = sess.run(generator.g_updates, feed_dict=feed)

        # Test
        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            if not use_real_world_data:
                generate_samples(sess, generator, BATCH_SIZE, generated_num,
                                 eval_file)
                likelihood_data_loader.create_batches(eval_file)
                test_loss = target_loss(sess, target_lstm,
                                        likelihood_data_loader)
                buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                    test_loss) + '\n'
                print('total_batch: ', total_batch, 'test_loss: ', test_loss)
                log.write(buffer)

        # Update roll-out parameters
        rollout.update_params()

        # Train the discriminator
        for _ in range(5):

            if use_real_world_data:
                generate_real_data_samples(sess, generator, BATCH_SIZE,
                                           generated_num, negative_file,
                                           inv_map, base_token)
                dis_data_loader.load_train_data(real_data_train_file,
                                                negative_file)
            else:
                generate_samples(sess, generator, BATCH_SIZE, generated_num,
                                 negative_file)
                dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _ = sess.run(discriminator.train_op, feed)

    print(
        '#########################################################################'
    )
    print('saving model...')
    save_file = os.path.join('.', 'ckp', EXPERIMENT_NAME, EXPERIMENT_NAME)
    saver.save(sess, save_file)

    #
    # print '#########################################################################'
    # print 'Start Language Model Evaluation...'
    # test_data_loader = Gen_Data_loader_text(BATCH_SIZE,map,inv_map)
    # test_data_loader.create_batches(real_data_test_file)
    # language_model_evaluation(sess,generator, test_data_loader)

    log.close()
Exemple #7
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
    vocab_size = 5000
    dis_data_loader = Dis_dataloader(re_batch_size)

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN, MID_LAYER_G)
    rewarder = Rewarder(vocab_size,
                        BATCH_SIZE,
                        EMB_DIM * 4,
                        HIDDEN_DIM * 4,
                        SEQ_LENGTH,
                        START_TOKEN,
                        MID_LAYER_R,
                        l2_reg_lambda=re_l2_reg_lambda)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN,
                              target_params)  # The oracle model

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.global_variables())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                     positive_file)
    gen_data_loader.create_batches(positive_file)
    # ground_loss = target_loss(sess, target_lstm, gen_data_loader)
    # print 'Ground-Truth:', ground_loss

    log = open('save/experiment-ent' + str(entropy_w), 'w')
    #  pre-train generator
    if restore is False:
        print 'Start pre-training...'
        log.write('pre-training...\n')
        for epoch in xrange(PRE_EPOCH_NUM):
            loss = pre_train_epoch(sess, generator, gen_data_loader)
            if epoch % 5 == 0:
                generate_samples(sess, generator, BATCH_SIZE, generated_num,
                                 eval_file)
                likelihood_data_loader.create_batches(eval_file)
                test_loss = target_loss(sess, target_lstm,
                                        likelihood_data_loader)
                print 'pre-train epoch ', epoch, 'test_loss ', test_loss
                buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                    test_loss) + '\n'
                log.write(buffer)

        print 'Start pre-training rewarder...'
        start = time.time()
        for _ in range(1):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(1):
                dis_data_loader.reset_pointer()
                r_losses = []
                for it in xrange(dis_data_loader.num_batch):
                    x_text = dis_data_loader.next_batch()
                    _, r_loss = rewarder.reward_train_step(
                        sess, x_text, np.ones(BATCH_SIZE), 1.0,
                        re_dropout_keep_prob, 0.01)
                    r_losses.append(r_loss)
                print 'reward_loss', np.mean(r_losses)
        speed = time.time() - start
        print 'Reward pre_training Speed:{:.3f}'.format(speed)

        checkpoint_path = os.path.join('save', 'exper_40.ckpt')
        saver.save(sess, checkpoint_path)
    else:
        print 'Restore pretrained model ...'
        log.write('Restore pre-trained model...\n')
        ckpt = tf.train.get_checkpoint_state('save')
        saver.restore(sess, ckpt.model_checkpoint_path)

    # by setting the parameters to 0.0 and 1.0, we didn't use the mixed policy RL training in SeqGAN
    rollout = ROLLOUT(generator, 0.0, 1.0)

    print '#########################################################################'
    print 'Start Adversarial Training...'
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):

        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

        # Train the generator for one step
        start = time.time()
        g_losses = []
        off_samples, off_probs = off_policy_samples(sess, rollout, BATCH_SIZE,
                                                    off_num)
        avg_reward = []
        for g_it in range(1):
            for it in range(off_num // BATCH_SIZE):
                rewards = rollout.get_reward(sess, off_samples[it], 8,
                                             rewarder)
                avg_reward.append(rewards)
            baseline = np.zeros(SEQ_LENGTH)
            for it in range(1):
                for it2 in range(off_num // BATCH_SIZE):
                    _, g_loss = generator.rl_train_step(
                        sess, off_samples[it2], avg_reward[it2], baseline,
                        off_probs[it2], entropy_w, G_rate)
                    g_losses.append(g_loss)
        speed = time.time() - start
        print 'MaxentPolicy Gradient {} round, Speed:{:.3f}, Loss:{:.3f}'.format(
            total_batch, speed, np.mean(g_losses))

        # Update roll-out parameters
        rollout.update_params()

        # Train the rewarder
        start = time.time()
        r_loss_list = []
        for _ in range(2):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)
            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in xrange(dis_data_loader.num_batch):
                    x_text = dis_data_loader.next_batch()
                    weights = rewarder.reward_weight(sess, x_text, generator)
                    _, r_loss = rewarder.reward_train_step(
                        sess, x_text, weights, 1, re_dropout_keep_prob,
                        R_rate * np.exp(-(total_batch // R_decay)))
                    r_loss_list.append(r_loss)
        speed = time.time() - start
        print 'Reward training {} round, Speed:{:.3f}, Loss:{:.3f}'.format(
            total_batch, speed, np.mean(r_loss_list))

    log.close()
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    # assert START_TOKEN == 0

    vocab_size = NUM_EMB
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM,
                          HIDDEN_DIM, MAX_LENGTH, START_TOKEN)
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE,
                              EMB_DIM, HIDDEN_DIM, MAX_LENGTH, 0)

    with tf.variable_scope('discriminator'):

#	batch_size=dis_batch_size
	cnn = GraphConvTensorGraph(n_tasks=1, batch_size=dis_batch_size, mode='classification')
	if not cnn.built:
	      cnn.build()

#indentation different 2 spaces
#  with cnn._get_tf("Graph").as_default():
#    manager = cnn._get_tf("Graph").as_default()
#    manager.__enter__()
    # Define Discriminator Training procedure
#    train_op = cnn._get_tf('train_op')

  # Define Discriminator Training procedure
    cnn_params = [param for param in tf.trainable_variables() if 'discriminator' in param.name]

    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(
        cnn.loss.out_tensor, cnn_params, aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(
        dis_grads_and_vars, global_step=dis_global_step)



    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    def train_discriminator():

	output_tensors = [cnn.outputs[0].out_tensor] #added from deepchem
    	fetches =  output_tensors + [dis_train_op, cnn.loss.out_tensor] 
        if D_WEIGHT == 0:
            return 0, 0

        negative_samples = generate_samples(
            sess, generator, BATCH_SIZE, POSITIVE_NUM)

        #  train discriminator
	#pos_smiles= [mm.decode(sample, ord_dict) for sample in positive_samples] #already defined in intro
	neg_smiles= [mm.decode(sample, ord_dict) for sample in negative_samples]
	#df_pos=pd.DataFrame({'real/fake': [1]*len(pos_smiles)   , 'smiles': pos_smiles}) #already defined in intro
	df_neg=pd.DataFrame({'real/fake': [0]*len(neg_smiles)   , 'smiles': neg_smiles}) 
	#df_total=df_pos.append(df_neg)
	df_total=pd.concat([df_pos,df_neg], ignore_index=True)

	
	def shuffle_by_batches(df, batch_size=dis_batch_size):
		l=len(df)
		l_batches=range(l)    
		l_batches=[l_batches[x*batch_size:(x+1)*batch_size] for x in range(np.ceil(l/batch_size))]
		permut=np.random.permutation(len(l_batches))
		l_new=[l_batches[n] for n in permut]
		l_new = [item for sublist in l_new for item in sublist] #flatten l_new
		df=df.reindex(np.array(l_new))
		df=df.reset_index(drop=True)   
		return df

	df_total=shuffle_by_batches(df_total,batch_size=dis_batch_size)    #shuffle rows by batch, see: Tip 4: https://github.com/soumith/ganhacks




	loader = deepchem.data.PandasLoader(tasks=['real/fake'], smiles_field="smiles", featurizer=deepchem.feat.ConvMolFeaturizer())
	train_dataset = loader.featurize(df_total, shard_size=8192)
	feed_dict_generator=cnn.default_generator(train_dataset, epochs=1)


	def create_feed_dict():
	      for d in feed_dict_generator:
	        feed_dict = {k.out_tensor: v for k, v in six.iteritems(d)}
	        feed_dict[cnn._training_placeholder] = 1.0
	        yield feed_dict

	def select_label(feed_dict):  #Select the output label layer in the feed dictionaty
		newkeys=[]
		for k in feed_dict.keys():
	             if 'Label' in k.name:
	                 newkeys.append(k)
		return newkeys[0]


	avg_loss, avg_acc, n_batches = 0.0, 0.0, 0

	for feed_dict in create_feed_dict():
	     fetched_values = sess.run(fetches, feed_dict=feed_dict)
	     loss = fetched_values[-1]
	     predicted_results=[np.argmax(x) for x in fetched_values[0]]
	     ground_truth= [np.argmax(x) for x in feed_dict[select_label(feed_dict)] ]
	     results= [ predicted_results[n] ==ground_truth[n] for n in range(len(predicted_results)) ]
	     accuracy= float(sum(results))/len(results)
	     n_batches += 1
	     ratio= (1/float(n_batches))
	     avg_loss= (1-ratio)*avg_loss + ratio*loss
	     avg_acc= (1-ratio)*avg_acc + ratio*accuracy
        print('\tD loss  :   {}'.format(avg_loss))
        print('\tAccuracy: {}'.format(avg_acc))
        return avg_loss, avg_acc

    # Pretrain is checkpointed and only execcutes if we don't find a checkpoint
    saver = tf.train.Saver()
    ckpt_dir = 'checkpoints_new/{}_pretrain'.format(PREFIX)
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    ckpt_file = os.path.join(ckpt_dir, 'pretrain_ckpt')
    if os.path.isfile(ckpt_file + '.meta') and params["LOAD_PRETRAIN"]:
        saver.restore(sess, ckpt_file)
        print('Pretrain loaded from previous checkpoint {}'.format(ckpt_file))
    else:
        if params["LOAD_PRETRAIN"]:
            print('\t* No pre-training data found as {:s}.'.format(ckpt_file))
        else:
            print('\t* LOAD_PRETRAIN was set to false.')
  	cnn._initialize_weights(sess, saver) #added from deepchem
        sess.run(tf.global_variables_initializer())
        pretrain(sess, generator, target_lstm, train_discriminator)
        path = saver.save(sess, ckpt_file)
        print('Pretrain finished and saved at {}'.format(path))

    # create reward function
    batch_reward = make_reward(train_samples)

    rollout = ROLLOUT(generator, 0.8)

    print('#########################################################################')
    print('Start Reinforcement Training Generator...')
    results_rows = []
    for nbatch in tqdm(range(TOTAL_BATCH)):
        results = OrderedDict({'exp_name': PREFIX})
        if nbatch % 1 == 0 or nbatch == TOTAL_BATCH - 1:
            print('* Making samples')
            if nbatch % 10 == 0:
                gen_samples = generate_samples(
                    sess, generator, BATCH_SIZE, BIG_SAMPLE_NUM)
            else:
                gen_samples = generate_samples(
                    sess, generator, BATCH_SIZE, SAMPLE_NUM)
            likelihood_data_loader.create_batches(gen_samples)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('batch_num: {}'.format(nbatch))
            print('test_loss: {}'.format(test_loss))
            results['Batch'] = nbatch
            results['test_loss'] = test_loss

            if test_loss < best_score:
                best_score = test_loss
                print('best score: %f' % test_loss)

            # results
            mm.compute_results(gen_samples, train_samples, ord_dict, results)

        print('#########################################################################')
        print('-> Training generator with RL.')
        print('G Epoch {}'.format(nbatch))

        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(
                sess, samples, 16, cnn,  ord_dict, batch_reward, D_WEIGHT)
            nll = generator.generator_step(sess, samples, rewards)
            # results
            print_rewards(rewards)
            print('neg-loglike: {}'.format(nll))
            results['neg-loglike'] = nll
        rollout.update_params()

        # generate for discriminator
        print('-> Training Discriminator')
        for i in range(D):
            print('D_Epoch {}'.format(i))
            d_loss, accuracy = train_discriminator()
            results['D_loss_{}'.format(i)] = d_loss
            results['Accuracy_{}'.format(i)] = accuracy
        print('results')
        results_rows.append(results)
        if nbatch % params["EPOCH_SAVES"] == 0:
            save_results(sess, PREFIX, PREFIX + '_model', results_rows)

    # write results
    save_results(sess, PREFIX, PREFIX + '_model', results_rows)

    print('\n:*** FINISHED ***')
    return
Exemple #9
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)


    # prepare data
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
    dis_data_loader = Dis_Data_loader(BATCH_SIZE)


    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)


    # target_params's size: [15 * 5000 * 32]
    target_params = pickle.load(open('./save/target_params_py3.pkl', 'rb'))
    # The oracle model
    target_lstm = TARGET_LSTM(5000, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, 20, 0, target_params)

    discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=vocab_size,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes, num_filters=dis_num_filters,
                                  l2_reg_lambda=dis_l2_reg_lambda)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    generate_samples_from_target(sess, target_lstm, BATCH_SIZE, generated_num, positive_file)
    gen_data_loader.create_batches(positive_file)

    # print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    #
    # likelihood_data_loader.create_batches(positive_file)
    # for i in range(100):
    #     test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    #     print('my step ', i, 'test_loss ', test_loss)
    #     input("next:")
    # input("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

    log = open('save/experiment-log.txt', 'w')
    #  pre-train generator
    print('Start pre-training...')
    log.write('pre-training...\n')
    ans_file = open("learning_cure.txt", 'w')
    for epoch in range(120):  # 120
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 1 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('pre-train epoch ', epoch, 'test_loss ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
            log.write(buffer)
            ans_file.write("%s\n" % str(test_loss))

    buffer = 'Start pre-training discriminator...'
    print(buffer)
    log.write(buffer)
    for _ in range(10):   # 10
        generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
        dis_data_loader.load_train_data(positive_file, negative_file)
        for _ in range(3):
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x: x_batch,
                    discriminator.input_y: y_batch,
                    discriminator.dropout_keep_prob: dis_dropout_keep_prob,
                }
                d_loss, d_acc, _ = sess.run([discriminator.loss, discriminator.accuracy, discriminator.train_op], feed)
        buffer = "discriminator loss %f acc %f\n" % (d_loss, d_acc)
        print(buffer)

        log.write(buffer)
    ans_file.write("==========\n")
    print("Start Adversarial Training...")
    log.write('adversarial training...')
    for total_batch in range(TOTAL_BATCH):
        # Train the generator
        for it in range(1):
            samples = generator.generate(sess)
            rewards = generator.get_reward(sess, samples, 16, discriminator, START_TOKEN)
            a = str(samples[0])
            b = str(rewards[0])
            buffer = "%s\n%s\n\n" % (a, b)
            # print(buffer)
            log.write(buffer)
            rewards_loss = generator.update_with_rewards(sess, samples, rewards, START_TOKEN)

            # good rewards
            # good_samples = gen_data_loader.next_batch()
            # rewards = np.array([[1.0] * SEQ_LENGTH] * BATCH_SIZE)
            # a = str(good_samples[0])
            # b = str(rewards[0])
            # buffer = "%s\n%s\n\n" % (a, b)
            # print(buffer)
            # log.write(buffer)
            # rewards_loss = generator.update_with_rewards(sess, good_samples, rewards, START_TOKEN)

            # little1 good reward
            # litter1_samples = gen_data_loader.next_batch()
            # rewards = generator.get_reward(sess, litter1_samples, 16, discriminator, START_TOKEN)
            # a = str(little1 good reward[0])
            # b = str(rewards[0])
            # buffer = "%s\n%s\n\n" % (a, b)
            # print(buffer)
            # log.write(buffer)
            # rewards_loss = generator.update_with_rewards(sess, litter1_samples, rewards, START_TOKEN)


        # Test
        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'reward-train epoch %s train loss %s test_loss %s\n' % (str(total_batch), str(rewards_loss), str(test_loss))
            print(buffer)
            log.write(buffer)
            ans_file.write("%s\n" % str(test_loss))

        # Train the discriminator
        for _ in range(1):
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)
            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob,
                    }
                    d_loss, d_acc, _ = sess.run([discriminator.loss, discriminator.accuracy, discriminator.train_op], feed)
            if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
                buffer = "discriminator loss %f acc %f\n" % (d_loss, d_acc)
                print(buffer)
                log.write(buffer)
def construct_gold_generator(vocab_size):
    file_obj = open('save/target_params_py3.pkl', 'rb')
    target_params = pickle.load(file_obj, encoding='utf8')
    #target_params = pickle.load(open('save/target_params_py3.pkl'))
    return TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH,
                       START_TOKEN, target_params)  # The oracle model
Exemple #11
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    #
    # Declare data loader
    # ----------------------------------------------------------------------------
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing
    vocab_size = 5000
    dis_data_loader = Dis_dataloader(BATCH_SIZE)
    # ----------------------------------------------------------------------------


    #
    # Declare Generator & Discriminator
    # ----------------------------------------------------------------------------
    # declare: generator
    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model

    # declare: discriminator
    discriminator = Discriminator(sequence_length=20, num_classes=2,
                                   vocab_size=vocab_size, embedding_size=dis_embedding_dim,
                                   filter_sizes=dis_filter_sizes, num_filters=dis_num_filters,
                                   l2_reg_lambda=dis_l2_reg_lambda)
    # ----------------------------------------------------------------------------

    #
    # Set the session <sess>
    # ----------------------------------------------------------------------------
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    # ----------------------------------------------------------------------------

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    # generate samples by using <target_lstm> and write the samples to file <positive_file>
    #generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('save/experiment-log.txt', 'w')


    #
    # Pre-train <generator> by using <gen_data_loader>,
    # and then compute the <test_loss> of <target_lstm> and <likelihood_data_loader>
    # ----------------------------------------------------------------------------
    print('Start pre-training...')
    log.write('pre-training...\n')
    for epoch in range(PRE_EPOCH_NUM):
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            # generate samples by using <generator> and write the samples to file <eval_file>
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)

            # load samples from file <eval_file>
            likelihood_data_loader.create_batches(eval_file)

            # compute <test_loss> of <target_lstm>, with input <likelihood_data_loader>
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)

            print('pre-train epoch ', epoch, 'test_loss ', test_loss)
            buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
            log.write(buffer)
    # ----------------------------------------------------------------------------


    #
    # Pre-train <discriminator> by using <generator>
    # ----------------------------------------------------------------------------
    print('Start pre-training discriminator...')
    # Generate data and train 3 epoch on the generated data, which will be done for 50 times
    for _ in range(50):
        # generate samples by using <generator> and write the samples to file <negative_file>
        generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)

        # load samples from file <negative_file>
        dis_data_loader.load_train_data(positive_file, negative_file)

        for _ in range(3):
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob}
                _ = sess.run(discriminator.train_op, feed_dict=feed)
    # ----------------------------------------------------------------------------

    rollout = ROLLOUT(generator, 0.8)

    #
    # Start seqGAN, train <discriminator> and <generator>
    # ----------------------------------------------------------------------------
    print('#########################################################################')
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):

        # ----- Train the generator for one step -----------------
        for it in range(G_STEPS):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, ROLLOUT_NUM, discriminator, SEQ_LENGTH)
            feed = {generator.x: samples,
                    generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)
        # --------------------------------------------------------

        # Update roll-out parameters
        rollout.update_params()

        # ----- Train the discriminator -------------------------
        for _ in range(D_STEPS):
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {discriminator.input_x: x_batch,
                            discriminator.input_y: y_batch,
                            discriminator.dropout_keep_prob: dis_dropout_keep_prob}
                    _ = sess.run(discriminator.train_op, feed_dict=feed)
        # --------------------------------------------------------
    # ----------------------------------------------------------------------------

    log.close()
Exemple #12
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    stringGenerator = TextGenerator('../corpus/index2word.pickle',
                                    '../corpus/word2index.pickle',
                                    '../corpus/all.code')

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = len(stringGenerator.index2Word)
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = get_trainable_model(vocab_size)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_params[00] = np.random.rand(vocab_size, 32).astype(np.float32)
    target_params[-2] = np.random.rand(32, vocab_size).astype(np.float32)
    target_params[-1] = np.random.rand(vocab_size).astype(np.float32)
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=20,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    #generate_samples(sess, target_lstm, 64, 10000, positive_file)
    stringGenerator.saveSamplesToFile(20, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    print 'Start training discriminator...'
    for _ in range(dis_alter_epoch):
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/seqgan.txt')

        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(
                positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(
                zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()
Exemple #13
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000

    best_score = 9.1
    generator = get_trainable_model(vocab_size)
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer)

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    ################################################################
    gen_data_loader.create_batches(positive_file)
    references = load_references(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    rollout = ROLLOUT(generator, references)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            print 'start calculating BLEU...'
            rewards = rollout.get_reward(sess, samples, 1,
                                         (1.0 / 3, 1.0 / 3, 1.0 / 3))
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/pg_bleu.txt')

        rollout.update_params()

    log.close()
Exemple #14
0
def main():
    assert START_TOKEN == 0
    assert ((FLAGS.use_oracle_data or FLAGS.use_natural_data) == True)

    assert ((FLAGS.use_character_level_model == True) if
            (FLAGS.use_onehot_embeddings == True) else
            (FLAGS.use_character_level_model == False))

    if FLAGS.use_natural_data:
        print("WARNING: " + \
              "since FLAGS.use_natural_data is set to True, " + \
              "we must are setting FLAGS.use_oracle_data to False.")
        FLAGS.use_oracle_data = False

    if FLAGS.use_oracle_data:
        print("WARNING: " + \
              "since FLAGS.use_oracle_data is set to True, " + \
              "we must are setting FLAGS.use_character_level_model to False.")
        FLAGS.use_character_level_model = False

    vocab_dict = None
    vocab_size = oracle_vocab_size
    EMB_DIM = WORD_EMB_DIM
    dis_embedding_dim = dis_word_embedding_dim

    if FLAGS.use_natural_data:
        vocab_dict = VocabDictionary(
            data_fp=positive_file,
            max_seq_length=SEQ_LENGTH,
            character_level_model_bool=FLAGS.use_character_level_model,
            drop_freq_thresh=10)
        print(vocab_dict.vocab_dict)
        print(vocab_dict.int_to_token_dict)

        vocab_size = vocab_dict.get_length()

        if FLAGS.use_onehot_embeddings:
            # if we're using one-hot encodings,
            # the embedding dim must be the same as the number of possible tokens:
            EMB_DIM = vocab_size

    # Data loaders
    gen_data_loader = Gen_Dataloader(
        BATCH_SIZE,
        vocab_dictionary=vocab_dict,
        max_seq_length=SEQ_LENGTH,
        character_level_model_bool=FLAGS.use_character_level_model)

    likelihood_data_loader = Gen_Dataloader(
        BATCH_SIZE,
        vocab_dictionary=vocab_dict,
        max_seq_length=SEQ_LENGTH,
        character_level_model_bool=FLAGS.use_character_level_model)

    dis_data_loader = Dis_Dataloader(
        BATCH_SIZE,
        vocab_dictionary=vocab_dict,
        max_seq_length=SEQ_LENGTH,
        character_level_model_bool=FLAGS.use_character_level_model)

    # Gen, Dis, and Oracle Models

    generator = Generator(
        vocab_size,
        BATCH_SIZE,
        EMB_DIM,
        HIDDEN_DIM,
        SEQ_LENGTH,
        go_token=START_TOKEN,
        eos_token=EOS_TOKEN,
        pad_token=(PAD_TOKEN if vocab_dict is not None else None),
        use_onehot_embeddings=FLAGS.use_onehot_embeddings)

    discriminator = Discriminator(sequence_length=SEQ_LENGTH,
                                  vocab_size=vocab_size,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  dropout_keep_prob=dis_dropout_keep_prob)

    target_params = []
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN, target_params)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    if FLAGS.use_oracle_data:
        generate_samples(sess,
                         target_lstm,
                         BATCH_SIZE,
                         generated_num,
                         positive_file,
                         vocab_dict=vocab_dict,
                         char_level_bool=FLAGS.use_character_level_model)

    gen_data_loader.create_batches(positive_file)

    log = open('save/experiment-log.txt', 'w+')

    print('Starting pre-training for the generator')
    log.write('pre-training...\n')

    for epoch in range(PRE_EPOCH_NUM):
        pretrain_cross_entropy_loss = pre_train_epoch(sess, generator,
                                                      gen_data_loader)

        if epoch % 5 == 0 or FLAGS.show_every_epoch:

            if (FLAGS.use_natural_data == False):
                generate_samples(
                    sess,
                    generator,
                    BATCH_SIZE,
                    generated_num,
                    eval_file,
                    vocab_dict=vocab_dict,
                    char_level_bool=FLAGS.use_character_level_model)

                likelihood_data_loader.create_batches(eval_file)

                oracle_nll_loss = compute_oracle_loss(sess, target_lstm,
                                                      likelihood_data_loader)

                print(
                    'generator pre-train epoch {}... oracle_nll {}... training set cross entropy loss {}... datetime {}'
                    .format(epoch,
                            oracle_nll_loss, pretrain_cross_entropy_loss,
                            datetime.datetime.now()))
                buffer = 'epoch:\t' + str(epoch) + '\t' + \
                         'oracle_nll:\t' + str(oracle_nll_loss) + '\n'
                log.write(buffer)
            else:
                generate_samples(
                    sess,
                    generator,
                    BATCH_SIZE,
                    generated_num,
                    eval_file,
                    vocab_dict=vocab_dict,
                    char_level_bool=FLAGS.use_character_level_model)

                dis_data_loader.load_train_data(positive_file, eval_file)
                likelihood_data_loader.create_batches(eval_file)
                gen_data_loader.create_batches(positive_file)

                logging_prefix_string = 'generator pre-train epoch {}\n\t token_cross_entropy_loss: {}'.format(
                    epoch, pretrain_cross_entropy_loss)

                log_all_the_things(sess=sess,
                                   discriminator=discriminator,
                                   mixed_data_loader=dis_data_loader,
                                   fake_data_loader=likelihood_data_loader,
                                   real_data_loader=gen_data_loader,
                                   logging_prefix_string=logging_prefix_string)

                buffer = 'epoch:\t' + str(epoch) + '\t' + \
                         'pretrain_cross_entropy_loss:\t' + str(pretrain_cross_entropy_loss) + '\n'
                log.write(buffer)

    print('Starting pre-training for the discriminator...')
    for epoch in range(dis_pre_epoch_num):
        generate_samples(sess,
                         generator,
                         BATCH_SIZE,
                         generated_num,
                         negative_file,
                         vocab_dict=vocab_dict,
                         char_level_bool=FLAGS.use_character_level_model)
        dis_data_loader.load_train_data(positive_file, negative_file)

        for _ in range(FLAGS.k_steps):
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x: x_batch,
                    discriminator.input_y: y_batch,
                    discriminator.training_mode: True
                }
                _ = sess.run(discriminator.train_op, feed)

        if epoch % 5 == 0 or FLAGS.show_every_epoch:
            generate_samples(sess,
                             generator,
                             BATCH_SIZE,
                             generated_num,
                             eval_file,
                             vocab_dict=vocab_dict,
                             char_level_bool=FLAGS.use_character_level_model)
            dis_data_loader.load_train_data(positive_file, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            gen_data_loader.create_batches(positive_file)

            logging_prefix_string = 'discriminator pre-train epoch {}... '.format(
                epoch)

            log_all_the_things(sess=sess,
                               discriminator=discriminator,
                               mixed_data_loader=dis_data_loader,
                               fake_data_loader=likelihood_data_loader,
                               real_data_loader=gen_data_loader,
                               logging_prefix_string=logging_prefix_string)

    rollout = ROLLOUT(generator, 0.0)

    print(
        '#########################################################################'
    )
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')

    for total_batch in range(TOTAL_BATCH):
        # Train the generator for one step
        for it in range(FLAGS.g_steps):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, rollout_branch_factor,
                                         discriminator)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)

            # Update roll-out parameters
            if FLAGS.update_rollout_every_gstep:
                rollout.update_params()

        # Evaluate the generator
        if (total_batch % 5 == 0) or (total_batch == TOTAL_BATCH -
                                      1) or FLAGS.show_every_epoch:

            if (FLAGS.use_natural_data == False):

                generate_samples(
                    sess,
                    generator,
                    BATCH_SIZE,
                    generated_num,
                    eval_file,
                    vocab_dict=vocab_dict,
                    char_level_bool=FLAGS.use_character_level_model)

                oracle_nll_loss = compute_oracle_loss(sess, target_lstm,
                                                      likelihood_data_loader)
                print(
                    'epoch: {}\t generator training... oracle_nll: {}\t datetime: {}'
                    .format(total_batch, oracle_nll_loss,
                            datetime.datetime.now()))

                buffer = 'epoch:\t' + str(total_batch) + '\t' + \
                         'oracle_nll:\t' + str(oracle_nll_loss) + '\n'
                log.write(buffer)
            else:
                generate_samples(
                    sess,
                    generator,
                    BATCH_SIZE,
                    generated_num,
                    eval_file,
                    vocab_dict=vocab_dict,
                    char_level_bool=FLAGS.use_character_level_model)

                dis_data_loader.load_train_data(positive_file, eval_file)
                likelihood_data_loader.create_batches(eval_file)
                gen_data_loader.create_batches(positive_file)

                logging_prefix_string = 'adversarial epoch: {}\n\t generator training... '.format(
                    total_batch)

                kv = log_all_the_things(
                    sess=sess,
                    discriminator=discriminator,
                    mixed_data_loader=dis_data_loader,
                    fake_data_loader=likelihood_data_loader,
                    real_data_loader=gen_data_loader,
                    logging_prefix_string=logging_prefix_string)

                g_loss = kv['g_loss']

                buffer = 'epoch:\t' + str(total_batch) + '\t' + \
                         'g_loss:\t' + str(g_loss) + '\n'
                log.write(buffer)

        # Update roll-out parameters, if we didn't already do so
        if not FLAGS.update_rollout_every_gstep:
            rollout.update_params()

        # Train the discriminator
        for _ in range(FLAGS.d_steps):
            generate_samples(sess,
                             generator,
                             BATCH_SIZE,
                             generated_num,
                             negative_file,
                             vocab_dict=vocab_dict,
                             char_level_bool=FLAGS.use_character_level_model)
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(FLAGS.k_steps):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.training_mode: True
                    }
                    _ = sess.run(discriminator.train_op, feed)

        # Test
        if (total_batch % 5 == 0) or (total_batch == TOTAL_BATCH -
                                      1) or FLAGS.show_every_epoch:
            generate_samples(sess,
                             generator,
                             BATCH_SIZE,
                             generated_num,
                             eval_file,
                             vocab_dict=vocab_dict,
                             char_level_bool=FLAGS.use_character_level_model)
            dis_data_loader.load_train_data(positive_file, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            gen_data_loader.create_batches(positive_file)

            logging_prefix_string = 'adversarial epoch: {}\n\t discriminator training... '.format(
                total_batch)

            kv = log_all_the_things(
                sess=sess,
                discriminator=discriminator,
                mixed_data_loader=dis_data_loader,
                fake_data_loader=likelihood_data_loader,
                real_data_loader=gen_data_loader,
                logging_prefix_string=logging_prefix_string)

            d_loss = kv['d_loss']

            buffer = 'epoch:\t' + str(total_batch) + '\t' + \
                     'd_loss:\t' + str(d_loss) + '\n'
            log.write(buffer)

    log.close()
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE) # For testing
    dis_data_loader = Dis_dataloader(BATCH_SIZE)

    generator = Generator(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)
    target_params = pickle.load(open(TARGET_PARAMS, 'rb'))
    target_lstm = TARGET_LSTM(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model

    discriminator = Discriminator(sequence_length=20, num_classes=2, vocab_size=VOCAB_SIZE, embedding_size=dis_embedding_dim, 
                                filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda)


    saver = tf.train.Saver() # saver
    # 开始 Session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open(LOG_FILE, 'w')
    #  pre-train generator
    print ('Start pre-training...')
    log.write('pre-training...\n')
    for epoch in range(PRE_GEN_EPOCH_NUM):
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print ('pre-train epoch ', epoch, 'test_loss ', test_loss)
            buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
            log.write(buffer)

    print ('Start pre-training discriminator...')
    # Train 3 epoch on the generated data and do this for 50 times
    ## 将真实数据与生成数据都切分成不同段的数据来训练判别器
    split_sentence_file(positive_file, positive_file_split)
    for epoch in range(PRE_DIS_EPOCH_NUM): 
        print("EPOCH : %d  $$$$$$$$$$$" % epoch)
        print("Generating and Spliting Negative file.......")
        generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
        split_sentence_file(negative_file, negative_file_split)
        print("Load file to loader.....")
        dis_data_loader.load_train_data(positive_file_split, negative_file_split)
        print("Start training ...... ")
        for ep in range(IN_DIS_EPOCH): # 3 --> 1
            print("inner epoch: %d :" % ep)
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x: x_batch,
                    discriminator.input_y: y_batch,
                    discriminator.dropout_keep_prob: dis_dropout_keep_prob
                }
                ## 获取判别器loss, 以观察进度
                loss, _ = sess.run([discriminator.loss, discriminator.train_op], feed)
                if it % 1000 == 0:
                    print (f'Total Epoch {epoch}, Gen Epoch {ep}, steps {it}, loss {loss}')

    print ('#########################################################################')
    print ('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        print(f"Total batch {total_batch} ------------------------------------------")
        # Train the generator for one step
        for it in range(ADV_GEN_EPOCH_NUM):
            samples = generator.generate(sess)
            # 修改reward获取方式, 改为从判别器直接获取各个段的rewards
            rewards = get_rewords_from_discriminator(sess, samples, discriminator)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)

        # Test
        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n'
            print ('total_batch: ', total_batch, 'test_loss: ', test_loss)
            log.write(buffer)

        # Train the discriminator
        for epoch in range(ADV_DIS_EPOCH_NUM): # 5 --> 1
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
            split_sentence_file(negative_file, negative_file_split)
            dis_data_loader.load_train_data(positive_file_split, negative_file_split)
            for ep in range(IN_DIS_EPOCH): # 3 --> 1
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    ## 获取判别器loss, 以观察进度
                    loss, _ = sess.run([discriminator.loss, discriminator.train_op], feed)
                    if it % 1000 == 0:
                        print (f'Total Epoch {epoch}, Gen Epoch {ep}, steps {it}, loss {loss}')
        # Save model 
        path = os.path.join(save_path, 'after-epoch')
        saver.save(sess, path, global_step=total_batch+1)
    log.close()
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0
    # 定义gen_data_loader, 专门读取真实样本集
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    # 定义gen_data_loader, 专门读取验证样本集
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
    vocab_size = 5000
    # 生成混合数据
    dis_data_loader = Dis_dataloader(BATCH_SIZE)
    # 定义生成模型
    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN)
    # pickle.load(open("./dataset/atis.pkl",  "rb"),  encoding='iso-8859-1')
    pickle_pack = open('save/target_params_py3.pkl', 'rb')
    target_params = pickle.load(pickle_pack)
    # 定义oracle模型
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN,
                              target_params)  # The oracle model
    # 定义判别模型
    discriminator = Discriminator(sequence_length=20,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  l2_reg_lambda=dis_l2_reg_lambda)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 分配50%
    config.gpu_options.allow_growth = True  # 显存自适应
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    # 通过oracle模型生成generated_num条真实数据, generated_num/BATCH_SIZE个batch(list)
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                     positive_file)
    gen_data_loader.create_batches(positive_file)

    print('begin to record save/experiment-log.txt')
    log = open('save/experiment-log.txt', 'w')
    #  pre-train generator
    # print('Start pre-training generator...')
    # log.write('pre-training...\n')
    # for epoch in range(PRE_EPOCH_NUM):
    #     # 训练生成模型
    #     loss = pre_train_epoch(sess, generator, gen_data_loader)
    #     if epoch % 5 == 0:
    #         # 使用生成模型生成数据写入eval_file
    #         generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    #         likelihood_data_loader.create_batches(eval_file)
    #         # 用oracle模型测试生成数据
    #         test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    #         print('pre-train epoch ', epoch, 'test_loss ', test_loss)
    #         buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
    #         log.write(buffer)
    #
    # print('Start pre-training discriminator...')
    # # Train 3 epoch on the generated data and do this for 50 times
    # for _ in range(50):
    #     generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
    #     dis_data_loader.load_train_data(positive_file, negative_file)
    #     for _ in range(3):
    #         dis_data_loader.reset_pointer()  # set next_batch pointer to 0
    #         for it in range(dis_data_loader.num_batch):
    #             x_batch, y_batch = dis_data_loader.next_batch()
    #             feed = {
    #                 discriminator.input_x: x_batch,
    #                 discriminator.input_y: y_batch,
    #                 discriminator.dropout_keep_prob: dis_dropout_keep_prob
    #             }
    #             _ = sess.run(discriminator.train_op, feed)

    print('define a  rollout object!')
    rollout = ROLLOUT(generator, 0.8)

    print(
        '#########################################################################'
    )
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        # Train the generator for one step
        print('begin to train generator with rollout policy')
        for it in range(1):
            samples = generator.generate(sess)
            print(
                'start a rollout and get reward from discriminator(rollout number is 16)...'
            )
            print('rollout samples shape is', samples.shape)
            print(samples[:5])
            rewards = rollout.get_reward(sess, samples, 16, discriminator)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)

        # Test
        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                test_loss) + '\n'
            print('total_batch: ', total_batch, 'test_loss: ', test_loss)
            log.write(buffer)

        # Update roll-out parameters using exponentially weighted averages beta=0.8
        rollout.update_params()

        # Train the discriminator
        print(
            'begin to train discriminator with positive and negative samples')
        for _ in range(5):
            print(
                'generate  %d negative samples from generator and write in %s'
                %
                (int(generated_num / BATCH_SIZE) * BATCH_SIZE, negative_file))
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            print('load pos and neg samples and shuffle and bootstrap')
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _ = sess.run(discriminator.train_op, feed)

    log.close()
Exemple #17
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    with open(true_file, 'r') as f_pos:
        file_contents = f_pos.read().splitlines()
        file_contents = [content.split() for content in file_contents]
        tokens = set([item for sublist in file_contents for item in sublist])
        # tokens = set(file_contents)

    pad_idx = len(tokens)
    vocab_size = pad_idx + 1

    token2idx = dict((token, i) for i, token in enumerate(tokens))
    idx2token = dict((i, token) for i, token in enumerate(tokens))
    idx2token[pad_idx] = " "
    load_positive(true_file, positive_file, token2idx, pad_idx)

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing

    dis_data_loader = Dis_dataloader(BATCH_SIZE)

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN)
    target_params = cPickle.load(open('save/target_params.pkl', 'rb'),
                                 encoding='latin1')
    target_params[0] = np.random.random([vocab_size, 32]).astype(np.float32)
    target_params[13] = np.random.random([32, vocab_size]).astype(np.float32)
    target_params[14] = np.random.random([
        vocab_size,
    ]).astype(np.float32)

    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN,
                              target_params)  # The oracle model

    discriminator = Discriminator(sequence_length=SEQ_LENGTH,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  l2_reg_lambda=dis_l2_reg_lambda)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    gen_data_loader.create_batches(positive_file, SEQ_LENGTH)

    # log file that stores progress
    log = open('save/experiment-log.txt', 'w')

    #  pre-train generator
    print('Start pre-training...')
    log.write('pre-training...\n')

    all_pre_train_losses = []
    for epoch in range(PRE_EPOCH_NUM):
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        all_pre_train_losses.append(loss)

    plt.plot(all_pre_train_losses)
    plt.savefig('pre_train_losses_plot.png')

    gen_outfile = 'save/generated_by_generator_after_' + str(
        PRE_EPOCH_NUM) + '_' + str(datetime.datetime.now()) + '_epochs.txt'

    generate_samples(sess, generator, BATCH_SIZE, generated_num, gen_outfile,
                     idx2token)

    checksyntax.check_code(log, gen_outfile)

    # if epoch % 5 == 0:
    #     generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    #     likelihood_data_loader.create_batches(eval_file, SEQ_LENGTH)
    #     test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    #     print('pre-train epoch ', epoch, 'test_loss ', test_loss)
    #     buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
    #     log.write(buffer)

    print('Start pre-training discriminator...')
    # Train 3 epoch on the generated data and do this for 50 times
    for i in range(50):
        print("discriminator pre train epoch : ", i)
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)
        dis_data_loader.load_train_data(positive_file, negative_file,
                                        SEQ_LENGTH)
        for _ in range(3):
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):

                x_batch, y_batch = dis_data_loader.next_batch()

                feed = {
                    discriminator.input_x: x_batch,
                    discriminator.input_y: y_batch,
                    discriminator.dropout_keep_prob: dis_dropout_keep_prob
                }
                _ = sess.run(discriminator.train_op, feed)

    gen_outfile = 'save/generated_by_generator_after_discriminator_training_' + str(
        datetime.datetime.now) + '.txt'

    generate_samples(sess, generator, BATCH_SIZE, generated_num, gen_outfile,
                     idx2token)

    checksyntax.check_code(log, gen_outfile)

    rollout = ROLLOUT(generator, 0.8)

    print(
        '#########################################################################'
    )
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        print("total_batch : ", total_batch)
        if total_batch % 20 == 0:
            file_name = 'save/output_batch_' + str(total_batch) + '.txt'
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             file_name, idx2token)

            checksyntax.check_code(log, file_name)

        # Train the generator for one step
        for it in range(1):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, discriminator)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)
        # Test
    #     if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
    #         generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    #         likelihood_data_loader.create_batches(eval_file)
    #         test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    #         buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n'
    #         print('total_batch: ', total_batch, 'test_loss: ', test_loss)
    #         log.write(buffer)
    #
    # Update roll-out parameters
        rollout.update_params()
        #
        # Train the discriminator
        for _ in range(1):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file,
                                            SEQ_LENGTH)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _ = sess.run(discriminator.train_op, feed)
        final_gen_file = 'save/final_output.txt'
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         final_gen_file, idx2token)

        checksyntax.check_code(log, final_gen_file)

    #     with open('save/output.txt','r') as f:
    #         with open('save/output_word.txt','w') as fout:
    #             for line in f:
    #                 line = line.strip()
    #                 line = line.split()
    #                 word_line = ''.join([idx2token[int(x)] for x in line])
    #                 fout.write(word_line + '\n')
    #
    log.close()
Exemple #18
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)
    assert START_TOKEN == 0

    vocab_size = 5000

    physical_devices = tf.config.experimental.list_physical_devices("GPU")
    if len(physical_devices) > 0:
        for dev in physical_devices:
            tf.config.experimental.set_memory_growth(dev, True)

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)
    target_params = pickle.load(open('save/target_params_py3.pkl', 'rb'))
    target_lstm = TARGET_LSTM(BATCH_SIZE, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model

    discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=vocab_size, embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, dropout_keep_prob=dis_dropout_keep_prob,
                                  l2_reg_lambda=dis_l2_reg_lambda)

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    if not os.path.exists(positive_file):
        target_lstm.generate_samples(generated_num // BATCH_SIZE, positive_file)
    gen_dataset = dataset_for_generator(positive_file, BATCH_SIZE)
    log = open('save/experiment-log.txt', 'w')
    #  pre-train generator
    if not os.path.exists("generator_pretrained.h5"):
        print('Start pre-training...')
        log.write('pre-training...\n')
        generator.pretrain(gen_dataset, target_lstm, PRE_EPOCH_NUM, generated_num // BATCH_SIZE, eval_file)
        generator.save("generator_pretrained.h5")
    else:
        generator.load("generator_pretrained.h5")

    if not os.path.exists("discriminator_pretrained.h5"):
        print('Start pre-training discriminator...')
        # Train 3 epoch on the generated data and do this for 50 times
        for _ in range(50):
            print("Dataset", _)
            generator.generate_samples(generated_num // BATCH_SIZE, negative_file)
            dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE)
            discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2)
        discriminator.save("discriminator_pretrained.h5")
    else:
        discriminator.load("discriminator_pretrained.h5")

    rollout = ROLLOUT(generator, 0.8)

    print('#########################################################################')
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        print("Generator", total_batch)
        # Train the generator for one step
        for it in range(1):
            samples = generator.generate_one_batch()
            rewards = rollout.get_reward(samples, 16, discriminator)
            generator.train_step(samples, rewards)

        # Test
        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            generator.generate_samples(generated_num // BATCH_SIZE, eval_file)
            likelihood_dataset = dataset_for_generator(eval_file, BATCH_SIZE)
            test_loss = target_lstm.target_loss(likelihood_dataset)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n'
            print('total_batch: ', total_batch, 'test_loss: ', test_loss)
            log.write(buffer)

        # Update roll-out parameters
        rollout.update_params()

        # Train the discriminator
        print("Discriminator", total_batch)
        for _ in range(5):
            generator.generate_samples(generated_num // BATCH_SIZE, negative_file)
            dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE)
            discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2)
    generator.save("generator.h5")
    discriminator.save("discriminator.h5")

    log.close()
Exemple #19
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length)  # For testing
    vocab_size = 5000
    file = open('save/target_params.pkl', 'rb')
    target_params = cPickle.load(file)

    dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH)
    discriminator = Discriminator(SEQ_LENGTH,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  dis_emb_dim=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  batch_size=BATCH_SIZE,
                                  hidden_dim=HIDDEN_DIM,
                                  start_token=START_TOKEN,
                                  goal_out_size=GOAL_OUT_SIZE,
                                  step_size=4)
    leakgan = LeakGAN(SEQ_LENGTH,
                      num_classes=2,
                      vocab_size=vocab_size,
                      emb_dim=EMB_DIM,
                      dis_emb_dim=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      batch_size=BATCH_SIZE,
                      hidden_dim=HIDDEN_DIM,
                      start_token=START_TOKEN,
                      goal_out_size=GOAL_OUT_SIZE,
                      goal_size=GOAL_SIZE,
                      step_size=4,
                      D_model=discriminator,
                      learning_rate=LEARNING_RATE)
    if SEQ_LENGTH == 40:
        target_lstm = TARGET_LSTM(vocab_size,
                                  BATCH_SIZE,
                                  EMB_DIM,
                                  HIDDEN_DIM,
                                  SEQ_LENGTH,
                                  START_TOKEN)  # The oracle model
    else:
        target_lstm = TARGET_LSTM20(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0)
    for a in range(1):
        g = sess.run(leakgan.gen_x, feed_dict={leakgan.drop_out: 0.8, leakgan.train: 1})
        print(g)
        print("epoch:", a, "  ")

    log = open('save/experiment-log.txt', 'w')
    gen_data_loader.create_batches(positive_file)
    saver_variables = tf.global_variables()
    saver = tf.train.Saver(saver_variables)
    model = tf.train.latest_checkpoint(model_path)
    print(model)
    if FLAGS.restore and model:
        # model = tf.train.latest_checkpoint(model_path)
        # if model and FLAGS.restore:
        if model_path + '/' + FLAGS.model:
            print(model_path + '/' + FLAGS.model)
            saver.restore(sess, model_path + '/' + FLAGS.model)
        else:
            saver.restore(sess, model)
    else:
        if FLAGS.resD and model_path + '/' + FLAGS.model:
            print(model_path + '/' + FLAGS.model)
            saver.restore(sess, model_path + '/' + FLAGS.model)

            print('Start pre-training...')
            log.write('pre-training...\n')
            for epoch in range(PRE_EPOCH_NUM):
                loss = pre_train_epoch(sess, leakgan, gen_data_loader)
                if epoch % 5 == 0:
                    generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0)
                    likelihood_data_loader.create_batches(eval_file)
                    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
                    print('pre-train epoch ', epoch, 'test_loss ', test_loss)
                    buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
                    log.write(buffer)
                    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0)
                    likelihood_data_loader.create_batches(eval_file)
                    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
                    print("Groud-Truth:", test_loss)
            saver.save(sess, model_path + '/leakgan_pre')
        else:
            print('Start pre-training discriminator...')
            for i in range(10):
                for _ in range(5):
                    generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0)
                    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0)
                    # gen_data_loader.create_batches(positive_file)
                    dis_data_loader.load_train_data(positive_file, negative_file)
                    for _ in range(3):
                        dis_data_loader.reset_pointer()
                        for it in range(dis_data_loader.num_batch):
                            x_batch, y_batch = dis_data_loader.next_batch()
                            feed = {
                                discriminator.D_input_x: x_batch,
                                discriminator.D_input_y: y_batch,
                                discriminator.dropout_keep_prob: dis_dropout_keep_prob
                            }
                            D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed)
                    print ("D_loss: ", D_loss)
                    leakgan.update_feature_function(discriminator)  ## todo: is important
                saver.save(sess, model_path + '/leakgan_preD')

                print('Start pre-training generator...')
                log.write('pre-training...\n')
                for epoch in range(PRE_EPOCH_NUM / 10):
                    loss = pre_train_epoch(sess, leakgan, gen_data_loader)
                    if epoch % 5 == 0:
                        print ("MLE Generator Loss: ", loss)
                        # generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0)
                        # likelihood_data_loader.create_batches(eval_file)
                        # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
                        # print('pre-train epoch ', epoch, 'test_loss ', test_loss)
                        # buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
                        # log.write(buffer)
                        # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0)
                        # likelihood_data_loader.create_batches(eval_file)
                        # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
                        # print("Groud-Truth:", test_loss)
            saver.save(sess, model_path + '/leakgan_pre')

    gencircle = 1
    #
    print('#########################################################################')
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        # Train the generator for one step
        for it in range(1):
            for gi in range(gencircle):
                samples = leakgan.generate(sess, 1.0, 1)
                rewards = get_reward(leakgan,
                                     discriminator,
                                     sess,
                                     samples,
                                     4,
                                     dis_dropout_keep_prob)
                feed = {leakgan.x: samples,
                        leakgan.reward: rewards,
                        leakgan.drop_out: 0.5}
                _, _, g_loss, w_loss = sess.run(
                    [leakgan.manager_updates, leakgan.worker_updates, leakgan.goal_loss, leakgan.worker_loss],
                    feed_dict=feed)
                print('total_batch: ', total_batch, "  ", g_loss, "  ", w_loss)

        # Test
        # if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
        #     generate_samples(sess, leakgan, BATCH_SIZE, generated_num, eval_file, 0)
        #     likelihood_data_loader.create_batches(eval_file)
        #     test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
        #     buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n'
        #     print('total_batch: ', total_batch, 'test_loss: ', test_loss)
        #     log.write(buffer)
        #     generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file, 0)
        #     likelihood_data_loader.create_batches(eval_file)
        #     test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
        #     print("Groud-Truth:", test_loss)

        # Train the discriminator
        for _ in range(5):
            generate_samples(sess, leakgan, BATCH_SIZE, generated_num, negative_file, 0)
            generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file, 0)
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.D_input_x: x_batch,
                        discriminator.D_input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    D_loss, _ = sess.run([discriminator.D_loss, discriminator.D_train_op], feed)
            leakgan.update_feature_function(discriminator)
    log.close()
Exemple #20
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
    vocab_size = 5000
    dis_data_loader = Dis_dataloader(re_batch_size)

    # TODO: Reimpliment this class with same interface.
    # generator = GeneratorTransformer(
    #     vocab_size,
    #     BATCH_SIZE,
    #     SEQ_LENGTH,
    #     START_TOKEN
    # )
    generator = Generator(
        vocab_size,
        BATCH_SIZE,
        EMB_DIM,
        HIDDEN_DIM,
        SEQ_LENGTH,
        START_TOKEN,
        MID_LAYER_G,
    )
    # TODO: Reimpliment this class with same interface.
    rewarder = Rewarder(
        vocab_size,
        BATCH_SIZE,
        EMB_DIM * 4,
        HIDDEN_DIM * 4,
        SEQ_LENGTH,
        START_TOKEN,
        MID_LAYER_R,
        l2_reg_lambda=re_l2_reg_lambda,
    )
    target_params = pickle.load(open("save/target_params.pkl", "rb"), encoding="latin1")
    # TODO: Reimpliment this class with same interface. (target_transformer)
    # I think we leave this as is, since it's the distribution we're trying to match? (Cailin)
    target_lstm = TARGET_LSTM(
        vocab_size,
        BATCH_SIZE,
        EMB_DIM,
        HIDDEN_DIM,
        SEQ_LENGTH,
        START_TOKEN,
        target_params,
    )  # The oracle model

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.global_variables())

    # First, use the oracle model to provide the positive examples,
    #   which are sampled from the oracle data distribution
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file)
    gen_data_loader.create_batches(positive_file)
    # ground_loss = target_loss(sess, target_lstm, gen_data_loader)
    # print('Ground-Truth:', ground_loss)

    log = open("save/experiment-ent" + str(entropy_w), "w")
    #  pre-train generator
    if restore is False:
        print("Start pre-training...")
        log.write("pre-training...\n")
        for epoch in range(PRE_EPOCH_NUM):
            loss = pre_train_epoch(sess, generator, gen_data_loader)
            if epoch % 5 == 0:
                generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
                likelihood_data_loader.create_batches(eval_file)
                test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
                print("pre-train epoch ", epoch, "test_loss ", test_loss)
                buffer = "epoch:\t" + str(epoch) + "\tnll:\t" + str(test_loss) + "\n"
                log.write(buffer)

        print("Start pre-training rewarder...")
        start = time.time()
        for _ in range(1):
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(1):
                dis_data_loader.reset_pointer()
                r_losses = []
                for it in range(dis_data_loader.num_batch):
                    x_text = dis_data_loader.next_batch()
                    _, r_loss = rewarder.reward_train_step(
                        sess,
                        x_text,
                        np.ones(BATCH_SIZE),
                        1.0,
                        re_dropout_keep_prob,
                        0.01,
                    )
                    r_losses.append(r_loss)
                print("reward_loss", np.mean(r_losses))
        speed = time.time() - start
        print("Reward pre_training Speed:{:.3f}".format(speed))

        checkpoint_path = os.path.join("save", "exper_40.ckpt")
        saver.save(sess, checkpoint_path)
    else:
        print("Restore pretrained model ...")
        log.write("Restore pre-trained model...\n")
        ckpt = tf.train.get_checkpoint_state("save")
        saver.restore(sess, ckpt.model_checkpoint_path)

    # by setting the parameters to 0.0 and 1.0, we didn't use the mixed policy RL training in SeqGAN
    rollout = ROLLOUT(generator, 0.0, 1.0)

    print("#########################################################################")
    print("Start Adversarial Training...")
    log.write("adversarial training...\n")
    for total_batch in range(TOTAL_BATCH):

        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = "epoch:\t" + str(total_batch) + "\tnll:\t" + str(test_loss) + "\n"
            print("total_batch: ", total_batch, "test_loss: ", test_loss)
            log.write(buffer)

        # Train the generator for one step
        start = time.time()
        g_losses = []
        # Draw trajectories (sequences) from generator
        off_samples, off_probs = off_policy_samples(sess, rollout, BATCH_SIZE, off_num)
        avg_reward = []
        for g_it in range(1):
            # Compute MCMC reward for each trajectory
            for it in range(off_num // BATCH_SIZE):
                rewards = rollout.get_reward(sess, off_samples[it], 8, rewarder)
                avg_reward.append(rewards)
            # Perform gradient update for generator
            baseline = np.zeros(SEQ_LENGTH)
            for it in range(1):
                for it2 in range(off_num // BATCH_SIZE):
                    _, g_loss = generator.rl_train_step(
                        sess,
                        off_samples[it2],
                        avg_reward[it2],
                        baseline,
                        off_probs[it2],
                        entropy_w,
                        G_rate,
                    )
                    g_losses.append(g_loss)
        speed = time.time() - start
        print(
            "MaxentPolicy Gradient {} round, Speed:{:.3f}, Loss:{:.3f}".format(
                total_batch, speed, np.mean(g_losses)
            )
        )

        # Update roll-out parameters
        rollout.update_params()

        # Train the rewarder
        start = time.time()
        r_loss_list = []
        for _ in range(2):
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)
            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_text = dis_data_loader.next_batch()
                    weights = rewarder.reward_weight(sess, x_text, generator)
                    _, r_loss = rewarder.reward_train_step(
                        sess,
                        x_text,
                        weights,
                        1,
                        re_dropout_keep_prob,
                        R_rate * np.exp(-(total_batch // R_decay)),
                    )
                    r_loss_list.append(r_loss)
        speed = time.time() - start
        print(
            "Reward training {} round, Speed:{:.3f}, Loss:{:.3f}".format(
                total_batch, speed, np.mean(r_loss_list)
            )
        )

    log.close()
Exemple #21
0
def main(FLAGS):

    #########################################################################################
    #  Generator  Hyper-parameters
    ######################################################################################
    EMB_DIM = FLAGS.gen_emb_dim  # 32  # embedding dimension
    HIDDEN_DIM = FLAGS.gen_hidden_dim  # 32  # hidden state dimension of lstm cell
    SEQ_LENGTH = FLAGS.seq_len  # 20  # sequence length
    START_TOKEN = 0
    PRE_EPOCH_NUM = FLAGS.pretrain_epoch_num  # 80 # supervise (maximum likelihood estimation) epochs for generator(X1) & descriminator(X5)
    SEED = 88
    BATCH_SIZE = FLAGS.batch_size  #64
    LEARNING_RATE = 0.01
    GOAL_SIZE = 16
    STEP_SIZE = 4

    #########################################################################################
    #  Discriminator  Hyper-parameters
    #########################################################################################
    dis_embedding_dim = FLAGS.dis_emb_dim  # 64
    dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
    dis_num_filters = [
        100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160
    ]
    if FLAGS.seq_len == 20:
        dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
        dis_num_filters = [
            100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160
        ]
        LEARNING_RATE = 0.0015
        # EMB_DIM = 32  # embedding dimension
        # HIDDEN_DIM = 32  # hidden state dimension of lstm cell
    elif FLAGS.seq_len == 40:
        dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40]
        dis_num_filters = [
            100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160, 160
        ]
        LEARNING_RATE = 0.0005
        # EMB_DIM = 64
        # HIDDEN_DIM = 64
    else:
        exit(0)
    print(SEQ_LENGTH)

    GOAL_OUT_SIZE = sum(dis_num_filters)

    # dis_dropout_keep_prob = 0.75
    dis_dropout_keep_prob = 1.0
    dis_l2_reg_lambda = 0.2
    dis_batch_size = FLAGS.batch_size  #64

    #########################################################################################
    #  Basic Training Parameters
    #########################################################################################
    EXPERIMENT_NAME = FLAGS.experiment_name
    TOTAL_BATCH = FLAGS.num_epochs  # 800 #num of adversarial epochs
    positive_file = 'save/real_data_%0s.txt' % EXPERIMENT_NAME
    negative_file = 'save/generator_sample_%0s.txt' % EXPERIMENT_NAME
    eval_file = "save/eval_file_%0s" % EXPERIMENT_NAME
    generated_num = 10000  # 10000
    model_path = './ckpts'

    #########################################################################################
    #  Data configurations
    #########################################################################################
    use_real_world_data = True
    real_data_file_path = './data/text8'
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    if use_real_world_data:
        vocab_size = 27
        # split to train-valid-test
        real_data_train_file = real_data_file_path + '-train'
        real_data_valid_file = real_data_file_path + '-valid'
        real_data_test_file = real_data_file_path + '-test'
        real_data_dict_file = real_data_file_path + '-dict.json'
        if not os.path.exists(real_data_train_file):
            split_text8(real_data_file_path)
        charmap, inv_charmap = create_real_data_dict(real_data_train_file,
                                                     real_data_dict_file)
        gen_data_loader = Gen_Data_loader_text8(BATCH_SIZE,
                                                charmap,
                                                inv_charmap,
                                                seq_len=SEQ_LENGTH)
        dis_data_loader = Dis_dataloader_text8(BATCH_SIZE,
                                               charmap,
                                               inv_charmap,
                                               seq_len=SEQ_LENGTH)
        #TODO
    else:
        gen_data_loader = Gen_Data_loader(BATCH_SIZE, FLAGS.length)
        likelihood_data_loader = Gen_Data_loader(BATCH_SIZE,
                                                 FLAGS.length)  # For testing
        vocab_size = 5000
        file = open('save/target_params.pkl', 'rb')
        target_params = pickle.load(file)

        dis_data_loader = Dis_dataloader(BATCH_SIZE, SEQ_LENGTH)

    discriminator = Discriminator(SEQ_LENGTH,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  dis_emb_dim=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  batch_size=BATCH_SIZE,
                                  hidden_dim=HIDDEN_DIM,
                                  start_token=START_TOKEN,
                                  goal_out_size=GOAL_OUT_SIZE,
                                  step_size=4)
    leakgan = LeakGAN(SEQ_LENGTH,
                      num_classes=2,
                      vocab_size=vocab_size,
                      emb_dim=EMB_DIM,
                      dis_emb_dim=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      batch_size=BATCH_SIZE,
                      hidden_dim=HIDDEN_DIM,
                      start_token=START_TOKEN,
                      goal_out_size=GOAL_OUT_SIZE,
                      goal_size=GOAL_SIZE,
                      step_size=4,
                      D_model=discriminator,
                      learning_rate=LEARNING_RATE)

    if not use_real_world_data:
        if SEQ_LENGTH == 40:
            target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM,
                                      HIDDEN_DIM, SEQ_LENGTH,
                                      START_TOKEN)  # The oracle model
        else:
            target_lstm = TARGET_LSTM20(vocab_size, BATCH_SIZE, EMB_DIM,
                                        HIDDEN_DIM, SEQ_LENGTH, START_TOKEN,
                                        target_params)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # config.gpu_options.per_process_gpu_memory_fraction = 0.3
    sess = tf.Session(config=config)
    saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=999999)
    sess.run(tf.global_variables_initializer())

    if use_real_world_data:
        # gen_data_loader.create_batches(real_data_train_file)
        gen_data_loader.create_batches(real_data_train_file,
                                       limit_num_samples=generated_num)
        pass
    else:
        # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
        generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                         positive_file, 0)
        gen_data_loader.create_batches(positive_file)

    for a in range(1):
        g = sess.run(leakgan.gen_x,
                     feed_dict={
                         leakgan.drop_out: 0.8,
                         leakgan.train: 1
                     })
        print(g)

        print("epoch:", a, "  ")

    log = open('save/experiment-log.txt', 'w')
    saver_variables = tf.global_variables()
    saver = tf.train.Saver(saver_variables)
    model = tf.train.latest_checkpoint(model_path)
    print(model)
    if FLAGS.restore and model:
        # model = tf.train.latest_checkpoint(model_path)
        # if model and FLAGS.restore:
        if model_path + '/' + FLAGS.model:
            print(model_path + '/' + FLAGS.model)
            saver.restore(sess, model_path + '/' + FLAGS.model)
        else:
            saver.restore(sess, model)
    else:
        # if FLAGS.resD and model_path + '/' + FLAGS.model:
        if False:  #default of resD
            print(model_path + '/' + FLAGS.model)
            saver.restore(sess, model_path + '/' + FLAGS.model)

            print('Start pre-training...')
            log.write('pre-training...\n')
            for epoch in range(PRE_EPOCH_NUM):
                loss = pre_train_epoch(sess, leakgan, gen_data_loader)
                if epoch % 5 == 0:
                    if use_real_world_data:
                        generate_real_data_samples(
                            sess, leakgan, BATCH_SIZE, generated_num,
                            eval_file + "_epoch_%0d.txt" % epoch, inv_charmap)
                        test_loss = 0  # FIXME - TEMP
                    else:
                        generate_samples(sess, leakgan, BATCH_SIZE,
                                         generated_num, eval_file, 0)
                        likelihood_data_loader.create_batches(eval_file)
                        test_loss = target_loss(sess, target_lstm,
                                                likelihood_data_loader)

                    print('pre-train epoch ', epoch, 'test_loss ', test_loss)
                    buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                        test_loss) + '\n'
                    log.write(buffer)
                    if use_real_world_data:
                        test_loss = 0  # FIXME - TEMP
                    else:
                        generate_samples(sess, target_lstm, BATCH_SIZE,
                                         generated_num, eval_file, 0)
                        likelihood_data_loader.create_batches(eval_file)
                        test_loss = target_loss(sess, target_lstm,
                                                likelihood_data_loader)
                    print("Groud-Truth:", test_loss)
            saver.save(sess, model_path + '/leakgan_pre')
        else:
            print('Start pre-training discriminator...')
            # Train 3 epoch on the generated data and do this for 50 times
            for i in range(10):
                for _ in range(5):
                    if use_real_world_data:
                        generate_real_data_samples(sess, leakgan, BATCH_SIZE,
                                                   generated_num,
                                                   negative_file, inv_charmap)
                        dis_data_loader.load_train_data(
                            real_data_train_file, negative_file)
                    else:
                        generate_samples(sess, leakgan, BATCH_SIZE,
                                         generated_num, negative_file, 0)
                        generate_samples(sess, target_lstm, BATCH_SIZE,
                                         generated_num, positive_file, 0)
                        # gen_data_loader.create_batches(positive_file)
                        dis_data_loader.load_train_data(
                            positive_file, negative_file)
                    for _ in range(3):
                        dis_data_loader.reset_pointer()
                        for it in range(dis_data_loader.num_batch):
                            x_batch, y_batch = dis_data_loader.next_batch()
                            feed = {
                                discriminator.D_input_x:
                                x_batch,
                                discriminator.D_input_y:
                                y_batch,
                                discriminator.dropout_keep_prob:
                                dis_dropout_keep_prob
                            }
                            D_loss, _ = sess.run([
                                discriminator.D_loss, discriminator.D_train_op
                            ], feed)
                            # # print 'D_loss ', D_loss
                            # buffer =  str(D_loss) + '\n'
                            # log.write(buffer)
                    leakgan.update_feature_function(discriminator)
                saver.save(sess, model_path + '/leakgan_preD')

                # saver.save(sess, model_path + '/leakgan')
                #  pre-train generator
                print('Start pre-training...')
                log.write('pre-training...\n')
                for epoch in range(PRE_EPOCH_NUM // 10):
                    loss = pre_train_epoch(sess, leakgan, gen_data_loader)
                    if epoch % 5 == 0:
                        if use_real_world_data:
                            generate_real_data_samples(
                                sess, leakgan, BATCH_SIZE, generated_num,
                                eval_file + "_epoch_%0d.txt" % epoch,
                                inv_charmap)
                            test_loss = 0  # FIXME - TEMP
                        else:
                            generate_samples(sess, leakgan, BATCH_SIZE,
                                             generated_num, eval_file, 0)
                            likelihood_data_loader.create_batches(eval_file)
                            test_loss = target_loss(sess, target_lstm,
                                                    likelihood_data_loader)
                        print('pre-train epoch ', epoch, 'test_loss ',
                              test_loss)
                        buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                            test_loss) + '\n'
                        log.write(buffer)
                        if use_real_world_data:
                            test_loss = 0  # FIXME - TEMP
                        else:
                            generate_samples(sess, target_lstm, BATCH_SIZE,
                                             generated_num, eval_file, 0)
                            likelihood_data_loader.create_batches(eval_file)
                            test_loss = target_loss(sess, target_lstm,
                                                    likelihood_data_loader)
                        print("Groud-Truth:", test_loss)
            saver.save(sess, model_path + '/leakgan_pre')

    gencircle = 1
    #
    print(
        '#########################################################################'
    )
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        # Train the generator for one step
        print("start epoch %0d" % total_batch)

        if total_batch % FLAGS.save_each_epochs == 0:
            print(
                '#########################################################################'
            )
            print('saving model...')
            save_file = os.path.join(
                '.', 'ckp', EXPERIMENT_NAME + '_epoch_%0d' % total_batch,
                EXPERIMENT_NAME + '_epoch_%0d' % total_batch)
            saver.save(sess, save_file)
        for it in range(1):

            for gi in range(gencircle):
                samples = leakgan.generate(sess, 1.0, 1)
                rewards = get_reward(leakgan, discriminator, sess, samples, 4,
                                     dis_dropout_keep_prob)
                feed = {
                    leakgan.x: samples,
                    leakgan.reward: rewards,
                    leakgan.drop_out: 1.0
                }
                _, _, g_loss, w_loss = sess.run([
                    leakgan.manager_updates, leakgan.worker_updates,
                    leakgan.goal_loss, leakgan.worker_loss
                ],
                                                feed_dict=feed)
                print('total_batch: ', total_batch, "  ", g_loss, "  ", w_loss)

        # Test
        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            if not use_real_world_data:
                generate_samples(sess, leakgan, BATCH_SIZE, generated_num,
                                 eval_file, 0)
                likelihood_data_loader.create_batches(eval_file)
                test_loss = target_loss(sess, target_lstm,
                                        likelihood_data_loader)
                buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                    test_loss) + '\n'
                print('total_batch: ', total_batch, 'test_loss: ', test_loss)
                log.write(buffer)

                generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                                 eval_file, 0)
                likelihood_data_loader.create_batches(eval_file)
                test_loss = target_loss(sess, target_lstm,
                                        likelihood_data_loader)
                print("Groud-Truth:", test_loss)

        # Train the discriminator
        for _ in range(5):
            if use_real_world_data:
                generate_real_data_samples(sess, leakgan, BATCH_SIZE,
                                           generated_num, negative_file,
                                           inv_charmap)
                dis_data_loader.load_train_data(real_data_train_file,
                                                negative_file)
            else:
                generate_samples(sess, leakgan, BATCH_SIZE, generated_num,
                                 negative_file, 0)
                generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                                 positive_file, 0)
                dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.D_input_x: x_batch,
                        discriminator.D_input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    D_loss, _ = sess.run(
                        [discriminator.D_loss, discriminator.D_train_op], feed)
                    # print 'D_loss ', D_loss
            leakgan.update_feature_function(discriminator)

    print(
        '#########################################################################'
    )
    print('saving model...')
    save_file = os.path.join('.', 'ckp', EXPERIMENT_NAME, EXPERIMENT_NAME)
    saver.save(sess, save_file)

    #
    # print '#########################################################################'
    # print 'Start Language Model Evaluation...'
    # test_data_loader = Gen_Data_loader_text8(BATCH_SIZE,charmap,inv_charmap)
    # test_data_loader.create_batches(real_data_test_file)
    # language_model_evaluation(sess,generator, test_data_loader)
    log.close()
Exemple #22
0
def main():
    opt = Options()
    create_logging(FLAGS)

    random.seed(SEED)
    np.random.seed(SEED)
    # data loader
    gen_data_loader = Gen_Data_loader(FLAGS.gen_pre_batch_size)
    likelihood_data_loader = Gen_Data_loader(
        FLAGS.gen_pre_batch_size)  # For testing
    rank_data_loader = Rank_Data_loader(FLAGS.rank_batch_size, FLAGS.ref_size)
    # network initialization
    generator = Generator(opt, FLAGS, pretrain=True)
    target_params = cPickle.load(open(opt.target_path))
    target_lstm = TARGET_LSTM(opt, FLAGS, target_params,
                              pretrain=True)  # The oracle model
    ranker = Ranker(opt, FLAGS)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    # create positive files for MLE training
    generate_samples(sess, target_lstm, FLAGS.gen_pre_batch_size,
                     opt.generated_num, opt.positive_file)
    gen_data_loader.create_batches(opt.positive_file)

    #################################################################pretraining with MLE
    # pre-train generator
    logging.info('Start pre-training generator')
    for epoch in xrange(FLAGS.pre_g_epoch):
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, FLAGS.gen_pre_batch_size,
                             opt.generated_num, opt.eval_file)
            likelihood_data_loader.create_batches(opt.eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            logging.info("Pretrain generator epoch: %d, test_loss: %0.4f" %
                         (epoch, test_loss))

    logging.info('Start pre-training rankder')
    # Train 3 epoch on the generated data and do this for 50 times
    for epoch in range(FLAGS.pre_r_epoch):
        generate_samples(sess, generator, FLAGS.gen_pre_batch_size,
                         opt.generated_num, opt.negative_file)
        rank_data_loader.load_train_data(opt.positive_file, opt.negative_file)
        for _ in range(3):
            rank_data_loader.reset_pointer()
            for it in xrange(rank_data_loader.num_batch):
                x_batch, y_batch, ref = rank_data_loader.next_batch()
                feed = {
                    ranker.input_x: x_batch,
                    ranker.input_y: y_batch,
                    ranker.input_ref: ref,
                    ranker.dropout_keep_prob: opt.dropout_ratio
                }
                _, loss = sess.run([ranker.train_op, ranker.loss], feed)
        if epoch % 5 == 0:
            logging.info("Pretrain ranker epoch: %d, training loss: %0.4f" %
                         (epoch, loss))

    # # # Save all params to disk.
    save_path = saver.save(sess, "./save/pre_model.ckpt")
    print("pretrain Model saved in file: %s" % save_path)

    # modify generator batch size for adversarial training
    tf.reset_default_graph()
    generator = Generator(opt, FLAGS, pretrain=False)
    ranker = Ranker(opt, FLAGS)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(opt, FLAGS, target_params,
                              pretrain=False)  # The oracle model

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    # load parameters
    saver.restore(sess, "./save/pre_model.ckpt")
    likelihood_data_loader = Gen_Data_loader(
        FLAGS.gen_batch_size)  # For testing
    print("Model restored.")
    rollout = ROLLOUT(generator, FLAGS.rollout_ratio, FLAGS.rollout_num)

    logging.info(
        '#########################################################################'
    )
    logging.info('Start adversarial training.')
    for epoch in range(FLAGS.epoch):
        # Train the generator for one step
        for it in range(FLAGS.g_step):
            samples = generator.generate(sess)
            generate_samples(sess, generator, FLAGS.gen_batch_size,
                             opt.generated_num, opt.negative_file)
            rank_data_loader.load_train_data(opt.positive_file,
                                             opt.negative_file)
            rewards = rollout.get_reward(sess, samples, FLAGS.rollout_num,
                                         ranker, rank_data_loader)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)

        # Testing
        if epoch % 5 == 0 or epoch == epoch - 1:
            generate_samples(sess, generator, FLAGS.gen_batch_size,
                             opt.generated_num, opt.eval_file)
            likelihood_data_loader.create_batches(opt.eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            logging.info("Epoch: %d, test_loss: %0.4f" % (epoch, test_loss))

        # Update roll-out parameters
        rollout.update_params()

        # Train the ranker
        for idx in range(FLAGS.r_step):
            generate_samples(sess, generator, FLAGS.gen_batch_size,
                             opt.generated_num, opt.negative_file)
            rank_data_loader.load_train_data(opt.positive_file,
                                             opt.negative_file)

            for it in xrange(rank_data_loader.num_batch):
                x_batch, y_batch, ref = rank_data_loader.next_batch()
                feed = {
                    ranker.input_x: x_batch,
                    ranker.input_y: y_batch,
                    ranker.input_ref: ref,
                    ranker.dropout_keep_prob: opt.dropout_ratio
                }
                _ = sess.run(ranker.train_op, feed)
Exemple #23
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH)
    val_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE,
                                             SEQ_LENGTH)  # For testing
    vocab_size = 5000

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN)
    target_params = pickle.load(open('save/target_params_py3.pkl', 'rb'))
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, 32, 32, SEQ_LENGTH,
                              START_TOKEN, target_params)  # The oracle model

    mediator = Generator(vocab_size,
                         BATCH_SIZE * 2,
                         EMB_DIM * 2,
                         HIDDEN_DIM * 2,
                         SEQ_LENGTH,
                         START_TOKEN,
                         name="mediator",
                         dropout_rate=M_DROPOUT_RATE)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                     positive_file)
    gen_data_loader.create_batches(positive_file)
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file)
    val_data_loader.create_batches(eval_file)

    log = open('save/experiment-log.txt', 'w')
    log_nll = open('save/experiment-log-nll.txt', 'w')
    log_jsd = open('save/experiment-log-jsd.txt', 'w')
    #  pre-train generator (default 0 epochs)(not recommended)
    print('Start pre-training...')
    log.write('pre-training...\n')
    for epoch in range(PRE_EPOCH_NUM):
        loss = mle_epoch(sess, generator, gen_data_loader)
        if epoch % 1 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            likelihood_data_loader.create_batches(negative_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('pre-train epoch ', epoch, 'nll_oracle ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll_oracle:\t' + str(
                test_loss) + '\n'
            log_nll.write(buffer)
        if epoch % 1 == 0:
            test_loss = target_loss(sess, generator, val_data_loader)
            print('pre-train epoch ', epoch, 'nll_test ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll_test:\t' + str(
                test_loss) + '\n'
            log_nll.write(buffer)

    print(
        '#########################################################################'
    )
    print('Start Cooperative Training...')
    for iter_idx in range(TOTAL_BATCH):
        # Train the generator for one step
        for it in range(2):
            samples = generator.generate(sess)
            rewards = mediator.get_reward(
                sess, np.concatenate([samples, samples], axis=0))
            feed = {
                generator.x: samples,
                generator.rewards: rewards[0:BATCH_SIZE]
            }
            _ = sess.run(generator.g_updates, feed_dict=feed)
        # Test
        if iter_idx % 100 == 0 or iter_idx == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            likelihood_data_loader.create_batches(negative_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'batch:\t' + str(iter_idx) + '\tnll_oracle:\t' + str(
                test_loss) + '\n'
            print('batch: ', iter_idx, 'nll_oracle: ', test_loss)
            log_nll.write(buffer)
        if iter_idx % 100 == 0:
            test_loss = target_loss(sess, generator, val_data_loader)
            print('batch:\t', iter_idx, 'nll_test ', test_loss)
            buffer = 'batch:\t' + str(iter_idx) + '\tnll_test:\t' + str(
                test_loss) + '\n'
            log_nll.write(buffer)
        # Train the mediator
        for _ in range(1):
            bnll_ = []
            collected_x = []
            ratio = 2
            for it in range(ratio):
                if it % 2 == 0:
                    x_batch = gen_data_loader.next_batch()
                else:
                    x_batch = generator.generate(sess)
                collected_x.append(x_batch)
            collected_x = np.reshape(collected_x, [-1, SEQ_LENGTH])
            np.random.shuffle(collected_x)
            collected_x = np.reshape(collected_x,
                                     [-1, BATCH_SIZE * 2, SEQ_LENGTH])
            for it in range(1):
                feed = {
                    mediator.x: collected_x[it],
                }
                bnll = sess.run(mediator.likelihood_loss, feed)
                bnll_.append(bnll)
                # sess.run(mediator.dropout_on)
                _ = sess.run(mediator.likelihood_updates, feed)
                # sess.run(mediator.dropout_off)
        if (iter_idx * 4) % gen_data_loader.num_batch == 0:
            bnll = np.mean(bnll_)
            gnll = sess.run(
                mediator.likelihood_loss,
                feed_dict={
                    mediator.x:
                    np.reshape(
                        [generator.generate(sess),
                         generator.generate(sess)],
                        [BATCH_SIZE * 2, SEQ_LENGTH])
                })
            print("mediator cooptrain iter#%d, balanced_nll %f, g_nll %f" %
                  (iter_idx, bnll, gnll))
            log.write("%d\t%f\n" % (iter_idx, bnll))
        if iter_idx % gen_data_loader.num_batch == 0:
            jsd = jsd_calculate(sess, generator, target_lstm)
            print('cooptrain epoch#', iter_idx // gen_data_loader.num_batch,
                  'jsd ', jsd)
            log_jsd.write("%d\t%f\n" %
                          (iter_idx // gen_data_loader.num_batch, jsd))

    log.close()
    log_nll.close()
    log_jsd.close()
Exemple #24
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    # assert START_TOKEN == 0

    vocab_size = NUM_EMB
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          MAX_LENGTH, START_TOKEN)
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              MAX_LENGTH, 0)

    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=MAX_LENGTH,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    def train_discriminator():
        if D_WEIGHT == 0:
            return 0, 0

        negative_samples = generate_samples(sess, generator, BATCH_SIZE,
                                            POSITIVE_NUM)

        #        global positive_samples
        #        pos_new=positive_samples
        # random 10% of positive samples are labeled negatively to weaken generator and avoid collapsing training
        #        random.shuffle(pos_new)
        #        length=len(pos_new)
        #        fake_neg_number= int(0.05*length)
        #        fake_neg= pos_new[:fake_neg_number]
        #        pos_new=pos_new[fake_neg_number:]

        #       negative_samples+=fake_neg
        #      random.shuffle(negative_samples)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_samples, negative_samples)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        ypred = 0
        counter = 0
        for batch in dis_batches:
            x_batch, y_batch = zip(*batch)
            feed = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: dis_dropout_keep_prob
            }
            _, step, loss, accuracy, ypred_for_auc = sess.run([
                dis_train_op, dis_global_step, cnn.loss, cnn.accuracy,
                cnn.ypred_for_auc
            ], feed)
            ypred_vect = np.array([item[1] for item in ypred_for_auc])
            ypred += np.mean(ypred_vect)
            counter += 1
        ypred = float(ypred) / counter
        print('\tD loss  :   {}'.format(loss))
        print('\tAccuracy: {}'.format(accuracy))
        print('\tMean ypred: {}'.format(ypred))
        return loss, accuracy, ypred

    # Pretrain is checkpointed and only execcutes if we don't find a checkpoint
    saver = tf.train.Saver()

    # We check previous session and pretrain is checkpointed and only execcutes if we don't find a checkpoint
    saver = tf.train.Saver()

    #check previous session
    prev_sess = False
    ckpt_dir = 'checkpoints/mingan'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
#    ckpt_file = os.path.join(ckpt_dir, ckpt_dir + '_model')   #old checkpoint
    ckpt_file = os.path.join(
        ckpt_dir, 'drd2_new' + '_model_'
    )  #new checkpoint iterate over checkpoints to find largest total a

    nbatches_max = 0
    for i in range(500):  #maximal number of batches iterations is 500
        if os.path.isfile(ckpt_file + str(i) +
                          '.meta'):  #and params["LOAD_PREV_SESS"]
            nbatches_max = i

#end try find max checkpoint
    ckpt_file = ckpt_file + str(nbatches_max) + '.meta'
    if params["LOAD_PREV_SESS"]:  # and os.path.isfile(ckpt_file):
        #        saver_test = tf.train.import_meta_graph(ckpt_file)
        #        sess.run(tf.global_variables_initializer())
        saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir))

        #        saver.restore(sess, ckpt_file)
        print('Previous session loaded from previous checkpoint {}'.format(
            ckpt_file))
        prev_sess = True
    else:
        if params["LOAD_PREV_SESS"]:
            print('\t* No previous session data found as {:s}.'.format(
                ckpt_file))
        else:
            print('\t* LOAD_PREV_SESS was set to false.')

#        sess.run(tf.global_variables_initializer())
#     pretrain(sess, generator, target_lstm, train_discriminator)
#     path = saver.save(sess, ckpt_file)
#    print('Pretrain finished and saved at {}'.format(path))

    if prev_sess == False:
        #check pretraining
        ckpt_dir = 'checkpoints/{}_pretrain'.format(PREFIX)
        if not os.path.exists(ckpt_dir):
            os.makedirs(ckpt_dir)
        ckpt_file = os.path.join(ckpt_dir, 'pretrain_ckpt')
        if os.path.isfile(ckpt_file + '.meta') and params["LOAD_PRETRAIN"]:
            saver.restore(sess, ckpt_file)
            print('Pretrain loaded from previous checkpoint {}'.format(
                ckpt_file))
        else:
            if params["LOAD_PRETRAIN"]:
                print('\t* No pre-training data found as {:s}.'.format(
                    ckpt_file))
            else:
                print('\t* LOAD_PRETRAIN was set to false.')

            sess.run(tf.global_variables_initializer())
            pretrain(sess, generator, target_lstm, train_discriminator)
            path = saver.save(sess, ckpt_file)
            print('Pretrain finished and saved at {}'.format(path))


#end loading previous session or pre-training

# create reward function
    batch_reward = make_reward(train_samples)

    rollout = ROLLOUT(generator, 0.8)

    #    nbatches_max= 30

    print(
        '#########################################################################'
    )
    print('Start Reinforcement Training Generator...')
    results_rows = []

    if nbatches_max + 1 > TOTAL_BATCH:
        print(
            ' We already trained that many batches: Check the Checkpoints folder or take a larger TOTAL_BATCH'
        )
    else:
        for nbatch in tqdm(range(nbatches_max + 1, TOTAL_BATCH)):

            #for nbatch in tqdm(range(TOTAL_BATCH)):
            results = OrderedDict({'exp_name': PREFIX})
            if nbatch % 1 == 0 or nbatch == TOTAL_BATCH - 1:
                print('* Making samples')
                if nbatch % 10 == 0:
                    gen_samples = generate_samples(sess, generator, BATCH_SIZE,
                                                   BIG_SAMPLE_NUM)
                else:
                    gen_samples = generate_samples(sess, generator, BATCH_SIZE,
                                                   SAMPLE_NUM)
                likelihood_data_loader.create_batches(gen_samples)
                test_loss = target_loss(sess, target_lstm,
                                        likelihood_data_loader)
                print('batch_num: {}'.format(nbatch))
                print('test_loss: {}'.format(test_loss))
                results['Batch'] = nbatch
                results['test_loss'] = test_loss

                if test_loss < best_score:
                    best_score = test_loss
                    print('best score: %f' % test_loss)

                # results
                mm.compute_results(gen_samples, train_samples, ord_dict,
                                   results)

            print(
                '#########################################################################'
            )
            print('-> Training generator with RL.')
            print('G Epoch {}'.format(nbatch))

            for it in range(TRAIN_ITER):
                samples = generator.generate(sess)
                rewards = rollout.get_reward(sess, samples, 16, cnn,
                                             batch_reward, D_WEIGHT)
                nll = generator.generator_step(sess, samples, rewards)
                # results
                print_rewards(rewards)
                print('neg-loglike: {}'.format(nll))
                results['neg-loglike'] = nll
            rollout.update_params()

            # generate for discriminator
            print('-> Training Discriminator')
            for i in range(D):
                print('D_Epoch {}'.format(i))
                d_loss, accuracy, ypred = train_discriminator()
                results['D_loss_{}'.format(i)] = d_loss
                results['Accuracy_{}'.format(i)] = accuracy
                results['Mean_ypred_{}'.format(i)] = ypred
            print('results')
            results_rows.append(results)
            if nbatch % params["EPOCH_SAVES"] == 0:
                save_results(sess, PREFIX, PREFIX + '_model_' + str(nbatch),
                             results_rows)

    # write results
        save_results(sess, PREFIX, PREFIX + '_model_' + str(nbatch),
                     results_rows)

    print('\n:*** FINISHED ***')
    return
Exemple #25
0
def main(unused_argv):
    config_train = training_config()
    config_gen = generator_config()
    config_dis = discriminator_config()
    np.random.seed(config_train.seed)
    assert config_train.start_token == 0

    #Build dataloader for generaotr, testing and discriminator
    gen_data_loader = Gen_Data_loader(config_gen.gen_batch_size)
    likelihood_data_loader = Gen_Data_loader(config_gen.gen_batch_size)
    dis_data_loader = Dis_dataloader(config_dis.dis_batch_size)

    #Build generator and its rollout
    generator = Generator(config=config_gen)
    # 生成 3个神经网络
    generator.build()
    #  快速展开网络,序列未生成完就预测后边的序列,用于计算reward
    rollout_gen = rollout(config=config_gen)

    #Build target LSTM
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(config=config_gen,
                              params=target_params)  # The oracle model

    #Build discriminator
    discriminator = Discriminator(config=config_dis)
    discriminator.build_discriminator()

    #Build optimizer op for pretraining
    pretrained_optimizer = tf.train.AdamOptimizer(
        config_train.gen_learning_rate)
    # 取出 teller 的所有变量, teller在 generator和rollout网络中
    var_pretrained = [
        v for v in tf.trainable_variables() if 'teller' in v.name
    ]  #Using name 'teller' here to prevent name collision of target LSTM
    # zip函数将 2个迭代器  组成tuple
    gradients, variables = zip(*pretrained_optimizer.compute_gradients(
        generator.pretrained_loss, var_list=var_pretrained))
    gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip)
    gen_pre_upate = pretrained_optimizer.apply_gradients(
        zip(gradients, variables))

    #Initialize all variables
    sess = tf.Session(config=config_hardware)
    sess.run(tf.global_variables_initializer())

    #Initalize data loader of generator   utils.py文件中
    #   target_lstm 网络生成真实数据 写入config_train.positive_file 文件
    generate_samples(sess, target_lstm, config_train.batch_size,
                     config_train.generated_num, config_train.positive_file)
    gen_data_loader.create_batches(config_train.positive_file)

    #Start pretraining
    log = open('save/experiment-log.txt', 'w')
    print 'Start pre-training generator...'
    log.write('pre-training...\n')
    for epoch in xrange(config_train.pretrained_epoch_num):
        gen_data_loader.reset_pointer()
        for it in xrange(gen_data_loader.num_batch):
            #见第60行,加载target_lstm 神经网络的数据,用于预训练生成器====真实样本
            batch = gen_data_loader.next_batch()
            #真实数据训练  generator;有监督学习   batch 最后第一个是label
            _, g_loss = sess.run([gen_pre_upate, generator.pretrained_loss], feed_dict={generator.input_seqs_pre:batch,\
                                                                                    generator.input_seqs_mask:np.ones_like(batch)})
        if epoch % config_train.test_per_epoch == 0:
            #  generator 生成样本  与 真实数据的相似度
            generate_samples(sess, generator, config_train.batch_size,
                             config_train.generated_num,
                             config_train.eval_file)
            likelihood_data_loader.create_batches(config_train.eval_file)
            #评估生成质量
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                test_loss) + '\n'
            log.write(buffer)

    print 'Start pre-training discriminator...'
    for t in range(config_train.dis_update_time_pre):
        print "Times: " + str(t)
        #   generator生成假数据+ target_lstm的真实数据;; 用于训练
        generate_samples(sess, generator, config_train.batch_size,
                         config_train.generated_num,
                         config_train.negative_file)
        #  混合真假数据
        dis_data_loader.load_train_data(config_train.positive_file,
                                        config_train.negative_file)
        for _ in range(config_train.dis_update_epoch_pre):
            dis_data_loader.reset_pointer()
            for it in xrange(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x:
                    x_batch,
                    discriminator.input_y:
                    y_batch,
                    discriminator.dropout_keep_prob:
                    config_dis.dis_dropout_keep_prob
                }
                #交叉上最小;  主要是训练评分网络 用于给generator提供reward
                _ = sess.run(discriminator.train_op, feed)

    #Build optimizer op for adversarial training
    train_adv_opt = tf.train.AdamOptimizer(config_train.gen_learning_rate)
    gradients, variables = zip(*train_adv_opt.compute_gradients(
        generator.gen_loss_adv, var_list=var_pretrained))
    gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip)
    train_adv_update = train_adv_opt.apply_gradients(zip(gradients, variables))

    #Initialize global variables of optimizer for adversarial training
    uninitialized_var = [
        e for e in tf.global_variables() if e not in tf.trainable_variables()
    ]
    init_vars_uninit_op = tf.variables_initializer(uninitialized_var)
    sess.run(init_vars_uninit_op)

    #Start adversarial training   开始对抗训练
    for total_batch in xrange(config_train.total_batch):
        for iter_gen in xrange(config_train.gen_update_time):

            #  用generator进行抽样; LSTM 生成序列
            samples = sess.run(generator.sample_word_list_reshape)

            feed = {"pred_seq_rollout:0": samples}
            reward_rollout = []
            #calcuate the reward given in the specific stpe t by roll out
            # 用rollout网络计算指定动作的回报
            for iter_roll in xrange(config_train.rollout_num):

                # 生成器采样的获得的单词传给 rollout  ??有一个疑问?samples看代码是完整序列(与论文不符),为什么还要rollout
                rollout_list = sess.run(rollout_gen.sample_rollout_step,
                                        feed_dict=feed)

                rollout_list_stack = np.vstack(
                    rollout_list
                )  #shape: #batch_size * #rollout_step, #sequence length
                # 蒙特卡洛 展开成序列,贝尔曼方程计算 reward
                reward_rollout_seq = sess.run(
                    discriminator.ypred_for_auc,
                    feed_dict={
                        discriminator.input_x: rollout_list_stack,
                        discriminator.dropout_keep_prob: 1.0
                    })
                reward_last_tok = sess.run(discriminator.ypred_for_auc,
                                           feed_dict={
                                               discriminator.input_x: samples,
                                               discriminator.dropout_keep_prob:
                                               1.0
                                           })
                reward_allseq = np.concatenate(
                    (reward_rollout_seq, reward_last_tok), axis=0)[:, 1]
                reward_tmp = []
                for r in xrange(config_gen.gen_batch_size):
                    reward_tmp.append(reward_allseq[range(
                        r,
                        config_gen.gen_batch_size * config_gen.sequence_length,
                        config_gen.gen_batch_size)])
                reward_rollout.append(np.array(reward_tmp))
            #计算reward
            rewards = np.sum(reward_rollout, axis=0) / config_train.rollout_num
            # 用reward 指导 generator 更新梯度
            _, gen_loss = sess.run([train_adv_update, generator.gen_loss_adv], feed_dict={generator.input_seqs_adv:samples,\
                                                                                        generator.rewards:rewards})
        if total_batch % config_train.test_per_epoch == 0 or total_batch == config_train.total_batch - 1:
            #对抗训练后 用generator再次生成样本与模拟器(target_lstm,真实数据)进行比对
            generate_samples(sess, generator, config_train.batch_size,
                             config_train.generated_num,
                             config_train.eval_file)
            likelihood_data_loader.create_batches(config_train.eval_file)
            #util.py中定义
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

        for _ in range(config_train.dis_update_time_adv):
            generate_samples(sess, generator, config_train.batch_size,
                             config_train.generated_num,
                             config_train.negative_file)
            dis_data_loader.load_train_data(config_train.positive_file,
                                            config_train.negative_file)

            for _ in range(config_train.dis_update_epoch_adv):
                dis_data_loader.reset_pointer()
                for it in xrange(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x:
                        x_batch,
                        discriminator.input_y:
                        y_batch,
                        discriminator.dropout_keep_prob:
                        config_dis.dis_dropout_keep_prob
                    }
                    #训练这个评分网络, score
                    _ = sess.run(discriminator.train_op, feed)
    log.close()
Exemple #26
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000
    dis_data_loader = Dis_dataloader()

    best_score = 1000

    # initialize a LSTM object and use the LSTM object to initialize PoemGen object
    generator = get_trainable_model(vocab_size)

    # cPickle is a object serialization library

    # the loaded picle object will be an array of numbers
    # later, these params will be used to initalize the target LSTM
    target_params = cPickle.load(open('save/target_params.pkl'))
    # print target_params

    time.sleep(1000)

    # This seems like the generator model which used RNN
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    # This is the discriminator which uses CNN
    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=20,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    # Initialize the generator with MLE estimators
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    print 'Start training discriminator...'
    for _ in range(dis_alter_epoch):
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            # The trainable model 'generator' is a RNN model from PoemGen

            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/seqgan.txt')

        # rollout policy???
        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(
                positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(
                zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()
Exemple #27
0
def main(unused_argv):
    config_train = training_config()
    config_gen = generator_config()
    config_dis = discriminator_config()
    np.random.seed(config_train.seed)
    assert config_train.start_token == 0

    #Build dataloader for generaotr, testing and discriminator
    gen_data_loader = Gen_Data_loader(config_gen.gen_batch_size)
    likelihood_data_loader = Gen_Data_loader(config_gen.gen_batch_size)
    dis_data_loader = Dis_dataloader(config_dis.dis_batch_size)

    #Build generator and its rollout
    generator = Generator(config=config_gen)
    generator.build()
    rollout_gen = rollout(config=config_gen)

    #Build target LSTM
    target_params = cPickle.load(StrToBytes(open('save/target_params.pkl')),
                                 encoding='bytes')
    target_lstm = TARGET_LSTM(config=config_gen,
                              params=target_params)  # The oracle model

    #Build discriminator
    discriminator = Discriminator(config=config_dis)
    discriminator.build_discriminator()

    #Build optimizer op for pretraining
    pretrained_optimizer = tf.train.AdamOptimizer(
        config_train.gen_learning_rate)
    var_pretrained = [
        v for v in tf.trainable_variables() if 'teller' in v.name
    ]  #Using name 'teller' here to prevent name collision of target LSTM
    gradients, variables = zip(*pretrained_optimizer.compute_gradients(
        generator.pretrained_loss, var_list=var_pretrained))
    gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip)
    gen_pre_upate = pretrained_optimizer.apply_gradients(
        zip(gradients, variables))

    #Initialize all variables
    sess = tf.Session(config=config_hardware)
    sess.run(tf.global_variables_initializer())

    #Initalize data loader of generator
    # generate_samples(sess, target_lstm, config_train.batch_size, config_train.generated_num, config_train.positive_file)
    gen_data_loader.create_batches(config_train.positive_file)

    #Start pretraining
    log = open('save/experiment-log.txt', 'w')
    print('Start pre-training generator...')
    log.write('pre-training...\n')
    for epoch in range(config_train.pretrained_epoch_num):
        gen_data_loader.reset_pointer()
        for it in range(gen_data_loader.num_batch):
            batch = gen_data_loader.next_batch()
            _, g_loss = sess.run([gen_pre_upate, generator.pretrained_loss], feed_dict={generator.input_seqs_pre:batch,\
                                                                                    generator.input_seqs_mask:np.ones_like(batch)})
        if epoch % config_train.test_per_epoch == 0:
            # generate_samples(sess, generator, config_train.batch_size, config_train.generated_num, config_train.eval_file)
            likelihood_data_loader.create_batches(config_train.eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('pre-train epoch ', epoch, 'test_loss ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                test_loss) + '\n'
            log.write(buffer)

    print('Start pre-training discriminator...')
    for t in range(config_train.dis_update_time_pre):
        print("Times: " + str(t))
        generate_samples(sess, generator, config_train.batch_size,
                         config_train.generated_num,
                         config_train.negative_file)
        dis_data_loader.load_train_data(config_train.positive_file,
                                        config_train.negative_file)
        for _ in range(config_train.dis_update_epoch_pre):
            dis_data_loader.reset_pointer()
            for it in range(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x:
                    x_batch,
                    discriminator.input_y:
                    y_batch,
                    discriminator.dropout_keep_prob:
                    config_dis.dis_dropout_keep_prob
                }
                _ = sess.run(discriminator.train_op, feed)

    #Build optimizer op for adversarial training
    train_adv_opt = tf.train.AdamOptimizer(config_train.gen_learning_rate)
    gradients, variables = zip(*train_adv_opt.compute_gradients(
        generator.gen_loss_adv, var_list=var_pretrained))
    gradients, _ = tf.clip_by_global_norm(gradients, config_train.grad_clip)
    train_adv_update = train_adv_opt.apply_gradients(zip(gradients, variables))

    #Initialize global variables of optimizer for adversarial training
    uninitialized_var = [
        e for e in tf.global_variables() if e not in tf.trainable_variables()
    ]
    init_vars_uninit_op = tf.variables_initializer(uninitialized_var)
    sess.run(init_vars_uninit_op)

    #Start adversarial training
    for total_batch in range(config_train.total_batch):
        for iter_gen in range(config_train.gen_update_time):
            samples = sess.run(generator.sample_word_list_reshape)

            feed = {"pred_seq_rollout:0": samples}
            reward_rollout = []
            #calcuate the reward given in the specific stpe t by roll out
            for iter_roll in range(config_train.rollout_num):
                rollout_list = sess.run(rollout_gen.sample_rollout_step,
                                        feed_dict=feed)
                rollout_list_stack = np.vstack(
                    rollout_list
                )  #shape: #batch_size * #rollout_step, #sequence length
                reward_rollout_seq = sess.run(
                    discriminator.ypred_for_auc,
                    feed_dict={
                        discriminator.input_x: rollout_list_stack,
                        discriminator.dropout_keep_prob: 1.0
                    })
                reward_last_tok = sess.run(discriminator.ypred_for_auc,
                                           feed_dict={
                                               discriminator.input_x: samples,
                                               discriminator.dropout_keep_prob:
                                               1.0
                                           })
                reward_allseq = np.concatenate(
                    (reward_rollout_seq, reward_last_tok), axis=0)[:, 1]
                reward_tmp = []
                for r in range(config_gen.gen_batch_size):
                    reward_tmp.append(reward_allseq[range(
                        r,
                        config_gen.gen_batch_size * config_gen.sequence_length,
                        config_gen.gen_batch_size)])
                reward_rollout.append(np.array(reward_tmp))
            rewards = np.sum(reward_rollout, axis=0) / config_train.rollout_num
            _, gen_loss = sess.run([train_adv_update, generator.gen_loss_adv], feed_dict={generator.input_seqs_adv:samples,\
                                                                                        generator.rewards:rewards})
        if total_batch % config_train.test_per_epoch == 0 or total_batch == config_train.total_batch - 1:
            generate_samples(sess, generator, config_train.batch_size,
                             config_train.generated_num,
                             config_train.eval_file)
            likelihood_data_loader.create_batches(config_train.eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                test_loss) + '\n'
            print('total_batch: ', total_batch, 'test_loss: ', test_loss)
            log.write(buffer)

        for _ in range(config_train.dis_update_time_adv):
            generate_samples(sess, generator, config_train.batch_size,
                             config_train.generated_num,
                             config_train.negative_file)
            dis_data_loader.load_train_data(config_train.positive_file,
                                            config_train.negative_file)

            for _ in range(config_train.dis_update_epoch_adv):
                dis_data_loader.reset_pointer()
                for it in range(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x:
                        x_batch,
                        discriminator.input_y:
                        y_batch,
                        discriminator.dropout_keep_prob:
                        config_dis.dis_dropout_keep_prob
                    }
                    _ = sess.run(discriminator.train_op, feed)
    log.close()
Exemple #28
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 68
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    # load generator with parameters
    generator = get_trainable_model(vocab_size)
    target_params = initialize_parameters(vocab_size)

    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN, target_params)

    # CNNs
    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=SEQ_LENGTH,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open(logpath, 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            file_name = 'target_generate/pretrain_epoch' + str(epoch) + '.pkl'
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             file_name)
            likelihood_data_loader.create_batches(file_name)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

            if epoch % 100 != 0:
                os.remove(file_name)

    file_name = 'target_generate/pretrain_finished.pkl'
    generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name)
    likelihood_data_loader.create_batches(file_name)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    file_name = 'target_generate/supervise.pkl'
    generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name)
    likelihood_data_loader.create_batches(file_name)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    os.remove(file_name)

    print 'Start training discriminator...'
    for i in range(dis_alter_epoch):
        print 'dis_alter_epoch : ' + str(i)
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:

            file_name = 'target_generate/reinforce_batch' + str(
                total_batch) + '.pkl'

            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             file_name)
            likelihood_data_loader.create_batches(file_name)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if total_batch % 50 != 0:
                os.remove(file_name)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/seqgan.txt')

        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            # for _ in range(2):

            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(
                positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(
                zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()
Exemple #29
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)  # For testing
    vocab_size = 5000
    dis_data_loader = Dis_dataloader(BATCH_SIZE)

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN,
                              target_params)  # The oracle model

    discriminator = Discriminator(sequence_length=20,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  l2_reg_lambda=dis_l2_reg_lambda)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    # generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('save/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = 'epoch:\t' + str(epoch) + '\tnll:\t' + str(
                test_loss) + '\n'
            log.write(buffer)

    print 'Start pre-training discriminator...'
    # Train 3 epoch on the generated data and do this for 50 times
    for epoch in range(50):
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)
        dis_data_loader.load_train_data(positive_file, negative_file)
        if epoch % 5 == 0:
            print 'pre-train discriminator epoch ', epoch
        for _ in range(3):
            dis_data_loader.reset_pointer()
            for it in xrange(dis_data_loader.num_batch):
                x_batch, y_batch = dis_data_loader.next_batch()
                feed = {
                    discriminator.input_x: x_batch,
                    discriminator.input_y: y_batch,
                    discriminator.dropout_keep_prob: dis_dropout_keep_prob
                }
                _ = sess.run(discriminator.train_op, feed)

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Adversarial Training...'
    log.write('adversarial training...\n')
    for total_batch in range(TOTAL_BATCH):
        # Train the generator for one step
        for it in range(1):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, discriminator)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)

        # Test
        if total_batch % 5 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(
                test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

        # Update roll-out parameters
        rollout.update_params()

        # Train the discriminator
        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            dis_data_loader.load_train_data(positive_file, negative_file)

            for _ in range(3):
                dis_data_loader.reset_pointer()
                for it in xrange(dis_data_loader.num_batch):
                    x_batch, y_batch = dis_data_loader.next_batch()
                    feed = {
                        discriminator.input_x: x_batch,
                        discriminator.input_y: y_batch,
                        discriminator.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _ = sess.run(discriminator.train_op, feed)

    log.close()
Exemple #30
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH)
    gan_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH)
    val_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH)
    likelihood_data_loader = Gen_Data_loader(BATCH_SIZE,
                                             SEQ_LENGTH)  # For testing
    vocab_size = 5000

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN)
    target_params = pickle.load(open('save/target_params_py3.pkl', 'rb'))
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, 32, 32, SEQ_LENGTH,
                              START_TOKEN, target_params)  # The oracle model

    mediator = Mediator(vocab_size,
                        BATCH_SIZE,
                        EMB_DIM * 2,
                        HIDDEN_DIM * 2,
                        SEQ_LENGTH,
                        START_TOKEN,
                        name="mediator",
                        dropout_rate=M_DROPOUT_RATE,
                        learning_rate=3e-3,
                        with_professor_forcing=False)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # First, use the oracle model to provide the positive examples, which are sampled from the oracle data distribution
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num,
                     positive_file)
    gen_data_loader.create_batches(positive_file)
    gan_data_loader.create_batches(positive_file)
    generate_samples(sess, target_lstm, BATCH_SIZE, generated_num, eval_file)
    val_data_loader.create_batches(eval_file)

    log = open('save/experiment-log.txt', 'w')
    log_nll = open('save/experiment-log-nll.txt', 'w')
    log_jsd = open('save/experiment-log-jsd.txt', 'w')
    #  pre-train generator (default 0 epochs)(not recommended)
    print('Start pre-training...')
    log.write('pre-training...\n')
    saver = tf.train.Saver(tf.global_variables())
    if RESTORE:
        saver.restore(sess, "saved_model/CoT")
    for epoch in range(PRE_EPOCH_NUM):
        loss = mle_epoch(sess, generator, gen_data_loader)
        if epoch % 1 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            likelihood_data_loader.create_batches(negative_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('pre-train epoch ', epoch, 'nll_oracle ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll_oracle:\t' + str(
                test_loss) + '\n'
            log_nll.write(buffer)
        if epoch % 1 == 0:
            test_loss = target_loss(sess, generator, val_data_loader)
            print('pre-train epoch ', epoch, 'nll_test ', test_loss)
            buffer = 'epoch:\t' + str(epoch) + '\tnll_test:\t' + str(
                test_loss) + '\n'
            log_nll.write(buffer)

    print(
        '#########################################################################'
    )
    print('Start Cooperative Training...')
    for iter_idx in range(TOTAL_BATCH):
        # Train the generator for one step
        for it in range(1):
            samples = generator.generate(sess)
            rewards = mediator.get_reward(sess, samples)
            feed = {generator.x: samples, generator.rewards: rewards}
            _ = sess.run(generator.g_updates, feed_dict=feed)
        # Test
        if iter_idx % 100 == 0 or iter_idx == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)
            likelihood_data_loader.create_batches(negative_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = 'batch:\t' + str(iter_idx) + '\tnll_oracle:\t' + str(
                test_loss) + '\n'
            print('batch: ', iter_idx, 'nll_oracle: ', test_loss)
            log_nll.write(buffer)
        if iter_idx % 100 == 0:
            test_loss = target_loss(sess, generator, val_data_loader)
            print('batch:\t', iter_idx, 'nll_test ', test_loss)
            buffer = 'batch:\t' + str(iter_idx) + '\tnll_test:\t' + str(
                test_loss) + '\n'
            log_nll.write(buffer)
        # Train the mediator
        for _ in range(1):
            bnll_ = []
            """
            d_loss_ = []
            for it in range(3):
                feed = {
                    mediator.x0: gan_data_loader.next_batch(),
                    mediator.x1: generator.generate(sess)
                }
                d_loss, _ = sess.run([mediator.d_loss, mediator.d_update], feed)
                d_loss_.append(d_loss)
            """
            for it in range(1):
                feed = {
                    mediator.x0: gen_data_loader.next_batch(),
                    mediator.x1: generator.generate(sess)
                }
                bnll = sess.run(mediator.likelihood_loss, feed)
                bnll_.append(bnll)
                sess.run(mediator.dropout_on)
                _ = sess.run(mediator.likelihood_updates, feed)
                sess.run(mediator.dropout_off)
            if iter_idx % 10 == 0:
                bnll = np.mean(bnll_)
                print("mediator cooptrain iter#%d, balanced_nll %f" %
                      (iter_idx, bnll))
                log.write("%d\t%f\n" % (iter_idx, bnll))
        if iter_idx % gen_data_loader.num_batch == 0:
            jsd = jsd_calculate(sess, generator, target_lstm)
            print('cooptrain epoch#', iter_idx // gen_data_loader.num_batch,
                  'jsd ', jsd)
            log_jsd.write("%d\t%f\n" %
                          (iter_idx // gen_data_loader.num_batch, jsd))
            saver.save(sess, "saved_model/CoT")
    log.close()
    log_nll.close()
    log_jsd.close()