Exemple #1
0
def run_experiment(option, use_basic_dataset):
    sequence_cap = 56 if use_basic_dataset else 130
    print('Setting up data pipeline...')

    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='attention_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES,
            BATCH_SIZE),
        huzzer_kwargs=huzzer_kwargs)
    queue = build_single_output_queue(datasource,
                                      output_shape=(BATCH_SIZE, sequence_cap,
                                                    TOKEN_EMB_SIZE),
                                      type=tf.uint8)
    raw_input_sequences = queue.dequeue(name='input_sequence')
    sequence_lengths = get_sequence_lengths(
        tf.cast(raw_input_sequences, tf.int32))
    input_sequences = tf.cast(raw_input_sequences, tf.float32)

    print('Building model..')
    if option.startswith('attention1'):
        z_size = int(option.split('_')[-1])
        encoder_output = build_single_program_encoder(input_sequences,
                                                      sequence_lengths, z_size)
        z_resampled = resampling(encoder_output)
        decoder_output, _ = build_attention1_decoder(z_resampled,
                                                     sequence_lengths,
                                                     sequence_cap,
                                                     TOKEN_EMB_SIZE)
        cross_entropy_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(decoder_output, input_sequences,
                                       sequence_lengths, sequence_cap))
        kl_loss = tf.reduce_mean(kl_divergence(encoder_output))
    else:
        print('INVALID OPTION')
        exit(1)

    total_loss_op = kl_loss + cross_entropy_loss
    tf.summary.scalar('cross_entropy_loss', cross_entropy_loss)
    tf.summary.scalar('kl_loss', kl_loss)
    tf.summary.scalar('total_loss', total_loss_op)
    logdir = os.path.join(BASEDIR,
                          ('basic_' if use_basic_dataset else '') + option)

    optimizer = tf.train.AdamOptimizer(1e-3)
    print('creating train op...')
    train_op = slim.learning.create_train_op(total_loss_op, optimizer)
    print('starting supervisor...')
    sv = Supervisor(logdir=logdir, save_model_secs=300, save_summaries_secs=60)
    print('training...')
    with sv.managed_session() as sess:
        while not sv.should_stop():
            total_loss, _ = sess.run([total_loss_op, train_op])
def run_experiment(option, use_basic_dataset):
    assert os.path.isdir(os.path.join(BASEDIR, 'pretrained_weights')), 'weights files are missing'

    sequence_cap = 56 if use_basic_dataset else 130
    print('Setting up data pipeline...')

    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='attention_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard',
            NUMBER_BATCHES,
            BATCH_SIZE
        ),
        huzzer_kwargs=huzzer_kwargs
    )
    queue = build_single_output_queue(
        datasource,
        output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE),
        type=tf.uint8
    )
    raw_input_sequences = queue.dequeue(name='input_sequence')
    real_sequence_lengths = get_sequence_lengths(
        tf.cast(raw_input_sequences, tf.int32)
    )
    real_input_sequences = tf.cast(raw_input_sequences, tf.float32)

    print('Building model..')
    if option.startswith('attention1_gan_no_pretrain'):
        z_size = int(option.split('_')[-1])

        random_vector = tf.random_normal(
            dtype=tf.float32,
            shape=[BATCH_SIZE, z_size],
            mean=0,
            stddev=0.1  # because that is what we will used when generating
        )

        # we do not know the length of the generated code beforehand, so we pass in
        # sequence lengths of `sequence_cap`
        full_lengths = tf.constant(
            [sequence_cap for _ in range(BATCH_SIZE)],
            dtype=tf.float32,
            name='generator_lengths'
        )

        # create the scaling const. k_t
        k_t = tf.Variable(0., trainable=False, name='k_t')

        # generator gets restored weights, and so does the
        with tf.variable_scope('generator'):
            unnormalized_generated_programs, _ = build_attention1_decoder(
                random_vector, full_lengths, sequence_cap, TOKEN_EMB_SIZE
            )
            generated_programs = tf.nn.softmax(
                unnormalized_generated_programs, dim=-1, name='generated_programs'
            )
            generated_lengths = get_sequence_lengths(generated_programs, epsilon=0.01)

        with tf.variable_scope('discriminator'):
            sequence_lengths = tf.concat([generated_lengths, real_sequence_lengths], axis=0)
            encoder_output = build_single_program_encoder(
                tf.concat([generated_programs, real_input_sequences], axis=0),
                sequence_lengths,
                z_size
            )
            # get the values corresponding to mus from the encoder output_shape
            assert encoder_output.get_shape()[1].value == 2 * z_size
            encoded_v = encoder_output[:, :z_size]
            reconstructed, _ = build_attention1_decoder(
                encoded_v, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE
            )
            # these are the unnormalized_token_probs for g and d
            generated_reconstructed = reconstructed[:BATCH_SIZE]
            real_reconstructed = reconstructed[BATCH_SIZE:]

        generator_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(
                unnormalized_token_probs=generated_reconstructed,
                input_sequences=generated_programs,
                sequence_lengths=generated_lengths,
                max_length=sequence_cap
            )
        )
        real_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(
                unnormalized_token_probs=real_reconstructed,
                input_sequences=real_input_sequences,
                sequence_lengths=generated_lengths,
                max_length=sequence_cap
            )
        )
        discriminator_loss = real_loss - (k_t * generator_loss)

        optimizer = tf.train.AdamOptimizer(1e-5)
        print('creating discriminator train op...')
        d_train_op = slim.learning.create_train_op(discriminator_loss, optimizer)

        optimizer = tf.train.AdamOptimizer(1e-5)
        print('creating generator train op...')
        g_train_op = slim.learning.create_train_op(generator_loss, optimizer)

        balance = GAMMA * real_loss - generator_loss
        measure = real_loss + tf.abs(balance)

        # update k_t
        with tf.control_dependencies([d_train_op, g_train_op]):
            k_update = tf.assign(
                k_t, tf.clip_by_value(k_t + LAMBDA * balance, 0, 1))

        # example_summary_op = tf.summary.merge([
        #     tf.summary.image("G", tf.expand_dims(generated_programs, -1)),
        #     tf.summary.image("AE_G", tf.expand_dims(
        #         tf.nn.softmax(generated_reconstructed, dim=-1), axis=-1
        #     )),
        #     tf.summary.image("AE_x", tf.expand_dims(
        #         tf.nn.softmax(real_reconstructed, dim=-1), axis=-1
        #     ))
        # ])

        perf_summary_op = tf.summary.merge([
            tf.summary.scalar("loss/discriminator_loss", discriminator_loss),
            tf.summary.scalar("loss/real_loss", real_loss),
            tf.summary.scalar("loss/generator_loss", generator_loss),
            tf.summary.scalar("misc/measure", measure),
            tf.summary.scalar("misc/k_t", k_t),
            tf.summary.scalar("misc/balance", balance),
        ])

    else:
        print('INVALID OPTION')
        exit(1)

    logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_gan')

    # build the model and initialise weights so supervisor can start where we left off
    # if not os.path.isdir(logdir):
    #     mkdir_p(logdir)
    #     with tf.Session() as sess:
    #         print('saving initial pretrained weights')
    #         with tf.variable_scope('', reuse=True):
    #             discriminator_vars = [
    #                 tf.get_variable('discriminator/decoder_fully_connected/bias'),
    #                 tf.get_variable('discriminator/decoder_fully_connected/weights'),
    #                 tf.get_variable('discriminator/decoder_rnn/lstm_cell/biases'),
    #                 tf.get_variable('discriminator/decoder_rnn/lstm_cell/weights'),
    #                 tf.get_variable('discriminator/rnn/lstm_cell/biases'),
    #                 tf.get_variable('discriminator/rnn/lstm_cell/weights'),
    #                 tf.get_variable('discriminator/simple_attention/bias'),
    #                 tf.get_variable('discriminator/simple_attention/weights'),
    #             ]
    #             generator_vars = [
    #                 tf.get_variable('generator/decoder_fully_connected/bias'),
    #                 tf.get_variable('generator/decoder_fully_connected/weights'),
    #                 tf.get_variable('generator/decoder_rnn/lstm_cell/biases'),
    #                 tf.get_variable('generator/decoder_rnn/lstm_cell/weights'),
    #                 tf.get_variable('generator/simple_attention/bias'),
    #                 tf.get_variable('generator/simple_attention/weights'),
    #             ]
    #
    #         discriminator_saver = tf.train.Saver(
    #             discriminator_vars
    #         )
    #         generator_saver = tf.train.Saver(
    #             generator_vars
    #         )
    #         sess.run(tf.global_variables_initializer())
    #         discriminator_saver.restore(
    #             sess,
    #             os.path.join(BASEDIR, 'pretrained_weights', 'discriminator_weights.cpkt')
    #         )
    #         generator_saver.restore(
    #             sess,
    #             os.path.join(BASEDIR, 'pretrained_weights', 'generator_weights.cpkt')
    #         )
    #
    #         saver = tf.train.Saver()
    #         saver.save(sess, os.path.join(logdir, 'model.cpkt-0'))

    print('starting supervisor...')
    sv = Supervisor(
        logdir=logdir,
        save_model_secs=300,
        save_summaries_secs=60,
        summary_op=perf_summary_op
    )
    print('training...')
    with sv.managed_session() as sess:

        global_step = -1
        while not sv.should_stop():
            ops = {
                'k_update': k_update,
                'measure': measure,
                'd_train_op': d_train_op,
                'g_train_op': g_train_op,
                'global_step': sv.global_step
            }
            # if global_step % 200 == 0:
                # ops.update({'images': example_summary_op})

            results = sess.run(ops)
import tensorflow as tf
from os.path import join

from models import build_single_program_encoder, build_attention1_decoder
build_attention1_decoder

path_to_model = 'experiments/RVAE_attention/basic_attention1_128'

z_size = 128
max_len = 4
input_sequences = tf.zeros((1, 4, 54))
sequence_lengths = tf.zeros((1), tf.int32)
encoder_output = build_single_program_encoder(input_sequences,
                                              sequence_lengths, z_size)
decoder_output = build_attention1_decoder(encoder_output[:, :z_size],
                                          sequence_lengths, max_len, 54)

with tf.variable_scope('', reuse=True):
    discriminator_var_list = {
        'discriminator/decoder_fully_connected/bias':
        tf.get_variable('decoder_fully_connected/bias', [54]),
        'discriminator/decoder_fully_connected/weights':
        tf.get_variable('decoder_fully_connected/weights', [128, 54]),
        'discriminator/decoder_rnn/lstm_cell/biases':
        tf.get_variable('decoder_rnn/lstm_cell/biases', [512]),
        'discriminator/decoder_rnn/lstm_cell/weights':
        tf.get_variable('decoder_rnn/lstm_cell/weights', [256, 512]),
        'discriminator/rnn/lstm_cell/biases':
        tf.get_variable('rnn/lstm_cell/biases', [1024]),
        'discriminator/rnn/lstm_cell/weights':
        tf.get_variable('rnn/lstm_cell/weights', [310, 1024]),
Exemple #4
0
def analyze_model(option, use_basic_dataset):
    BASEDIR = os.path.dirname(os.path.realpath(__file__))
    sequence_cap = 56 if use_basic_dataset else 130
    TOKEN_EMB_SIZE = 54
    NUMBER_OF_EXAMPLES = 1000

    if option.startswith('attention1_gan'):
        z_size = int(option.split('_')[-1])
        directory = '{}{}'.format(
            'basic_' if use_basic_dataset else '',
            option
        )
        # input_sequence_t = tf.placeholder(
        #     shape=[1, sequence_cap, TOKEN_EMB_SIZE],
        #     dtype=tf.float32,
        #     name='input_sequence'
        # )
        # sequence_lengths_t = get_sequence_lengths(
        #     tf.cast(input_sequence_t, tf.int32)
        # )
        # mus_and_log_sigs = build_single_program_encoder(
        #     input_sequence_t,
        #     sequence_lengths_t,
        #     z_size
        # )
        # z = mus_and_log_sigs[:, :z_size]
        decoder_input = tf.placeholder(
            shape=[1, z_size],
            dtype=tf.float32,
            name='generator_input'
        )
        sequence_lengths_t = tf.constant([sequence_cap])
        with tf.variable_scope('generator'):
            decoder_output, attention_weights_t = build_attention1_decoder(
                decoder_input,
                sequence_lengths_t,
                sequence_cap,
                TOKEN_EMB_SIZE
            )
        token_probs_t = tf.nn.softmax(decoder_output)

        print('z_size={}'.format(z_size))
    else:
        exit('invalid option')

    # huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}

    # print('Setting up data pipeline...')
    # dataset = one_hot_token_dataset(
    #     batch_size=1,
    #     number_of_batches=1000,
    #     cache_path='{}model_analysis_attention'.format(
    #         'basic_' if use_basic_dataset else ''
    #     ),
    #     length=sequence_cap,
    #     huzzer_kwargs=huzzer_kwargs
    # )
    #
    # def get_input():
    #     return np.squeeze(dataset()[0], axis=0).astype(np.float32)

    path = join(BASEDIR, directory)

    saver = tf.train.Saver()

    # examples = [get_input() for i in range(NUMBER_OF_EXAMPLES)]

    sess = tf.Session()
    print('Restoring variables...')

    saver.restore(
        sess, tf.train.latest_checkpoint(path, 'checkpoint.txt')
    )
    examples_dir = join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_examples')
    mkdir_p(examples_dir)

    # # Autoencode_bit
    # autoencoded_examples_path = join(examples_dir, 'autoencoded')
    # message = 'Autencoding {} examples'.format(
    #     len(examples),
    # )
    # for i, input_sequence in enumerate(tqdm(
    #     examples, desc=message, total=len(examples)
    # )):
    #     dir_for_example = join(autoencoded_examples_path, str(i))
    #     mkdir_p(dir_for_example)
    #
    #     z_mus, length = sess.run([z, sequence_lengths_t], feed_dict={
    #         input_sequence_t: np.expand_dims(input_sequence, 0),
    #     })
    #
    #     token_probs, attention_weights = sess.run(
    #         [token_probs_t, attention_weights_t],
    #         feed_dict={
    #             decoder_input: z_mus
    #         }
    #     )
    #     length = np.squeeze(length)
    #     token_probs = np.squeeze(token_probs)[:length]
    #     attention_weights = attention_weights[:length]
    #     input_sequence = input_sequence[:length]
    #
    #     input_code = example_to_code(input_sequence)
    #     output_code = example_to_code(token_probs)
    #     visualize_attention_weights(output_code, attention_weights, join(dir_for_example, 'attention_weights'))
    #
    #     input_code = example_to_code(input_sequence)
    #     write_to_file(join(dir_for_example, 'input.hs'), input_code)
    #     imsave(join(dir_for_example, 'input.png'), input_sequence.T)
    #     imsave(join(dir_for_example, 'z.png'), z_mus.reshape((z_mus.size // 32, 32)))
    #
    #     imsave(join(dir_for_example, 'decoder_output.png'), token_probs.T)
    #     write_to_file(join(dir_for_example, 'autoencoded_code.hs'), output_code)

    # generate_bit
    generated_examples_path = join(examples_dir, 'generated')
    message = 'Generating {} examples'.format(
        NUMBER_OF_EXAMPLES,
    )
    for i in tqdm(
        range(NUMBER_OF_EXAMPLES), desc=message, total=NUMBER_OF_EXAMPLES
    ):
        dir_for_example = join(generated_examples_path, str(i))
        mkdir_p(dir_for_example)

        z_gen = np.random.normal(0, 0.1, z_size)
        imsave(join(dir_for_example, 'z.png'), z_gen.reshape((z_gen.size // 32, 32)))
        token_probs, attention_weights = sess.run(
            [token_probs_t, attention_weights_t],
            feed_dict={
                decoder_input: np.expand_dims(z_gen, 0)
            }
        )
        token_probs = np.squeeze(token_probs)
        tokens = np.argmax(token_probs, axis=-1)
        end_of_code = np.argmax(tokens == 0) or sequence_cap
        token_probs = token_probs[:end_of_code]
        attention_weights = attention_weights[:end_of_code]
        output_code = example_to_code(token_probs)

        visualize_attention_weights(output_code, attention_weights, join(dir_for_example, 'attention_weights'))

        imsave(join(dir_for_example, 'decoder_output.png'), token_probs.T)
        write_to_file(join(dir_for_example, 'generated_code.hs'), example_to_code(token_probs))