Beispiel #1
0
def run_experiment(option, use_basic_dataset):
    TOKEN_EMB_SIZE = 54
    BATCH_SIZE = 128
    if use_basic_dataset:
        sequence_cap = 56
    else:
        sequence_cap = 130

    X_SHAPE = (sequence_cap, TOKEN_EMB_SIZE)

    # set up pipeline
    print('Setting up data pipeline')
    NUMBER_BATCHES = 1000
    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='simple_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES,
            BATCH_SIZE),
        huzzer_kwargs=huzzer_kwargs)
    queue = build_single_output_queue(datasource,
                                      output_shape=(BATCH_SIZE, sequence_cap,
                                                    TOKEN_EMB_SIZE),
                                      type=tf.uint8)
    raw_input_sequences = queue.dequeue(name='encoder_input')
    input_sequences = tf.cast(raw_input_sequences, tf.float32)
    if option.startswith('simple_'):
        z_size = int(option.split('_')[-1])
        build_simple_network2(input_sequences,
                              X_SHAPE,
                              latent_dim=z_size,
                              kl_limit=0.0)
    elif option == 'conv':
        z_size = 128
        build_special_conv4_final(
            input_sequences,
            X_SHAPE,
            z_size,
            filter_length=3,
            num_filters=128,
        )
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/VAE_baseline/{}{}'.format(
        'basic_' if use_basic_dataset else '', option)

    sv = Supervisor(logdir=logdir, save_summaries_secs=10, save_model_secs=120)
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.
        for i in range(20000):
            if sv.should_stop():
                exit()
            sess.run('train_on_batch', )
Beispiel #2
0
def run_experiment(option, use_basic_dataset):
    sequence_cap = 56 if use_basic_dataset else 130
    print('Setting up data pipeline...')

    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='attention_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES,
            BATCH_SIZE),
        huzzer_kwargs=huzzer_kwargs)
    queue = build_single_output_queue(datasource,
                                      output_shape=(BATCH_SIZE, sequence_cap,
                                                    TOKEN_EMB_SIZE),
                                      type=tf.uint8)
    raw_input_sequences = queue.dequeue(name='input_sequence')
    sequence_lengths = get_sequence_lengths(
        tf.cast(raw_input_sequences, tf.int32))
    input_sequences = tf.cast(raw_input_sequences, tf.float32)

    print('Building model..')
    if option.startswith('attention1'):
        z_size = int(option.split('_')[-1])
        encoder_output = build_single_program_encoder(input_sequences,
                                                      sequence_lengths, z_size)
        z_resampled = resampling(encoder_output)
        decoder_output, _ = build_attention1_decoder(z_resampled,
                                                     sequence_lengths,
                                                     sequence_cap,
                                                     TOKEN_EMB_SIZE)
        cross_entropy_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(decoder_output, input_sequences,
                                       sequence_lengths, sequence_cap))
        kl_loss = tf.reduce_mean(kl_divergence(encoder_output))
    else:
        print('INVALID OPTION')
        exit(1)

    total_loss_op = kl_loss + cross_entropy_loss
    tf.summary.scalar('cross_entropy_loss', cross_entropy_loss)
    tf.summary.scalar('kl_loss', kl_loss)
    tf.summary.scalar('total_loss', total_loss_op)
    logdir = os.path.join(BASEDIR,
                          ('basic_' if use_basic_dataset else '') + option)

    optimizer = tf.train.AdamOptimizer(1e-3)
    print('creating train op...')
    train_op = slim.learning.create_train_op(total_loss_op, optimizer)
    print('starting supervisor...')
    sv = Supervisor(logdir=logdir, save_model_secs=300, save_summaries_secs=60)
    print('training...')
    with sv.managed_session() as sess:
        while not sv.should_stop():
            total_loss, _ = sess.run([total_loss_op, train_op])
Beispiel #3
0
def run_experiment(option):
    BATCH_SIZE = 128
    X_SHAPE = (128, 54)

    # set up pipeline
    print('Setting up data pipeline')
    data_pipeline = one_hot_token_pipeline(for_cnn=False, length=128)

    # Function to pass into queue
    batch_index = 0

    def get_batch():
        nonlocal batch_index

        if batch_index % 100 == 0:
            logging.info('{} examples used'.format(batch_index * BATCH_SIZE))
        code_seeds = [
            str(i) for i in range(batch_index * BATCH_SIZE, (batch_index + 1) *
                                  BATCH_SIZE)
        ]

        batch = np.array(data_pipeline[code_seeds])
        batch_index += 1
        return batch

    # use the queue for training
    queue = build_single_output_queue(get_batch, (BATCH_SIZE, *X_SHAPE))
    x = queue.dequeue(name='encoder_input')
    if option == 'simple':
        print('this no longer works: - a small refactor would work')
        tensor_names = build_simple_network(x, BATCH_SIZE, (256, 54))
    elif option == 'conv1':
        tensor_names = build_conv1(x, (128, 54))
    elif option == 'conv2':
        tensor_names = build_conv2(x, (128, 54), 32)
    elif option == 'conv3':
        tensor_names = build_conv3(x, (128, 54), 64)
    elif option == 'conv4':
        tensor_names = build_conv4(x, (128, 54), 64)
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/VAE_baseline/{}'.format(option)

    sv = Supervisor(logdir=logdir, save_summaries_secs=20, save_model_secs=120)
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.
        while not sv.should_stop():
            sess.run('train_on_batch', )
def run_experiment(option):
    BATCH_SIZE = 32

    if option == 'mnist_digits':
        gen = mnist_unlabeled_generator(BATCH_SIZE, for_cnn=True)
        batch_shape = gen()[0].shape

        print('batch shape is : {}'.format(batch_shape))
        get_batch = lambda: gen()[0]

        queue = build_single_output_queue(get_batch, batch_shape)
        x = queue.dequeue(name='real_input')
        training_ops = build_mnist_gan_for_training(x)
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/GAN_baseline/{}'.format(option)

    sv = Supervisor(logdir=logdir, save_summaries_secs=20, save_model_secs=120)
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.

        d_loss = 5
        g_loss = 4
        steps_without_d_training = 0
        i = 0
        while not sv.should_stop():
            # if d_loss > 0.5 or steps_without_d_training > 50:
            if d_loss > g_loss or i > 6000:
                steps_without_d_training = 0
                d_loss = sess.run(training_ops['train_discriminator'])
            # else:
            #     steps_without_d_training += 1

            # sess.run(training_ops['train_discriminator'])
            g_loss = sess.run(training_ops['train_generator'])
def run_experiment(option):
    BATCH_SIZE = 128
    X_SHAPE = (128, 54)

    # set up pipeline
    print('Setting up data pipeline')
    NUMBER_BATCHES = 500
    dataset = one_hot_token_dataset(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=128,
        cache_path='one_hot_token_haskell_batch{}_number{}'.format(BATCH_SIZE, NUMBER_BATCHES)
    )

    def get_batch():
        return dataset()[0]


    # use the queue for training
    queue = build_single_output_queue(get_batch, (BATCH_SIZE, *X_SHAPE))
    x = queue.dequeue(name='encoder_input')
    if option == 'simple':
        tensor_names = build_simple_network2(x, X_SHAPE, 32)
    elif option == 'simple_double_latent':
        tensor_names = build_simple_network2(x, X_SHAPE, 64)
    elif option == 'simple_256':
        tensor_names = build_simple_network2(x, X_SHAPE, 256)
    elif option == 'simple_1024':
        tensor_names = build_simple_network2(x, X_SHAPE, 1024)
    elif option == 'simple_8192':
        tensor_names = build_simple_network2(x, X_SHAPE, 8192)
    elif option == 'conv_special':
        tensor_names = build_special_conv(x, X_SHAPE, 64)
    elif option == 'conv_special_low_kl':
        tensor_names = build_special_conv_low_kl(x, X_SHAPE, 64)
    elif option == 'conv_special2':
        tensor_names = build_special_conv2(x, X_SHAPE, 64)
    elif option == 'conv_special2_l1':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 64)
    elif option == 'conv_special2_l1_128':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 128)

    # conv3 is conv2 but with initial filter length of 5 instead of 1
    elif option == 'conv_special3_l1_128':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 128, filter_length=5)
    elif option == 'conv_special3_l1_256':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 256, filter_length=5)
    elif option == 'conv_special3_l1_128f_256':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 256, filter_length=5, num_filters=128)
    elif option == 'conv_special3_big_l1_512':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 512, filter_length=10)
    elif option == 'conv_special4_l1_1024':
        tensor_names = build_special_conv4_l1(x, X_SHAPE, 1024, filter_length=3, num_filters=256)
    elif option == 'conv_special4_l1_2048_f5':
        tensor_names = build_special_conv4_l1(x, X_SHAPE, 1024, filter_length=5, num_filters=256)
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/VAE_baseline/{}_sss'.format(option)

    sv = Supervisor(
        logdir=logdir,
        save_summaries_secs=20,
        save_model_secs=120
    )
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.
        while not sv.should_stop():
            sess.run(
                'train_on_batch',
            )
def run_experiment(option, use_basic_dataset):
    assert os.path.isdir(os.path.join(BASEDIR, 'pretrained_weights')), 'weights files are missing'

    sequence_cap = 56 if use_basic_dataset else 130
    print('Setting up data pipeline...')

    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='attention_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard',
            NUMBER_BATCHES,
            BATCH_SIZE
        ),
        huzzer_kwargs=huzzer_kwargs
    )
    queue = build_single_output_queue(
        datasource,
        output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE),
        type=tf.uint8
    )
    raw_input_sequences = queue.dequeue(name='input_sequence')
    real_sequence_lengths = get_sequence_lengths(
        tf.cast(raw_input_sequences, tf.int32)
    )
    real_input_sequences = tf.cast(raw_input_sequences, tf.float32)

    print('Building model..')
    if option.startswith('attention1_gan_no_pretrain'):
        z_size = int(option.split('_')[-1])

        random_vector = tf.random_normal(
            dtype=tf.float32,
            shape=[BATCH_SIZE, z_size],
            mean=0,
            stddev=0.1  # because that is what we will used when generating
        )

        # we do not know the length of the generated code beforehand, so we pass in
        # sequence lengths of `sequence_cap`
        full_lengths = tf.constant(
            [sequence_cap for _ in range(BATCH_SIZE)],
            dtype=tf.float32,
            name='generator_lengths'
        )

        # create the scaling const. k_t
        k_t = tf.Variable(0., trainable=False, name='k_t')

        # generator gets restored weights, and so does the
        with tf.variable_scope('generator'):
            unnormalized_generated_programs, _ = build_attention1_decoder(
                random_vector, full_lengths, sequence_cap, TOKEN_EMB_SIZE
            )
            generated_programs = tf.nn.softmax(
                unnormalized_generated_programs, dim=-1, name='generated_programs'
            )
            generated_lengths = get_sequence_lengths(generated_programs, epsilon=0.01)

        with tf.variable_scope('discriminator'):
            sequence_lengths = tf.concat([generated_lengths, real_sequence_lengths], axis=0)
            encoder_output = build_single_program_encoder(
                tf.concat([generated_programs, real_input_sequences], axis=0),
                sequence_lengths,
                z_size
            )
            # get the values corresponding to mus from the encoder output_shape
            assert encoder_output.get_shape()[1].value == 2 * z_size
            encoded_v = encoder_output[:, :z_size]
            reconstructed, _ = build_attention1_decoder(
                encoded_v, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE
            )
            # these are the unnormalized_token_probs for g and d
            generated_reconstructed = reconstructed[:BATCH_SIZE]
            real_reconstructed = reconstructed[BATCH_SIZE:]

        generator_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(
                unnormalized_token_probs=generated_reconstructed,
                input_sequences=generated_programs,
                sequence_lengths=generated_lengths,
                max_length=sequence_cap
            )
        )
        real_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(
                unnormalized_token_probs=real_reconstructed,
                input_sequences=real_input_sequences,
                sequence_lengths=generated_lengths,
                max_length=sequence_cap
            )
        )
        discriminator_loss = real_loss - (k_t * generator_loss)

        optimizer = tf.train.AdamOptimizer(1e-5)
        print('creating discriminator train op...')
        d_train_op = slim.learning.create_train_op(discriminator_loss, optimizer)

        optimizer = tf.train.AdamOptimizer(1e-5)
        print('creating generator train op...')
        g_train_op = slim.learning.create_train_op(generator_loss, optimizer)

        balance = GAMMA * real_loss - generator_loss
        measure = real_loss + tf.abs(balance)

        # update k_t
        with tf.control_dependencies([d_train_op, g_train_op]):
            k_update = tf.assign(
                k_t, tf.clip_by_value(k_t + LAMBDA * balance, 0, 1))

        # example_summary_op = tf.summary.merge([
        #     tf.summary.image("G", tf.expand_dims(generated_programs, -1)),
        #     tf.summary.image("AE_G", tf.expand_dims(
        #         tf.nn.softmax(generated_reconstructed, dim=-1), axis=-1
        #     )),
        #     tf.summary.image("AE_x", tf.expand_dims(
        #         tf.nn.softmax(real_reconstructed, dim=-1), axis=-1
        #     ))
        # ])

        perf_summary_op = tf.summary.merge([
            tf.summary.scalar("loss/discriminator_loss", discriminator_loss),
            tf.summary.scalar("loss/real_loss", real_loss),
            tf.summary.scalar("loss/generator_loss", generator_loss),
            tf.summary.scalar("misc/measure", measure),
            tf.summary.scalar("misc/k_t", k_t),
            tf.summary.scalar("misc/balance", balance),
        ])

    else:
        print('INVALID OPTION')
        exit(1)

    logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_gan')

    # build the model and initialise weights so supervisor can start where we left off
    # if not os.path.isdir(logdir):
    #     mkdir_p(logdir)
    #     with tf.Session() as sess:
    #         print('saving initial pretrained weights')
    #         with tf.variable_scope('', reuse=True):
    #             discriminator_vars = [
    #                 tf.get_variable('discriminator/decoder_fully_connected/bias'),
    #                 tf.get_variable('discriminator/decoder_fully_connected/weights'),
    #                 tf.get_variable('discriminator/decoder_rnn/lstm_cell/biases'),
    #                 tf.get_variable('discriminator/decoder_rnn/lstm_cell/weights'),
    #                 tf.get_variable('discriminator/rnn/lstm_cell/biases'),
    #                 tf.get_variable('discriminator/rnn/lstm_cell/weights'),
    #                 tf.get_variable('discriminator/simple_attention/bias'),
    #                 tf.get_variable('discriminator/simple_attention/weights'),
    #             ]
    #             generator_vars = [
    #                 tf.get_variable('generator/decoder_fully_connected/bias'),
    #                 tf.get_variable('generator/decoder_fully_connected/weights'),
    #                 tf.get_variable('generator/decoder_rnn/lstm_cell/biases'),
    #                 tf.get_variable('generator/decoder_rnn/lstm_cell/weights'),
    #                 tf.get_variable('generator/simple_attention/bias'),
    #                 tf.get_variable('generator/simple_attention/weights'),
    #             ]
    #
    #         discriminator_saver = tf.train.Saver(
    #             discriminator_vars
    #         )
    #         generator_saver = tf.train.Saver(
    #             generator_vars
    #         )
    #         sess.run(tf.global_variables_initializer())
    #         discriminator_saver.restore(
    #             sess,
    #             os.path.join(BASEDIR, 'pretrained_weights', 'discriminator_weights.cpkt')
    #         )
    #         generator_saver.restore(
    #             sess,
    #             os.path.join(BASEDIR, 'pretrained_weights', 'generator_weights.cpkt')
    #         )
    #
    #         saver = tf.train.Saver()
    #         saver.save(sess, os.path.join(logdir, 'model.cpkt-0'))

    print('starting supervisor...')
    sv = Supervisor(
        logdir=logdir,
        save_model_secs=300,
        save_summaries_secs=60,
        summary_op=perf_summary_op
    )
    print('training...')
    with sv.managed_session() as sess:

        global_step = -1
        while not sv.should_stop():
            ops = {
                'k_update': k_update,
                'measure': measure,
                'd_train_op': d_train_op,
                'g_train_op': g_train_op,
                'global_step': sv.global_step
            }
            # if global_step % 200 == 0:
                # ops.update({'images': example_summary_op})

            results = sess.run(ops)