Exemple #1
0
def run_experiment(option, use_basic_dataset):
    TOKEN_EMB_SIZE = 54
    BATCH_SIZE = 128
    if use_basic_dataset:
        sequence_cap = 56
    else:
        sequence_cap = 130

    X_SHAPE = (sequence_cap, TOKEN_EMB_SIZE)

    # set up pipeline
    print('Setting up data pipeline')
    NUMBER_BATCHES = 1000
    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='simple_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES,
            BATCH_SIZE),
        huzzer_kwargs=huzzer_kwargs)
    queue = build_single_output_queue(datasource,
                                      output_shape=(BATCH_SIZE, sequence_cap,
                                                    TOKEN_EMB_SIZE),
                                      type=tf.uint8)
    raw_input_sequences = queue.dequeue(name='encoder_input')
    input_sequences = tf.cast(raw_input_sequences, tf.float32)
    if option.startswith('simple_'):
        z_size = int(option.split('_')[-1])
        build_simple_network2(input_sequences,
                              X_SHAPE,
                              latent_dim=z_size,
                              kl_limit=0.0)
    elif option == 'conv':
        z_size = 128
        build_special_conv4_final(
            input_sequences,
            X_SHAPE,
            z_size,
            filter_length=3,
            num_filters=128,
        )
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/VAE_baseline/{}{}'.format(
        'basic_' if use_basic_dataset else '', option)

    sv = Supervisor(logdir=logdir, save_summaries_secs=10, save_model_secs=120)
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.
        for i in range(20000):
            if sv.should_stop():
                exit()
            sess.run('train_on_batch', )
Exemple #2
0
def run_experiment(option, use_basic_dataset):
    sequence_cap = 56 if use_basic_dataset else 130
    print('Setting up data pipeline...')

    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='attention_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES,
            BATCH_SIZE),
        huzzer_kwargs=huzzer_kwargs)
    queue = build_single_output_queue(datasource,
                                      output_shape=(BATCH_SIZE, sequence_cap,
                                                    TOKEN_EMB_SIZE),
                                      type=tf.uint8)
    raw_input_sequences = queue.dequeue(name='input_sequence')
    sequence_lengths = get_sequence_lengths(
        tf.cast(raw_input_sequences, tf.int32))
    input_sequences = tf.cast(raw_input_sequences, tf.float32)

    print('Building model..')
    if option.startswith('attention1'):
        z_size = int(option.split('_')[-1])
        encoder_output = build_single_program_encoder(input_sequences,
                                                      sequence_lengths, z_size)
        z_resampled = resampling(encoder_output)
        decoder_output, _ = build_attention1_decoder(z_resampled,
                                                     sequence_lengths,
                                                     sequence_cap,
                                                     TOKEN_EMB_SIZE)
        cross_entropy_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(decoder_output, input_sequences,
                                       sequence_lengths, sequence_cap))
        kl_loss = tf.reduce_mean(kl_divergence(encoder_output))
    else:
        print('INVALID OPTION')
        exit(1)

    total_loss_op = kl_loss + cross_entropy_loss
    tf.summary.scalar('cross_entropy_loss', cross_entropy_loss)
    tf.summary.scalar('kl_loss', kl_loss)
    tf.summary.scalar('total_loss', total_loss_op)
    logdir = os.path.join(BASEDIR,
                          ('basic_' if use_basic_dataset else '') + option)

    optimizer = tf.train.AdamOptimizer(1e-3)
    print('creating train op...')
    train_op = slim.learning.create_train_op(total_loss_op, optimizer)
    print('starting supervisor...')
    sv = Supervisor(logdir=logdir, save_model_secs=300, save_summaries_secs=60)
    print('training...')
    with sv.managed_session() as sess:
        while not sv.should_stop():
            total_loss, _ = sess.run([total_loss_op, train_op])
Exemple #3
0
def run_experiment(option):
    BATCH_SIZE = 128
    X_SHAPE = (128, 54)

    # set up pipeline
    print('Setting up data pipeline')
    data_pipeline = one_hot_token_pipeline(for_cnn=False, length=128)

    # Function to pass into queue
    batch_index = 0

    def get_batch():
        nonlocal batch_index

        if batch_index % 100 == 0:
            logging.info('{} examples used'.format(batch_index * BATCH_SIZE))
        code_seeds = [
            str(i) for i in range(batch_index * BATCH_SIZE, (batch_index + 1) *
                                  BATCH_SIZE)
        ]

        batch = np.array(data_pipeline[code_seeds])
        batch_index += 1
        return batch

    # use the queue for training
    queue = build_single_output_queue(get_batch, (BATCH_SIZE, *X_SHAPE))
    x = queue.dequeue(name='encoder_input')
    if option == 'simple':
        print('this no longer works: - a small refactor would work')
        tensor_names = build_simple_network(x, BATCH_SIZE, (256, 54))
    elif option == 'conv1':
        tensor_names = build_conv1(x, (128, 54))
    elif option == 'conv2':
        tensor_names = build_conv2(x, (128, 54), 32)
    elif option == 'conv3':
        tensor_names = build_conv3(x, (128, 54), 64)
    elif option == 'conv4':
        tensor_names = build_conv4(x, (128, 54), 64)
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/VAE_baseline/{}'.format(option)

    sv = Supervisor(logdir=logdir, save_summaries_secs=20, save_model_secs=120)
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.
        while not sv.should_stop():
            sess.run('train_on_batch', )
def run_experiment(option):
    BATCH_SIZE = 32

    if option == 'mnist_digits':
        gen = mnist_unlabeled_generator(BATCH_SIZE, for_cnn=True)
        batch_shape = gen()[0].shape

        print('batch shape is : {}'.format(batch_shape))
        get_batch = lambda: gen()[0]

        queue = build_single_output_queue(get_batch, batch_shape)
        x = queue.dequeue(name='real_input')
        training_ops = build_mnist_gan_for_training(x)
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/GAN_baseline/{}'.format(option)

    sv = Supervisor(logdir=logdir, save_summaries_secs=20, save_model_secs=120)
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.

        d_loss = 5
        g_loss = 4
        steps_without_d_training = 0
        i = 0
        while not sv.should_stop():
            # if d_loss > 0.5 or steps_without_d_training > 50:
            if d_loss > g_loss or i > 6000:
                steps_without_d_training = 0
                d_loss = sess.run(training_ops['train_discriminator'])
            # else:
            #     steps_without_d_training += 1

            # sess.run(training_ops['train_discriminator'])
            g_loss = sess.run(training_ops['train_generator'])
def run_experiment(option):
    BATCH_SIZE = 128
    X_SHAPE = (128, 54)

    # set up pipeline
    print('Setting up data pipeline')
    NUMBER_BATCHES = 500
    dataset = one_hot_token_dataset(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=128,
        cache_path='one_hot_token_haskell_batch{}_number{}'.format(BATCH_SIZE, NUMBER_BATCHES)
    )

    def get_batch():
        return dataset()[0]


    # use the queue for training
    queue = build_single_output_queue(get_batch, (BATCH_SIZE, *X_SHAPE))
    x = queue.dequeue(name='encoder_input')
    if option == 'simple':
        tensor_names = build_simple_network2(x, X_SHAPE, 32)
    elif option == 'simple_double_latent':
        tensor_names = build_simple_network2(x, X_SHAPE, 64)
    elif option == 'simple_256':
        tensor_names = build_simple_network2(x, X_SHAPE, 256)
    elif option == 'simple_1024':
        tensor_names = build_simple_network2(x, X_SHAPE, 1024)
    elif option == 'simple_8192':
        tensor_names = build_simple_network2(x, X_SHAPE, 8192)
    elif option == 'conv_special':
        tensor_names = build_special_conv(x, X_SHAPE, 64)
    elif option == 'conv_special_low_kl':
        tensor_names = build_special_conv_low_kl(x, X_SHAPE, 64)
    elif option == 'conv_special2':
        tensor_names = build_special_conv2(x, X_SHAPE, 64)
    elif option == 'conv_special2_l1':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 64)
    elif option == 'conv_special2_l1_128':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 128)

    # conv3 is conv2 but with initial filter length of 5 instead of 1
    elif option == 'conv_special3_l1_128':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 128, filter_length=5)
    elif option == 'conv_special3_l1_256':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 256, filter_length=5)
    elif option == 'conv_special3_l1_128f_256':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 256, filter_length=5, num_filters=128)
    elif option == 'conv_special3_big_l1_512':
        tensor_names = build_special_conv2_l1(x, X_SHAPE, 512, filter_length=10)
    elif option == 'conv_special4_l1_1024':
        tensor_names = build_special_conv4_l1(x, X_SHAPE, 1024, filter_length=3, num_filters=256)
    elif option == 'conv_special4_l1_2048_f5':
        tensor_names = build_special_conv4_l1(x, X_SHAPE, 1024, filter_length=5, num_filters=256)
    else:
        print('INVALID OPTION')
        exit(1)

    logdir = 'experiments/VAE_baseline/{}_sss'.format(option)

    sv = Supervisor(
        logdir=logdir,
        save_summaries_secs=20,
        save_model_secs=120
    )
    # Get a TensorFlow session managed by the supervisor.
    with sv.managed_session() as sess:
        # Use the session to train the graph.
        while not sv.should_stop():
            sess.run(
                'train_on_batch',
            )
Exemple #6
0
def run(hps):
    train_images, _ = images(hps)
    hps.image_size = validate_and_get_image_size(train_images)

    # To avoid error due to GraphDef being over 2GB
    # (https://www.tensorflow.org/guide/datasets#consuming_numpy_arrays):
    images_placeholder = tf.placeholder(train_images.dtype, train_images.shape)

    iterator = tf.data.Dataset.from_tensor_slices(images_placeholder). \
        shuffle(10000, reshuffle_each_iteration=True).repeat(). \
        batch(batch_size=hps.batch_size).make_initializable_iterator()

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        x = tf.reshape(iterator.get_next(), (-1, 3, *hps.image_size))

        # Data-dependent initialization causes freeze during cycle detection (TF bug?):
        # hps.num_gpus = 1
        # init_x = x[:hps.batch_size, :, :, :]
        # init_model = CVAE1(hps, "init", init_x)

        # vs.reuse_variables()
        hps.num_gpus = FLAGS.num_gpus
        model = CVAE1(hps, "train", x)

    saver = tf.train.Saver(max_to_keep=2)

    total_size = 0
    for v in tf.trainable_variables():
        total_size += np.prod([int(s) for s in v.get_shape()])
    print("Num trainable variables: %d" % total_size)

    init_op = tf.global_variables_initializer()

    def init_fn(ses):
        print("Initializing parameters.")
        ses.run(iterator.initializer,
                feed_dict={images_placeholder: train_images})
        # XXX(rafal): TensorFlow bug?? Default initializer should handle things well..
        # ses.run(init_model.h_top.initializer)
        ses.run(init_op)
        print("Initialized!")

    sv = Supervisor(
        is_chief=True,
        logdir=FLAGS.logdir +
        "/train/{}_{}".format(strftime('%Y%m%d-%H%M%S'), FLAGS.hpconfig),
        summary_op=None,  # Automatic summaries don"t work with placeholders.
        saver=saver,
        global_step=model.global_step,
        save_summaries_secs=120,
        save_model_secs=0,
        init_op=None,
        init_fn=init_fn)

    print("starting training")
    local_step = 0
    begin = time.time()

    config = tf.ConfigProto(allow_soft_placement=True)
    with sv.managed_session(config=config) as sess:
        print("Running first iteration!")
        while not sv.should_stop():
            fetches = [
                model.bits_per_dim, model.global_step, model.dec_log_stdv,
                model.train_op
            ]

            should_compute_summary = (local_step % 20 == 19)
            if should_compute_summary:
                fetches += [model.summary_op]

            fetched = sess.run(fetches)

            if should_compute_summary:
                sv.summary_computed(sess, fetched[-1])

            if local_step < 10 or should_compute_summary:
                print(
                    "Iteration %d, time = %.2fs, train bits_per_dim = %.4f, dec_log_stdv = %.4f"
                    %
                    (fetched[1], time.time() - begin, fetched[0], fetched[2]))
                begin = time.time()
            if np.isnan(fetched[0]):
                print("NAN detected!")
                break
            if local_step % 3000 == 0:
                saver.save(sess,
                           sv.save_path,
                           global_step=sv.global_step,
                           write_meta_graph=False)

            local_step += 1
        sv.stop()
def run_experiment(option, use_basic_dataset):
    assert os.path.isdir(os.path.join(BASEDIR, 'pretrained_weights')), 'weights files are missing'

    sequence_cap = 56 if use_basic_dataset else 130
    print('Setting up data pipeline...')

    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    datasource = one_hot_token_random_batcher(
        BATCH_SIZE,
        NUMBER_BATCHES,
        length=sequence_cap,
        cache_path='attention_models_{}_{}_{}'.format(
            'basic' if use_basic_dataset else 'standard',
            NUMBER_BATCHES,
            BATCH_SIZE
        ),
        huzzer_kwargs=huzzer_kwargs
    )
    queue = build_single_output_queue(
        datasource,
        output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE),
        type=tf.uint8
    )
    raw_input_sequences = queue.dequeue(name='input_sequence')
    real_sequence_lengths = get_sequence_lengths(
        tf.cast(raw_input_sequences, tf.int32)
    )
    real_input_sequences = tf.cast(raw_input_sequences, tf.float32)

    print('Building model..')
    if option.startswith('attention1_gan_no_pretrain'):
        z_size = int(option.split('_')[-1])

        random_vector = tf.random_normal(
            dtype=tf.float32,
            shape=[BATCH_SIZE, z_size],
            mean=0,
            stddev=0.1  # because that is what we will used when generating
        )

        # we do not know the length of the generated code beforehand, so we pass in
        # sequence lengths of `sequence_cap`
        full_lengths = tf.constant(
            [sequence_cap for _ in range(BATCH_SIZE)],
            dtype=tf.float32,
            name='generator_lengths'
        )

        # create the scaling const. k_t
        k_t = tf.Variable(0., trainable=False, name='k_t')

        # generator gets restored weights, and so does the
        with tf.variable_scope('generator'):
            unnormalized_generated_programs, _ = build_attention1_decoder(
                random_vector, full_lengths, sequence_cap, TOKEN_EMB_SIZE
            )
            generated_programs = tf.nn.softmax(
                unnormalized_generated_programs, dim=-1, name='generated_programs'
            )
            generated_lengths = get_sequence_lengths(generated_programs, epsilon=0.01)

        with tf.variable_scope('discriminator'):
            sequence_lengths = tf.concat([generated_lengths, real_sequence_lengths], axis=0)
            encoder_output = build_single_program_encoder(
                tf.concat([generated_programs, real_input_sequences], axis=0),
                sequence_lengths,
                z_size
            )
            # get the values corresponding to mus from the encoder output_shape
            assert encoder_output.get_shape()[1].value == 2 * z_size
            encoded_v = encoder_output[:, :z_size]
            reconstructed, _ = build_attention1_decoder(
                encoded_v, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE
            )
            # these are the unnormalized_token_probs for g and d
            generated_reconstructed = reconstructed[:BATCH_SIZE]
            real_reconstructed = reconstructed[BATCH_SIZE:]

        generator_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(
                unnormalized_token_probs=generated_reconstructed,
                input_sequences=generated_programs,
                sequence_lengths=generated_lengths,
                max_length=sequence_cap
            )
        )
        real_loss = tf.reduce_mean(
            ce_loss_for_sequence_batch(
                unnormalized_token_probs=real_reconstructed,
                input_sequences=real_input_sequences,
                sequence_lengths=generated_lengths,
                max_length=sequence_cap
            )
        )
        discriminator_loss = real_loss - (k_t * generator_loss)

        optimizer = tf.train.AdamOptimizer(1e-5)
        print('creating discriminator train op...')
        d_train_op = slim.learning.create_train_op(discriminator_loss, optimizer)

        optimizer = tf.train.AdamOptimizer(1e-5)
        print('creating generator train op...')
        g_train_op = slim.learning.create_train_op(generator_loss, optimizer)

        balance = GAMMA * real_loss - generator_loss
        measure = real_loss + tf.abs(balance)

        # update k_t
        with tf.control_dependencies([d_train_op, g_train_op]):
            k_update = tf.assign(
                k_t, tf.clip_by_value(k_t + LAMBDA * balance, 0, 1))

        # example_summary_op = tf.summary.merge([
        #     tf.summary.image("G", tf.expand_dims(generated_programs, -1)),
        #     tf.summary.image("AE_G", tf.expand_dims(
        #         tf.nn.softmax(generated_reconstructed, dim=-1), axis=-1
        #     )),
        #     tf.summary.image("AE_x", tf.expand_dims(
        #         tf.nn.softmax(real_reconstructed, dim=-1), axis=-1
        #     ))
        # ])

        perf_summary_op = tf.summary.merge([
            tf.summary.scalar("loss/discriminator_loss", discriminator_loss),
            tf.summary.scalar("loss/real_loss", real_loss),
            tf.summary.scalar("loss/generator_loss", generator_loss),
            tf.summary.scalar("misc/measure", measure),
            tf.summary.scalar("misc/k_t", k_t),
            tf.summary.scalar("misc/balance", balance),
        ])

    else:
        print('INVALID OPTION')
        exit(1)

    logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_gan')

    # build the model and initialise weights so supervisor can start where we left off
    # if not os.path.isdir(logdir):
    #     mkdir_p(logdir)
    #     with tf.Session() as sess:
    #         print('saving initial pretrained weights')
    #         with tf.variable_scope('', reuse=True):
    #             discriminator_vars = [
    #                 tf.get_variable('discriminator/decoder_fully_connected/bias'),
    #                 tf.get_variable('discriminator/decoder_fully_connected/weights'),
    #                 tf.get_variable('discriminator/decoder_rnn/lstm_cell/biases'),
    #                 tf.get_variable('discriminator/decoder_rnn/lstm_cell/weights'),
    #                 tf.get_variable('discriminator/rnn/lstm_cell/biases'),
    #                 tf.get_variable('discriminator/rnn/lstm_cell/weights'),
    #                 tf.get_variable('discriminator/simple_attention/bias'),
    #                 tf.get_variable('discriminator/simple_attention/weights'),
    #             ]
    #             generator_vars = [
    #                 tf.get_variable('generator/decoder_fully_connected/bias'),
    #                 tf.get_variable('generator/decoder_fully_connected/weights'),
    #                 tf.get_variable('generator/decoder_rnn/lstm_cell/biases'),
    #                 tf.get_variable('generator/decoder_rnn/lstm_cell/weights'),
    #                 tf.get_variable('generator/simple_attention/bias'),
    #                 tf.get_variable('generator/simple_attention/weights'),
    #             ]
    #
    #         discriminator_saver = tf.train.Saver(
    #             discriminator_vars
    #         )
    #         generator_saver = tf.train.Saver(
    #             generator_vars
    #         )
    #         sess.run(tf.global_variables_initializer())
    #         discriminator_saver.restore(
    #             sess,
    #             os.path.join(BASEDIR, 'pretrained_weights', 'discriminator_weights.cpkt')
    #         )
    #         generator_saver.restore(
    #             sess,
    #             os.path.join(BASEDIR, 'pretrained_weights', 'generator_weights.cpkt')
    #         )
    #
    #         saver = tf.train.Saver()
    #         saver.save(sess, os.path.join(logdir, 'model.cpkt-0'))

    print('starting supervisor...')
    sv = Supervisor(
        logdir=logdir,
        save_model_secs=300,
        save_summaries_secs=60,
        summary_op=perf_summary_op
    )
    print('training...')
    with sv.managed_session() as sess:

        global_step = -1
        while not sv.should_stop():
            ops = {
                'k_update': k_update,
                'measure': measure,
                'd_train_op': d_train_op,
                'g_train_op': g_train_op,
                'global_step': sv.global_step
            }
            # if global_step % 200 == 0:
                # ops.update({'images': example_summary_op})

            results = sess.run(ops)
Exemple #8
0
def __run(build_model):
    cfg = gflags.cfg

    # ============ Class balance
    # assert class_balance in [None, 'median_freq_cost', 'rare_freq_cost'], (
    #     'The balance class method is not implemented')

    # if class_balance in ['median_freq_cost', 'rare_freq_cost']:
    #     if not hasattr(Dataset, 'class_freqs'):
    #         raise RuntimeError('class_freqs is missing for dataset '
    #                            '{}'.format(Dataset.name))
    #     freqs = Dataset.class_freqs

    #     if class_balance == 'median_freq_cost':
    #         w_freq = np.median(freqs) / freqs
    #     elif class_balance == 'rare_freq_cost':
    #         w_freq = 1 / (cfg.nclasses * freqs)

    #     tf.logging.info("Class balance weights", w_freq)
    #     cfg.class_balance = w_freq

    # ============ Train/validation
    # Load data
    # init_epoch = 0
    # prev_history = None
    # best_loss = np.Inf
    # best_val = np.Inf if early_stop_strategy == 'min' else -np.Inf
    # val_metrics_ext = ['val_' + m for m in val_metrics]
    # history_path = tmp_path + save_name + '.npy'
    # if cfg.reload_weights:
    #     # Reload weights
    #     pass

    # BUILD GRAPH
    tf_config = tf.ConfigProto(allow_soft_placement=True)

    tf.logging.info("Building the model ...")
    # with graph:
    with tf.Graph().as_default() as graph:
        cfg.global_step = tf.Variable(0, trainable=False, name='global_step',
                                      dtype='int32')

        # Create a list of input placeholders for each GPU.
        # When the batchsize is not big enough to fill all of them we
        # would want to use a subset of the placeholders, but TF raises
        # a 'negative shape error' if a placeholder is not fed. Instead,
        # we provide all of them with values but we use n_spits to
        # select which of the inputs to process (and perform gradient
        # descent on) and which to ignore.
        # At runtime, we replicate the input data to feed all the
        # placeholders (even if it's internally ignored). We could use
        # placeholder_with_default to assign a value to it's input but
        # the batch_size might change dynamically, so we rather
        # replicate the input at runtime.
        inputs_per_gpu = []
        val_inputs_per_gpu = []
        labels_per_gpu = []
        num_splits = tf.placeholder(np.int32, shape=None, name='num_splits')
        num_batches = tf.placeholder(np.int32, shape=None, name='num_batches')
        for i, _ in enumerate(range(cfg.num_splits)):
            inputs_per_gpu.append(tf.placeholder(
                dtype=cfg._FLOATX,
                shape=cfg.input_shape,
                name='inputs_per_gpu_%i' % i))
            val_inputs_per_gpu.append(tf.placeholder(
                dtype=cfg._FLOATX,
                shape=cfg.val_input_shape,
                name='val_inputs_per_gpu_%i' % i))
            labels_per_gpu.append(tf.placeholder(
                dtype=np.int32,
                shape=[None],  # flattened
                name='labels_per_gpu_%i' % i))
        prev_err = tf.placeholder(shape=(), dtype=cfg._FLOATX, name='prev_err')
        placeholders = [inputs_per_gpu, labels_per_gpu, num_splits,
                        num_batches, prev_err]
        val_placeholders = [val_inputs_per_gpu, labels_per_gpu, num_splits,
                            num_batches]

        # Learning rate schedule
        if cfg.lr_decay is None:
            lr = cfg.lr
        elif cfg.lr_decay == 'exp':
            lr = tf.train.exponential_decay(cfg.lr,
                                            cfg.global_step,
                                            cfg.decay_steps,
                                            cfg.decay_rate,
                                            staircase=cfg.staircase)
        elif cfg.lr_decay == 'piecewise':
            lr = tf.train.piecewise_constant(cfg.global_step,
                                             cfg.lr_boundaries,
                                             cfg.lr_values)
        elif cfg.lr_decay == 'polynomial':
            lr = tf.train.polynomial_decay(cfg.lr,
                                           cfg.global_step,
                                           cfg.decay_steps,
                                           end_learning_rate=cfg.end_lr,
                                           power=cfg.power,
                                           cycle=cfg.staircase)

        elif cfg.lr_decay == 'natural_exp':
            lr = tf.train.natural_exp_decay(cfg.lr,
                                            cfg.global_step,
                                            cfg.decay_steps,
                                            cfg.decay_rate,
                                            staircase=cfg.staircase)
        elif cfg.lr_decay == 'inverse_time':
            lr = tf.train.inverse_time_decay(cfg.lr,
                                             cfg.global_step,
                                             cfg.decay_steps,
                                             cfg.decay_rate,
                                             staircase=cfg.staircase)
        else:
            raise NotImplementedError()
        cfg.Optimizer = cfg.Optimizer(learning_rate=lr, **cfg.optimizer_params)

        # Model compilation
        # -----------------
        # Model parameters on the FIRST device specified in cfg.devices
        # Gradient Average and the rest of the operations are on CPU
        with tf.device('/cpu:0'):
            # Build the training graph
            train_outs, train_summary_ops, _ = build_graph(
                placeholders,
                cfg.input_shape,
                build_model,
                'train')

            # Build the validation graphs (reusing variables)
            val_outs = {}
            val_summary_ops = {}
            val_reset_cm_ops = {}
            for s in ['eval_' + v for v in cfg.val_on_sets]:
                ret = build_graph(
                    val_placeholders,
                    cfg.val_input_shape,
                    build_model,
                    s)
                val_outs[s], val_summary_ops[s], val_reset_cm_ops[s] = ret

            # Add the hyperparameters summaries
            if cfg.hyperparams_summaries is not None:
                sum_text = []
                for (key_header,
                     list_value) in cfg.hyperparams_summaries.iteritems():

                    header_list = []
                    text_list = []
                    for v in list_value:
                        header_list.append('**'+v+'**')
                        text_list.append(str(getattr(cfg, v)))
                    header_tensor = tf.constant(header_list)
                    text_tensor = tf.constant(text_list)

                    sum_text.append(tf.summary.text(
                        key_header, tf.reshape(
                            tf.concat([header_tensor, text_tensor], axis=0),
                            [2, -1])))
                sum_text_op = tf.summary.merge(sum_text)

            # Group global and local init into one op. Could be split into
            # two different ops and passed to `init_op` and `local_init_op`
            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            saver = tf.train.Saver(max_to_keep=cfg.checkpoints_to_keep)

        # Start the session
        # ------------------
        sv = Supervisor(
            graph=graph,
            init_op=init_op,
            summary_op=None,
            global_step=cfg.global_step,
            # TODO add option to restore best rather than last?
            logdir=cfg.checkpoints_dir,
            checkpoint_basename=cfg.model_name,
            saver=saver,
            # session_manager
            # summary_writer
            save_model_secs=300)
        cfg.sv = sv

        with sv.managed_session(cfg.supervisor_master, tf_config) as sess:
            cfg.sess = sess
            if cfg.debug:
                from tensorflow.python import debug as tf_debug
                sess = tf_debug.LocalCLIDebugWrapperSession(sess)
                sess.add_tensor_filter("has_inf_or_nan",
                                       tf_debug.has_inf_or_nan)

            if cfg.hyperparams_summaries is not None:
                # write Hyper parameters text summaries
                summary_str = cfg.sess.run(sum_text_op)
                sv.summary_computed(cfg.sess, summary_str)

            if not cfg.do_validation_only:
                # Start training loop
                main_loop_kwags = {'placeholders': placeholders,
                                   'val_placeholders': val_placeholders,
                                   'train_outs': train_outs,
                                   'train_summary_ops': train_summary_ops,
                                   'val_outs': val_outs,
                                   'val_summary_ops': val_summary_ops,
                                   'val_reset_cm_ops': val_reset_cm_ops,
                                   'loss_fn': cfg.loss_fn,
                                   'Dataset': cfg.Dataset,
                                   'dataset_params': cfg.dataset_params,
                                   'valid_params': cfg.valid_params,
                                   'sv': sv,
                                   'saver': saver}
                return main_loop(**main_loop_kwags)
            else:
                # Perform validation only
                mean_iou = {}
                for s in cfg.val_on_sets:
                    from validate import validate
                    mean_iou[s] = validate(
                        val_placeholders,
                        val_outs['eval_' + s],
                        val_summary_ops['eval_' + s],
                        val_reset_cm_ops['eval_' + s],
                        which_set='eval_' + s)
Exemple #9
0
def run_experiment(option, use_basic_dataset):
    BATCH_SIZE = 128
    NUMBER_BATCHES = 1000

    print('Building model..')
    if option.startswith('single_layer_gru_blind_'):
        look_behind = 0
        num_grus = int(option.split('_')[-1])
        network_block = build_token_level_RVAE(num_grus,
                                               TOKEN_EMB_SIZE,
                                               look_behind_length=0)
        train_block = build_train_graph_for_RVAE(network_block)
    elif option.startswith('single_layer_gru_look_behind_'):
        num_grus = int(option.split('_')[-1])
        look_behind = int(option.split('_')[-2])
        network_block = build_token_level_RVAE(num_grus, TOKEN_EMB_SIZE,
                                               look_behind)
        train_block = build_train_graph_for_RVAE(network_block, look_behind)
    else:
        print('INVALID OPTION')
        exit(1)

    print('Setting up data pipeline...')
    huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {}
    # the generator for fold needs one example at a time,
    dataset = one_hot_variable_length_token_dataset(
        batch_size=1,
        number_of_batches=BATCH_SIZE * NUMBER_BATCHES,
        cache_path=
        'one_hot_token_variable_length_haskell_batch{}_number{}_lookbehind{}'.
        format(1, NUMBER_BATCHES * BATCH_SIZE, look_behind),
        zero_front_pad=look_behind,
        huzzer_kwargs=huzzer_kwargs)

    # Generator that gets examples
    def get_example():
        while True:
            yield np.squeeze(dataset()[0], axis=0)

    logdir = 'experiments/Recurrent_VAE_baseline/{}{}'.format(
        'basic_' if use_basic_dataset else '', option)

    # compile and build the train op
    compiler = td.Compiler.create(train_block)

    metrics = compiler.metric_tensors
    kl_loss = tf.reduce_mean(metrics['kl_loss'])
    cross_entropy_loss = tf.reduce_mean(metrics['cross_entropy_loss'])
    total_loss_op = kl_loss + cross_entropy_loss
    tf.summary.scalar('cross_entropy_loss', cross_entropy_loss)
    tf.summary.scalar('kl_loss', kl_loss)
    tf.summary.scalar('total_loss', total_loss_op)

    optimizer = tf.train.AdamOptimizer(1e-3)
    train_op = slim.learning.create_train_op(total_loss_op, optimizer)
    summary_op = tf.summary.merge_all()

    sv = Supervisor(
        logdir=logdir,
        save_model_secs=60,
        summary_op=None,
    )
    print('training...')
    with sv.managed_session() as sess:

        batcher = compiler.build_loom_input_batched(get_example(), BATCH_SIZE)

        steps_per_summary = 10
        best_loss_so_far = 100
        num_steps_until_best = 0

        for i, batch in enumerate(batcher):
            if sv.should_stop():
                break

            encoder_sequence_length_t = compiler.metric_tensors[
                'encoder_sequence_length']
            decoder_sequence_length_t = compiler.metric_tensors[
                'decoder_sequence_length']

            le, ld, summary, global_step, total_loss, _ = sess.run(
                [
                    encoder_sequence_length_t, decoder_sequence_length_t,
                    summary_op, sv.global_step, total_loss_op, train_op
                ],
                feed_dict={compiler.loom_input_tensor: batch})
            assert all(le == ld), \
                'the encoder is folding over a different length sequence to encoder'
            if i % steps_per_summary == 0:
                sv.summary_computed(sess, summary, global_step)

            # Stop if loss does not improve after some steps
            if total_loss < best_loss_so_far:
                best_loss_so_far = total_loss
                num_steps_until_best = 0
            else:
                num_steps_until_best += 1
                if num_steps_until_best == NUM_STEPS_TO_STOP_IF_NO_IMPROVEMENT:
                    exit()