def run_experiment(option, use_basic_dataset): sequence_cap = 56 if use_basic_dataset else 130 print('Setting up data pipeline...') huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {} datasource = one_hot_token_random_batcher( BATCH_SIZE, NUMBER_BATCHES, length=sequence_cap, cache_path='attention_models_{}_{}_{}'.format( 'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES, BATCH_SIZE), huzzer_kwargs=huzzer_kwargs) queue = build_single_output_queue(datasource, output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE), type=tf.uint8) raw_input_sequences = queue.dequeue(name='input_sequence') sequence_lengths = get_sequence_lengths( tf.cast(raw_input_sequences, tf.int32)) input_sequences = tf.cast(raw_input_sequences, tf.float32) print('Building model..') if option.startswith('attention1'): z_size = int(option.split('_')[-1]) encoder_output = build_single_program_encoder(input_sequences, sequence_lengths, z_size) z_resampled = resampling(encoder_output) decoder_output, _ = build_attention1_decoder(z_resampled, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE) cross_entropy_loss = tf.reduce_mean( ce_loss_for_sequence_batch(decoder_output, input_sequences, sequence_lengths, sequence_cap)) kl_loss = tf.reduce_mean(kl_divergence(encoder_output)) else: print('INVALID OPTION') exit(1) total_loss_op = kl_loss + cross_entropy_loss tf.summary.scalar('cross_entropy_loss', cross_entropy_loss) tf.summary.scalar('kl_loss', kl_loss) tf.summary.scalar('total_loss', total_loss_op) logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option) optimizer = tf.train.AdamOptimizer(1e-3) print('creating train op...') train_op = slim.learning.create_train_op(total_loss_op, optimizer) print('starting supervisor...') sv = Supervisor(logdir=logdir, save_model_secs=300, save_summaries_secs=60) print('training...') with sv.managed_session() as sess: while not sv.should_stop(): total_loss, _ = sess.run([total_loss_op, train_op])
def run_experiment(option, use_basic_dataset): assert os.path.isdir(os.path.join(BASEDIR, 'pretrained_weights')), 'weights files are missing' sequence_cap = 56 if use_basic_dataset else 130 print('Setting up data pipeline...') huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {} datasource = one_hot_token_random_batcher( BATCH_SIZE, NUMBER_BATCHES, length=sequence_cap, cache_path='attention_models_{}_{}_{}'.format( 'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES, BATCH_SIZE ), huzzer_kwargs=huzzer_kwargs ) queue = build_single_output_queue( datasource, output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE), type=tf.uint8 ) raw_input_sequences = queue.dequeue(name='input_sequence') real_sequence_lengths = get_sequence_lengths( tf.cast(raw_input_sequences, tf.int32) ) real_input_sequences = tf.cast(raw_input_sequences, tf.float32) print('Building model..') if option.startswith('attention1_gan_no_pretrain'): z_size = int(option.split('_')[-1]) random_vector = tf.random_normal( dtype=tf.float32, shape=[BATCH_SIZE, z_size], mean=0, stddev=0.1 # because that is what we will used when generating ) # we do not know the length of the generated code beforehand, so we pass in # sequence lengths of `sequence_cap` full_lengths = tf.constant( [sequence_cap for _ in range(BATCH_SIZE)], dtype=tf.float32, name='generator_lengths' ) # create the scaling const. k_t k_t = tf.Variable(0., trainable=False, name='k_t') # generator gets restored weights, and so does the with tf.variable_scope('generator'): unnormalized_generated_programs, _ = build_attention1_decoder( random_vector, full_lengths, sequence_cap, TOKEN_EMB_SIZE ) generated_programs = tf.nn.softmax( unnormalized_generated_programs, dim=-1, name='generated_programs' ) generated_lengths = get_sequence_lengths(generated_programs, epsilon=0.01) with tf.variable_scope('discriminator'): sequence_lengths = tf.concat([generated_lengths, real_sequence_lengths], axis=0) encoder_output = build_single_program_encoder( tf.concat([generated_programs, real_input_sequences], axis=0), sequence_lengths, z_size ) # get the values corresponding to mus from the encoder output_shape assert encoder_output.get_shape()[1].value == 2 * z_size encoded_v = encoder_output[:, :z_size] reconstructed, _ = build_attention1_decoder( encoded_v, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE ) # these are the unnormalized_token_probs for g and d generated_reconstructed = reconstructed[:BATCH_SIZE] real_reconstructed = reconstructed[BATCH_SIZE:] generator_loss = tf.reduce_mean( ce_loss_for_sequence_batch( unnormalized_token_probs=generated_reconstructed, input_sequences=generated_programs, sequence_lengths=generated_lengths, max_length=sequence_cap ) ) real_loss = tf.reduce_mean( ce_loss_for_sequence_batch( unnormalized_token_probs=real_reconstructed, input_sequences=real_input_sequences, sequence_lengths=generated_lengths, max_length=sequence_cap ) ) discriminator_loss = real_loss - (k_t * generator_loss) optimizer = tf.train.AdamOptimizer(1e-5) print('creating discriminator train op...') d_train_op = slim.learning.create_train_op(discriminator_loss, optimizer) optimizer = tf.train.AdamOptimizer(1e-5) print('creating generator train op...') g_train_op = slim.learning.create_train_op(generator_loss, optimizer) balance = GAMMA * real_loss - generator_loss measure = real_loss + tf.abs(balance) # update k_t with tf.control_dependencies([d_train_op, g_train_op]): k_update = tf.assign( k_t, tf.clip_by_value(k_t + LAMBDA * balance, 0, 1)) # example_summary_op = tf.summary.merge([ # tf.summary.image("G", tf.expand_dims(generated_programs, -1)), # tf.summary.image("AE_G", tf.expand_dims( # tf.nn.softmax(generated_reconstructed, dim=-1), axis=-1 # )), # tf.summary.image("AE_x", tf.expand_dims( # tf.nn.softmax(real_reconstructed, dim=-1), axis=-1 # )) # ]) perf_summary_op = tf.summary.merge([ tf.summary.scalar("loss/discriminator_loss", discriminator_loss), tf.summary.scalar("loss/real_loss", real_loss), tf.summary.scalar("loss/generator_loss", generator_loss), tf.summary.scalar("misc/measure", measure), tf.summary.scalar("misc/k_t", k_t), tf.summary.scalar("misc/balance", balance), ]) else: print('INVALID OPTION') exit(1) logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_gan') # build the model and initialise weights so supervisor can start where we left off # if not os.path.isdir(logdir): # mkdir_p(logdir) # with tf.Session() as sess: # print('saving initial pretrained weights') # with tf.variable_scope('', reuse=True): # discriminator_vars = [ # tf.get_variable('discriminator/decoder_fully_connected/bias'), # tf.get_variable('discriminator/decoder_fully_connected/weights'), # tf.get_variable('discriminator/decoder_rnn/lstm_cell/biases'), # tf.get_variable('discriminator/decoder_rnn/lstm_cell/weights'), # tf.get_variable('discriminator/rnn/lstm_cell/biases'), # tf.get_variable('discriminator/rnn/lstm_cell/weights'), # tf.get_variable('discriminator/simple_attention/bias'), # tf.get_variable('discriminator/simple_attention/weights'), # ] # generator_vars = [ # tf.get_variable('generator/decoder_fully_connected/bias'), # tf.get_variable('generator/decoder_fully_connected/weights'), # tf.get_variable('generator/decoder_rnn/lstm_cell/biases'), # tf.get_variable('generator/decoder_rnn/lstm_cell/weights'), # tf.get_variable('generator/simple_attention/bias'), # tf.get_variable('generator/simple_attention/weights'), # ] # # discriminator_saver = tf.train.Saver( # discriminator_vars # ) # generator_saver = tf.train.Saver( # generator_vars # ) # sess.run(tf.global_variables_initializer()) # discriminator_saver.restore( # sess, # os.path.join(BASEDIR, 'pretrained_weights', 'discriminator_weights.cpkt') # ) # generator_saver.restore( # sess, # os.path.join(BASEDIR, 'pretrained_weights', 'generator_weights.cpkt') # ) # # saver = tf.train.Saver() # saver.save(sess, os.path.join(logdir, 'model.cpkt-0')) print('starting supervisor...') sv = Supervisor( logdir=logdir, save_model_secs=300, save_summaries_secs=60, summary_op=perf_summary_op ) print('training...') with sv.managed_session() as sess: global_step = -1 while not sv.should_stop(): ops = { 'k_update': k_update, 'measure': measure, 'd_train_op': d_train_op, 'g_train_op': g_train_op, 'global_step': sv.global_step } # if global_step % 200 == 0: # ops.update({'images': example_summary_op}) results = sess.run(ops)
import tensorflow as tf from os.path import join from models import build_single_program_encoder, build_attention1_decoder build_attention1_decoder path_to_model = 'experiments/RVAE_attention/basic_attention1_128' z_size = 128 max_len = 4 input_sequences = tf.zeros((1, 4, 54)) sequence_lengths = tf.zeros((1), tf.int32) encoder_output = build_single_program_encoder(input_sequences, sequence_lengths, z_size) decoder_output = build_attention1_decoder(encoder_output[:, :z_size], sequence_lengths, max_len, 54) with tf.variable_scope('', reuse=True): discriminator_var_list = { 'discriminator/decoder_fully_connected/bias': tf.get_variable('decoder_fully_connected/bias', [54]), 'discriminator/decoder_fully_connected/weights': tf.get_variable('decoder_fully_connected/weights', [128, 54]), 'discriminator/decoder_rnn/lstm_cell/biases': tf.get_variable('decoder_rnn/lstm_cell/biases', [512]), 'discriminator/decoder_rnn/lstm_cell/weights': tf.get_variable('decoder_rnn/lstm_cell/weights', [256, 512]), 'discriminator/rnn/lstm_cell/biases': tf.get_variable('rnn/lstm_cell/biases', [1024]), 'discriminator/rnn/lstm_cell/weights': tf.get_variable('rnn/lstm_cell/weights', [310, 1024]),
def analyze_model(option, use_basic_dataset): BASEDIR = os.path.dirname(os.path.realpath(__file__)) sequence_cap = 56 if use_basic_dataset else 130 TOKEN_EMB_SIZE = 54 NUMBER_OF_EXAMPLES = 1000 if option.startswith('attention1_gan'): z_size = int(option.split('_')[-1]) directory = '{}{}'.format( 'basic_' if use_basic_dataset else '', option ) # input_sequence_t = tf.placeholder( # shape=[1, sequence_cap, TOKEN_EMB_SIZE], # dtype=tf.float32, # name='input_sequence' # ) # sequence_lengths_t = get_sequence_lengths( # tf.cast(input_sequence_t, tf.int32) # ) # mus_and_log_sigs = build_single_program_encoder( # input_sequence_t, # sequence_lengths_t, # z_size # ) # z = mus_and_log_sigs[:, :z_size] decoder_input = tf.placeholder( shape=[1, z_size], dtype=tf.float32, name='generator_input' ) sequence_lengths_t = tf.constant([sequence_cap]) with tf.variable_scope('generator'): decoder_output, attention_weights_t = build_attention1_decoder( decoder_input, sequence_lengths_t, sequence_cap, TOKEN_EMB_SIZE ) token_probs_t = tf.nn.softmax(decoder_output) print('z_size={}'.format(z_size)) else: exit('invalid option') # huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {} # print('Setting up data pipeline...') # dataset = one_hot_token_dataset( # batch_size=1, # number_of_batches=1000, # cache_path='{}model_analysis_attention'.format( # 'basic_' if use_basic_dataset else '' # ), # length=sequence_cap, # huzzer_kwargs=huzzer_kwargs # ) # # def get_input(): # return np.squeeze(dataset()[0], axis=0).astype(np.float32) path = join(BASEDIR, directory) saver = tf.train.Saver() # examples = [get_input() for i in range(NUMBER_OF_EXAMPLES)] sess = tf.Session() print('Restoring variables...') saver.restore( sess, tf.train.latest_checkpoint(path, 'checkpoint.txt') ) examples_dir = join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_examples') mkdir_p(examples_dir) # # Autoencode_bit # autoencoded_examples_path = join(examples_dir, 'autoencoded') # message = 'Autencoding {} examples'.format( # len(examples), # ) # for i, input_sequence in enumerate(tqdm( # examples, desc=message, total=len(examples) # )): # dir_for_example = join(autoencoded_examples_path, str(i)) # mkdir_p(dir_for_example) # # z_mus, length = sess.run([z, sequence_lengths_t], feed_dict={ # input_sequence_t: np.expand_dims(input_sequence, 0), # }) # # token_probs, attention_weights = sess.run( # [token_probs_t, attention_weights_t], # feed_dict={ # decoder_input: z_mus # } # ) # length = np.squeeze(length) # token_probs = np.squeeze(token_probs)[:length] # attention_weights = attention_weights[:length] # input_sequence = input_sequence[:length] # # input_code = example_to_code(input_sequence) # output_code = example_to_code(token_probs) # visualize_attention_weights(output_code, attention_weights, join(dir_for_example, 'attention_weights')) # # input_code = example_to_code(input_sequence) # write_to_file(join(dir_for_example, 'input.hs'), input_code) # imsave(join(dir_for_example, 'input.png'), input_sequence.T) # imsave(join(dir_for_example, 'z.png'), z_mus.reshape((z_mus.size // 32, 32))) # # imsave(join(dir_for_example, 'decoder_output.png'), token_probs.T) # write_to_file(join(dir_for_example, 'autoencoded_code.hs'), output_code) # generate_bit generated_examples_path = join(examples_dir, 'generated') message = 'Generating {} examples'.format( NUMBER_OF_EXAMPLES, ) for i in tqdm( range(NUMBER_OF_EXAMPLES), desc=message, total=NUMBER_OF_EXAMPLES ): dir_for_example = join(generated_examples_path, str(i)) mkdir_p(dir_for_example) z_gen = np.random.normal(0, 0.1, z_size) imsave(join(dir_for_example, 'z.png'), z_gen.reshape((z_gen.size // 32, 32))) token_probs, attention_weights = sess.run( [token_probs_t, attention_weights_t], feed_dict={ decoder_input: np.expand_dims(z_gen, 0) } ) token_probs = np.squeeze(token_probs) tokens = np.argmax(token_probs, axis=-1) end_of_code = np.argmax(tokens == 0) or sequence_cap token_probs = token_probs[:end_of_code] attention_weights = attention_weights[:end_of_code] output_code = example_to_code(token_probs) visualize_attention_weights(output_code, attention_weights, join(dir_for_example, 'attention_weights')) imsave(join(dir_for_example, 'decoder_output.png'), token_probs.T) write_to_file(join(dir_for_example, 'generated_code.hs'), example_to_code(token_probs))