def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU() # build the model grads = [] losses = [] y_list = [] acc_list = [] batch_size = spt.utils.get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x, dev_input_y ] in multi_gpu.data_parallel(batch_size, [input_x, input_y]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: _ = model(dev_input_x, is_training, channels_last=True) else: # derive the loss, output and accuracy dev_logits = model(dev_input_x, is_training=is_training, channels_last=multi_gpu.channels_last(dev)) dev_cls_loss = tf.losses.sparse_softmax_cross_entropy( dev_input_y, dev_logits) dev_loss = dev_cls_loss + tf.losses.get_regularization_loss() dev_y = spt.ops.softmax_classification_output(dev_logits) dev_acc = spt.ops.classification_accuracy(dev_y, dev_input_y) losses.append(dev_loss) y_list.append(dev_y) acc_list.append(dev_acc) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, acc] = multi_gpu.average([losses, acc_list], batch_size) [y] = multi_gpu.concat([y_list]) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_cifar10(x_shape=config.x_shape, normalize_x=True) train_flow = spt.DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) with spt.utils.create_session().as_default(): # train the network with spt.TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={is_training: True}, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU(disable_prebuild=False) # build the model grads = [] losses = [] test_nlls = [] test_lbs = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([p_net, q_net], is_training=is_training, channels_last=True): _ = q_net(dev_input_x).chain(p_net, latent_names=['z'], observed={'x': dev_input_x}) else: with arg_scope([p_net, q_net], is_training=is_training, channels_last=multi_gpu.channels_last(dev)): # derive the loss and lower-bound for training train_q_net = q_net(dev_input_x) train_chain = train_q_net.chain( p_net, latent_names=['z'], latent_axis=0, observed={'x': dev_input_x}) dev_vae_loss = tf.reduce_mean( train_chain.vi.training.sgvb()) dev_loss = dev_vae_loss + regularization_loss() losses.append(dev_loss) # derive the nll and logits output for testing test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': dev_input_x}) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) dev_test_lb = tf.reduce_mean( test_chain.vi.lower_bound.elbo()) test_nlls.append(dev_test_nll) test_lbs.append(dev_test_lb) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, test_lb, test_nll] = \ multi_gpu.average([losses, test_lbs, test_nlls], batch_size) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plot_x'): plot_p_net = p_net(n_z=100, is_training=is_training, channels_last=multi_gpu.channels_last(work_dev)) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32, name='learning_rate') learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # build the model with arg_scope([q_net, p_net], is_training=is_training): # derive the loss and lower-bound for training train_q_net = q_net(input_x, n_samples=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_names=['y', 'z'], latent_axis=0, observed={'x': input_x}) if config.train_n_samples is None: baseline = reinforce_baseline_net(input_x) vae_loss = tf.reduce_mean( train_chain.vi.training.reinforce(baseline=baseline)) else: vae_loss = tf.reduce_mean(train_chain.vi.training.vimco()) loss = vae_loss + regularization_loss() # derive the nll and logits output for testing test_q_net = q_net(input_x, n_samples=config.test_n_samples) test_chain = test_q_net.chain(p_net, latent_names=['y', 'z'], latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) # derive the classifier via q(y|x) q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits, axis=-1) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plot_x'): plot_p_net = p_net( observed={'y': tf.range(config.n_clusters, dtype=tf.int32)}, n_z=10, is_training=is_training) x_plots = tf.reshape( tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(config.n_clusters, 10), results=results) # derive the final un-supervised classifier c_classifier = ClusteringClassifier(config.n_clusters, 10) def train_classifier(loop): df = bernoulli_flow(x_train, config.batch_size, shuffle=False, skip_incomplete=False) with loop.timeit('cls_train_time'): [c_pred] = collect_outputs(outputs=[q_y_given_x], inputs=[input_x], data_flow=df, feed_dict={is_training: False}) c_classifier.fit(c_pred, y_train) print(c_classifier.describe()) def evaluate_classifier(loop): with loop.timeit('cls_test_time'): [c_pred] = collect_outputs(outputs=[q_y_given_x], inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}) y_pred = c_classifier.predict(c_pred) cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)} loop.collect_metrics(cls_metrics) results.update_metrics(cls_metrics) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with TrainLoop(params, var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_nll': test_nll}, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( train_classifier, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( evaluate_classifier, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f: f.write(c_classifier.describe()) print_with_title('Results', results.format_metrics(), before='\n') results.close()
var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print('=' * 30 + 'result' + '=' * 30) pprint(best_valid_metrics) if __name__ == '__main__': # get config obj config = ExpConfig() # parse the arguments arg_parser = ArgumentParser() register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) config.x_dim = get_data_dim(config.dataset) print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories if specified results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs(config.save_dir, exist_ok=True) with warnings.catch_warnings(): # suppress DeprecationWarning from NumPy caused by codes in TensorFlow-Probability warnings.filterwarnings("ignore", category=DeprecationWarning, module='numpy') main()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x, is_training=is_training) softmax_loss = softmax_classification_loss(logits, input_y) loss = softmax_loss + regularization_loss() y = softmax_classification_output(logits) acc = classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ load_cifar10(x_shape=(config.x_dim,), normalize_x=True) train_flow = DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = DataFlow.arrays([x_test, y_test], config.test_batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, n_samples=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) if config.vi_algorithm == 'reinforce': baseline = reinforce_baseline_net(input_x) vae_loss = tf.reduce_mean( train_chain.vi.training.reinforce(baseline=baseline)) else: assert (config.vi_algorithm == 'vimco') vae_loss = tf.reduce_mean(train_chain.vi.training.vimco()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_samples=config.test_n_samples) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) # derive the classifier via q(y|x) q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits, axis=-1, name='q_y_given_x') # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net( observed={'y': tf.range(config.n_clusters, dtype=tf.int32)}, n_z=10) x_plots = tf.reshape( tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(config.n_clusters, 10), results=results) # derive the final un-supervised classifier c_classifier = ClusteringClassifier(config.n_clusters, 10) def train_classifier(loop): df = bernoulli_flow(x_train, config.batch_size, shuffle=False, skip_incomplete=False) with loop.timeit('cls_train_time'): [c_pred] = collect_outputs( outputs=[q_y_given_x], inputs=[input_x], data_flow=df, ) c_classifier.fit(c_pred, y_train) print(c_classifier.describe()) def evaluate_classifier(loop): with loop.timeit('cls_test_time'): [c_pred] = collect_outputs( outputs=[q_y_given_x], inputs=[input_x], data_flow=test_flow, ) y_pred = c_classifier.predict(c_pred) cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)} loop.collect_metrics(cls_metrics) results.update_metrics(cls_metrics) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784]) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with spt.TrainLoop( params, var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_nll': test_nll}, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( train_classifier, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( evaluate_classifier, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f: f.write(c_classifier.describe()) print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.1, dtype=tf.float32, name='beta', trainable=True) beta = tf.clip_by_value(beta, config.beta, 1.0) # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(input_x, n_z=config.train_n_qz) init_p_net = p_net(observed={ 'x': input_x, 'z': init_q_net['z'] }, n_z=config.train_n_qz, beta=beta) init_loss = get_all_loss(init_q_net, init_p_net) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([batch_norm], training=True): train_q_net = q_net(input_x, n_z=config.train_n_qz) train_p_net = p_net(observed={ 'x': input_x, 'z': train_q_net['z'] }, n_z=config.train_n_qz, beta=beta) VAE_loss = get_all_loss(train_q_net, train_p_net) VAE_loss += tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_z=config.test_n_qz) test_chain = test_q_net.chain(p_net, observed={'x': input_x}, n_z=config.test_n_qz, latent_axis=0, beta=beta) test_nll = -tf.reduce_mean( spt.ops.log_mean_exp( tf.reshape(test_chain.vi.evaluation.is_loglikelihood(), ( -1, config.test_x_samples, )), axis=-1)) + config.x_shape_multiple * np.log(128.0) test_recon = tf.reduce_mean(test_chain.model['x'].log_prob()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) test_mse = tf.reduce_sum( (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) - tf.round(test_chain.model['x'] * 128 + 127.5))**2, axis=[-1, -2, -3]) # (sample_dim, batch_dim, x_sample_dim) test_mse = tf.reduce_min(test_mse, axis=[0]) test_mse = tf.reduce_mean( tf.reduce_mean(tf.reshape(test_mse, ( -1, config.test_x_samples, )), axis=-1)) # derive the optimizer with tf.name_scope('optimizing'): VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables( 'G_theta') + tf.trainable_variables('beta') print("========VAE_params=========") print(VAE_params) with tf.variable_scope('VAE_optimizer'): VAE_optimizer = tf.train.AdamOptimizer(learning_rate) VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads) # derive the plotting function with tf.name_scope('plotting'): x_plots = 256.0 * tf.reshape( p_net(n_z=100, mcmc_iterator=0, beta=beta)['x'].distribution.mean, (-1, ) + config.x_shape) / 2 + 127.5 reconstruct_q_net = q_net(input_x) reconstruct_z = reconstruct_q_net['z'] reconstruct_plots = 256.0 * tf.reshape( p_net(observed={'z': reconstruct_z}, beta=beta)['x'], (-1, ) + config.x_shape) / 2 + 127.5 x_plots = tf.clip_by_value(x_plots, 0, 255) reconstruct_plots = tf.clip_by_value(reconstruct_plots, 0, 255) def plot_samples(loop): with loop.timeit('plot_time'): # plot samples images = session.run(x_plots) # pyplot.scatter(z_points[:, 0], z_points[:, 1], s=5) # pyplot.savefig(results.system_path('plotting/z_plot/{}.pdf'.format(loop.epoch))) # pyplot.close() # print(images) try: print(np.max(images), np.min(images)) images = np.round(images) save_images_collection( images=images, filename='plotting/sample/{}.png'.format(loop.epoch), grid_size=(10, 10), results=results, ) # plot reconstructs for [x] in reconstruct_train_flow: x_samples = uniform_sampler.sample(x) images = np.zeros((150, ) + config.x_shape, dtype=np.uint8) images[::3, ...] = np.round(256.0 * x / 2 + 127.5) images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5) images[2::3, ...] = np.round( session.run(reconstruct_plots, feed_dict={input_x: x})) save_images_collection( images=images, filename='plotting/train.reconstruct/{}.png'.format( loop.epoch), grid_size=(10, 15), results=results, ) break # plot reconstructs for [x] in reconstruct_test_flow: x_samples = uniform_sampler.sample(x) images = np.zeros((150, ) + config.x_shape, dtype=np.uint8) images[::3, ...] = np.round(256.0 * x / 2 + 127.5) images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5) images[2::3, ...] = np.round( session.run(reconstruct_plots, feed_dict={input_x: x})) save_images_collection( images=images, filename='plotting/test.reconstruct/{}.png'.format( loop.epoch), grid_size=(10, 15), results=results, ) break except Exception as e: print(e) # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = \ spt.datasets.load_cifar10(x_shape=config.x_shape) # train_flow = bernoulli_flow( # x_train, config.batch_size, shuffle=True, skip_incomplete=True) x_train = (_x_train - 127.5) / 256.0 * 2 x_test = (_x_test - 127.5) / 256.0 * 2 uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float) train_flow = spt.DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True) train_flow = train_flow.map(uniform_sampler) reconstruct_train_flow = spt.DataFlow.arrays([x_train], 50, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays([x_test], 50, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays( [np.repeat(x_test, config.test_x_samples, axis=0)], config.test_batch_size) test_flow = test_flow.map(uniform_sampler) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'.format( session.run(init_loss, feed_dict={input_x: x}))) break # train the network with spt.TrainLoop( tf.trainable_variables(), var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch + 1, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, restore_checkpoint= "/mnt/mfs/mlstorage-experiments/cwx17/83/19/6f3b6c3ef49d6d6c81d5/checkpoint/checkpoint/checkpoint.dat-585000" ) as loop: evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb, 'test_recon': test_recon, 'test_mse': test_mse }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() for epoch in epoch_iterator: plot_samples(loop) dataset_img = _x_train mala_img = [] for i in range(config.fid_samples // config.sample_n_z): mala_images = session.run(x_plots) mala_img.append(mala_images) print('{}-th sample finished...'.format(i)) mala_img = np.concatenate(mala_img, axis=0).astype('uint8') mala_img = np.asarray(mala_img) np.savez('sample_store', mala_img=mala_img) FID = get_fid(mala_img, dataset_img) IS_mean, IS_std = get_inception_score(mala_img) loop.collect_metrics(FID=FID) loop.collect_metrics(IS=IS_mean) # ori_img = np.concatenate(ori_img, axis=0).astype('uint8') # ori_img = np.asarray(ori_img) # FID = get_fid_google(ori_img, dataset_img) # IS_mean, IS_std = get_inception_score(ori_img) # loop.collect_metrics(FID_ori=FID) # loop.collect_metrics(IS_ori=IS_mean) loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) posterior_flow = spt.layers.planar_normalizing_flows(config.nf_layers, name='posterior_flow') # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') input_origin_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_origin_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True) # derive the nll and logits output for testing with tf.name_scope('testing'), \ arg_scope([batch_norm], training=True): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz) # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z()) test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z()) test_p_net = p_net(observed={'z': test_q_net['z']}, n_z=config.test_n_qz, beta=beta, log_Z=get_log_Z()) pn_abs = tf.abs( tf.reduce_mean(D_psi(test_pn_net['x']), axis=0) - D_psi(test_pn_net['x'].distribution.mean)) print(pn_abs) pn_abs = tf.reduce_mean(pn_abs) p_abs = tf.abs( tf.reduce_mean(D_psi(test_p_net['x']), axis=0) - D_psi(test_p_net['x'].distribution.mean)) p_abs = tf.reduce_mean(p_abs) xi_node = get_var('p_net/xi') # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = spt.datasets.load_cifar10(x_shape=config.x_shape) x_train = (_x_train - 127.5) / 256.0 * 2 x_test = (_x_test - 127.5) / 256.0 * 2 # uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float) train_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size) random_train_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size, shuffle=True) reconstruct_train_flow = spt.DataFlow.arrays([x_train], 100, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays([x_test], 100, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays([x_test, x_test], config.test_batch_size) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network # for [x, origin_x] in train_flow: # print('Network initialized, first-batch loss is {:.6g}.\n'. # format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x}))) # break # if config.z_dim == 512: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 1024: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 2048: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 3072: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000' # else: restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/3d/0c/d445f4f80a9fee59aed5/checkpoint/checkpoint/checkpoint.dat-312000' # train the network with spt.TrainLoop(tf.trainable_variables(), var_groups=[ 'q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi', 'G_omega', 'D_kappa' ], max_epoch=config.max_epoch + 10, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, restore_checkpoint=restore_checkpoint) as loop: loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() evaluator = spt.Evaluator(loop, metrics={ 'pn_abs': pn_abs, 'p_abs': p_abs }, inputs=[input_x, input_origin_x], data_flow=train_flow, time_metric_name='test_time') # adversarial training for epoch in epoch_iterator: evaluator.run() loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) posterior_flow = spt.layers.planar_normalizing_flows( config.nf_layers, name='posterior_flow') # input placeholders input_x = tf.placeholder( dtype=tf.int32, shape=(None,) + config.x_shape, name='input_x') input_origin_x = tf.placeholder( dtype=tf.float32, shape=(None,) + config.x_shape, name='input_origin_x') warm = tf.placeholder( dtype=tf.float32, shape=(), name='warm') mcmc_alpha = tf.placeholder( dtype=tf.float32, shape=(1,), name='mcmc_alpha') learning_rate = spt.AnnealingVariable( 'learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True) # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_origin_x, posterior_flow, n_z=config.test_n_qz) # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z()) test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z()) test_chain = test_q_net.chain(p_net, observed={'x': tf.to_float(input_x)}, n_z=config.test_n_qz, latent_axis=0, beta=beta, log_Z=get_log_Z()) test_mse = tf.reduce_sum( (tf.round(test_chain.model['x'].distribution.mean * 255.0) - tf.round( tf.to_float(test_chain.model['x']) * 255.0)) ** 2, axis=[-1, -2, -3]) # (sample_dim, batch_dim, x_sample_dim) test_mse = tf.reduce_min(test_mse, axis=[0]) test_mse = tf.reduce_mean(tf.reduce_mean(tf.reshape( test_mse, (-1, config.test_x_samples,) ), axis=-1)) test_nll = -tf.reduce_mean( tf.reshape( test_chain.vi.evaluation.is_loglikelihood(), (-1, config.test_x_samples,) ) ) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) test_recon = test_chain.model['x'].log_prob() p_z = test_chain.model['z'].distribution.log_prob( test_chain.model['z'], group_ndims=1, y=test_chain.model['x'] ).log_energy_prob q_z_given_x = test_q_net['z'].log_prob() vi = spt.VariationalInference( log_joint=test_recon + p_z, latent_log_probs=[q_z_given_x], axis=0 ) test_recon = tf.reduce_mean(test_recon) adv_test_nll = -tf.reduce_mean( tf.reshape( vi.evaluation.is_loglikelihood(), (-1, config.test_x_samples,) ) ) adv_test_lb = tf.reduce_mean(vi.lower_bound.elbo()) real_energy = tf.reduce_mean(D_psi(input_origin_x)) reconstruct_energy = tf.reduce_mean(D_psi(test_chain.model['x'].distribution.mean)) pd_energy = tf.reduce_mean( D_psi(test_pn_net['x'].distribution.mean) * tf.exp( test_pn_net['z'].log_prob().log_energy_prob - test_pn_net['z'].log_prob())) pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean)) log_Z_compute_op = spt.ops.log_mean_exp( -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob()) p_z_energy = test_chain.model['z'].log_prob().energy another_log_Z_compute_op = spt.ops.log_mean_exp( -p_z_energy - q_z_given_x + np.log(config.len_train) ) kl_adv_and_gaussian = tf.reduce_mean( test_pn_net['z'].log_prob() - test_pn_net['z'].log_prob().log_energy_prob ) xi_node = get_var('p_net/xi') # derive the optimizer # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = \ spt.datasets.fashion_mnist(x_shape=config.x_shape) # train_flow = bernoulli_flow( # x_train, config.batch_size, shuffle=True, skip_incomplete=True) x_train = _x_train / 255.0 x_test = _x_test / 255.0 bernouli_sampler = BernoulliSampler() train_flow = spt.DataFlow.arrays([x_train, x_train], config.batch_size, shuffle=True, skip_incomplete=True) train_flow = train_flow.map(lambda x, y: [bernouli_sampler.sample(x), y]) Z_compute_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size, shuffle=True, skip_incomplete=True) Z_compute_flow = Z_compute_flow.map(lambda x, y: [bernouli_sampler.sample(x), y]) reconstruct_train_flow = spt.DataFlow.arrays( [x_train], 100, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays( [x_test], 100, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays( [x_test, x_test], config.test_batch_size ) test_flow = test_flow.map(lambda x, y: [bernouli_sampler.sample(x), y]) # mapped_test_flow = test_flow.to_arrays_flow(config.test_batch_size).map(bernouli_sampler) # gathered_flow = spt.DataFlow.gather([test_flow, mapped_test_flow]) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network # for [x, origin_x] in train_flow: # print('Network initialized, first-batch loss is {:.6g}.\n'. # format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x}))) # break # if config.z_dim == 512: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 1024: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 2048: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 3072: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000' # else: restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/b9/1c/d445f4f80a9f8ab3c0e5/checkpoint/checkpoint/checkpoint.dat-936000' # train the network with spt.TrainLoop(tf.trainable_variables(), var_groups=['q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi', 'G_omega', 'D_kappa'], max_epoch=config.max_epoch + 10, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, restore_checkpoint=restore_checkpoint ) as loop: evaluator = spt.Evaluator( loop, metrics={'test_nll': test_nll, 'test_lb': test_lb, 'adv_test_nll': adv_test_nll, 'adv_test_lb': adv_test_lb, 'reconstruct_energy': reconstruct_energy, 'real_energy': real_energy, 'pd_energy': pd_energy, 'pn_energy': pn_energy, 'test_recon': test_recon, 'kl_adv_and_gaussian': kl_adv_and_gaussian, 'test_mse': test_mse}, inputs=[input_x, input_origin_x], data_flow=test_flow, time_metric_name='test_time' ) loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() n_critical = config.n_critical all_nll_list = [] all_log_Z_list = [] # adversarial training for epoch in epoch_iterator: with loop.timeit('compute_Z_time'): # log_Z_list = [] # for i in range(config.log_Z_times): # log_Z_list.append(session.run(log_Z_compute_op)) # from scipy.misc import logsumexp # log_Z = logsumexp(np.asarray(log_Z_list)) - np.log(len(log_Z_list)) # print('log_Z_list:{}'.format(log_Z_list)) # print('log_Z:{}'.format(log_Z)) log_Z_list = [] for i in range(config.log_Z_times): for [batch_x, batch_origin_x] in Z_compute_flow: log_Z_list.append(session.run(another_log_Z_compute_op, feed_dict={ input_x: batch_x, input_origin_x: batch_origin_x })) from scipy.misc import logsumexp another_log_Z = logsumexp(np.asarray(log_Z_list)) - np.log(len(log_Z_list)) # print('log_Z_list:{}'.format(log_Z_list)) print('another_log_Z:{}'.format(another_log_Z)) # final_log_Z = logsumexp(np.asarray([log_Z, another_log_Z])) - np.log(2) final_log_Z = another_log_Z # TODO get_log_Z().set(final_log_Z) with loop.timeit('eval_time'): evaluator.run() all_nll_list.append(loop._epoch_metrics.metrics['adv_test_nll'].mean) all_log_Z_list.append(final_log_Z) loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() all_nll_list = np.asarray(all_nll_list) all_log_Z_list = np.asarray(all_log_Z_list) print('NLL: {} ± {}'.format(np.mean(all_nll_list), np.std(all_nll_list))) print('log_Z: {} ± {}'.format(np.mean(all_log_Z_list), np.std(all_log_Z_list))) # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) posterior_flow = spt.layers.planar_normalizing_flows(config.nf_layers, name='posterior_flow') # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') warm = tf.placeholder(dtype=tf.float32, shape=(), name='warm') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True) # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_pn_net = p_net(n_z=config.train_n_pz, beta=beta) init_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz) init_p_net = p_net(observed={ 'x': input_x, 'z': init_q_net['z'] }, n_z=config.train_n_qz, beta=beta) init_loss = sum(get_all_loss(init_q_net, init_p_net, init_pn_net)) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([batch_norm, dropout], training=True): train_pn_net = p_net(n_z=config.train_n_pz, beta=beta) train_log_Z = spt.ops.log_mean_exp( -train_pn_net['z'].log_prob().energy - train_pn_net['z'].log_prob()) train_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz) train_p_net = p_net(observed={ 'x': input_x, 'z': train_q_net['z'] }, n_z=config.train_n_qz, beta=beta, log_Z=train_log_Z) VAE_loss, D_loss, G_loss, debug = get_all_loss(train_q_net, train_p_net, train_pn_net, warm) VAE_loss += tf.losses.get_regularization_loss() D_loss += tf.losses.get_regularization_loss() G_loss += tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz) # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z()) test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z()) test_chain = test_q_net.chain(p_net, observed={'x': input_x}, n_z=config.test_n_qz, latent_axis=0, beta=beta, log_Z=get_log_Z()) test_recon = tf.reduce_mean(test_chain.model['x'].log_prob()) test_mse = tf.reduce_sum( (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) - tf.round(test_chain.model['x'] * 128 + 127.5))**2, axis=[-1, -2, -3]) # (sample_dim, batch_dim, x_sample_dim) test_mse = tf.reduce_min(test_mse, axis=[0]) test_mse = tf.reduce_mean( tf.reduce_mean(tf.reshape(test_mse, ( -1, config.test_x_samples, )), axis=-1)) test_nll = -tf.reduce_mean( spt.ops.log_mean_exp( tf.reshape(test_chain.vi.evaluation.is_loglikelihood(), ( -1, config.test_x_samples, )), axis=-1)) + config.x_shape_multiple * np.log(128.0) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) vi = spt.VariationalInference( log_joint=test_chain.model['x'].log_prob() + test_chain.model['z'].distribution.log_prob( test_chain.model['z'], group_ndims=1, y=test_chain.model['x']).log_energy_prob, latent_log_probs=[test_q_net['z'].log_prob()], axis=0) adv_test_nll = -tf.reduce_mean( spt.ops.log_mean_exp( tf.reshape(vi.evaluation.is_loglikelihood(), ( -1, config.test_x_samples, )), axis=-1)) + config.x_shape_multiple * np.log(128.0) adv_test_lb = tf.reduce_mean(vi.lower_bound.elbo()) real_energy = tf.reduce_mean(D_psi(input_origin_x)) reconstruct_energy = tf.reduce_mean( D_psi(test_chain.model['x'].distribution.mean)) pd_energy = tf.reduce_mean( D_psi(test_pn_net['x'].distribution.mean) * tf.exp(test_pn_net['z'].log_prob().log_energy_prob - test_pn_net['z'].log_prob())) pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean)) log_Z_compute_op = spt.ops.log_mean_exp( -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob()) kl_adv_and_gaussian = tf.reduce_mean( test_pn_net['z'].log_prob() - test_pn_net['z'].log_prob().log_energy_prob) xi_node = get_var('p_net/xi') # derive the optimizer with tf.name_scope('optimizing'): VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables( 'G_theta') + tf.trainable_variables( 'beta') + tf.trainable_variables( 'p_net/xi') + tf.trainable_variables('posterior_flow') D_params = tf.trainable_variables('D_psi') G_params = tf.trainable_variables('G_theta') print("========VAE_params=========") print(VAE_params) print("========D_params=========") print(D_params) print("========G_params=========") print(G_params) with tf.variable_scope('VAE_optimizer'): _VAE_grads = tf.gradients(VAE_loss, G_params) VAE_grad = [] for grad in _VAE_grads: VAE_grad.append(tf.reshape(grad, (-1, ))) VAE_grad = tf.concat(VAE_grad, axis=0) # above is working for get the gradient for G_theta VAE_optimizer = tf.train.AdamOptimizer(learning_rate) VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params) with tf.variable_scope('D_optimizer'): D_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) D_grads = D_optimizer.compute_gradients(D_loss, D_params) with tf.variable_scope('G_optimizer'): G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) G_grads = G_optimizer.compute_gradients(G_loss, G_params) _G_grads = tf.gradients(G_loss, G_params) G_grad = [tf.reshape(grad, (-1, )) for grad in _G_grads] G_grad = tf.concat(G_grad, axis=0) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads) G_train_op = G_optimizer.apply_gradients(G_grads) D_train_op = D_optimizer.apply_gradients(D_grads) # derive the plotting function with tf.name_scope('plotting'): x_plots = 256.0 * tf.reshape( p_net(n_z=100, mcmc_iterator=0, beta=beta)['x'].distribution.mean, (-1, ) + config.x_shape) / 2 + 127.5 reconstruct_q_net = q_net(input_x, posterior_flow) reconstruct_z = reconstruct_q_net['z'] reconstruct_plots = 256.0 * tf.reshape( p_net(observed={'z': reconstruct_z}, beta=beta)['x'], (-1, ) + config.x_shape) / 2 + 127.5 x_plots = tf.clip_by_value(x_plots, 0, 255) reconstruct_plots = tf.clip_by_value(reconstruct_plots, 0, 255) def plot_samples(loop): with loop.timeit('plot_time'): # plot samples images = session.run(x_plots) # pyplot.scatter(z_points[:, 0], z_points[:, 1], s=5) # pyplot.savefig(results.system_path('plotting/z_plot/{}.pdf'.format(loop.epoch))) # pyplot.close() # print(images) try: print(np.max(images), np.min(images)) images = np.round(images) save_images_collection( images=images, filename='plotting/sample/{}.png'.format(loop.epoch), grid_size=(10, 10), results=results, ) # plot reconstructs for [x] in reconstruct_train_flow: x_samples = uniform_sampler.sample(x) images = np.zeros((150, ) + config.x_shape, dtype=np.uint8) images[::3, ...] = np.round(256.0 * x / 2 + 127.5) images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5) images[2::3, ...] = np.round( session.run(reconstruct_plots, feed_dict={input_x: x})) save_images_collection( images=images, filename='plotting/train.reconstruct/{}.png'.format( loop.epoch), grid_size=(10, 15), results=results, ) break # plot reconstructs for [x] in reconstruct_test_flow: x_samples = uniform_sampler.sample(x) images = np.zeros((150, ) + config.x_shape, dtype=np.uint8) images[::3, ...] = np.round(256.0 * x / 2 + 127.5) images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5) images[2::3, ...] = np.round( session.run(reconstruct_plots, feed_dict={input_x: x})) save_images_collection( images=images, filename='plotting/test.reconstruct/{}.png'.format( loop.epoch), grid_size=(10, 15), results=results, ) break except Exception as e: print(e) # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = \ spt.datasets.load_cifar10(x_shape=config.x_shape) # train_flow = bernoulli_flow( # x_train, config.batch_size, shuffle=True, skip_incomplete=True) x_train = (_x_train - 127.5) / 256.0 * 2 x_test = (_x_test - 127.5) / 256.0 * 2 uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float) train_flow = spt.DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True) train_flow = train_flow.map(uniform_sampler) gan_train_flow = spt.DataFlow.arrays( [np.concatenate([x_train, x_test], axis=0)], config.batch_size, shuffle=True, skip_incomplete=True) gan_train_flow = gan_train_flow.map(uniform_sampler) reconstruct_train_flow = spt.DataFlow.arrays([x_train], 50, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays([x_test], 50, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays( [np.repeat(x_test, config.test_x_samples, axis=0)], config.test_batch_size) test_flow = test_flow.map(uniform_sampler) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'.format( session.run(init_loss, feed_dict={input_x: x}))) break # train the network with spt.TrainLoop( tf.trainable_variables(), var_groups=[ 'q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi' ], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, ) as loop: evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb, 'adv_test_nll': adv_test_nll, 'adv_test_lb': adv_test_lb, 'reconstruct_energy': reconstruct_energy, 'real_energy': real_energy, 'pd_energy': pd_energy, 'pn_energy': pn_energy, 'test_recon': test_recon, 'kl_adv_and_gaussian': kl_adv_and_gaussian, 'test_mse': test_mse }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() n_critical = config.n_critical # adversarial training for epoch in epoch_iterator: step_iterator = MyIterator(train_flow) while step_iterator.has_next: if epoch <= config.warm_up_start: # generator training x [_, batch_G_loss] = session.run([G_train_op, G_loss], feed_dict={}) loop.collect_metrics(G_loss=batch_G_loss) # vae training for step, [x] in loop.iter_steps( limited(step_iterator, n_critical)): if epoch <= config.warm_up_start: # discriminator training [_, batch_D_loss, debug_loss ] = session.run([D_train_op, D_loss, debug], feed_dict={ input_x: x, }) loop.collect_metrics(D_loss=batch_D_loss) loop.collect_metrics(debug_loss=debug_loss) else: [ _, batch_VAE_loss, beta_value, xi_value, batch_train_recon, train_reconstruct_energy_value, training_D_loss ] = session.run( [ VAE_train_op, VAE_loss, beta, xi_node, train_recon, train_reconstruct_energy, D_loss ], feed_dict={ input_x: x, warm: min( 1.0, 1.0 * (epoch - config.warm_up_start) / config.warm_up_epoch) }) loop.collect_metrics(batch_VAE_loss=batch_VAE_loss) loop.collect_metrics(xi=xi_value) loop.collect_metrics(beta=beta_value) loop.collect_metrics(train_recon=batch_train_recon) loop.collect_metrics( train_reconstruct_energy= train_reconstruct_energy_value) loop.collect_metrics( training_D_loss=training_D_loss) # loop.print_logs() if epoch in config.lr_anneal_epoch_freq: learning_rate.anneal() if epoch == config.warm_up_start: learning_rate.set(config.initial_lr) if epoch % config.plot_epoch_freq == 0: plot_samples(loop) if epoch % config.test_epoch_freq == 0: log_Z_list = [] for i in range(config.log_Z_times): log_Z_list.append(session.run(log_Z_compute_op)) from scipy.misc import logsumexp log_Z = logsumexp(np.asarray(log_Z_list)) - np.log( config.log_Z_times) get_log_Z().set(log_Z) print('log_Z_list:{}'.format(log_Z_list)) print('log_Z:{}'.format(log_Z)) with loop.timeit('eval_time'): evaluator.run() if epoch == config.max_epoch: dataset_img = np.concatenate([_x_train, _x_test], axis=0) sample_img = [] for i in range((len(x_train) + len(x_test)) // 100 + 1): sample_img.append(session.run(x_plots)) sample_img = np.concatenate(sample_img, axis=0).astype('uint8') sample_img = sample_img[:len(dataset_img)] sample_img = np.asarray(sample_img) FID = get_fid(sample_img, dataset_img) # turn to numpy array IS_mean, IS_std = get_inception_score(sample_img) loop.collect_metrics(FID=FID) loop.collect_metrics(IS=IS_mean) loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x) cls_loss = tf.losses.sparse_softmax_cross_entropy(input_y, logits) loss = cls_loss + tf.losses.get_regularization_loss() y = spt.ops.softmax_classification_output(logits) acc = spt.ops.classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_cifar10(x_shape=(config.x_dim,), normalize_x=True) train_flow = spt.DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) with spt.utils.create_session().as_default(): # train the network with spt.TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x, input_y], train_flow, metrics={ 'loss': loss, 'acc': acc }, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) posterior_flow = spt.layers.planar_normalizing_flows( config.nf_layers, name='posterior_flow') # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None,) + config.x_shape, name='input_x') warm = tf.placeholder( dtype=tf.float32, shape=(), name='warm') learning_rate = spt.AnnealingVariable( 'learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True) # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_pn_net = p_net(n_z=config.train_n_pz, beta=beta) init_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz) init_p_net = p_net(observed={'x': input_x, 'z': init_q_net['z']}, n_z=config.train_n_qz, beta=beta) init_loss = sum(get_all_loss(init_q_net, init_p_net, init_pn_net)) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([batch_norm], training=True): train_pn_theta = p_net(n_z=config.train_n_pz, beta=beta) train_pn_omega = p_omega_net(n_z=config.train_n_pz, beta=beta) train_log_Z = spt.ops.log_mean_exp(-train_pn_theta['z'].log_prob().energy - train_pn_theta['z'].log_prob()) train_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz) train_p_net = p_net(observed={'x': input_x, 'z': train_q_net['z']}, n_z=config.train_n_qz, beta=beta, log_Z=train_log_Z) VAE_loss, _, VAE_G_loss, VAE_D_real = get_all_loss(train_q_net, train_p_net, train_pn_theta, warm) _, D_loss, G_loss, D_real = get_all_loss(train_q_net, train_p_net, train_pn_omega, warm) VAE_loss += tf.losses.get_regularization_loss() VAE_G_loss += tf.losses.get_regularization_loss() D_loss += tf.losses.get_regularization_loss() G_loss += tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz) # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z()) test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z()) test_chain = test_q_net.chain(p_net, observed={'x': input_x}, n_z=config.test_n_qz, latent_axis=0, beta=beta, log_Z=get_log_Z()) ele_test_recon = test_chain.model['x'].log_prob() ele_test_recon = tf.reduce_mean(ele_test_recon, axis=0) print(ele_test_recon.shape) test_recon = tf.reduce_mean(ele_test_recon) ''' test_mse = tf.reduce_sum( (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) - tf.round( test_chain.model['x'] * 128 + 127.5)) ** 2, axis=[-1, -2, -3]) # (sample_dim, batch_dim, x_sample_dim) test_mse = tf.reduce_min(test_mse, axis=[0]) test_mse = tf.reduce_mean(tf.reduce_mean(tf.reshape( test_mse, (-1, config.test_x_samples,) ), axis=-1)) ''' test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood() ) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) vi = spt.VariationalInference( log_joint=test_chain.model['x'].log_prob() + test_chain.model['z'].distribution.log_prob( test_chain.model['z'], group_ndims=1, y=test_chain.model['x'] ).log_energy_prob, latent_log_probs=[test_q_net['z'].log_prob()], axis=0 ) ele_grad = tf.gradients(D_psi(input_x), [input_x])[0] ele_grad_norm = tf.reduce_sum(tf.square(ele_grad), axis=[-1, -2, -3]) ele_adv_test_nll = -vi.evaluation.is_loglikelihood() print(ele_adv_test_nll.shape) adv_test_nll = tf.reduce_mean(ele_adv_test_nll) ele_adv_test_lb = vi.lower_bound.elbo() print(ele_adv_test_lb.shape) adv_test_lb = tf.reduce_mean(ele_adv_test_lb) ele_real_energy = D_psi(test_chain.model['x']) real_energy = tf.reduce_mean(D_psi(input_origin_x)) reconstruct_energy = tf.reduce_mean(D_psi(test_chain.model['x'].distribution.mean)) pd_energy = tf.reduce_mean( D_psi(test_pn_net['x'].distribution.mean) * tf.exp( test_pn_net['z'].log_prob().log_energy_prob - test_pn_net['z'].log_prob())) pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean)) log_Z_compute_op = spt.ops.log_mean_exp( -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob()) another_log_Z_compute_op = spt.ops.log_mean_exp( -test_chain.model['z'].log_prob().energy - test_q_net['z'].log_prob() + np.log(config.len_train) ) kl_adv_and_gaussian = tf.reduce_mean( test_pn_net['z'].log_prob() - test_pn_net['z'].log_prob().log_energy_prob ) xi_node = get_var('p_net/xi') # derive the optimizer with tf.name_scope('optimizing'): VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables('G_theta') + tf.trainable_variables( 'beta') + tf.trainable_variables('posterior_flow') + tf.trainable_variables('p_net/xi') D_params = tf.trainable_variables('D_psi') VAE_G_params = tf.trainable_variables('G_theta') G_params = tf.trainable_variables('G_omega') print("========VAE_params=========") print(VAE_params) print("========D_params=========") print(D_params) print("========G_params=========") print(G_params) with tf.variable_scope('VAE_optimizer'): VAE_optimizer = tf.train.AdamOptimizer(learning_rate) VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params) with tf.variable_scope('VAE_G_optimizer'): VAE_G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) VAE_G_grads = VAE_G_optimizer.compute_gradients(VAE_G_loss, VAE_G_params) with tf.variable_scope('D_optimizer'): D_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) D_grads = D_optimizer.compute_gradients(D_loss, D_params) with tf.variable_scope('G_optimizer'): G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) G_grads = G_optimizer.compute_gradients(G_loss, G_params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads) VAE_G_train_op = VAE_optimizer.apply_gradients(VAE_G_grads) G_train_op = G_optimizer.apply_gradients(G_grads) D_train_op = D_optimizer.apply_gradients(D_grads) # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = spt.datasets.load_cifar10(x_shape=config.x_shape) x_train = (_x_train - 127.5) / 256.0 * 2 x_test = (_x_test - 127.5) / 256.0 * 2 # uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float) train_flow = spt.DataFlow.arrays([x_train], config.test_batch_size) reconstruct_train_flow = spt.DataFlow.arrays( [x_train], 100, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays( [x_test], 100, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays( [x_test], config.test_batch_size) (svhn_train, _), (svhn_test, __) = load_svhn(config.x_shape) svhn_train = (svhn_train - 127.5) / 256.0 * 2 svhn_test = (svhn_test - 127.5) / 256.0 * 2 svhn_train_flow = spt.DataFlow.arrays([svhn_train], config.test_batch_size) svhn_test_flow = spt.DataFlow.arrays([svhn_test], config.test_batch_size) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network # for [x, origin_x] in train_flow: # print('Network initialized, first-batch loss is {:.6g}.\n'. # format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x}))) # break # if config.z_dim == 512: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 1024: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 2048: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 3072: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000' # else: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/93/0c/d434dabfcaecd3b5bed5/checkpoint/checkpoint/checkpoint.dat-195000' restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/24/29/6fc8930042bc9bab75d5/checkpoint/checkpoint/checkpoint.dat-195000' # train the network with spt.TrainLoop(tf.trainable_variables(), var_groups=['q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi', 'G_omega', 'D_kappa'], max_epoch=config.max_epoch + 1, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, restore_checkpoint=restore_checkpoint ) as loop: loop.print_training_summary() spt.utils.ensure_variables_initialized() def evaluator_generate(flow, preffix=''): return spt.Evaluator( loop, metrics={preffix + 'nll': test_nll, preffix + 'lb': test_lb, preffix + 'adv_nll': adv_test_nll, preffix + 'adv_lb': adv_test_lb, preffix + 'reconstruct_energy': reconstruct_energy, preffix + 'real_energy': real_energy, preffix + 'pd_energy': pd_energy, preffix + 'pn_energy': pn_energy, preffix + 'recon': test_recon, preffix + 'kl_adv_and_gaussian': kl_adv_and_gaussian}, # preffix + 'mse': test_mse}, inputs=[input_x], data_flow=flow, time_metric_name=preffix + 'time' ) cifar_train_evaluator = evaluator_generate(train_flow, 'cifar_train') cifar_test_evaluator = evaluator_generate(test_flow, 'cifar_test') svhn_train_evaluator = evaluator_generate(svhn_train_flow, 'svhn_train') svhn_test_evaluator = evaluator_generate(svhn_test_flow, 'svhn_test') epoch_iterator = loop.iter_epochs() # adversarial training for epoch in epoch_iterator: with loop.timeit('out_of_distribution_test'): def get_ele(ops, flow): packs = [] for [batch_x] in flow: pack = session.run( ops, feed_dict={ input_x: batch_x }) # [3, batch_size] pack = np.transpose(np.asarray(pack), (1, 0)) # [batch_size, 3] packs.append(pack) packs = np.concatenate(packs, axis=0) # [len_of_flow, 3] packs = np.transpose(np.asarray(packs), (1, 0)) # [3, len_of_flow] return packs cifar_train_nll, cifar_train_lb, cifar_train_recon, cifar_train_energy, cifar_train_norm = get_ele( [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm], train_flow) # print(cifar_train_nll.shape, cifar_train_lb.shape, cifar_train_recon.shape) cifar_test_nll, cifar_test_lb, cifar_test_recon, cifar_test_energy, cifar_test_norm = get_ele( [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm], test_flow) svhn_train_nll, svhn_train_lb, svhn_train_recon, svhn_train_energy, svhn_train_norm = get_ele( [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm], svhn_train_flow) svhn_test_nll, svhn_test_lb, svhn_test_recon, svhn_test_energy, svhn_test_norm = get_ele( [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm], svhn_test_flow) def draw_nll(nll, color, label): nll = list(nll) # print(nll) # print(nll.shape) n, bins, patches = pyplot.hist(nll, 40, normed=True, facecolor=color, alpha=0.4, label=label) index = [] for i in range(len(bins) - 1): index.append((bins[i] + bins[i + 1]) / 2) def smooth(c, N=5): weights = np.hanning(N) return np.convolve(weights / weights.sum(), c)[N - 1:-N + 1] n[2:-2] = smooth(n) pyplot.plot(index, n, color=color) pyplot.legend() print('%s done.' % label) # Draw the histogram or exrta the data here def plot_fig(data_list, color_list, label_list, x_label, fig_name): pyplot.cla() pyplot.plot() pyplot.grid(c='silver', ls='--') pyplot.xlabel(x_label) spines = pyplot.gca().spines for sp in spines: spines[sp].set_color('silver') def draw_nll(nll, color, label): nll = list(nll) # print(nll) # print(nll.shape) n, bins, patches = pyplot.hist(nll, 40, normed=True, facecolor=color, alpha=0.4, label=label) index = [] for i in range(len(bins) - 1): index.append((bins[i] + bins[i + 1]) / 2) def smooth(c, N=5): weights = np.hanning(N) return np.convolve(weights / weights.sum(), c)[N - 1:-N + 1] n[2:-2] = smooth(n) pyplot.plot(index, n, color=color) pyplot.legend() print('%s done.' % label) for i in range(len(data_list)): draw_nll(data_list[i], color_list[i], label_list[i]) pyplot.savefig('plotting/wgan/%s.jpg' % fig_name) def draw_curve(cifar_test, svhn_test, fig_name): label = np.concatenate(([1] * len(cifar_test), [-1] * len(svhn_test))) score = np.concatenate((cifar_test, svhn_test)) fpr, tpr, thresholds = roc_curve(label, score) precision, recall, thresholds = precision_recall_curve(label, score) pyplot.plot(recall, precision) pyplot.plot(fpr, tpr) print('%s auc: %4f, ap: %4f' % (fig_name, auc(fpr, tpr), average_precision_score(label, score))) pyplot.cla() pyplot.plot() draw_curve(data_list[1], data_list[3], fig_name) pyplot.savefig('plotting/wgan/%s_curve.jpg' % fig_name) plot_fig([cifar_train_energy, cifar_test_energy, svhn_train_energy, svhn_test_energy], ['red', 'salmon', 'green', 'lightgreen'], ['CIFAR-10 Train', 'CIFAR-10 Test', 'SVHN Train', 'SVHN Test'], 'energy', 'out_of_distribution_energy') plot_fig([cifar_train_norm, cifar_test_norm, svhn_train_norm, svhn_test_norm], ['red', 'salmon', 'green', 'lightgreen'], ['CIFAR-10 Train', 'CIFAR-10 Test', 'SVHN Train', 'SVHN Test'], 'log(bits/dim)', 'out_of_distribution_norm') with loop.timeit('eval_time'): cifar_train_evaluator.run() cifar_test_evaluator.run() svhn_train_evaluator.run() svhn_test_evaluator.run() loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # build the model with arg_scope([q_net, p_net], is_training=is_training): # derive the loss and lower-bound for training train_q_net = q_net(input_x) train_chain = train_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': input_x}) baseline = baseline_net(input_x) cost, baseline_cost = \ train_chain.vi.training.reinforce(baseline=baseline) loss = regularization_loss() + tf.reduce_mean(cost + baseline_cost) # derive the nll and logits output for testing test_q_net = q_net(input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plot_x'): plot_p_net = p_net(n_z=100, is_training=is_training) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): session = get_default_session_or_error() images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the output for initialization with tf.name_scope('initialization'), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(input_x, is_initializing=True) init_chain = init_q_net.chain(p_net, observed={'x': input_x}, is_initializing=True) init_lb = tf.reduce_mean(init_chain.vi.lower_bound.elbo()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x) train_chain = train_q_net.chain(p_net, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net(n_z=100) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784]) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'.format( session.run(init_lb, feed_dict={input_x: x}))) break # train the network with spt.TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \ str(config.noExp) + ".model" dirName = os.path.basename(__file__).split(".py")[0] + "_" + str( config.noExp) results = MLResults(os.path.join(config.result_dir, dirName)) results.save_config(config) # save experiment settings results.make_dirs('train_summary', exist_ok=True) results.make_dirs('result_summary', exist_ok=True) results.make_dirs('mid_summary', exist_ok=True) # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor, min_value=1e-6) multi_gpu = MultiGPU(disable_prebuild=True) # multi_gpu = MultiGPU() # derive the training operation gradses = [] grad_vars = [] train_losses = [] BATCH_SIZE = get_batch_size(input_x) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([p_net, q_net], is_initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(dev_input_x, n_z=config.train_n_samples) init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.vimco()) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([p_net, q_net], is_training=True): train_q_net = q_net(dev_input_x, n_z=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) + tf.losses.get_regularization_loss()) train_losses.append(train_loss) # derive the logits output for testing with tf.name_scope('testing'): test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) # log_prob of X and each univariate time series of X log_prob = tf.reduce_mean( test_chain.model['x'].distribution.log_prob(dev_input_x), 0) log_prob_per_element = tf.reduce_sum(log_prob) log_prob_per_element_univariate_TS = tf.reduce_sum( log_prob, [0, 1, 3]) log_prob_per_element_univariate_TS_All = tf.reduce_sum( log_prob, [1, 3]) # derive the optimizer with tf.name_scope('optimizing'): params = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) grads = optimizer.compute_gradients(train_loss, params) for grad, var in grads: if grad is not None and var is not None: if config.grad_clip_norm: grad = tf.clip_by_norm(grad, config.grad_clip_norm) if config.check_numerics: grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) grad_vars.append((grad, var)) gradses.append(grad_vars) # merge multi-gpu outputs and operations [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # sort the contribution of each univariate_TS of input SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32, shape=(None, None), name='SORT_UNIVARIATE_TS_INPUT') SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT, k=config.metricNumber).indices + 1 # load the training and testing data print("=" * 10 + "Shape of Input data" + "=" * 10) x, time_indexs, x_test, time_indexs2 = load_matrix_allData( config.dataReadformat, config.datapathForTrain, config.datapathForTest, config.timeLength, config.metricNumber, "TrainFileNameList.txt", "TestFileNameList.txt", results, config.norm) x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1]) print("Test:", x_test.shape) if config.batchTest: test_flow = DataFlow.arrays( [x_test], config.test_batch_size) # DataFlow is iterator del x_test x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION) x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1]) x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1]) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=False, skip_incomplete=True) val_flow = DataFlow.arrays([x_val], config.test_batch_size) print("Note:", config.x_dim, ", x_dim = size of datapoint = timeLength * metricNumber") print("Input data shape:", x.shape, "Train data shape:", x_train.shape, "Validation data shape:", x_val.shape) del x_train, x_val, x # training part with spt.utils.create_session().as_default() as session: spt.utils.ensure_variables_initialized() saver = CheckpointSaver(tf.trainable_variables(), model_file) if os.path.exists(model_file): # load the parameters of trained model saver.restore_latest() else: # initialize the network while True: breakFlag = 0 for [x] in train_flow: INITLOSS = session.run(init_loss, feed_dict={input_x: x}) print('Network initialized, first-batch loss is {:.6g}.'. format(INITLOSS)) if np.isnan(INITLOSS) or np.isinf( INITLOSS) or INITLOSS > 10**5: pass else: breakFlag = 1 break if breakFlag: break # train the network with train_flow.threaded(10) as train_flow: with spt.TrainLoop( params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=True) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': train_loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) # anneal the learning rate trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) validator = spt.Validator( loop, train_loss, [input_x], val_flow, ) trainer.evaluate_after_epochs(validator, freq=10) trainer.log_after_epochs(freq=1) trainer.run() saver.save() # save the training infomation firWrite = True num = 0 time0 = time.time() for [x_train] in train_flow: if config.savetrainDS: # log prob of each metric of each instance log_prob_per_element_univariate_TS_list_item_Train = ( session.run(log_prob_per_element_univariate_TS_All, feed_dict={input_x: x_train})) log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train log_prob_per_element_list_Train = np.sum(np.array( log_prob_per_element_univariate_TS_list_item_Train), axis=1).tolist() if firWrite: save_file( results.system_path("train_summary"), "OutlierScores_metric.txt", log_prob_per_element_univariate_TS_list_Train) save_file(results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train) else: save_file( results.system_path("train_summary"), "OutlierScores_metric.txt", log_prob_per_element_univariate_TS_list_Train, "\n", "a") save_file(results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train, "\n", "a") firWrite = False num += 1 if num % 1000 == 0: print( "-----Train %s >>>>>:Sum time of batch instances:%s" % (num, float(time.time() - time0) / float(num))) del train_flow, val_flow # online test time2 = time.time() log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], [] if config.batchTest: num = 0 for [x_test] in test_flow: if config.savetestDS: # log prob of each metric of each instance log_prob_per_element_univariate_TS_list_item = ( session.run(log_prob_per_element_univariate_TS_All, feed_dict={input_x: x_test})) log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist( ) log_prob_per_element_list += np.sum( np.array(log_prob_per_element_univariate_TS_list_item), axis=1).tolist() num += 1 if num % 200 == 0: print("-----Test %s >>>>>:Sum time of batch instances:%s" % (num, float(time.time() - time2) / float(num))) else: num = 1 for batch_x in x_test: if config.savetestTS: log_prob_per_element_list_item = (session.run( log_prob_per_element, feed_dict={input_x: [batch_x]})) log_prob_per_element_list.append( log_prob_per_element_list_item) if config.savetestDS: log_prob_per_element_univariate_TS_list_item = ( session.run(log_prob_per_element_univariate_TS, feed_dict={input_x: [batch_x]})) log_prob_per_element_univariate_TS_list.append( log_prob_per_element_univariate_TS_list_item) log_prob_per_element_list.append( sum(log_prob_per_element_univariate_TS_list_item)) if num % 200 == 0: print( "-----Test>>>>>:%d, average time of each instance:%s" % (num, float(time.time() - time2) / float(num))) num += 1 # get the lable file name and its line cnt number allLabelFileNameLineCntList = get_machineID(results, config.labelpath) print("No of OutlierScores for all dataPoint:(%s):" % len(log_prob_per_element_list)) if config.savetestDS: save_file( results.system_path("result_summary"), "OutlierScores_metric.txt", cat_List(allLabelFileNameLineCntList, log_prob_per_element_univariate_TS_list)) save_file( results.system_path("result_summary"), "OutlierScores.txt", cat_List(allLabelFileNameLineCntList, log_prob_per_element_list)) if config.evaluation: # Prepraration for the hitory two-metric results twoMetricScore = read_file(results.system_path("train_summary"), "OutlierScores_metric.txt") ave_twoMetricScore = np.mean(np.array(twoMetricScore), axis=0).tolist() save_file(results.system_path("result_summary"), "PRF.txt", ["Average score of each univariate time series", "\n"], ",") save_file(results.system_path("result_summary"), "PRF.txt", ave_twoMetricScore + ["\n"], ",", "a") save_file(results.system_path("result_summary"), "PRF.txt", [ "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n" ], ",", "a") # get the sorted item each metric by change score twoMetricScoreList = cal_scoreChanges( log_prob_per_element_list, ave_twoMetricScore, log_prob_per_element_univariate_TS_list) MetricResult = session.run( SORT_UNIVARIATE_TS, feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList}) save_file(results.system_path("result_summary"), "MetricResult.txt", cat_List(allLabelFileNameLineCntList, MetricResult)) # POT evalution POT_TH = pot_eval( read_file(results.system_path("train_summary"), "OutlierScores.txt", "float"), config.q, config.level) resultArray, outlierLabelfileNameLineCntList = cal_binaryResult( log_prob_per_element_list, POT_TH, time_indexs2, config.saveMetricInfo, allLabelFileNameLineCntList) evaluate(results, config.labelpath, resultArray, time_indexs2, POT_TH) # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close() interpretation_hit_ratio(truth_filepath=config.interpret_filepath, prediction_filepath=os.path.join( config.result_dir, dirName, "result_summary", "MetricResult.txt"))
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # build the posterior flow with tf.variable_scope('posterior_flow'): flows = [] for i in range(config.n_flows): flows.append(spt.layers.ActNorm()) flows.append( spt.layers.CouplingLayer(tf.make_template( 'coupling', coupling_layer_shift_and_scale, create_scope_now_=True), scale_type='exp')) flows.append(spt.layers.InvertibleDense()) posterior_flow = spt.layers.SequentialFlow(flows=flows) # derive the initialization op with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True): init_q_net = q_net(input_x, posterior_flow) init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, posterior_flow) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net(n_z=100) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # initialize the network spt.utils.ensure_variables_initialized() for [batch_x] in train_flow: print('Network initialization loss: {:.6g}'.format( session.run(init_loss, {input_x: batch_x}))) print('') break # train the network with spt.TrainLoop(params, var_groups=['p_net', 'q_net', 'posterior_flow'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, ) + config.x_shape, name='input_x') input_origin_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_origin_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) input_x = tf.to_float(input_x) # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([p_net, q_net], is_initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net( input_origin_x if config.use_q_z_given_e else input_x) init_chain = init_q_net.chain( p_net, observed={ 'x': input_origin_x if config.use_origin_x_as_observe else input_x }) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([p_net, q_net], is_training=True): train_q_net = q_net( input_origin_x if config.use_q_z_given_e else input_x) train_chain = train_q_net.chain( p_net, observed={ 'x': input_origin_x if config.use_origin_x_as_observe else input_x }) train_loss = (tf.reduce_mean(train_chain.vi.training.sgvb()) + tf.losses.get_regularization_loss()) # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net( input_origin_x if config.use_q_z_given_e else input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': tf.to_float(input_x)}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) test_mse = tf.reduce_sum( (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) - tf.round(input_origin_x * 128 + 127.5))**2, axis=[-1, -2, -3]) # (sample_dim, batch_dim) test_mse = tf.reduce_min(test_mse, axis=[0]) test_mse = tf.reduce_mean(test_mse) # derive the optimizer with tf.name_scope('optimizing'): params = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) grads = optimizer.compute_gradients(train_loss, params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): x_plots = tf.reshape(bernoulli_as_pixel(p_net(n_z=100)['x']), (-1, ) + config.x_shape) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection( images=images, filename='plotting/{}.png'.format(loop.epoch), grid_size=(10, 10), results=results, channels_last=config.channels_last, ) # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = \ spt.datasets.load_mnist(x_shape=config.x_shape) # train_flow = bernoulli_flow( # x_train, config.batch_size, shuffle=True, skip_incomplete=True) x_train = _x_train / 255.0 x_test = _x_test / 255.0 bernouli_sampler = BernoulliSampler() train_flow = spt.DataFlow.arrays([x_train, x_train], config.batch_size, shuffle=True, skip_incomplete=True) train_flow = train_flow.map(lambda x, y: [bernouli_sampler.sample(x), y]) Z_compute_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size, shuffle=True, skip_incomplete=True) Z_compute_flow = Z_compute_flow.map( lambda x, y: [bernouli_sampler.sample(x), y]) reconstruct_train_flow = spt.DataFlow.arrays([x_train], 100, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays([x_test], 100, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays([x_test, x_test], config.test_batch_size) test_flow = test_flow.map(lambda x, y: [bernouli_sampler.sample(x), y]) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x, ox] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'.format( session.run(init_loss, feed_dict={ input_x: x, input_origin_x: ox }))) break # train the network with spt.TrainLoop( params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch + 1, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, early_stopping=False, restore_checkpoint= "/mnt/mfs/mlstorage-experiments/cwx17/10/1c/d4e63c432be97afba7e5/checkpoint/checkpoint/checkpoint.dat-140400" ) as loop: loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() for epoch in epoch_iterator: dataset_img = np.tile(_x_train, (1, 1, 1, 3)) mala_img = [] for i in range(config.fid_samples // config.sample_n_z): mala_images = session.run(x_plots) mala_img.append(mala_images) print('{}-th sample finished...'.format(i)) mala_img = np.concatenate(mala_img, axis=0).astype('uint8') mala_img = np.asarray(mala_img) mala_img = np.tile(mala_img, (1, 1, 1, 3)) np.savez('sample_store', mala_img=mala_img) FID = get_fid(mala_img, dataset_img) IS_mean, IS_std = get_inception_score(mala_img) loop.collect_metrics(FID=FID) loop.collect_metrics(IS=IS_mean) # ori_img = np.concatenate(ori_img, axis=0).astype('uint8') # ori_img = np.asarray(ori_img) # FID = get_fid_google(ori_img, dataset_img) # IS_mean, IS_std = get_inception_score(ori_img) # loop.collect_metrics(FID_ori=FID) # loop.collect_metrics(IS_ori=IS_mean) loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()