def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # build the posterior flow with tf.variable_scope('posterior_flow'): flows = [] for i in range(config.n_flows): flows.append(spt.layers.ActNorm()) flows.append( spt.layers.CouplingLayer(tf.make_template( 'coupling', coupling_layer_shift_and_scale, create_scope_now_=True), scale_type='exp')) flows.append(spt.layers.InvertibleDense()) posterior_flow = spt.layers.SequentialFlow(flows=flows) # derive the initialization op with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True): init_q_net = q_net(input_x, posterior_flow) init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, posterior_flow) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net(n_z=100) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # initialize the network spt.utils.ensure_variables_initialized() for [batch_x] in train_flow: print('Network initialization loss: {:.6g}'.format( session.run(init_loss, {input_x: batch_x}))) print('') break # train the network with spt.TrainLoop(params, var_groups=['p_net', 'q_net', 'posterior_flow'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU() # build the model grads = [] losses = [] y_list = [] acc_list = [] batch_size = spt.utils.get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x, dev_input_y ] in multi_gpu.data_parallel(batch_size, [input_x, input_y]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: _ = model(dev_input_x, is_training, channels_last=True) else: # derive the loss, output and accuracy dev_logits = model(dev_input_x, is_training=is_training, channels_last=multi_gpu.channels_last(dev)) dev_cls_loss = tf.losses.sparse_softmax_cross_entropy( dev_input_y, dev_logits) dev_loss = dev_cls_loss + tf.losses.get_regularization_loss() dev_y = spt.ops.softmax_classification_output(dev_logits) dev_acc = spt.ops.classification_accuracy(dev_y, dev_input_y) losses.append(dev_loss) y_list.append(dev_y) acc_list.append(dev_acc) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, acc] = multi_gpu.average([losses, acc_list], batch_size) [y] = multi_gpu.concat([y_list]) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_cifar10(x_shape=config.x_shape, normalize_x=True) train_flow = spt.DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) with spt.utils.create_session().as_default(): # train the network with spt.TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={is_training: True}, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the output for initialization with tf.name_scope('initialization'), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(input_x, is_initializing=True) init_chain = init_q_net.chain(p_net, observed={'x': input_x}, is_initializing=True) init_lb = tf.reduce_mean(init_chain.vi.lower_bound.elbo()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x) train_chain = train_q_net.chain(p_net, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net(n_z=100) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784]) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'.format( session.run(init_lb, feed_dict={input_x: x}))) break # train the network with spt.TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU(disable_prebuild=False) # build the model grads = [] losses = [] test_nlls = [] test_lbs = [] batch_size = spt.utils.get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([p_net, q_net], is_training=is_training, channels_last=True): _ = q_net(dev_input_x).chain(p_net, observed={'x': dev_input_x}) else: with arg_scope([p_net, q_net], is_training=is_training, channels_last=multi_gpu.channels_last(dev)): # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(dev_input_x) train_chain = train_q_net.chain( p_net, latent_axis=0, observed={'x': dev_input_x}) dev_vae_loss = tf.reduce_mean( train_chain.vi.training.sgvb()) dev_loss = dev_vae_loss + \ tf.losses.get_regularization_loss() losses.append(dev_loss) # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain( p_net, latent_axis=0, observed={'x': dev_input_x}) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) dev_test_lb = tf.reduce_mean( test_chain.vi.lower_bound.elbo()) test_nlls.append(dev_test_nll) test_lbs.append(dev_test_lb) # derive the optimizer with tf.name_scope('optimizing'): params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations with tf.name_scope('optimizing'): [loss, test_lb, test_nll] = \ multi_gpu.average([losses, test_lbs, test_nlls], batch_size) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plotting'): plot_p_net = p_net(n_z=100, is_training=is_training, channels_last=multi_gpu.channels_last(work_dev)) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with spt.TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, feed_dict={is_training: True}, metrics={'loss': loss}) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, n_samples=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) if config.vi_algorithm == 'reinforce': baseline = reinforce_baseline_net(input_x) vae_loss = tf.reduce_mean( train_chain.vi.training.reinforce(baseline=baseline)) else: assert (config.vi_algorithm == 'vimco') vae_loss = tf.reduce_mean(train_chain.vi.training.vimco()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_samples=config.test_n_samples) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) # derive the classifier via q(y|x) q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits, axis=-1, name='q_y_given_x') # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net( observed={'y': tf.range(config.n_clusters, dtype=tf.int32)}, n_z=10) x_plots = tf.reshape( tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(config.n_clusters, 10), results=results) # derive the final un-supervised classifier c_classifier = ClusteringClassifier(config.n_clusters, 10) def train_classifier(loop): df = bernoulli_flow(x_train, config.batch_size, shuffle=False, skip_incomplete=False) with loop.timeit('cls_train_time'): [c_pred] = collect_outputs( outputs=[q_y_given_x], inputs=[input_x], data_flow=df, ) c_classifier.fit(c_pred, y_train) print(c_classifier.describe()) def evaluate_classifier(loop): with loop.timeit('cls_test_time'): [c_pred] = collect_outputs( outputs=[q_y_given_x], inputs=[input_x], data_flow=test_flow, ) y_pred = c_classifier.predict(c_pred) cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)} loop.collect_metrics(cls_metrics) results.update_metrics(cls_metrics) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784]) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with spt.TrainLoop( params, var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_nll': test_nll}, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( train_classifier, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( evaluate_classifier, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f: f.write(c_classifier.describe()) print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \ str(config.noExp) + ".model" dirName = os.path.basename(__file__).split(".py")[0] + "_" + str( config.noExp) results = MLResults(os.path.join(config.result_dir, dirName)) results.save_config(config) # save experiment settings results.make_dirs('train_summary', exist_ok=True) results.make_dirs('result_summary', exist_ok=True) results.make_dirs('mid_summary', exist_ok=True) # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor, min_value=1e-6) multi_gpu = MultiGPU(disable_prebuild=True) # multi_gpu = MultiGPU() # derive the training operation gradses = [] grad_vars = [] train_losses = [] BATCH_SIZE = get_batch_size(input_x) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([p_net, q_net], is_initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(dev_input_x, n_z=config.train_n_samples) init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.vimco()) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([p_net, q_net], is_training=True): train_q_net = q_net(dev_input_x, n_z=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) + tf.losses.get_regularization_loss()) train_losses.append(train_loss) # derive the logits output for testing with tf.name_scope('testing'): test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) # log_prob of X and each univariate time series of X log_prob = tf.reduce_mean( test_chain.model['x'].distribution.log_prob(dev_input_x), 0) log_prob_per_element = tf.reduce_sum(log_prob) log_prob_per_element_univariate_TS = tf.reduce_sum( log_prob, [0, 1, 3]) log_prob_per_element_univariate_TS_All = tf.reduce_sum( log_prob, [1, 3]) # derive the optimizer with tf.name_scope('optimizing'): params = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) grads = optimizer.compute_gradients(train_loss, params) for grad, var in grads: if grad is not None and var is not None: if config.grad_clip_norm: grad = tf.clip_by_norm(grad, config.grad_clip_norm) if config.check_numerics: grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) grad_vars.append((grad, var)) gradses.append(grad_vars) # merge multi-gpu outputs and operations [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # sort the contribution of each univariate_TS of input SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32, shape=(None, None), name='SORT_UNIVARIATE_TS_INPUT') SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT, k=config.metricNumber).indices + 1 # load the training and testing data print("=" * 10 + "Shape of Input data" + "=" * 10) x, time_indexs, x_test, time_indexs2 = load_matrix_allData( config.dataReadformat, config.datapathForTrain, config.datapathForTest, config.timeLength, config.metricNumber, "TrainFileNameList.txt", "TestFileNameList.txt", results, config.norm) x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1]) print("Test:", x_test.shape) if config.batchTest: test_flow = DataFlow.arrays( [x_test], config.test_batch_size) # DataFlow is iterator del x_test x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION) x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1]) x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1]) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=False, skip_incomplete=True) val_flow = DataFlow.arrays([x_val], config.test_batch_size) print("Note:", config.x_dim, ", x_dim = size of datapoint = timeLength * metricNumber") print("Input data shape:", x.shape, "Train data shape:", x_train.shape, "Validation data shape:", x_val.shape) del x_train, x_val, x # training part with spt.utils.create_session().as_default() as session: spt.utils.ensure_variables_initialized() saver = CheckpointSaver(tf.trainable_variables(), model_file) if os.path.exists(model_file): # load the parameters of trained model saver.restore_latest() else: # initialize the network while True: breakFlag = 0 for [x] in train_flow: INITLOSS = session.run(init_loss, feed_dict={input_x: x}) print('Network initialized, first-batch loss is {:.6g}.'. format(INITLOSS)) if np.isnan(INITLOSS) or np.isinf( INITLOSS) or INITLOSS > 10**5: pass else: breakFlag = 1 break if breakFlag: break # train the network with train_flow.threaded(10) as train_flow: with spt.TrainLoop( params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=True) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': train_loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) # anneal the learning rate trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) validator = spt.Validator( loop, train_loss, [input_x], val_flow, ) trainer.evaluate_after_epochs(validator, freq=10) trainer.log_after_epochs(freq=1) trainer.run() saver.save() # save the training infomation firWrite = True num = 0 time0 = time.time() for [x_train] in train_flow: if config.savetrainDS: # log prob of each metric of each instance log_prob_per_element_univariate_TS_list_item_Train = ( session.run(log_prob_per_element_univariate_TS_All, feed_dict={input_x: x_train})) log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train log_prob_per_element_list_Train = np.sum(np.array( log_prob_per_element_univariate_TS_list_item_Train), axis=1).tolist() if firWrite: save_file( results.system_path("train_summary"), "OutlierScores_metric.txt", log_prob_per_element_univariate_TS_list_Train) save_file(results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train) else: save_file( results.system_path("train_summary"), "OutlierScores_metric.txt", log_prob_per_element_univariate_TS_list_Train, "\n", "a") save_file(results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train, "\n", "a") firWrite = False num += 1 if num % 1000 == 0: print( "-----Train %s >>>>>:Sum time of batch instances:%s" % (num, float(time.time() - time0) / float(num))) del train_flow, val_flow # online test time2 = time.time() log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], [] if config.batchTest: num = 0 for [x_test] in test_flow: if config.savetestDS: # log prob of each metric of each instance log_prob_per_element_univariate_TS_list_item = ( session.run(log_prob_per_element_univariate_TS_All, feed_dict={input_x: x_test})) log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist( ) log_prob_per_element_list += np.sum( np.array(log_prob_per_element_univariate_TS_list_item), axis=1).tolist() num += 1 if num % 200 == 0: print("-----Test %s >>>>>:Sum time of batch instances:%s" % (num, float(time.time() - time2) / float(num))) else: num = 1 for batch_x in x_test: if config.savetestTS: log_prob_per_element_list_item = (session.run( log_prob_per_element, feed_dict={input_x: [batch_x]})) log_prob_per_element_list.append( log_prob_per_element_list_item) if config.savetestDS: log_prob_per_element_univariate_TS_list_item = ( session.run(log_prob_per_element_univariate_TS, feed_dict={input_x: [batch_x]})) log_prob_per_element_univariate_TS_list.append( log_prob_per_element_univariate_TS_list_item) log_prob_per_element_list.append( sum(log_prob_per_element_univariate_TS_list_item)) if num % 200 == 0: print( "-----Test>>>>>:%d, average time of each instance:%s" % (num, float(time.time() - time2) / float(num))) num += 1 # get the lable file name and its line cnt number allLabelFileNameLineCntList = get_machineID(results, config.labelpath) print("No of OutlierScores for all dataPoint:(%s):" % len(log_prob_per_element_list)) if config.savetestDS: save_file( results.system_path("result_summary"), "OutlierScores_metric.txt", cat_List(allLabelFileNameLineCntList, log_prob_per_element_univariate_TS_list)) save_file( results.system_path("result_summary"), "OutlierScores.txt", cat_List(allLabelFileNameLineCntList, log_prob_per_element_list)) if config.evaluation: # Prepraration for the hitory two-metric results twoMetricScore = read_file(results.system_path("train_summary"), "OutlierScores_metric.txt") ave_twoMetricScore = np.mean(np.array(twoMetricScore), axis=0).tolist() save_file(results.system_path("result_summary"), "PRF.txt", ["Average score of each univariate time series", "\n"], ",") save_file(results.system_path("result_summary"), "PRF.txt", ave_twoMetricScore + ["\n"], ",", "a") save_file(results.system_path("result_summary"), "PRF.txt", [ "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n" ], ",", "a") # get the sorted item each metric by change score twoMetricScoreList = cal_scoreChanges( log_prob_per_element_list, ave_twoMetricScore, log_prob_per_element_univariate_TS_list) MetricResult = session.run( SORT_UNIVARIATE_TS, feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList}) save_file(results.system_path("result_summary"), "MetricResult.txt", cat_List(allLabelFileNameLineCntList, MetricResult)) # POT evalution POT_TH = pot_eval( read_file(results.system_path("train_summary"), "OutlierScores.txt", "float"), config.q, config.level) resultArray, outlierLabelfileNameLineCntList = cal_binaryResult( log_prob_per_element_list, POT_TH, time_indexs2, config.saveMetricInfo, allLabelFileNameLineCntList) evaluate(results, config.labelpath, resultArray, time_indexs2, POT_TH) # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close() interpretation_hit_ratio(truth_filepath=config.interpret_filepath, prediction_filepath=os.path.join( config.result_dir, dirName, "result_summary", "MetricResult.txt"))
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x) cls_loss = tf.losses.sparse_softmax_cross_entropy(input_y, logits) loss = cls_loss + tf.losses.get_regularization_loss() y = spt.ops.softmax_classification_output(logits) acc = spt.ops.classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_cifar10(x_shape=(config.x_dim,), normalize_x=True) train_flow = spt.DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) with spt.utils.create_session().as_default(): # train the network with spt.TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x, input_y], train_flow, metrics={ 'loss': loss, 'acc': acc }, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(trainpath, normalpath, abnormalpath, outputpath): if config.debug_level == -1: spt.utils.set_assertion_enabled(False) elif config.debug_level == 1: spt.utils.set_check_numerics(True) #spt.utils.set_assertion_enabled(False) # print the config print_with_title('Configurations', config.format_config(), after='\n') # input and output file train_file = trainpath normal_file = normalpath abnormal_file = abnormalpath output_file = os.path.join('webankdata', '{}_{}.csv'.format(config.flow_type or 'vae', outputpath)) valid_file = os.path.join('webankdata', 'v{}_{}.csv'.format(config.flow_type or 'vae', outputpath)) # you can change it by yourself # read data (x_train, y_train), (x_test, y_test), flows_test = \ get_data_vae(train_file, normal_file, abnormal_file) config.x_dim = x_train.shape[1] #config.z_dim = get_z_dim(x_train.shape[1]) all_len = x_train.shape[0] print('origin data: %s' % all_len) for i in range(30): print(list(x_train[i])) valid_rate = 0.1 x_train, x_valid = train_test_split(x_train, test_size=valid_rate) # x_valid = x_train print('%s for validation, %s for training v2' % (x_valid.shape[0], x_train.shape[0])) print('%s for test' % x_test.shape[0]) print('x_dim: %s z_dim: %s' % (config.x_dim, config.z_dim)) # change it by yourself # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable( 'learning_rate', config.initial_lr, config.lr_anneal_factor) # build the posterior flow if config.flow_type is None: posterior_flow = None elif config.flow_type == 'planar_nf': posterior_flow = \ spt.layers.planar_normalizing_flows(config.n_planar_nf_layers) else: assert(config.flow_type == 'rnvp') with tf.variable_scope('posterior_flow'): flows = [] for i in range(config.n_rnvp_layers): flows.append(spt.layers.ActNorm()) flows.append(spt.layers.CouplingLayer( tf.make_template( 'coupling', coupling_layer_shift_and_scale, create_scope_now_=True ), scale_type='sigmoid' )) flows.append(spt.layers.InvertibleDense(strict_invertible=True)) posterior_flow = spt.layers.SequentialFlow(flows=flows) # derive the initialization op with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True): init_q_net = q_net(input_x, posterior_flow) init_chain = init_q_net.chain( p_net, latent_axis=0, observed={'x': input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, posterior_flow) train_chain = train_q_net.chain( p_net, latent_axis=0, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z) test_chain = test_q_net.chain( p_net, latent_axis=0, observed={'x': input_x}) test_logp = test_chain.vi.evaluation.is_loglikelihood() test_nll = -tf.reduce_mean(test_logp) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) cliped_grad = [] for grad, var in grads: if grad is not None and var is not None: if config.norm_clip is not None: grad = tf.clip_by_norm(grad, config.norm_clip) cliped_grad.append((grad, var)) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(cliped_grad) train_flow = spt.DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True) valid_flow = spt.DataFlow.arrays([x_valid], config.test_batch_size) test_flow = spt.DataFlow.arrays([x_test], config.test_batch_size) # model_file #model_name = '' model_name = os.path.join( 'webankdata', 'md_{}_{}.model'.format( config.flow_type or 'vae', outputpath.split('.')[0] ) ) with spt.utils.create_session().as_default() as session: var_dict = spt.utils.get_variables_as_dict() saver = spt.VariableSaver(var_dict, model_name) #if os.path.exists(model_name): if False: print('%s exists' % model_name) saver.restore() else: print('no model here, and start training') # initialize the network spt.utils.ensure_variables_initialized() for [batch_x] in train_flow: print('Network initialization loss: {:.6g}'. format(session.run(init_loss, {input_x: batch_x}))) print('') break # train the network with spt.TrainLoop(params, var_groups=['p_net', 'q_net', 'posterior_flow'], max_epoch=config.max_epoch, max_step=config.max_step, early_stopping=True, valid_metric_name='valid_loss', valid_metric_smaller_is_better=True) as loop: trainer = spt.Trainer( loop, train_op, [input_x], train_flow, metrics={'loss': loss} ) trainer.anneal_after( learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq ) evaluator = spt.Evaluator( loop, metrics={'valid_loss': test_nll}, inputs=[input_x], data_flow=valid_flow, time_metric_name='valid_time' ) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.log_after_epochs(freq=1) trainer.run() saver.save() # get the answer print('start testing') start = time.time() test_ans = collect_outputs([test_logp], [input_x], test_flow)[0] \ / config.x_dim end = time.time() print("test time: ", end-start) pd.DataFrame( {'id': flows_test, 'label': y_test, 'score': test_ans}) \ .to_csv(output_file, index=False) valid_ans = collect_outputs([test_logp], [input_x], valid_flow)[0] \ / config.x_dim pd.DataFrame({'score': valid_ans}).to_csv(valid_file, index=False)