def train(loss, input_x, x_data, max_epoch, batch_size, valid_portion): train_x, valid_x = split_numpy_array(x_data, portion=valid_portion) train_flow = DataFlow.arrays([train_x], batch_size=batch_size, shuffle=True, skip_incomplete=True) valid_flow = DataFlow.arrays([valid_x], batch_size=batch_size) # derive the optimizer learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(0.001, 0.99995) params = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(loss, var_list=params) # run the training loop with TrainLoop(params, max_epoch=max_epoch, early_stopping=True) as loop: trainer = LossTrainer(loop, loss, train_op, [input_x], train_flow) trainer.anneal_after_steps(learning_rate_var, freq=1) trainer.validate_after_epochs(Validator(loop, loss, [input_x], valid_flow), freq=1) trainer.after_epochs.add_hook( lambda: trainer.loop.collect_metrics(lr=learning_rate_var), freq=1) trainer.log_after_epochs(freq=1) trainer.run(feed_dict={learning_rate: learning_rate_var})
def test_auto_init(self): epoch_counter = [0] seq_flow = DataFlow.seq(0, 10, batch_size=2) map_flow = seq_flow.map(lambda x: (x + epoch_counter[0] * 10, )) def make_iterator(): epoch_counter[0] += 1 return map_flow it_flow = DataFlow.iterator_factory(make_iterator) flow = it_flow.threaded(3) batches = [b[0] for b in flow] np.testing.assert_array_equal( [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]], batches) batches = [b[0] for b in flow] np.testing.assert_array_equal( [[20, 21], [22, 23], [24, 25], [26, 27], [28, 29]], batches) flow.close() batches = [b[0] for b in flow] np.testing.assert_array_equal( [[40, 41], [42, 43], [44, 45], [46, 47], [48, 49]], batches) flow.close()
def test_iterator_factory(self): x_flow = DataFlow.arrays([np.arange(5)], batch_size=3) y_flow = DataFlow.arrays([np.arange(5, 10)], batch_size=3) flow = DataFlow.iterator_factory( lambda: ((x, y) for (x, ), (y, ) in zip(x_flow, y_flow))) b = list(flow) self.assertEquals(2, len(b)) self.assertEquals(2, len(b[0])) np.testing.assert_array_equal([0, 1, 2], b[0][0]) np.testing.assert_array_equal([5, 6, 7], b[0][1]) np.testing.assert_array_equal([3, 4], b[1][0]) np.testing.assert_array_equal([8, 9], b[1][1])
def _create_sampled_dataflow(arrays, sampler, sample_now, **kwargs): if sample_now: arrays = sampler(*arrays) df = DataFlow.arrays(arrays, **kwargs) if not sample_now: df = df.map(sampler) return df
def test_select(self): x = np.arange(5) y = np.arange(5, 10) z = np.arange(10, 15) flow = DataFlow.arrays([x, y, z], batch_size=5).select([0, 2, 0]) self.assertEquals(1, len(list(flow))) for b in flow: np.testing.assert_equal([x, z, x], b)
def test_errors(self): source = DataFlow.arrays([np.arange(5), np.arange(5, 10)], batch_size=4) df = source.map(lambda x, y: x + y) with pytest.raises(TypeError, match='The output of the ``mapper`` is expected to ' 'be a tuple or a list, but got a'): _ = list(df)
def test_run(self): with self.test_session() as session: df = DataFlow.arrays([np.arange(6, dtype=np.float32)], batch_size=4) def log_message(m): logged_messages.append(m) logged_messages = [] # test default loss weight and merged feed dict with TrainLoop([], max_epoch=2) as loop: t = BaseTrainer(loop) t._run_step = Mock(return_value=None) t._iter_steps = Mock(wraps=lambda: loop.iter_steps(df)) t.before_epochs.add_hook( functools.partial(log_message, 'before_epoch')) t.before_steps.add_hook( functools.partial(log_message, 'before_step')) t.after_steps.add_hook( functools.partial(log_message, 'after_step')) t.after_epochs.add_hook( functools.partial(log_message, 'after_epoch')) t.run() self.assertEquals(4, len(t._run_step.call_args_list)) for i, call_args in enumerate(t._run_step.call_args_list[:-2]): call_session, call_payload = call_args[0] self.assertIs(session, call_session) self.assertEquals(i + 1, call_payload[0]) self.assertIsInstance(call_payload[1], tuple) self.assertEquals(1, len(call_payload[1])) np.testing.assert_equal( np.arange(6, dtype=np.float32)[i * 4:(i + 1) * 4], call_payload[1][0]) self.assertEquals([ 'before_epoch', 'before_step', 'after_step', 'before_step', 'after_step', 'after_epoch' ] * 2, logged_messages) # test re-entrant error with TrainLoop([], max_epoch=1) as loop: t = BaseTrainer(loop) t._run_step = Mock(return_value=None) t._iter_steps = Mock(wraps=lambda: loop.iter_steps(df)) def reentrant_error(): with pytest.raises(RuntimeError, match=r'`run\(\)` is not re-entrant'): t.run() reentrant_error = Mock(wraps=reentrant_error) t.after_steps.add_hook(reentrant_error) t.run() self.assertTrue(reentrant_error.called)
def train_classifier(loop): df = DataFlow.arrays([x_train], batch_size=config.batch_size). \ map(input_x_sampler) with loop.timeit('cls_train_time'): [c_pred] = collect_outputs(outputs=[y_given_x], inputs=[input_x], data_flow=df, feed_dict={is_training: False}) c_classifier.fit(c_pred, y_train) print(c_classifier.describe())
def test_run(self): with self.test_session() as session: df = DataFlow.arrays([np.arange(6, dtype=np.float32)], batch_size=4) ph = tf.placeholder(tf.float32, shape=[None]) ph2 = tf.placeholder(tf.float32, shape=[]) ph3 = tf.placeholder(tf.float32, shape=[]) # test default loss weight and merged feed dict with TrainLoop([], max_epoch=1) as loop: v = Evaluator(loop, tf.reduce_mean(ph), [ph], df, feed_dict={ph2: 34}) v._run_batch = Mock(wraps=v._run_batch) for epoch in loop.iter_epochs(): v.run({ph3: 56}) np.testing.assert_almost_equal( 2.5, loop._epoch_metrics._metrics['valid_loss'].mean) np.testing.assert_almost_equal( 2.5, v.last_metrics_dict['valid_loss']) self.assertIn('eval_time', loop._epoch_metrics._metrics) self.assertEquals(2, len(v._run_batch.call_args_list)) for i, call_args in enumerate(v._run_batch.call_args_list): call_session, call_feed_dict = call_args[0] self.assertIs(session, call_session) np.testing.assert_equal( np.arange(6, dtype=np.float32)[i * 4:(i + 1) * 4], call_feed_dict[ph]) self.assertEquals(34, call_feed_dict[ph2]) self.assertEquals(56, call_feed_dict[ph3]) # test None loss weight and None time metric and override feed dict with TrainLoop([], max_epoch=1) as loop: v = Evaluator(loop, {'valid_loss_x': tf.reduce_mean(ph)}, [ph], df, feed_dict={ph2: 34}, batch_weight_func=None, time_metric_name=None) v._run_batch = Mock(wraps=v._run_batch) for epoch in loop.iter_epochs(): v.run({ph2: 56}) np.testing.assert_almost_equal( 3.0, loop._epoch_metrics._metrics['valid_loss_x'].mean) np.testing.assert_almost_equal( 3.0, v.last_metrics_dict['valid_loss_x']) self.assertNotIn('eval_time', loop._epoch_metrics._metrics) for i, call_args in enumerate(v._run_batch.call_args_list): call_session, call_feed_dict = call_args[0] self.assertEquals(56, call_feed_dict[ph2]) self.assertNotIn(ph3, call_feed_dict)
def test_arrays(self): arrays = [np.arange(5), np.arange(10).reshape([5, 2])] df = DataFlow.arrays(arrays, 4, shuffle=False, skip_incomplete=False) self.assertIsInstance(df, ArrayFlow) for i, arr in enumerate(arrays): self.assertIs(arr, df.the_arrays[i]) self.assertEquals(2, df.array_count) self.assertEquals(5, df.data_length) self.assertEquals(((), (2, )), df.data_shapes) self.assertFalse(df.is_shuffled) self.assertFalse(df.skip_incomplete)
def test_map_to_tuple(self): source = DataFlow.arrays([np.arange(5), np.arange(5, 10)], batch_size=4) df = source.map(lambda x, y: (x + y, )) self.assertIs(df.source, source) b = list(df) self.assertEquals(2, len(b)) self.assertEquals(1, len(b[0])) np.testing.assert_array_equal([5, 7, 9, 11], b[0][0]) np.testing.assert_array_equal([13], b[1][0])
def test_get_arrays(self): with pytest.raises(ValueError, match='empty, cannot convert to arrays'): _ = DataFlow.arrays([np.arange(0)], batch_size=5).get_arrays() # test one batch df = DataFlow.arrays([np.arange(5), np.arange(5, 10)], batch_size=6) arrays = df.get_arrays() np.testing.assert_equal(np.arange(5), arrays[0]) np.testing.assert_equal(np.arange(5, 10), arrays[1]) # test two batches df = DataFlow.arrays([np.arange(10), np.arange(10, 20)], batch_size=6) arrays = df.get_arrays() np.testing.assert_equal(np.arange(10), arrays[0]) np.testing.assert_equal(np.arange(10, 20), arrays[1]) # test to_arrays_flow df2 = df.to_arrays_flow(batch_size=6) self.assertIsInstance(df2, ArrayFlow)
def test_implicit_iterator(self): df = DataFlow.arrays([np.arange(3)], batch_size=2) self.assertIsNone(df.current_batch) np.testing.assert_equal([[0, 1]], df.next_batch()) np.testing.assert_equal([[0, 1]], df.current_batch) np.testing.assert_equal([[2]], df.next_batch()) np.testing.assert_equal([[2]], df.current_batch) with pytest.raises(StopIteration): _ = df.next_batch() self.assertIsNone(df.current_batch) np.testing.assert_equal([[0, 1]], df.next_batch()) np.testing.assert_equal([[0, 1]], df.current_batch)
def test_seq(self): df = DataFlow.seq(1, 9, 2, batch_size=3, shuffle=False, skip_incomplete=False, dtype=np.int64) self.assertIsInstance(df, SeqFlow) self.assertEquals(1, df.array_count) self.assertEquals(4, df.data_length) self.assertEquals(((), ), df.data_shapes) self.assertEquals(3, df.batch_size) self.assertFalse(df.is_shuffled) self.assertFalse(df.skip_incomplete) self.assertEquals(1, df.start) self.assertEquals(9, df.stop) self.assertEquals(2, df.step)
def test_run(self): ph = tf.placeholder(tf.int32, [5]) var = tf.get_variable('var', shape=[5], dtype=tf.int32, initializer=tf.zeros_initializer()) train_op = tf.assign(var, ph) df = DataFlow.arrays([np.arange(10, 15, dtype=np.int32)], batch_size=5) with self.test_session() as session, \ TrainLoop([var], max_epoch=1, early_stopping=False) as loop: loop.collect_metrics = Mock(wraps=loop.collect_metrics) t = Trainer(loop, train_op, [ph], df, metrics={'loss_x': tf.reduce_sum(ph)}) ensure_variables_initialized() t.run() self.assertEquals({'loss_x': 60}, loop.collect_metrics.call_args_list[0][0][0]) np.testing.assert_equal([10, 11, 12, 13, 14], session.run(var))
def main(): # load mnist data (train_x, train_y), (test_x, test_y) = datasets.load_mnist() # the parameters of this experiment x_dim = train_x.shape[1] z_dim = 2 max_epoch = 10 batch_size = 256 valid_portion = 0.2 # construct the graph with tf.Graph().as_default(), tf.Session().as_default() as session: input_x = tf.placeholder(dtype=tf.float32, shape=(None, x_dim), name='input_x') x_binarized = tf.stop_gradient(sample_input_x(input_x)) batch_size_tensor = tf.shape(input_x)[0] # derive the VAE z_shape = tf.stack([batch_size_tensor, z_dim]) vae = VAE(p_z=Normal(mean=tf.zeros(z_shape), std=tf.ones(z_shape)), p_x_given_z=Bernoulli, q_z_given_x=Normal, h_for_p_x=Sequential([ K.layers.Dense(100, activation=tf.nn.relu), K.layers.Dense(100, activation=tf.nn.relu), DictMapper( {'logits': K.layers.Dense(x_dim, name='x_logits')}) ]), h_for_q_z=Sequential([ tf.to_float, K.layers.Dense(100, activation=tf.nn.relu), K.layers.Dense(100, activation=tf.nn.relu), DictMapper({ 'mean': K.layers.Dense(z_dim, name='z_mean'), 'logstd': K.layers.Dense(z_dim, name='z_logstd'), }) ])) # train the network train(vae.get_training_loss(x_binarized), input_x, train_x, max_epoch, batch_size, valid_portion) # plot the latent space q_net = vae.variational(x_binarized) z_posterior = q_net['z'] z_predict = [] for [batch_x] in DataFlow.arrays([test_x], batch_size=batch_size): z_predict.append( session.run(z_posterior, feed_dict={input_x: batch_x})) z_predict = np.concatenate(z_predict, axis=0) plt.figure(figsize=(8, 6)) plt.scatter(z_predict[:, 0], z_predict[:, 1], c=test_y) plt.colorbar() plt.grid() plt.show()
def main(): # load mnist data (x_train, y_train), (x_test, y_test) = \ load_cifar10(dtype=np.float32, normalize=True) print(x_train.shape) # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None,) + x_train.shape[1:], name='input_x') input_y = tf.placeholder( dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder( dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU() # build the model grads = [] losses = [] y_list = [] acc_list = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x, dev_input_y] in multi_gpu.data_parallel( batch_size, [input_x, input_y]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: _ = model(dev_input_x, is_training, channels_last=True) else: # derive the loss, output and accuracy dev_logits = model( dev_input_x, is_training=is_training, channels_last=multi_gpu.channels_last(dev) ) dev_softmax_loss = \ softmax_classification_loss(dev_logits, dev_input_y) dev_loss = dev_softmax_loss + regularization_loss() dev_y = softmax_classification_output(dev_logits) dev_acc = classification_accuracy(dev_y, dev_input_y) losses.append(dev_loss) y_list.append(dev_y) acc_list.append(dev_acc) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, acc] = multi_gpu.average([losses, acc_list], batch_size) [y] = multi_gpu.concat([y_list]) train_op = multi_gpu.apply_grads( grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection(tf.GraphKeys.UPDATE_OPS) ) # prepare for training and testing data train_flow = DataFlow.arrays( [x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True ) test_flow = DataFlow.arrays([x_test, y_test], config.batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), summary_commit_freqs={'loss': 10, 'acc': 10}, early_stopping=False) as loop: trainer = Trainer( loop, train_op, [input_x, input_y], train_flow, feed_dict={learning_rate: learning_rate_var, is_training: True}, metrics={'loss': loss, 'acc': acc} ) anneal_after( trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq ) evaluator = Evaluator( loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time' ) evaluator.after_run.add_hook( lambda: results.commit(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # save test result results.commit_and_print(evaluator.last_metrics_dict)
def main(): logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') # load mnist data (x_train, y_train), (x_test, y_test) = \ load_mnist(shape=[config.x_dim], dtype=np.float32, normalize=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, ) + x_train.shape[1:], name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32, name='learning_rate') learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) tau_p = tf.placeholder(shape=(), dtype=tf.float32, name='tau_p') tau_p_var = AnnealingDynamicValue(config.initial_tau_p, config.tau_p_anneal_factor, config.min_tau_p) tau_q = tf.placeholder(shape=(), dtype=tf.float32, name='tau_q') tau_q_var = AnnealingDynamicValue(config.initial_tau_q, config.tau_q_anneal_factor, config.min_tau_q) multi_gpu = MultiGPU(disable_prebuild=False) # build the model grads = [] losses = [] test_nlls = [] y_given_x_list = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([q_net, p_net], is_training=is_training): _ = q_net(dev_input_x).chain(p_net, latent_names=['y', 'z'], observed={'x': dev_input_x}) else: with arg_scope([q_net, p_net], is_training=is_training): # derive the loss and lower-bound for training train_n_samples = (config.train_n_samples_for_concrete if config.use_concrete_distribution else config.train_n_samples) train_q_net = q_net(dev_input_x, n_samples=train_n_samples, tau=tau_q) train_chain = train_q_net.chain( p_net, latent_names=['y', 'z'], latent_axis=0, observed={'x': dev_input_x}, tau=tau_p) if config.use_concrete_distribution: if train_n_samples is None: dev_vae_loss = tf.reduce_mean( train_chain.vi.training.sgvb()) else: dev_vae_loss = tf.reduce_mean( train_chain.vi.training.iwae()) else: if train_n_samples is None: dev_baseline = reinforce_baseline_net(dev_input_x) dev_vae_loss = tf.reduce_mean( train_chain.vi.training.reinforce( baseline=dev_baseline)) else: dev_vae_loss = tf.reduce_mean( train_chain.vi.training.vimco()) dev_loss = dev_vae_loss + regularization_loss() dev_loss = add_p_z_given_y_reg_loss(dev_loss) losses.append(dev_loss) # derive the nll and logits output for testing test_q_net = q_net(dev_input_x, n_samples=config.test_n_samples) test_chain = test_q_net.chain(p_net, latent_names=['y', 'z'], latent_axis=0, observed={'x': dev_input_x}) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) test_nlls.append(dev_test_nll) # derive the classifier via q(y|x) dev_q_y_given_x = tf.argmax( test_q_net['y'].distribution.logits, axis=-1) y_given_x_list.append(dev_q_y_given_x) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, test_nll] = \ multi_gpu.average([losses, test_nlls], batch_size) [y_given_x] = multi_gpu.concat([y_given_x_list]) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plot_x'): plot_p_net = p_net( observed={'y': tf.range(config.n_clusters, dtype=tf.int32)}, n_z=10, is_training=is_training) x = tf.cast(255 * tf.sigmoid(plot_p_net['x'].distribution.logits), dtype=tf.uint8) x_plots = tf.reshape(tf.transpose(x, [1, 0, 2]), [-1, 28, 28]) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename=results.prepare_parent( 'plotting/{}.png'.format(loop.epoch)), grid_size=(config.n_clusters, 10)) # derive the final un-supervised classifier c_classifier = ClusteringClassifier(config.n_clusters, 10) test_metrics = {} def train_classifier(loop): df = DataFlow.arrays([x_train], batch_size=config.batch_size). \ map(input_x_sampler) with loop.timeit('cls_train_time'): [c_pred] = collect_outputs(outputs=[y_given_x], inputs=[input_x], data_flow=df, feed_dict={is_training: False}) c_classifier.fit(c_pred, y_train) print(c_classifier.describe()) def evaluate_classifier(loop): with loop.timeit('cls_test_time'): [c_pred] = collect_outputs(outputs=[y_given_x], inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}) y_pred = c_classifier.predict(c_pred) cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)} loop.collect_metrics(cls_metrics) test_metrics.update(cls_metrics) # prepare for training and testing data def input_x_sampler(x): return session.run([sampled_x], feed_dict={sample_input_x: x}) with tf.device('/device:CPU:0'): sample_input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='sample_input_x') sampled_x = sample_from_probs(sample_input_x) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True).map(input_x_sampler) test_flow = DataFlow.arrays([x_test], config.test_batch_size). \ map(input_x_sampler) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # fix the testing flow, reducing the testing time test_flow = test_flow.to_arrays_flow(batch_size=config.test_batch_size) # train the network with TrainLoop(params, var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'], max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), summary_commit_freqs={'loss': 10}, early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, tau_p: tau_p_var, tau_q: tau_q_var, is_training: True }, metrics={'loss': loss}) anneal_after(trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) anneal_after(trainer, tau_p_var, epochs=config.tau_p_anneal_epoch_freq, steps=config.tau_p_anneal_step_freq) anneal_after(trainer, tau_q_var, epochs=config.tau_q_anneal_epoch_freq, steps=config.tau_q_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_nll': test_nll}, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.commit(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( train_classifier, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( evaluate_classifier, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # write the final results with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f: f.write(c_classifier.describe()) test_metrics.update(evaluator.last_metrics_dict) results.commit_and_print(test_metrics)
def test_counters(self): # test loop with configured `max_epoch` with TrainLoop([], max_epoch=2) as loop: epoch_counter = 0 step_counter = 0 for epoch in loop.iter_epochs(): epoch_counter += 1 self.assertEqual(epoch, epoch_counter) x_ans = 0 for step, [x] in \ loop.iter_steps(DataFlow.arrays([np.arange(4)], 1)): self.assertEqual(step, loop.step) self.assertEqual(epoch, loop.epoch) self.assertEqual(x, x_ans) x_ans += 1 step_counter += 1 self.assertEqual(step, step_counter) self.assertEqual(step_counter, loop.step) self.assertEqual(epoch, loop.epoch) self.assertEqual(epoch_counter, 2) self.assertEqual(step_counter, 8) # test loop with configured `max_step` with TrainLoop([], max_step=10) as loop: epoch_counter = 0 step_counter = 0 for epoch in loop.iter_epochs(): epoch_counter += 1 self.assertEqual(epoch, epoch_counter) for step in loop.iter_steps(): step_counter += 1 self.assertEqual(step, step_counter) self.assertEqual(epoch_counter, 1) self.assertEqual(step_counter, 10) # test loop with configured `max_step` with payload with TrainLoop([], max_step=10) as loop: epoch_counter = 0 step_counter = 0 for epoch in loop.iter_epochs(): epoch_counter += 1 self.assertEqual(epoch, epoch_counter) x_ans = 0 for step, x in loop.iter_steps(np.arange(4)): self.assertEqual(x, x_ans) x_ans += 1 step_counter += 1 self.assertEqual(step, step_counter) self.assertEqual(epoch_counter, 3) self.assertEqual(step_counter, 10) # test loop with configured `max_step` and `max_epoch`, # while `max_epoch` finishes first with TrainLoop([], max_step=10, max_epoch=2) as loop: epoch_counter = 0 step_counter = 0 for epoch in loop.iter_epochs(): epoch_counter += 1 self.assertEqual(epoch, epoch_counter) for step, _ in loop.iter_steps(np.arange(4)): step_counter += 1 self.assertEqual(step, step_counter) self.assertEqual(epoch_counter, 2) self.assertEqual(step_counter, 8) # test loop with configured `max_step` and `max_epoch`, # while `max_step` finishes first with TrainLoop([], max_step=10, max_epoch=3) as loop: epoch_counter = 0 step_counter = 0 for epoch in loop.iter_epochs(): epoch_counter += 1 self.assertEqual(epoch, epoch_counter) for step, _ in loop.iter_steps(np.arange(4)): step_counter += 1 self.assertEqual(step, step_counter) self.assertEqual(epoch_counter, 3) self.assertEqual(step_counter, 10)
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x, is_training=is_training) softmax_loss = softmax_classification_loss(logits, input_y) loss = softmax_loss + regularization_loss() y = softmax_classification_output(logits) acc = classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ load_cifar10(x_shape=(config.x_dim,), normalize_x=True) train_flow = DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = DataFlow.arrays([x_test, y_test], config.test_batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # load mnist data (x_train, y_train), (x_test, y_test) = \ load_mnist(shape=[config.x_dim], dtype=np.float32, normalize=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, ) + x_train.shape[1:], name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU(disable_prebuild=False) # build the model vae = VAE( p_z=Bernoulli(tf.zeros([1, config.z_dim])), p_x_given_z=Bernoulli, q_z_given_x=Bernoulli, h_for_p_x=functools.partial(h_for_p_x, is_training=is_training), h_for_q_z=functools.partial(h_for_q_z, is_training=is_training), ) grads = [] losses = [] lower_bounds = [] test_nlls = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([h_for_q_z, h_for_p_x]): _ = vae.chain(dev_input_x) else: # derive the loss and lower-bound for training train_chain = vae.chain(dev_input_x) dev_baseline = baseline_net(dev_input_x) dev_cost, dev_baseline_cost = \ train_chain.vi.training.reinforce(baseline=dev_baseline) dev_loss = regularization_loss() + \ tf.reduce_mean(dev_cost + dev_baseline_cost) dev_lower_bound = \ tf.reduce_mean(train_chain.vi.lower_bound.elbo()) losses.append(dev_loss) lower_bounds.append(dev_lower_bound) # derive the nll and logits output for testing test_chain = vae.chain(dev_input_x, n_z=config.test_n_z) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) test_nlls.append(dev_test_nll) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, lower_bound, test_nll] = \ multi_gpu.average([losses, lower_bounds, test_nlls], batch_size) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plot_x'), \ arg_scope([h_for_q_z, h_for_p_x], channels_last=multi_gpu.channels_last(work_dev)): x_plots = tf.reshape( tf.cast(255 * tf.sigmoid(vae.model(n_z=100)['x'].distribution.logits), dtype=tf.uint8), [-1, 28, 28]) def plot_samples(loop): with loop.timeit('plot_time'): session = get_default_session_or_error() images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename=results.prepare_parent( 'plotting/{}.png'.format(loop.epoch)), grid_size=(10, 10)) # prepare for training and testing data def input_x_sampler(x): sess = get_default_session_or_error() return sess.run([sampled_x], feed_dict={sample_input_x: x}) with tf.device('/device:CPU:0'): sample_input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='sample_input_x') sampled_x = sample_from_probs(sample_input_x) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True).map(input_x_sampler) test_flow = DataFlow.arrays([x_test], config.test_batch_size). \ map(input_x_sampler) with create_session().as_default(): # fix the testing flow, reducing the testing time test_flow = test_flow.to_arrays_flow(batch_size=config.test_batch_size) # train the network with TrainLoop(params, max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) anneal_after(trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': lower_bound }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # write the final test_nll and test_lb results.commit(evaluator.last_metrics_dict)
def iter_steps(self, data_generator=None): """ Iterate through the steps. This method can only be called when there's no other step loop is being iterated, and an epoch loop is active. Args: data_generator: Optional iterable data to be yielded at every step. This is required if `max_step` is not configured, so as to prevent an infinite step loop. Yields: int or (int, any): The global step counter (starting from 1), or the tuple of ``(step counter, batch data)`` if `data_generator` is specified. """ def loop_condition(): return self._max_step is None or self._step < self._max_step self._require_entered() if not self._within_epoch: raise RuntimeError('Step loop must be opened within active epoch ' 'loop') if self._within_step: raise RuntimeError('Another step loop has been opened') if self._max_step is None and data_generator is None: raise RuntimeError('`data_generator` is required when `max_step` ' 'is not configured, so as to prevent an ' 'unstoppable step loop') try: if data_generator is not None: if isinstance(data_generator, DataFlow): data_flow = data_generator else: def iter_factory(): if data_gen[0] is not None: for batch in data_gen[0]: yield batch data_gen[0] = None # force to use data_generator once data_gen = [data_generator] data_flow = DataFlow.iterator_factory(iter_factory) self._data_flow = data_flow while loop_condition(): # prepare for the step data if self._data_flow is None: yield_obj = self._step + 1 else: try: step_data = self._data_flow.next_batch() except StopIteration: break yield_obj = self._step + 1, step_data # yield this step self._step += 1 self._within_step = True self._step_start_time = time.time() try: yield yield_obj except StopIteration: # pragma: no cover # might be caused by call to ``data_flow.next_batch()`` break self._commit_step_stop_time() finally: self._within_step = False self._step_start_time = None self._data_flow = None
def test_iterator(self): epoch_counter = [0] external_counter = [1] seq_flow = DataFlow.seq(0, 10, batch_size=2) map_flow = seq_flow.map(lambda x: (x + epoch_counter[0] * 10 + external_counter[0] * 100, )) def make_iterator(): epoch_counter[0] += 1 return map_flow it_flow = DataFlow.iterator_factory(make_iterator) with it_flow.threaded(prefetch=2) as flow: # the first epoch, expect 0 .. 10 np.testing.assert_array_equal( [[110, 111], [112, 113], [114, 115], [116, 117], [118, 119]], [a[0] for a in flow]) time.sleep(.1) external_counter[0] += 1 # the second epoch, the epoch counter should affect more than # the external counter np.testing.assert_array_equal( # having `prefetch = 2` should affect 3 items, because # while the queue size is 2, there are 1 additional prefetched # item waiting to be enqueued [[120, 121], [122, 123], [124, 125], [226, 227], [228, 229]], [a[0] for a in flow]) time.sleep(.1) external_counter[0] += 1 # the third epoch, we shall carry out an incomplete epoch by break for a in flow: np.testing.assert_array_equal([230, 231], a[0]) break time.sleep(.1) external_counter[0] += 1 # verify that the epoch counter increases after break for i, (a, ) in enumerate(flow): # because the interruption is not well-predictable under # multi-threading context, we shall have a weaker verification # than the above self.assertTrue((340 + i * 2 == a[0]) or (440 + i * 2 == a[0])) self.assertTrue((341 + i * 2 == a[1]) or (441 + i * 2 == a[1])) time.sleep(.1) external_counter[0] += 1 # carry out the fourth, incomplete epoch by error try: for a in flow: np.testing.assert_array_equal([450, 451], a[0]) raise _MyError() except _MyError: pass time.sleep(.1) external_counter[0] += 1 # verify that the epoch counter increases after error for i, (a, ) in enumerate(flow): self.assertTrue((560 + i * 2 == a[0]) or (660 + i * 2 == a[0])) self.assertTrue((561 + i * 2 == a[1]) or (661 + i * 2 == a[1]))
def main(): logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') # load mnist data (x_train, y_train), (x_test, y_test) = \ load_mnist(shape=[config.x_dim], dtype=np.float32, normalize=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, ) + x_train.shape[1:], name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU(disable_prebuild=False) # build the model grads = [] losses = [] lower_bounds = [] test_nlls = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([p_net, q_net], is_training=is_training): _ = q_net(dev_input_x).chain(p_net, latent_names=['z'], observed={'x': dev_input_x}) else: with arg_scope([q_net, p_net], is_training=is_training): # derive the loss and lower-bound for training train_q_net = q_net(dev_input_x) train_chain = train_q_net.chain( p_net, latent_names=['z'], latent_axis=0, observed={'x': dev_input_x}) dev_vae_loss = tf.reduce_mean( train_chain.vi.training.sgvb()) dev_loss = dev_vae_loss + regularization_loss() dev_lower_bound = -dev_vae_loss losses.append(dev_loss) lower_bounds.append(dev_lower_bound) # derive the nll and logits output for testing test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': dev_input_x}) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) test_nlls.append(dev_test_nll) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, lower_bound, test_nll] = \ multi_gpu.average([losses, lower_bounds, test_nlls], batch_size) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plot_x'): plot_p_net = p_net(n_z=100, is_training=is_training) x = tf.cast(255 * tf.sigmoid(plot_p_net['x'].distribution.logits), dtype=tf.uint8) x_plots = tf.reshape(x, [-1, 28, 28]) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename=results.prepare_parent( 'plotting/{}.png'.format(loop.epoch)), grid_size=(10, 10)) # prepare for training and testing data def input_x_sampler(x): return session.run([sampled_x], feed_dict={sample_input_x: x}) with tf.device('/device:CPU:0'): sample_input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='sample_input_x') sampled_x = sample_from_probs(sample_input_x) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True).map(input_x_sampler) test_flow = DataFlow.arrays([x_test], config.test_batch_size). \ map(input_x_sampler) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # fix the testing flow, reducing the testing time test_flow = test_flow.to_arrays_flow(batch_size=config.test_batch_size) # train the network with TrainLoop(params, var_groups=['p_net', 'q_net', 'posterior_flow'], max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) anneal_after(trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': lower_bound }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.commit(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # write the final test_nll and test_lb results.commit_and_print(evaluator.last_metrics_dict)
def test_errors(self): with pytest.raises(ValueError, match='`prefetch_num` must be at least 1'): _ = ThreadingFlow(DataFlow.arrays([np.arange(10)], batch_size=2), prefetch=0)
def main(): # load mnist data (x_train, y_train), (x_test, y_test) = \ load_mnist(shape=[784], dtype=np.float32, normalize=True) # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None,) + x_train.shape[1:], name='input_x') input_y = tf.placeholder( dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder( dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # build the model optimizer = tf.train.AdamOptimizer(learning_rate) # derive the loss, output and accuracy logits = model(input_x, is_training=is_training) softmax_loss = softmax_classification_loss(logits, input_y) loss = softmax_loss + regularization_loss() y = softmax_classification_output(logits) acc = classification_accuracy(y, input_y) # derive the optimizer params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data train_flow = DataFlow.arrays( [x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True ) test_flow = DataFlow.arrays([x_test, y_test], config.batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), summary_commit_freqs={'loss': 10, 'acc': 10}, early_stopping=False) as loop: trainer = Trainer( loop, train_op, [input_x, input_y], train_flow, feed_dict={learning_rate: learning_rate_var, is_training: True}, metrics={'loss': loss, 'acc': acc} ) anneal_after( trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq ) evaluator = Evaluator( loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time' ) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # save test result results.commit(evaluator.last_metrics_dict)
def test_threaded(self): flow = DataFlow.arrays([np.arange(10)], batch_size=2). \ threaded(prefetch=3) self.assertIsInstance(flow, ThreadingFlow) self.assertEquals(3, flow.prefetch_num)