def main(): # load mnist data (x_train, y_train), (x_test, y_test) = \ load_cifar10(dtype=np.float32, normalize=True) print(x_train.shape) # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None,) + x_train.shape[1:], name='input_x') input_y = tf.placeholder( dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder( dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU() # build the model grads = [] losses = [] y_list = [] acc_list = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x, dev_input_y] in multi_gpu.data_parallel( batch_size, [input_x, input_y]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: _ = model(dev_input_x, is_training, channels_last=True) else: # derive the loss, output and accuracy dev_logits = model( dev_input_x, is_training=is_training, channels_last=multi_gpu.channels_last(dev) ) dev_softmax_loss = \ softmax_classification_loss(dev_logits, dev_input_y) dev_loss = dev_softmax_loss + regularization_loss() dev_y = softmax_classification_output(dev_logits) dev_acc = classification_accuracy(dev_y, dev_input_y) losses.append(dev_loss) y_list.append(dev_y) acc_list.append(dev_acc) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, acc] = multi_gpu.average([losses, acc_list], batch_size) [y] = multi_gpu.concat([y_list]) train_op = multi_gpu.apply_grads( grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection(tf.GraphKeys.UPDATE_OPS) ) # prepare for training and testing data train_flow = DataFlow.arrays( [x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True ) test_flow = DataFlow.arrays([x_test, y_test], config.batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), summary_commit_freqs={'loss': 10, 'acc': 10}, early_stopping=False) as loop: trainer = Trainer( loop, train_op, [input_x, input_y], train_flow, feed_dict={learning_rate: learning_rate_var, is_training: True}, metrics={'loss': loss, 'acc': acc} ) anneal_after( trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq ) evaluator = Evaluator( loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time' ) evaluator.after_run.add_hook( lambda: results.commit(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # save test result results.commit_and_print(evaluator.last_metrics_dict)
def main(): # load mnist data (x_train, y_train), (x_test, y_test) = \ load_mnist(shape=[784], dtype=np.float32, normalize=True) # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None,) + x_train.shape[1:], name='input_x') input_y = tf.placeholder( dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder( dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # build the model optimizer = tf.train.AdamOptimizer(learning_rate) # derive the loss, output and accuracy logits = model(input_x, is_training=is_training) softmax_loss = softmax_classification_loss(logits, input_y) loss = softmax_loss + regularization_loss() y = softmax_classification_output(logits) acc = classification_accuracy(y, input_y) # derive the optimizer params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data train_flow = DataFlow.arrays( [x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True ) test_flow = DataFlow.arrays([x_test, y_test], config.batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), summary_commit_freqs={'loss': 10, 'acc': 10}, early_stopping=False) as loop: trainer = Trainer( loop, train_op, [input_x, input_y], train_flow, feed_dict={learning_rate: learning_rate_var, is_training: True}, metrics={'loss': loss, 'acc': acc} ) anneal_after( trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq ) evaluator = Evaluator( loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time' ) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # save test result results.commit(evaluator.last_metrics_dict)
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x, is_training=is_training) softmax_loss = softmax_classification_loss(logits, input_y) loss = softmax_loss + regularization_loss() y = softmax_classification_output(logits) acc = classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ load_cifar10(x_shape=(config.x_dim,), normalize_x=True) train_flow = DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = DataFlow.arrays([x_test, y_test], config.test_batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()