def main(_): tf.enable_v2_behavior() experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([ (name, FLAGS[name].value) for name in hparam_flags ]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) train_dataset, eval_dataset = emnist_ae_dataset.get_centralized_emnist_datasets( batch_size=FLAGS.batch_size, only_digits=False) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() model = emnist_ae_models.create_autoencoder_model() model.compile( loss=tf.keras.losses.MeanSquaredError(), optimizer=optimizer, metrics=[tf.keras.metrics.MeanSquaredError()]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir) # Reduce the learning rate after a fixed number of epochs. def decay_lr(epoch, learning_rate): if (epoch + 1) % FLAGS.decay_epochs == 0: return learning_rate * FLAGS.lr_decay else: return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( train_dataset, validation_data=eval_dataset, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'mean_squared_error']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def test_writes_dict_as_csv(self): tmpdir = self.get_temp_dir() logger = keras_callbacks.AtomicCSVLogger(tmpdir) logger.on_epoch_end(epoch=0, logs={'value': 0, 'value_1': 'a'}) logger.on_epoch_end(epoch=1, logs={'value': 2, 'value_1': 'b'}) logger.on_epoch_end(epoch=2, logs={'value': 3, 'value_1': 'c'}) logger.on_epoch_end(epoch=1, logs={'value': 4, 'value_1': 'd'}) read_logs = pd.read_csv(os.path.join(tmpdir, 'metric_results.csv'), index_col=0, header=0, engine='c') self.assertNotEmpty(read_logs) pd.testing.assert_frame_equal( read_logs, pd.DataFrame({ 'value': [0, 4], 'value_1': ['a', 'd'], }))
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') tf.compat.v1.enable_v2_behavior() experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) train_client_data, test_client_data = ( tff.simulation.datasets.shakespeare.load_data()) def preprocess(ds): return dataset.convert_snippets_to_character_sequence_examples( ds, FLAGS.batch_size, epochs=1).cache() train_dataset = train_client_data.create_tf_dataset_from_all_clients() if FLAGS.shuffle_train_data: train_dataset = train_dataset.shuffle(buffer_size=10000) train_dataset = preprocess(train_dataset) eval_dataset = preprocess( test_client_data.create_tf_dataset_from_all_clients()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() # Vocabulary with one OOV ID and zero for the mask. vocab_size = len(dataset.CHAR_VOCAB) + 2 model = models.create_recurrent_model(vocab_size=vocab_size, batch_size=FLAGS.batch_size) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.FlattenedCategoricalAccuracy(vocab_size=vocab_size, mask_zero=True) ]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) # Reduce the learning rate every 20 epochs. def decay_lr(epoch, lr): if (epoch + 1) % 20 == 0: return lr * 0.1 else: return lr lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( train_dataset, validation_data=eval_dataset, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'accuracy']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def test_initializes(self): tmpdir = self.get_temp_dir() logger = keras_callbacks.AtomicCSVLogger(tmpdir) self.assertIsInstance(logger, tf.keras.callbacks.Callback)
def run_experiment(): """Runs the training experiment.""" (_, stackoverflow_validation, stackoverflow_test) = dataset.construct_word_level_datasets( FLAGS.vocab_size, FLAGS.batch_size, 1, FLAGS.sequence_length, -1, FLAGS.num_validation_examples) centralized_train = dataset.get_centralized_train_dataset( FLAGS.vocab_size, FLAGS.batch_size, FLAGS.sequence_length, FLAGS.shuffle_buffer_size) def _lstm_fn(latent_size): return tf.keras.layers.LSTM(latent_size, return_sequences=True) model = models.create_recurrent_model( FLAGS.vocab_size, _lstm_fn, 'stackoverflow-lstm', shared_embedding=FLAGS.shared_embedding) logging.info('Training model: %s', model.summary()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() pad_token, oov_token, _, eos_token = dataset.get_special_tokens( FLAGS.vocab_size) model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ # Plus 4 for pad, oov, bos, eos keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_with_oov', masked_tokens=pad_token), keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_no_oov', masked_tokens=[pad_token, oov_token]), keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, oov_token, eos_token]), ]) train_results_path = os.path.join(FLAGS.root_output_dir, 'train_results', FLAGS.experiment_name) test_results_path = os.path.join(FLAGS.root_output_dir, 'test_results', FLAGS.experiment_name) train_csv_logger = keras_callbacks.AtomicCSVLogger(train_results_path) test_csv_logger = keras_callbacks.AtomicCSVLogger(test_results_path) log_dir = os.path.join(FLAGS.root_output_dir, 'logdir', FLAGS.experiment_name) try: tf.io.gfile.makedirs(log_dir) tf.io.gfile.makedirs(train_results_path) tf.io.gfile.makedirs(test_results_path) except tf.errors.OpError: pass # log_dir already exists. train_tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir, write_graph=True, update_freq=FLAGS.tensorboard_update_frequency) test_tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) # Write the hyperparameters to a CSV: hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparams_file = os.path.join(FLAGS.root_output_dir, FLAGS.experiment_name, 'hparams.csv') utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) model.fit(centralized_train, epochs=FLAGS.epochs, verbose=0, validation_data=stackoverflow_validation, callbacks=[train_csv_logger, train_tensorboard_callback]) score = model.evaluate( stackoverflow_test, verbose=0, callbacks=[test_csv_logger, test_tensorboard_callback]) logging.info('Final test loss: %.4f', score[0]) logging.info('Final test accuracy: %.4f', score[1])
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) train_dataset, eval_dataset = stackoverflow_lr_dataset.get_centralized_stackoverflow_datasets( batch_size=FLAGS.batch_size, vocab_tokens_size=FLAGS.vocab_tokens_size, vocab_tags_size=FLAGS.vocab_tags_size) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() model = stackoverflow_lr_models.create_logistic_model( vocab_tokens_size=FLAGS.vocab_tokens_size, vocab_tags_size=FLAGS.vocab_tags_size) model.compile(loss=tf.keras.losses.BinaryCrossentropy( from_logits=False, reduction=tf.keras.losses.Reduction.SUM), optimizer=optimizer, metrics=[ tf.keras.metrics.Precision(), tf.keras.metrics.Recall(top_k=5) ]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) # Reduce the learning rate after a fixed number of epochs. def decay_lr(epoch, learning_rate): if (epoch + 1) % FLAGS.decay_epochs == 0: return learning_rate * FLAGS.lr_decay else: return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( train_dataset, validation_data=eval_dataset, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'precision', 'recall']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') tf.compat.v1.enable_v2_behavior() experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([ (name, FLAGS[name].value) for name in hparam_flags ]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) cifar_train, cifar_test = dataset.get_centralized_cifar100( train_batch_size=FLAGS.batch_size, crop_shape=CROP_SHAPE) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() model = resnet_models.create_resnet18( input_shape=CROP_SHAPE, num_classes=NUM_CLASSES) model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=optimizer, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir) # Reduce the learning rate after a fixed number of epochs. def decay_lr(epoch, learning_rate): if (epoch + 1) % FLAGS.decay_epochs == 0: return learning_rate * FLAGS.lr_decay else: return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( cifar_train, validation_data=cifar_test, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'sparse_categorical_accuracy']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) train_dataset, eval_dataset = emnist_dataset.get_centralized_emnist_datasets( batch_size=FLAGS.batch_size, only_digits=False) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() if FLAGS.model == 'cnn': model = emnist_models.create_conv_dropout_model(only_digits=False) elif FLAGS.model == '2nn': model = emnist_models.create_two_hidden_layer_model(only_digits=False) else: raise ValueError('Cannot handle model flag [{!s}].'.format( FLAGS.model)) model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=optimizer, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) # Reduce the learning rate after a fixed number of epochs. def decay_lr(epoch, learning_rate): if (epoch + 1) % FLAGS.decay_epochs == 0: return learning_rate * FLAGS.lr_decay else: return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( train_dataset, validation_data=eval_dataset, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'sparse_categorical_accuracy']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def run( keras_model: tf.keras.Model, train_dataset: tf.data.Dataset, experiment_name: str, root_output_dir: str, num_epochs: int, hparams_dict: Optional[Dict[str, Any]] = None, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, validation_dataset: Optional[tf.data.Dataset] = None, test_dataset: Optional[tf.data.Dataset] = None ) -> tf.keras.callbacks.History: """Run centralized training for a given compiled `tf.keras.Model`. Args: keras_model: A compiled `tf.keras.Model`. train_dataset: The `tf.data.Dataset` to be used for training. experiment_name: Name of the experiment, used as part of the name of the output directory. root_output_dir: The top-level output directory. The directory `root_output_dir/experiment_name` will contain TensorBoard logs, metrics CSVs and other outputs. num_epochs: How many training epochs to perform. hparams_dict: An optional dict specifying hyperparameters. If provided, the hyperparameters will be written to CSV. decay_epochs: Number of training epochs before decaying the learning rate. lr_decay: How much to decay the learning rate by every `decay_epochs`. validation_dataset: An optional `tf.data.Dataset` used for validation during training. test_dataset: An optional `tf.data.Dataset` used for testing after all training has completed. Returns: A `tf.keras.callbacks.History` object. """ tensorboard_dir = os.path.join(root_output_dir, 'logdir', experiment_name) results_dir = os.path.join(root_output_dir, 'results', experiment_name) for path in [root_output_dir, tensorboard_dir, results_dir]: tf.io.gfile.makedirs(path) if hparams_dict: hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) hparams_df = pd.DataFrame(hparams_dict, index=[0]) utils_impl.atomic_write_to_csv(hparams_df, hparams_file) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) training_callbacks = [tensorboard_callback, csv_logger_callback] if decay_epochs is not None and decay_epochs > 0: # Reduce the learning rate after a fixed number of epochs. def decay_lr(epoch, learning_rate): if (epoch + 1) % decay_epochs == 0: return learning_rate * lr_decay else: return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) training_callbacks.append(lr_callback) logging.info('Training model:') logging.info(keras_model.summary()) history = keras_model.fit(train_dataset, validation_data=validation_dataset, epochs=num_epochs, callbacks=training_callbacks) logging.info('Final training metrics:') for metric in keras_model.metrics: name = metric.name metric = history.history[name][-1] logging.info('\t%s: %.4f', name, metric) if validation_dataset: logging.info('Final validation metrics:') for metric in keras_model.metrics: name = metric.name metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric) if test_dataset: test_metrics = keras_model.evaluate(test_dataset, return_dict=True) logging.info('Test metrics:') for metric in keras_model.metrics: name = metric.name metric = test_metrics[name] logging.info('\t%s: %.4f', name, metric) return history