def main(_): tf.enable_eager_execution() # Ground-truth constants. true_w = [[-2.0], [4.0], [1.0]] true_b = [0.5] noise_level = 0.01 # Training constants. batch_size = 64 learning_rate = 0.1 print("True w: %s" % true_w) print("True b: %s\n" % true_b) model = LinearModel() dataset = synthetic_dataset(true_w, true_b, noise_level, batch_size, 20) device = "gpu:0" if tfe.num_gpus() else "cpu:0" print("Using device: %s" % device) with tf.device(device): optimizer = tf.train.GradientDescentOptimizer(learning_rate) fit(model, dataset, optimizer, verbose=True, logdir=FLAGS.logdir) print("\nAfter training: w = %s" % model.variables[0].numpy()) print("\nAfter training: b = %s" % model.variables[1].numpy())
def main(_): # Build the train and eval datasets from the MNIST data. Also return the # input shape which is constructed based on the `image_data_format` # i.e channels_first or channels_last. tf.enable_eager_execution() train_ds, eval_ds, input_shape = get_input_datasets() # Instantiate the MirroredStrategy object. If we don't specify `num_gpus` or # the `devices` argument then all the GPUs available on the machine are used. # TODO(priyag): Use `tf.distribute.MirroredStrategy` once available. strategy = mirrored_strategy.MirroredStrategy(['/gpu:0', '/cpu:0']) # Create and compile the model under Distribution strategy scope. # `fit`, `evaluate` and `predict` will be distributed based on the strategy # model was compiled with. with strategy.scope(): model = get_model(input_shape) optimizer = rmsprop.RMSProp(learning_rate=0.001) model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) # Train the model with the train dataset. model.fit(x=train_ds, epochs=20, steps_per_epoch=468) # Evaluate the model with the eval dataset. score = model.evaluate(eval_ds, steps=10, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def main(_): tf.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) checkpoint = tfe.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write("eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
import tensorflow as tf import tensorflow.feature_column as fc tf.enable_eager_execution()# 支持程序立即运行,以检查程序
import matplotlib as mpl import numpy as np from PIL import Image import time import functools import tensorflow as tf import tensorflow.contrib.eager as tfe from tensorflow.python.keras.preprocessing import image as kp_image from tensorflow.python.keras import models, losses, layers from tensorflow.python.keras import backend as K tf.enable_eager_execution() def load_image(path_to_image): max_dim = 512 img = Image.open(path_to_image) long = max(img.size) scale = max_dim/long img = img.resize((round(img.size[0]*scale), round(img.size[1]*scale)), Image.ANTIALIAS) img = kp_image.img_to_array(img) img = np.expand_dims(img, axis=0)
from tensorflow.keras.models import Model def fit_keras_model(): data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) # source https://keras.io/getting-started/functional-api-guide/ # This returns a tensor inputs = Input(shape=(32,)) # a layer instance is callable on a tensor, and returns a tensor x = Dense(64, activation='relu')(inputs) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) # This creates a model that includes # the Input layer and three Dense layers model = Model(inputs=inputs, outputs=predictions) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(data, labels) # starts training return model if __name__ == '__main__': if len(sys.argv) > 1 and sys.argv[1] == 'eager': tf.enable_eager_execution() # fails with eager execution enabled fit_keras_model() print('success')
def main(): tf.enable_eager_execution() # parser = argparse.ArgumentParser() # parser.add_argument('--shuffle_buffer_size', type=int, default=100) # parser.add_argument('--batch_size', type=int, default=16) # parser.add_argument('--max_abs_len', type=int, default=250) # # args = parser.parse_args() # for easy debugging # tfrecord_file = '../dat/reddit/proc.tf_record' tfrecord_file = '../dat/reddit/proc.tf_record' vocab_file = "../../bert/pre-trained/uncased_L-12_H-768_A-12/vocab.txt" tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True) # labeler = make_subreddit_based_simulated_labeler(1.0, 1.0, 1.0, setting="simple", seed=0) # labeler = make_log_scores() base_propensities_path = '~/reddit_logs/prop_sim/propensity_source/test_results_keto.tsv' output = pd.read_csv(base_propensities_path, '\t') base_propensity_scores = output['treatment_probability'].values example_indices = output['index'].values labeler = make_propensity_based_simulated_labeler(1.0, 1.0, 1.0, base_propensity_scores, example_indices, exogeneous_con=0., setting="simple", seed=42) input_fn = make_input_fn_from_file( input_files_or_glob=tfrecord_file, seq_length=128, num_splits=10, dev_splits=1, test_splits=2, tokenizer=tokenizer, is_training=True, filter_test=False, # subreddits=[13, 8, 6], subreddits=[13], shuffle_buffer_size=int( 1e6), # note: bert hardcoded this, and I'm following suit seed=0, labeler=labeler) # input_fn = make_input_fn_from_tfrecord(tokenizer=tokenizer, tfrecord=tfrecord_file) params = {'batch_size': 64} dataset = input_fn(params) sampler = dataset.make_one_shot_iterator() for _ in range(25): sample = sampler.get_next() # print(sample) # print('Subreddit: {}'.format(sample['subreddit'])) # print('index: {}'.format(sample['index'])) # print('Outcome: {}'.format(sample['outcome'])) # print('score: {}'.format(sample['score'])) # print('score: {}'.format(tf.reduce_mean(sample['score']))) # log_score = sample['log_score'].numpy() # print('log_score: {}'.format(log_score.std())) # in_train = sample['in_train'] # in_dev = sample['in_dev'] # norm_score = (tf.cast(score, tf.float32)-24.)/167.2 # print('in_dev: {}'.format(in_dev)) # print('in_train: {}'.format(in_train)) outcome = sample['outcome'] print('outcome: {}'.format(outcome)) treatment = sample['treatment'] print('treatment: {}'.format(treatment))
def __init__(self, sess, args): self.sess = sess self.is_training = args.is_training self.layers = args.layers self.rnn_size = args.rnn_size self.n_epochs = args.n_epochs self.batch_size = args.batch_size self.dropout_p_hidden = args.dropout_p_hidden self.learning_rate = args.learning_rate self.decay = args.decay self.decay_steps = args.decay_steps self.sigma = args.sigma self.init_as_normal = args.init_as_normal self.reset_after_session = args.reset_after_session self.session_key = args.session_key self.item_key = args.item_key self.time_key = args.time_key self.grad_cap = args.grad_cap self.n_items = args.n_items self.predict_state = [ np.zeros([self.batch_size, self.rnn_size], dtype=np.float32) for _ in range(self.layers) ] if args.hidden_act == 'tanh': self.hidden_act = self.tanh elif args.hidden_act == 'relu': self.hidden_act = self.relu else: raise NotImplementedError if args.loss == 'cross-entropy': if args.final_act == 'tanh': self.final_activation = self.softmaxth else: self.final_activation = self.softmax self.loss_function = self.cross_entropy elif args.loss == 'bpr': if args.final_act == 'linear': self.final_activation = self.linear elif args.final_act == 'relu': self.final_activation = self.relu else: self.final_activation = self.tanh self.loss_function = self.bpr elif args.loss == 'top1': if args.final_act == 'linear': self.final_activation = self.linear elif args.final_act == 'relu': self.final_activatin = self.relu else: self.final_activation = self.tanh self.loss_function = self.top1 else: raise NotImplementedError self.checkpoint_dir = args.checkpoint_dir if not os.path.isdir(self.checkpoint_dir): raise Exception("[!] Checkpoint Dir not found") tf.enable_eager_execution() self.build_model() self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) if self.is_training: return # use self.predict_state to hold hidden states during prediction. self.predict_state = [ np.zeros([self.batch_size, self.rnn_size], dtype=np.float32) for _ in range(self.layers) ] # restore checkpoint ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore( sess, '{}/gru-model-{}'.format(self.checkpoint_dir, args.test_model))
def run_keras_model_benchmark(_): """Run the benchmark on keras model.""" # Ensure a valid model name was supplied via command line argument if FLAGS.model not in MODELS.keys(): raise AssertionError("The --model command line argument should " "be a key in the `MODELS` dictionary.") # Check if eager execution is enabled if FLAGS.eager: tf.logging.info("Eager execution is enabled...") tf.enable_eager_execution() # Load the model tf.logging.info("Benchmark on {} model...".format(FLAGS.model)) keras_model = MODELS[FLAGS.model] model = keras_model(weights=None) # Get dataset dataset_name = "ImageNet" if FLAGS.use_synthetic_data: tf.logging.info("Using synthetic dataset...") dataset_name += "_Synthetic" train_dataset = dataset.generate_synthetic_input_dataset( FLAGS.model, FLAGS.batch_size) val_dataset = dataset.generate_synthetic_input_dataset( FLAGS.model, FLAGS.batch_size) else: raise ValueError("Only synthetic dataset is supported!") num_gpus = flags_core.get_num_gpus(FLAGS) distribution = None # Use distribution strategy if FLAGS.dist_strat: distribution = distribution_utils.get_distribution_strategy( num_gpus=num_gpus) elif num_gpus > 1: # Run with multi_gpu_model # If eager execution is enabled, only one GPU is utilized even if multiple # GPUs are provided. if FLAGS.eager: tf.logging.warning( "{} GPUs are provided, but only one GPU is utilized as " "eager execution is enabled.".format(num_gpus)) model = tf.keras.utils.multi_gpu_model(model, gpus=num_gpus) # Adam optimizer and some other optimizers doesn't work well with # distribution strategy (b/113076709) # Use GradientDescentOptimizer here optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"], distribute=distribution) # Create benchmark logger for benchmark logging run_params = { "batch_size": FLAGS.batch_size, "synthetic_data": FLAGS.use_synthetic_data, "train_epochs": FLAGS.train_epochs, "num_train_images": FLAGS.num_train_images, "num_eval_images": FLAGS.num_eval_images, } benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info( model_name=FLAGS.model, dataset_name=dataset_name, run_params=run_params, test_id=FLAGS.benchmark_test_id) # Create callbacks that log metric values about the training and evaluation callbacks = model_callbacks.get_model_callbacks( FLAGS.callbacks, batch_size=FLAGS.batch_size, metric_logger=benchmark_logger) # Train and evaluate the model history = model.fit( train_dataset, epochs=FLAGS.train_epochs, callbacks=callbacks, validation_data=val_dataset, steps_per_epoch=int(np.ceil(FLAGS.num_train_images / FLAGS.batch_size)), validation_steps=int(np.ceil(FLAGS.num_eval_images / FLAGS.batch_size)) ) tf.logging.info("Logging the evaluation results...") for epoch in range(FLAGS.train_epochs): eval_results = { "accuracy": history.history["val_acc"][epoch], "loss": history.history["val_loss"][epoch], tf.GraphKeys.GLOBAL_STEP: (epoch + 1) * np.ceil( FLAGS.num_eval_images/FLAGS.batch_size) } benchmark_logger.log_evaluation_result(eval_results) # Clear the session explicitly to avoid session delete error tf.keras.backend.clear_session()
def create_image(content_path, style_path, show_output=False, save_output=True): tf.enable_eager_execution() print("Eager execution: {}".format(tf.executing_eagerly())) # Run model --------------------------------------------- best_img, best_loss, timeline_imgs, total_loss, content_loss, style_loss\ = model.run_style_transfer(content_path, style_path, num_iterations=1000) # Plot outputs ------------------------------------------ content = img.load(content_path) style = img.load(style_path) plt.figure(figsize=(15, 5)) plt.subplot(1, 3, 1) img.show(content) plt.xticks([]) plt.yticks([]) plt.subplot(1, 3, 2) img.show(style) plt.xticks([]) plt.yticks([]) plt.subplot(1, 3, 3) plt.imshow(best_img) plt.xticks([]) plt.yticks([]) fig_compare = plt.gcf() if show_output: plt.show() plt.figure(figsize=(10, 10)) plt.imshow(best_img) plt.xticks([]) plt.yticks([]) fig_output = plt.gcf() if show_output: plt.show() num_rows = 2 num_cols = 5 plt.figure(figsize=(14, 4)) for i, image in enumerate(timeline_imgs): plt.subplot(num_rows, num_cols, i + 1) plt.imshow(image) plt.xticks([]) plt.yticks([]) fig_timeline = plt.gcf() if show_output: plt.show() x = np.arange(len(total_loss)) plt.gca().set_ylim([10e4, 10e7]) plt.plot(x, np.array(total_loss), label='total loss') plt.plot(x, np.array(content_loss), label='content loss') plt.plot(x, np.array(style_loss), label='style loss') plt.yscale('log') plt.xlabel('epoc') plt.ylabel('loss') plt.legend(loc='upper right') fig_loss = plt.gcf() if show_output: plt.show() # Save Images --------------------------------------------- if save_output: output_name = os.path.basename(content_path).split('.')[0] + ' ' + \ os.path.basename(style_path).split('.')[0] + '.jpg' fig_output.savefig(os.path.join('..', 'output', output_name), bbox_inches='tight') fig_compare.savefig(os.path.join('..', 'compare', output_name), bbox_inches='tight') fig_timeline.savefig(os.path.join('..', 'timeline', output_name), bbox_inches='tight') fig_loss.savefig(os.path.join('..', 'loss', output_name), bbox_inches='tight')
def main(_): """Eager execution workflow with RevNet trained on CIFAR-10.""" tf.enable_eager_execution() config = get_config(config_name=FLAGS.config, dataset=FLAGS.dataset) ds_train, ds_train_one_shot, ds_validation, ds_test = get_datasets( data_dir=FLAGS.data_dir, config=config) model = revnet.RevNet(config=config) global_step = tf.train.get_or_create_global_step( ) # Ensure correct summary global_step.assign(1) learning_rate = tf.train.piecewise_constant(global_step, config.lr_decay_steps, config.lr_list) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=config.momentum) checkpointer = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=global_step) if FLAGS.use_defun: model.call = tfe.defun(model.call) model.compute_gradients = tfe.defun(model.compute_gradients) model.get_moving_stats = tfe.defun(model.get_moving_stats) model.restore_moving_stats = tfe.defun(model.restore_moving_stats) global apply_gradients # pylint:disable=global-variable-undefined apply_gradients = tfe.defun(apply_gradients) if FLAGS.train_dir: summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir) if FLAGS.restore: latest_path = tf.train.latest_checkpoint(FLAGS.train_dir) checkpointer.restore(latest_path) print("Restored latest checkpoint at path:\"{}\" " "with global_step: {}".format(latest_path, global_step.numpy())) sys.stdout.flush() for x, y in ds_train: train_one_iter(model, x, y, optimizer, global_step=global_step) if global_step.numpy() % config.log_every == 0: acc_test, loss_test = evaluate(model, ds_test) if FLAGS.validate: acc_train, loss_train = evaluate(model, ds_train_one_shot) acc_validation, loss_validation = evaluate( model, ds_validation) print("Iter {}, " "training set accuracy {:.4f}, loss {:.4f}; " "validation set accuracy {:.4f}, loss {:.4f}; " "test accuracy {:.4f}, loss {:.4f}".format( global_step.numpy(), acc_train, loss_train, acc_validation, loss_validation, acc_test, loss_test)) else: print("Iter {}, test accuracy {:.4f}, loss {:.4f}".format( global_step.numpy(), acc_test, loss_test)) sys.stdout.flush() if FLAGS.train_dir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("Test accuracy", acc_test) tf.contrib.summary.scalar("Test loss", loss_test) if FLAGS.validate: tf.contrib.summary.scalar("Training accuracy", acc_train) tf.contrib.summary.scalar("Training loss", loss_train) tf.contrib.summary.scalar("Validation accuracy", acc_validation) tf.contrib.summary.scalar("Validation loss", loss_validation) if global_step.numpy() % config.save_every == 0 and FLAGS.train_dir: saved_path = checkpointer.save( file_prefix=os.path.join(FLAGS.train_dir, "ckpt")) print("Saved checkpoint at path: \"{}\" " "with global_step: {}".format(saved_path, global_step.numpy())) sys.stdout.flush()
from datetime import datetime import multiprocessing as mp import os import os.path as osp import sys import tensorflow as tf import utils from trainer import trainer from train_manager import train_manager from player import player from player_manager import player_manager config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True tf.enable_eager_execution(config=config_proto) def main(): args = parse_args() valid_modes_list = utils.get_valid_game_modes() valid_modes_string = utils.get_valid_game_modes_string() if args.mode not in valid_modes_list: print('Invalid game mode informed. Please inform a mode with ' + '--mode=mode_name, where mode_name is one of the following ' + '{%s}' % valid_modes_string) sys.exit() args.gpu_id = [int(x) for x in args.gpu_id]
def main(unused_argv): """Run a CNN model on MNIST data to demonstrate DistributedStrategies.""" tf.enable_eager_execution() num_gpus = FLAGS.num_gpus if num_gpus is None: devices = None elif num_gpus == 0: devices = ["/device:CPU:0"] else: devices = ["/device:GPU:{}".format(i) for i in range(num_gpus)] strategy = tf.distribute.MirroredStrategy(devices) with strategy.scope(): train_ds, test_ds = mnist_datasets() train_ds = train_ds.shuffle(NUM_TRAIN_IMAGES).batch(FLAGS.batch_size) test_ds = test_ds.batch(FLAGS.batch_size) model = create_model() optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum) training_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32) training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( "training_accuracy", dtype=tf.float32) test_loss = tf.keras.metrics.Mean("test_loss", dtype=tf.float32) test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( "test_accuracy", dtype=tf.float32) def train_step(inputs): images, labels = inputs with tf.GradientTape() as tape: logits = model(images, training=True) loss = compute_loss(logits, labels) grads = tape.gradient(loss, model.variables) optimizer.apply_gradients(zip(grads, model.variables)) training_loss.update_state(loss) training_accuracy.update_state(labels, logits) def test_step(inputs): images, labels = inputs logits = model(images, training=False) loss = compute_loss(logits, labels) test_loss.update_state(loss) test_accuracy.update_state(labels, logits) train_iterator = strategy.make_dataset_iterator(train_ds) test_iterator = strategy.make_dataset_iterator(test_ds) for epoch in range(0, FLAGS.num_epochs): # Train print("Starting epoch {}".format(epoch)) train_iterator.initialize() for _ in range(NUM_TRAIN_IMAGES // FLAGS.batch_size): strategy.experimental_run(train_step, train_iterator) print("Training loss: {:0.4f}, accuracy: {:0.2f}%".format( training_loss.result(), training_accuracy.result() * 100)) training_loss.reset_states() training_accuracy.reset_states() # Test test_iterator.initialize() for _ in range(NUM_TEST_IMAGES // FLAGS.batch_size): strategy.experimental_run(test_step, test_iterator) print("Test loss: {:0.4f}, accuracy: {:0.2f}%".format( test_loss.result(), test_accuracy.result() * 100)) test_loss.reset_states() test_accuracy.reset_states()
def enable_tf_eager(tf): tf.enable_eager_execution() print("Eager execution: {}".format(tf.executing_eagerly()))
# # works on # # tensorboard 1.14.0 # tensorflow 1.14.0 # tensorflow-estimator 1.14.0 # # Will give warnings but thats fine # import tensorflow as tf import pandas as pd import numpy as np tf.enable_eager_execution() # eager train_ds_url = "http://download.tensorflow.org/data/iris_training.csv" test_ds_url = "http://download.tensorflow.org/data/iris_test.csv" ds_columns = ['SepalLength', 'SepalWidth','PetalLength', 'PetalWidth', 'Plants'] species = np.array(['Setosa', 'Versicolor', 'Virginica'], dtype=np.object) # The label to prdict categories='Plants' train_path = tf.keras.utils.get_file(train_ds_url.split('/')[-1], train_ds_url) test_path = tf.keras.utils.get_file(test_ds_url.split('/')[-1], test_ds_url) # pop removes the Plants columns
def run_keras_model_benchmark(_): """Run the benchmark on keras model.""" # Ensure a valid model name was supplied via command line argument if FLAGS.model not in MODELS.keys(): raise AssertionError("The --model command line argument should " "be a key in the `MODELS` dictionary.") # Check if eager execution is enabled if FLAGS.eager: tf.logging.info("Eager execution is enabled...") tf.enable_eager_execution() # Load the model tf.logging.info("Benchmark on {} model...".format(FLAGS.model)) keras_model = MODELS[FLAGS.model] model = keras_model(weights=None) # Get dataset dataset_name = "ImageNet" if FLAGS.use_synthetic_data: tf.logging.info("Using synthetic dataset...") dataset_name += "_Synthetic" train_dataset = dataset.generate_synthetic_input_dataset( FLAGS.model, FLAGS.batch_size) val_dataset = dataset.generate_synthetic_input_dataset( FLAGS.model, FLAGS.batch_size) else: raise ValueError("Only synthetic dataset is supported!") # If run with multiple GPUs # If eager execution is enabled, only one GPU is utilized even if multiple # GPUs are provided. num_gpus = flags_core.get_num_gpus(FLAGS) if num_gpus > 1: if FLAGS.eager: tf.logging.warning( "{} GPUs are provided, but only one GPU is utilized as " "eager execution is enabled.".format(num_gpus)) model = tf.keras.utils.multi_gpu_model(model, gpus=num_gpus) model.compile(loss="categorical_crossentropy", optimizer=tf.train.AdamOptimizer(), metrics=["accuracy"]) # Create benchmark logger for benchmark logging run_params = { "batch_size": FLAGS.batch_size, "synthetic_data": FLAGS.use_synthetic_data, "train_epochs": FLAGS.train_epochs, "num_train_images": FLAGS.num_images, "num_eval_images": FLAGS.num_images, } benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info(model_name=FLAGS.model, dataset_name=dataset_name, run_params=run_params, test_id=FLAGS.benchmark_test_id) # Create callbacks that log metric values about the training and evaluation callbacks = model_callbacks.get_model_callbacks( FLAGS.callbacks, batch_size=FLAGS.batch_size, metric_logger=benchmark_logger) # Train and evaluate the model history = model.fit( train_dataset, epochs=FLAGS.train_epochs, callbacks=callbacks, validation_data=val_dataset, steps_per_epoch=int(np.ceil(FLAGS.num_images / FLAGS.batch_size)), validation_steps=int(np.ceil(FLAGS.num_images / FLAGS.batch_size))) tf.logging.info("Logging the evaluation results...") for epoch in range(FLAGS.train_epochs): eval_results = { "accuracy": history.history["val_acc"][epoch], "loss": history.history["val_loss"][epoch], tf.GraphKeys.GLOBAL_STEP: (epoch + 1) * np.ceil(FLAGS.num_images / FLAGS.batch_size) } benchmark_logger.log_evaluation_result(eval_results) # Clear the session explicitly to avoid session delete error tf.keras.backend.clear_session()
import tensorflow tensorflow.enable_eager_execution() batch_size = 128 epochs = 10 (x_train, y_train), (x_test, y_test) = tensorflow.keras.datasets.mnist.load_data() x_train = x_train.reshape(-1, 28, 28, 1) x_test = x_test.reshape(-1, 28, 28, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = tensorflow.keras.utils.to_categorical(y_train, 10) y_test = tensorflow.keras.utils.to_categorical(y_test, 10) model = tensorflow.keras.Sequential([ tensorflow.keras.layers.Conv2D(32, 5, padding='same', activation='relu', input_shape=(28, 28, 1)), tensorflow.keras.layers.MaxPooling2D((2, 2), (2, 2), padding='same'), tensorflow.keras.layers.BatchNormalization(), tensorflow.keras.layers.Conv2D(64, 5, padding='same', activation='relu'), tensorflow.keras.layers.MaxPooling2D((2, 2), (2, 2), padding='same'), tensorflow.keras.layers.Flatten(), tensorflow.keras.layers.Dense(1024, activation='relu'), tensorflow.keras.layers.Dropout(0.4),
def setUp(self): tf.enable_eager_execution()
def run(flags_obj): """Run ResNet ImageNet training and eval loop using native Keras APIs. Args: flags_obj: An object containing parsed flag values. Raises: ValueError: If fp16 is passed as it is not currently supported. """ if flags_obj.enable_eager: tf.enable_eager_execution() dtype = flags_core.get_tf_dtype(flags_obj) if dtype == 'fp16': raise ValueError('dtype fp16 is not supported in Keras. Use the default ' 'value(fp32).') data_format = flags_obj.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') tf.keras.backend.set_image_data_format(data_format) # pylint: disable=protected-access if flags_obj.use_synthetic_data: input_fn = keras_common.get_synth_input_fn( height=imagenet_main.DEFAULT_IMAGE_SIZE, width=imagenet_main.DEFAULT_IMAGE_SIZE, num_channels=imagenet_main.NUM_CHANNELS, num_classes=imagenet_main.NUM_CLASSES, dtype=flags_core.get_tf_dtype(flags_obj)) else: input_fn = imagenet_main.input_fn train_input_dataset = input_fn(is_training=True, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras) eval_input_dataset = input_fn(is_training=False, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras) strategy = distribution_utils.get_distribution_strategy( num_gpus=flags_obj.num_gpus, turn_off_distribution_strategy=flags_obj.turn_off_distribution_strategy) strategy_scope = keras_common.get_strategy_scope(strategy) with strategy_scope: optimizer = keras_common.get_optimizer() model = resnet_model.resnet50(num_classes=imagenet_main.NUM_CLASSES) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['sparse_categorical_accuracy']) time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks( learning_rate_schedule, imagenet_main.NUM_IMAGES['train']) train_steps = imagenet_main.NUM_IMAGES['train'] // flags_obj.batch_size train_epochs = flags_obj.train_epochs if flags_obj.train_steps: train_steps = min(flags_obj.train_steps, train_steps) train_epochs = 1 num_eval_steps = (imagenet_main.NUM_IMAGES['validation'] // flags_obj.batch_size) validation_data = eval_input_dataset if flags_obj.skip_eval: # Only build the training graph. This reduces memory usage introduced by # control flow ops in layers that have different implementations for # training and inference (e.g., batch norm). tf.keras.backend.set_learning_phase(1) num_eval_steps = None validation_data = None history = model.fit(train_input_dataset, epochs=train_epochs, steps_per_epoch=train_steps, callbacks=[ time_callback, lr_callback, tensorboard_callback ], validation_steps=num_eval_steps, validation_data=validation_data, verbose=1) eval_output = None if not flags_obj.skip_eval: eval_output = model.evaluate(eval_input_dataset, steps=num_eval_steps, verbose=1) stats = keras_common.build_stats(history, eval_output, time_callback) return stats
def run_process( state_size, action_size, global_model, opt, result_queue, worker_idx, save_lock, high_score, global_episode, global_moving_average_reward, game_name='bombermandiehard-v0', save_dir='/tmp'): local_model = ActorCriticModel() local_model.initialize(state_size, action_size) env = gym.make(game_name).unwrapped ep_loss = 0.0 best_score = high_score total_step = 1 mem = Memory() import tensorflow as tf from tensorflow.python import keras from tensorflow.python.keras import layers, Model from tensorflow.python.keras.layers import Dense, Flatten,Activation import keras.backend as K tf.enable_eager_execution() K.set_session(tf.Session()) while global_episode.value < args.max_eps: current_state = generate_state(env.reset(),WINDOW_LENGTH) mem.clear() ep_reward = 0. ep_steps = 0 ep_loss = 0 time_count = 0 done = False while not done: #print(current_state) logits, _ = local_model( tf.convert_to_tensor(current_state[None, :], dtype=tf.float32)) probs = tf.nn.softmax(logits) #print(probs) action = np.random.choice(action_size, p=probs.numpy()[0]) new_state, reward, done, _ = env.step(action) new_state = push_state(new_state,current_state) if done: reward = -1 ep_reward += reward mem.store(current_state, action, reward) if time_count == args.update_freq or done: # Calculate gradient wrt to local model. We do so by tracking the # variables involved in computing the loss by using tf.GradientTape with tf.GradientTape() as tape: total_loss = compute_loss(done, new_state, mem, local_model, args.gamma) ep_loss += total_loss # Calculate local gradients grads = tape.gradient(total_loss, local_model.trainable_weights) # Push local gradients to global model opt.apply_gradients(zip(grads, global_model.trainable_weights())) # Update local model with new weights local_model.set_weights(global_model.get_weights()) mem.clear() time_count = 0 if done: # done and print information global_moving_average_reward.value = \ record(global_episode, ep_reward, worker_idx, global_moving_average_reward, result_queue, ep_loss, ep_steps) # We must use a lock to save our model and to print to prevent data races. if ep_reward > best_score.value: with save_lock: print("Saving best model to {}, " "episode score: {}".format(save_dir, ep_reward)) global_model.save_weights( os.path.join(save_dir, 'model_{}.h5'.format(game_name)) ) best_score.value = ep_reward global_episode.value = global_episode.value + 1 ep_steps += 1 time_count += 1 current_state = new_state total_step += 1 print("Stopped {}".format(worker_idx)) result_queue.put('stop') return 0
def main(_): # Horovod: initialize Horovod. hvd.init() os.environ['KMP_SETTINGS'] = str(1) os.environ['KMP_BLOCKTIME'] = str(0) os.environ['OMP_NUM_THREADS'] = str(threads) os.environ['KMP_AFFINITY'] = 'granularity=fine,compact,1,0' config = tf.ConfigProto() config.intra_op_parallelism_threads = threads config.inter_op_parallelism_threads = pools config.gpu_options.visible_device_list = str(hvd.local_rank()) tf.enable_eager_execution(config=config) mnist_model = tf.keras.Sequential([ tf.keras.layers.Conv2D(16, [3, 3], activation='relu'), tf.keras.layers.Conv2D(16, [3, 3], activation='relu'), tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(10) ]) # Horovod: adjust learning rate based on number of GPUs. opt = tf.train.RMSPropOptimizer(0.001 * hvd.size()) (mnist_images, mnist_labels), _ = \ tf.keras.datasets.mnist.load_data(path=os.path.join(args.datadir, 'mnist.npz')) dataset = tf.data.Dataset.from_tensor_slices( (tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32), tf.cast(mnist_labels, tf.int64))) dataset = dataset.shuffle(1000).batch(args.batch_size) checkpoint_dir = os.path.join(args.modeldir, 'checkpoints') step_counter = tf.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint(model=mnist_model, optimizer=opt, step_counter=step_counter) # Horovod: adjust number of steps based on number of GPUs. for (batch, (images, labels)) in enumerate(dataset.take(2000 // hvd.size())): with tf.GradientTape() as tape: logits = mnist_model(images, training=True) loss_value = tf.losses.sparse_softmax_cross_entropy(labels, logits) # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. if batch == 0: hvd.broadcast_variables(mnist_model.variables, root_rank=0) # Horovod: add Horovod Distributed GradientTape. tape = hvd.DistributedGradientTape(tape) grads = tape.gradient(loss_value, mnist_model.variables) opt.apply_gradients(zip(grads, mnist_model.variables), global_step=tf.train.get_or_create_global_step()) if batch % 10 == 0 and hvd.local_rank() == 0: print('Step #%d\tLoss: %.6f' % (batch, loss_value)) # Horovod: save checkpoints only on worker 0 to prevent other workers from # corrupting it. if hvd.rank() == 0: checkpoint.save(checkpoint_dir)
def main_process(): tf.logging.set_verbosity(tf.logging.INFO) args = manage_arguments() # prepare the dataset if necessary if not os.path.isfile(args.data_dir + '/Test_Images.TFRecord'): tf.gfile.MakeDirs(args.data_dir) dataloader.prepare_datasets(args.data_dir) real_labels = {} with open(args.data_dir + '/label_info.csv', newline='') as csvfile: reader = csv.reader(csvfile, delimiter=',') for i, row in enumerate(reader): if row[0].strip().startswith('#'): # header continue index = int(row[0]) label = row[1] real_labels[index] = label tf.enable_eager_execution() # define the model conv_list = [(2, 64), (2, 128), (3, 256)] dense_list = [args.fc_size] * args.num_fc dense_list.append(17) network_model = model.create_model(input_shape=(224, 224, 3), conv_list=conv_list, dense_list=dense_list, kernel_size=args.kernel_size, strides=args.strides, pool_size=args.pool_size, dropout_rate=args.dropout_rate, output_activation='sigmoid', layer_activation='relu') network_model.summary() ckpt = tf.train.Checkpoint(model=network_model) status = ckpt.restore(tf.train.latest_checkpoint(args.chkpt_dir)) status.assert_existing_objects_matched() # define the train/valid dataloaders ds_test_x = dataloader.input_dataset_fn( batch_size=args.batch_size, image_file=args.data_dir + 'Test_Images.TFRecord', label_file=None, repeat=False, shuffle=False, drop_remainder=False, data_augmentation=False, ) predictions = network_model.predict(ds_test_x, ) num_elements = len(predictions) ds_test_y = dataloader.input_dataset_fn( batch_size=num_elements, image_file=None, label_file=args.data_dir + 'Test_Labels.TFRecord', repeat=False, shuffle=False, drop_remainder=False, data_augmentation=False, ) ground_truth = next(iter(ds_test_y)) # define the loss function, metrics, and optimizer loss_fn = tf.keras.losses.BinaryCrossentropy() metrics = [ tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall() ] loss_test = loss_fn(ground_truth, predictions) for i in range(len(metrics)): metrics[i].update_state(ground_truth, predictions) metric_values = dict( ('test_' + m.name, m.result().numpy()) for m in metrics) metric_values['loss_test'] = loss_test.numpy() test_precision = metric_values.get('test_precision', 0.0) test_recall = metric_values.get('test_recall', 0.0) denom = test_precision + test_recall if denom <= 0: denom = 1e-5 test_f1 = 2 * (test_precision * test_recall) / denom metric_values['test_f1'] = test_f1 predicted_classes = np.argwhere(predictions > 0.5) item_classes = {} for i in range(len(predicted_classes)): index, a_class = predicted_classes[i] if not (index in item_classes): item_classes[index] = [] item_classes[index].append(real_labels[a_class]) print("\n\n") print(metric_values) print("\n\n")
model.add(tf.keras.layers.Dense(206,activation='sigmoid',input_shape=(features,))) model.compile(optimizer='rmsprop', loss='binary_crossentropy') return model p_min = 0.001 p_max = 0.999 def logloss(y_true, y_pred): y_pred = tf.clip_by_value(y_pred,p_min,p_max) return -backend.mean(y_true*backend.log(y_pred) + (1-y_true)*backend.log(1-y_pred)) # pd.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) # np.set_printoptions(threshold=np.inf) ''' tf.enable_eager_execution() label = [[0,0,0,1]] inP = [[0,0,0,0.8]] m = tf.keras.metrics.Accuracy() m.update_state(label, inP) print(m.result().numpy()) ''' # This is made by sihyun's feature selection. drop_columns = ['sig_id'] train_input = pd.read_csv('./lish-moa/train_features.csv') train_output = pd.read_csv('./lish-moa/train_targets_scored.csv') test_input = pd.read_csv('./lish-moa/test_features.csv') print('Done Input') test_id = test_input['sig_id']
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.enable_eager_execution(config=tf.ConfigProto(allow_soft_placement=True)) train_eval(FLAGS.root_dir, num_iterations=FLAGS.num_iterations)
def main(_): """Eager execution workflow with RevNet trained on CIFAR-10.""" if FLAGS.data_dir is None: raise ValueError("No supplied data directory") if not os.path.exists(FLAGS.data_dir): raise ValueError("Data directory {} does not exist".format(FLAGS.data_dir)) tf.enable_eager_execution() config = config_.get_hparams_cifar_38() model = revnet.RevNet(config=config) ds_train = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="train", data_aug=True, batch_size=config.batch_size, epochs=config.epochs, shuffle=config.shuffle, data_format=config.data_format, dtype=config.dtype, prefetch=config.prefetch) ds_validation = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="validation", data_aug=False, batch_size=config.eval_batch_size, epochs=1, data_format=config.data_format, dtype=config.dtype, prefetch=config.prefetch) ds_test = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="test", data_aug=False, batch_size=config.eval_batch_size, epochs=1, data_format=config.data_format, dtype=config.dtype, prefetch=config.prefetch) global_step = tfe.Variable(1, trainable=False) def learning_rate(): # TODO(lxuechen): Remove once cl/201089859 is in place return tf.train.piecewise_constant(global_step, config.lr_decay_steps, config.lr_list) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) checkpoint = tf.train.Checkpoint( optimizer=optimizer, model=model, optimizer_step=global_step) if FLAGS.train_dir: summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir) if FLAGS.restore: latest_path = tf.train.latest_checkpoint(FLAGS.train_dir) checkpoint.restore(latest_path) for x, y in ds_train: loss = train_one_iter(model, x, y, optimizer, global_step=global_step) if global_step % config.log_every == 0: it_validation = ds_validation.make_one_shot_iterator() it_test = ds_test.make_one_shot_iterator() acc_validation = evaluate(model, it_validation) acc_test = evaluate(model, it_test) print("Iter {}, " "train loss {}, " "validation accuracy {}, " "test accuracy {}".format(global_step.numpy(), loss, acc_validation, acc_test)) if FLAGS.train_dir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("Validation accuracy", acc_validation) tf.contrib.summary.scalar("Test accuracy", acc_test) tf.contrib.summary.scalar("Training loss", loss) if global_step.numpy() % config.save_every == 0 and FLAGS.train_dir: checkpoint.save(file_prefix=FLAGS.train_dir + "ckpt")
def run_mnist_eager(flags_obj): """Run MNIST training and eval loop in eager mode. Args: flags_obj: An object containing parsed flag values. """ tf.enable_eager_execution() model_helpers.apply_clean(flags.FLAGS) # Automatically determine device and data_format (device, data_format) = ('/gpu:0', 'channels_first') if flags_obj.no_gpu or not tf.test.is_gpu_available(): (device, data_format) = ('/cpu:0', 'channels_last') # If data_format is defined in FLAGS, overwrite automatically set value. if flags_obj.data_format is not None: data_format = flags_obj.data_format print('Using device %s, and data format %s.' % (device, data_format)) # Load the datasets train_ds = mnist_dataset.train(flags_obj.data_dir).shuffle(60000).batch( flags_obj.batch_size) test_ds = mnist_dataset.test(flags_obj.data_dir).batch( flags_obj.batch_size) # Create the model and optimizer model = mnist.create_model(data_format) optimizer = tf.train.MomentumOptimizer(flags_obj.lr, flags_obj.momentum) # Create file writers for writing TensorBoard summaries. if flags_obj.output_dir: # Create directories to which summaries will be written # tensorboard --logdir=<output_dir> # can then be used to see the recorded summaries. train_dir = os.path.join(flags_obj.output_dir, 'train') test_dir = os.path.join(flags_obj.output_dir, 'eval') tf.gfile.MakeDirs(flags_obj.output_dir) else: train_dir = None test_dir = None summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) test_summary_writer = tf.contrib.summary.create_file_writer( test_dir, flush_millis=10000, name='test') # Create and restore checkpoint (if one exists on the path) checkpoint_prefix = os.path.join(flags_obj.model_dir, 'ckpt') step_counter = tf.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint( model=model, optimizer=optimizer, step_counter=step_counter) # Restore variables on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(flags_obj.model_dir)) # Train and evaluate for a set number of epochs. with tf.device(device): for _ in range(flags_obj.train_epochs): start = time.time() with summary_writer.as_default(): train(model, optimizer, train_ds, step_counter, flags_obj.log_interval) end = time.time() print('\nTrain time for epoch #%d (%d total steps): %f' % (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start)) with test_summary_writer.as_default(): test(model, test_ds) checkpoint.save(checkpoint_prefix)
from __future__ import absolute_import, division, print_function import tensorflow as tf import numpy as np """ To start eager execution, add tf.enable_eager_execution() to the beginning of the program or console session. Do not add this operation to other modules that the program calls. """ print(tf.enable_eager_execution()) # Returns True if the current thread has eager execution enabled. print(tf.executing_eagerly()) x = [[2.]] m = tf.matmul(x, x) print("hello, {}".format(m)) a = tf.constant([[1, 2], [3, 4]]) print(a) # Broadcasting support b = tf.add(a, 1) print(b) # Operator overloading is supported print(a * b) c = np.multiply(a, b) print(c)
import networkx as nx import numpy as np import tensorflow as tf # import torch as th config = tf.ConfigProto() config.gpu_options.allow_growth = True tf.enable_eager_execution(config) def read_edgelist(f, create_using=None): if create_using is None: create_using = nx.Graph() g = nx.read_edgelist(f, create_using=create_using, nodetype=int) nodes = list(g.nodes()) if g.number_of_nodes() < max(g.nodes()) - min(g.nodes()) + 1: nodes.sort() idx = np.where(np.array(nodes[:-1]) + 1 != np.array(nodes[1:]))[0] for i in idx: i = int(i) for n in range(nodes[i], nodes[i + 1]): g.add_node(n) return g def sparse_sp2tf(matrix): coo = matrix.tocoo() idx = [[i, j] for i, j in zip(coo.row, coo.col)] return tf.SparseTensor(idx, coo.data.tolist(), coo.shape)
def run(flags_obj): """Run ResNet ImageNet training and eval loop using native Keras APIs. Args: flags_obj: An object containing parsed flag values. Raises: ValueError: If fp16 is passed as it is not currently supported. """ if flags_obj.enable_eager: tf.enable_eager_execution() dtype = flags_core.get_tf_dtype(flags_obj) if dtype == 'fp16': raise ValueError( 'dtype fp16 is not supported in Keras. Use the default ' 'value(fp32).') per_device_batch_size = distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)) # pylint: disable=protected-access if flags_obj.use_synthetic_data: input_fn = keras_common.get_synth_input_fn( height=imagenet_main.DEFAULT_IMAGE_SIZE, width=imagenet_main.DEFAULT_IMAGE_SIZE, num_channels=imagenet_main.NUM_CHANNELS, num_classes=imagenet_main.NUM_CLASSES, dtype=flags_core.get_tf_dtype(flags_obj)) else: input_fn = imagenet_main.input_fn train_input_dataset = input_fn(is_training=True, data_dir=flags_obj.data_dir, batch_size=per_device_batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras) eval_input_dataset = input_fn(is_training=False, data_dir=flags_obj.data_dir, batch_size=per_device_batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras) optimizer = keras_common.get_optimizer() strategy = distribution_utils.get_distribution_strategy( flags_obj.num_gpus, flags_obj.turn_off_distribution_strategy) model = resnet_model.resnet50(num_classes=imagenet_main.NUM_CLASSES) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['sparse_categorical_accuracy'], distribute=strategy) time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks( learning_rate_schedule, imagenet_main.NUM_IMAGES['train']) train_steps = imagenet_main.NUM_IMAGES['train'] // flags_obj.batch_size train_epochs = flags_obj.train_epochs if flags_obj.train_steps: train_steps = min(flags_obj.train_steps, train_steps) train_epochs = 1 num_eval_steps = (imagenet_main.NUM_IMAGES['validation'] // flags_obj.batch_size) validation_data = eval_input_dataset if flags_obj.skip_eval: num_eval_steps = None validation_data = None model.fit(train_input_dataset, epochs=train_epochs, steps_per_epoch=train_steps, callbacks=[time_callback, lr_callback, tensorboard_callback], validation_steps=num_eval_steps, validation_data=validation_data, verbose=1) if not flags_obj.skip_eval: model.evaluate(eval_input_dataset, steps=num_eval_steps, verbose=1)
def main(_): """Eager execution workflow with RevNet trained on CIFAR-10.""" if FLAGS.data_dir is None: raise ValueError("No supplied data directory") if not os.path.exists(FLAGS.data_dir): raise ValueError("Data directory {} does not exist".format(FLAGS.data_dir)) tf.enable_eager_execution() config = config_.get_hparams_cifar_38() if FLAGS.validate: # 40k Training set ds_train = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="train", data_aug=True, batch_size=config.batch_size, epochs=config.epochs, shuffle=config.shuffle, data_format=config.data_format, dtype=config.dtype, prefetch=config.batch_size) # 10k Training set ds_validation = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="validation", data_aug=False, batch_size=config.eval_batch_size, epochs=1, shuffle=False, data_format=config.data_format, dtype=config.dtype, prefetch=config.eval_batch_size) else: # 50k Training set ds_train = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="train_all", data_aug=True, batch_size=config.batch_size, epochs=config.epochs, shuffle=config.shuffle, data_format=config.data_format, dtype=config.dtype, prefetch=config.batch_size) # Always compute loss and accuracy on whole training and test set ds_train_one_shot = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="train_all", data_aug=False, batch_size=config.eval_batch_size, epochs=1, shuffle=False, data_format=config.data_format, dtype=config.dtype, prefetch=config.eval_batch_size) ds_test = cifar_input.get_ds_from_tfrecords( data_dir=FLAGS.data_dir, split="test", data_aug=False, batch_size=config.eval_batch_size, epochs=1, shuffle=False, data_format=config.data_format, dtype=config.dtype, prefetch=config.eval_batch_size) model = revnet.RevNet(config=config) global_step = tfe.Variable(1, trainable=False) learning_rate = tf.train.piecewise_constant( global_step, config.lr_decay_steps, config.lr_list) optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=config.momentum) checkpointer = tf.train.Checkpoint( optimizer=optimizer, model=model, optimizer_step=global_step) if FLAGS.train_dir: summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir) if FLAGS.restore: latest_path = tf.train.latest_checkpoint(FLAGS.train_dir) checkpointer.restore(latest_path) print("Restored latest checkpoint at path:\"{}\" " "with global_step: {}".format(latest_path, global_step.numpy())) sys.stdout.flush() warmup(model, config) for x, y in ds_train: loss = train_one_iter(model, x, y, optimizer, global_step=global_step) if global_step.numpy() % config.log_every == 0: it_train = ds_train_one_shot.make_one_shot_iterator() acc_train, loss_train = evaluate(model, it_train) it_test = ds_test.make_one_shot_iterator() acc_test, loss_test = evaluate(model, it_test) if FLAGS.validate: it_validation = ds_validation.make_one_shot_iterator() acc_validation, loss_validation = evaluate(model, it_validation) print("Iter {}, " "training set accuracy {:.4f}, loss {:.4f}; " "validation set accuracy {:.4f}, loss {:4.f}" "test accuracy {:.4f}, loss {:.4f}".format( global_step.numpy(), acc_train, loss_train, acc_validation, loss_validation, acc_test, loss_test)) else: print("Iter {}, " "training set accuracy {:.4f}, loss {:.4f}; " "test accuracy {:.4f}, loss {:.4f}".format( global_step.numpy(), acc_train, loss_train, acc_test, loss_test)) sys.stdout.flush() if FLAGS.train_dir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("Training loss", loss) tf.contrib.summary.scalar("Test accuracy", acc_test) if FLAGS.validate: tf.contrib.summary.scalar("Validation accuracy", acc_validation) if global_step.numpy() % config.save_every == 0 and FLAGS.train_dir: saved_path = checkpointer.save( file_prefix=os.path.join(FLAGS.train_dir, "ckpt")) print("Saved checkpoint at path: \"{}\" " "with global_step: {}".format(saved_path, global_step.numpy())) sys.stdout.flush()
def __init__(self): super(datareader, self).__init__() tf.enable_eager_execution()
def main(argv): del argv # unused tf.enable_eager_execution() tf.set_random_seed(FLAGS.seed) timestamp = datetime.strftime(datetime.today(), "%y%m%d_%H%M%S") FLAGS.logdir = FLAGS.logdir.format(timestamp=timestamp) FLAGS.model_dir = FLAGS.model_dir.format(timestamp=timestamp) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) sprites_data = sprites_dataset.SpritesDataset(fake_data=FLAGS.fake_data) model = DisentangledSequentialVAE( latent_size_static=FLAGS.latent_size_static, latent_size_dynamic=FLAGS.latent_size_dynamic, hidden_size=FLAGS.hidden_size, channels=sprites_data.channels, latent_posterior=FLAGS.latent_posterior) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer( tf.train.cosine_decay(FLAGS.learning_rate, global_step, FLAGS.max_steps)) checkpoint = tf.train.Checkpoint(model=model, global_step=global_step, optimizer=optimizer) checkpoint_manager = tf.contrib.checkpoint.CheckpointManager( checkpoint, directory=FLAGS.model_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) writer = tf.contrib.summary.create_file_writer(FLAGS.logdir) writer.set_as_default() dataset = sprites_data.train.map(lambda *x: x[0]).shuffle(1000).repeat() dataset = dataset.batch(FLAGS.batch_size).take(FLAGS.max_steps) for inputs in dataset.prefetch(buffer_size=None): with tf.contrib.summary.record_summaries_every_n_global_steps( FLAGS.log_steps, global_step=global_step): if FLAGS.enable_debug_logging: tf.contrib.summary.histogram("image", inputs) with tf.GradientTape() as tape: features = model.compressor(inputs) # (batch, timesteps, hidden) static_sample, static_posterior = model.sample_static_posterior( features, FLAGS.num_samples) # (samples, batch, latent) dynamic_sample, dynamic_posterior = model.sample_dynamic_posterior( features, FLAGS.num_samples, static_sample) # (sampl, N, T, latent) likelihood = model.decoder((dynamic_sample, static_sample)) reconstruction = tf.reduce_mean( # integrate samples likelihood.mean()[:FLAGS.num_reconstruction_samples], axis=0) visualize_reconstruction(inputs, reconstruction, name="train_reconstruction") static_prior = model.static_prior() _, dynamic_prior = model.sample_dynamic_prior( FLAGS.num_samples, FLAGS.batch_size, sprites_data.length) if FLAGS.enable_debug_logging: summarize_dist_params(static_prior, "static_prior") summarize_dist_params(static_posterior, "static_posterior") summarize_dist_params(dynamic_prior, "dynamic_prior") summarize_dist_params(dynamic_posterior, "dynamic_posterior") summarize_dist_params(likelihood, "likelihood") static_prior_log_prob = static_prior.log_prob(static_sample) static_posterior_log_prob = static_posterior.log_prob(static_sample) dynamic_prior_log_prob = tf.reduce_sum( dynamic_prior.log_prob(dynamic_sample), axis=-1) # sum time dynamic_posterior_log_prob = tf.reduce_sum( dynamic_posterior.log_prob(dynamic_sample), axis=-1) # sum time likelihood_log_prob = tf.reduce_sum( likelihood.log_prob(inputs), axis=-1) # sum time if FLAGS.enable_debug_logging: with tf.name_scope("log_probs"): summarize_mean_in_nats_and_bits( static_prior_log_prob, FLAGS.latent_size_static, "static_prior") summarize_mean_in_nats_and_bits( static_posterior_log_prob, FLAGS.latent_size_static, "static_posterior") summarize_mean_in_nats_and_bits( dynamic_prior_log_prob, FLAGS.latent_size_dynamic * sprites_data.length, "dynamic_prior") summarize_mean_in_nats_and_bits( dynamic_posterior_log_prob, FLAGS.latent_size_dynamic * sprites_data.length, "dynamic_posterior") summarize_mean_in_nats_and_bits( likelihood_log_prob, sprites_data.frame_size ** 2 * sprites_data.channels * sprites_data.length, "likelihood") elbo = tf.reduce_mean(static_prior_log_prob - static_posterior_log_prob + dynamic_prior_log_prob - dynamic_posterior_log_prob + likelihood_log_prob) loss = -elbo tf.contrib.summary.scalar("elbo", elbo) grads = tape.gradient(loss, model.variables) grads, global_norm = tf.clip_by_global_norm(grads, FLAGS.clip_norm) grads_and_vars = list(zip(grads, model.variables)) # allow reuse in py3 if FLAGS.enable_debug_logging: with tf.name_scope("grads"): tf.contrib.summary.scalar("global_norm_grads", global_norm) tf.contrib.summary.scalar("global_norm_grads_clipped", tf.global_norm(grads)) for grad, var in grads_and_vars: with tf.name_scope("grads"): tf.contrib.summary.histogram("{}/grad".format(var.name), grad) with tf.name_scope("vars"): tf.contrib.summary.histogram(var.name, var) optimizer.apply_gradients(grads_and_vars, global_step) is_log_step = global_step.numpy() % FLAGS.log_steps == 0 is_final_step = global_step.numpy() == FLAGS.max_steps if is_log_step or is_final_step: checkpoint_manager.save() print("ELBO ({}/{}): {}".format(global_step.numpy(), FLAGS.max_steps, elbo.numpy())) with tf.contrib.summary.always_record_summaries(): val_data = sprites_data.test.take(20) inputs = next(iter(val_data.shuffle(20).batch(3)))[0] visualize_qualitative_analysis(inputs, model, FLAGS.num_reconstruction_samples) writer.flush()
def main(_): tf.enable_eager_execution() global_step = tf.train.get_or_create_global_step() global_step.assign(1) energy_fn, mean, covar = { "scg": l2hmc.get_scg_energy_fn(), "rw": l2hmc.get_rw_energy_fn() }[FLAGS.energy_fn] x_dim = 2 train_iters = 5000 eval_iters = 2000 eps = 0.1 n_steps = 10 # Chain length n_samples = 200 record_loss_every = 100 dynamics = l2hmc.Dynamics( x_dim=x_dim, minus_loglikelihood_fn=energy_fn, n_steps=n_steps, eps=eps) learning_rate = tf.train.exponential_decay( 1e-3, global_step, 1000, 0.96, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) checkpointer = tf.train.Checkpoint( optimizer=optimizer, dynamics=dynamics, global_step=global_step) if FLAGS.train_dir: summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir) if FLAGS.restore: latest_path = tf.train.latest_checkpoint(FLAGS.train_dir) checkpointer.restore(latest_path) print("Restored latest checkpoint at path:\"{}\" ".format(latest_path)) sys.stdout.flush() if not FLAGS.restore: # Training if FLAGS.use_defun: # Use `tfe.deun` to boost performance when there are lots of small ops loss_fn = tfe.defun(l2hmc.compute_loss) else: loss_fn = l2hmc.compute_loss samples = tf.random_normal(shape=[n_samples, x_dim]) for i in range(1, train_iters + 1): loss, samples, accept_prob = train_one_iter( dynamics, samples, optimizer, loss_fn=loss_fn, global_step=global_step) if i % record_loss_every == 0: print("Iteration {}, loss {:.4f}, x_accept_prob {:.4f}".format( i, loss.numpy(), accept_prob.numpy().mean())) if FLAGS.train_dir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("Training loss", loss, step=global_step) print("Training complete.") sys.stdout.flush() if FLAGS.train_dir: saved_path = checkpointer.save( file_prefix=os.path.join(FLAGS.train_dir, "ckpt")) print("Saved checkpoint at path: \"{}\" ".format(saved_path)) sys.stdout.flush() # Evaluation if FLAGS.use_defun: # Use tfe.deun to boost performance when there are lots of small ops apply_transition = tfe.defun(dynamics.apply_transition) else: apply_transition = dynamics.apply_transition samples = tf.random_normal(shape=[n_samples, x_dim]) samples_history = [] for i in range(eval_iters): samples_history.append(samples.numpy()) _, _, _, samples = apply_transition(samples) samples_history = np.array(samples_history) print("Sampling complete.") sys.stdout.flush() # Mean and covariance of target distribution mean = mean.numpy() covar = covar.numpy() ac_spectrum = compute_ac_spectrum(samples_history, mean, covar) print("First 25 entries of the auto-correlation spectrum: {}".format( ac_spectrum[:25])) ess = compute_ess(ac_spectrum) print("Effective sample size per Metropolis-Hastings step: {}".format(ess)) sys.stdout.flush() if FLAGS.train_dir: # Plot autocorrelation spectrum in tensorboard plot_step = tfe.Variable(1, trainable=False, dtype=tf.int64) for ac in ac_spectrum: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("Autocorrelation", ac, step=plot_step) plot_step.assign(plot_step + n_steps) if HAS_MATPLOTLIB: # Choose a single chain and plot the trajectory single_chain = samples_history[:, 0, :] xs = single_chain[:100, 0] ys = single_chain[:100, 1] plt.figure() plt.plot(xs, ys, color="orange", marker="o", alpha=0.6) # Trained chain plt.savefig(os.path.join(FLAGS.train_dir, "single_chain.png"))
def initialize(): tf.enable_eager_execution()
import os os.environ['OMP_NUM_THREADS'] = str(NUM_THREADS) import tensorflow as tf import copy import numpy as np import time import pickle import ncon as ncon import misc_mera from sys import stdout config = tf.ConfigProto() config.intra_op_parallelism_threads = NUM_THREADS config.inter_op_parallelism_threads = NUM_THREADS tf.enable_eager_execution(config=config) tf.enable_v2_behavior() @tf.contrib.eager.defun def ascending_super_operator(hamAB, hamBA, w_isometry, v_isometry, unitary, refsym): """ ascending super operator for a modified binary MERA ascends 'hamAB' and 'hamBA' up one layer Parameters: ------------------------- hamAB, hamBA: tf.Tensor local Hamiltonian terms w_isometry: tf.Tensor v_isometry: tf.Tensor
def eager_val(): tf.enable_eager_execution() for s in range(10): images, boxs, label, input_rpn_match, input_rpn_bbox = q.get() predict(images)
import os import time import numpy as np import tensorflow as tf import tensorflow.contrib.eager as tfe tf.enable_eager_execution(device_policy=tfe.DEVICE_PLACEMENT_SILENT) print("TensorFlow version: {}".format(tf.VERSION)) print("Eager execution: {}".format(tf.executing_eagerly())) user_names = [ 'U12', 'U13', 'U24', 'U78', 'U207', 'U293', 'U453', 'U679', 'U1289', 'U1480' ] users_indir = '../data/users_feats' users_lossdir = '../data/users_loss' users_modeldir = '../data/users_model' max_len = 120 # max length of sentence num_chars = 128 # our vocabulary, i.e. unique characters in text. We'll just use the first 128 (half ASCII) # transform character-based input into equivalent numerical versions def encode_data(text, num_chars, max_length): # create empty vessels for one-hot encoded input X = np.zeros((len(text), max_length, num_chars), dtype=np.float32) y = np.zeros((len(text), max_length, num_chars), dtype=np.float32) # loop over inputs and tranform and store in X
to Python. tf.Tensor objects reference concrete values instead of symbolic handles to nodes in a computational graph. Since there isn't a computational graph to build and run later in a session, it's easy to inspect results using print() or a debugger. Evaluating, printing, and checking tensor values does not break the flow for computing gradients. The tf.contrib.eager module contains symbols available to both eager and graph execution environments and is useful for writing code to work with graphs For small graphs, eager execution runs a lot slower """ import tensorflow as tf import tensorflow.contrib.eager as tfe tf.enable_eager_execution() # start eager execution #print(tf.executing_eagerly()) # ==> True # try tfe.Variable W_ih = tf.get_variable(name = "W_ih", initializer=tf.random_uniform([2,3], -1, 1)) W_ho = tf.get_variable(name = "W_ho", initializer=tf.random_uniform([3,1], -1, 1)) b_h = tf.get_variable(name = "b_h", initializer=tf.zeros([3])) b_o = tf.get_variable(name = "b_o", initializer=tf.zeros([1])) def nn(X): Z = tf.tanh(tf.matmul(X, W_ih) + b_h) # Hidden layer output = tf.sigmoid(tf.matmul(Z, W_ho) + b_o) # Output layer return output def bin_xentropy(output, y):
def setUp(self) -> None: tf.reset_default_graph() tf.enable_eager_execution() print("Eager Execution:", tf.executing_eagerly())
def enable_eager_execution(gpu=0, gpu_frac=0.3): tf.enable_eager_execution( config=config_gpu(gpu=gpu, gpu_frac=gpu_frac), # device_policy=tf.contrib.eager.DEVICE_PLACEMENT_EXPLICIT )
def run(flags_obj): """Run ResNet Cifar-10 training and eval loop using native Keras APIs. Args: flags_obj: An object containing parsed flag values. Raises: ValueError: If fp16 is passed as it is not currently supported. Returns: Dictionary of training and eval stats. """ if flags_obj.enable_eager: tf.enable_eager_execution() dtype = flags_core.get_tf_dtype(flags_obj) if dtype == 'fp16': raise ValueError('dtype fp16 is not supported in Keras. Use the default ' 'value(fp32).') data_format = flags_obj.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') tf.keras.backend.set_image_data_format(data_format) if flags_obj.use_synthetic_data: input_fn = keras_common.get_synth_input_fn( height=cifar_main.HEIGHT, width=cifar_main.WIDTH, num_channels=cifar_main.NUM_CHANNELS, num_classes=cifar_main.NUM_CLASSES, dtype=flags_core.get_tf_dtype(flags_obj)) else: input_fn = cifar_main.input_fn train_input_dataset = input_fn( is_training=True, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras) eval_input_dataset = input_fn( is_training=False, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras) strategy = distribution_utils.get_distribution_strategy( num_gpus=flags_obj.num_gpus, turn_off_distribution_strategy=flags_obj.turn_off_distribution_strategy) strategy_scope = keras_common.get_strategy_scope(strategy) with strategy_scope: optimizer = keras_common.get_optimizer() model = resnet_cifar_model.resnet56(classes=cifar_main.NUM_CLASSES) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['categorical_accuracy']) time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks( learning_rate_schedule, cifar_main.NUM_IMAGES['train']) train_steps = cifar_main.NUM_IMAGES['train'] // flags_obj.batch_size train_epochs = flags_obj.train_epochs if flags_obj.train_steps: train_steps = min(flags_obj.train_steps, train_steps) train_epochs = 1 num_eval_steps = (cifar_main.NUM_IMAGES['validation'] // flags_obj.batch_size) validation_data = eval_input_dataset if flags_obj.skip_eval: tf.keras.backend.set_learning_phase(1) num_eval_steps = None validation_data = None history = model.fit(train_input_dataset, epochs=train_epochs, steps_per_epoch=train_steps, callbacks=[ time_callback, lr_callback, tensorboard_callback ], validation_steps=num_eval_steps, validation_data=validation_data, verbose=1) eval_output = None if not flags_obj.skip_eval: eval_output = model.evaluate(eval_input_dataset, steps=num_eval_steps, verbose=1) stats = keras_common.build_stats(history, eval_output, time_callback) return stats
def main(): parser = argparse.ArgumentParser() parser.add_argument('--job-dir', required=True) parser.add_argument('--seed', default=67, type=int) args = parser.parse_args() print('args:', args) # create a job directory if it doesn't already exist if not os.path.exists(args.job_dir): os.makedirs(args.job_dir) # enable eager execution tf.enable_eager_execution() # set random seeds for consistent execution random.seed(args.seed) np.random.seed(args.seed) tf.set_random_seed(args.seed) # define hyperparameters params = Params() print('params:', params) # load MNIST dataset ((images_train, labels_train), (images_test, labels_test)) = tf.keras.datasets.mnist.load_data() # prepare the images by casting and rescaling images_train = prep_images(images_train) images_test = prep_images(images_test) # compute statistics from the training set images_loc = images_train.mean() images_scale = images_train.std() # define datasets for sampling batches dataset_train = get_dataset((images_train, labels_train), batch_size=params.batch_size, shuffle=True) dataset_test = get_dataset((images_test, labels_test), batch_size=params.batch_size) # model / optimization global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) model = Model(inputs_loc=images_loc, inputs_scale=images_scale, inputs_shape=[28, 28, 1]) latent_prior = tfp.distributions.MultivariateNormalDiag( loc=tf.zeros(shape=[2], dtype=tf.float32), scale_identity_multiplier=1.0) # checkpoints checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model, global_step=global_step) checkpoint_path = tf.train.latest_checkpoint(args.job_dir) if checkpoint_path is not None: checkpoint.restore(checkpoint_path).assert_consumed() # summaries summary_writer = tf.contrib.summary.create_file_writer(args.job_dir, max_queue=1, flush_millis=1000) summary_writer.set_as_default() with trange(params.epochs) as pbar: for epoch in pbar: loss_train = tfe.metrics.Mean(name='loss/train') for images, labels in dataset_train: with tf.GradientTape() as tape: outputs_dist, z_dist, z = model(images, labels, training=True) loss = losses.variational(outputs_dist, z_dist, images, latent_prior) loss_train(loss) grads = tape.gradient(loss, model.trainable_variables) grads_and_vars = zip(grads, model.trainable_variables) optimizer.apply_gradients(grads_and_vars, global_step=global_step) with tf.contrib.summary.always_record_summaries(): loss_train.result() tf.contrib.summary.scalar(name='grad_norm', tensor=tf.global_norm(grads)) tf.contrib.summary.image(name='image/train', tensor=images, max_images=1, step=global_step) tf.contrib.summary.image(name='outputs/train', tensor=outputs_dist.mean(), max_images=1, step=global_step) loss_test = tfe.metrics.Mean(name='loss/eval') for images, labels in dataset_test: outputs_dist, z_dist, z = model(images, labels) loss = losses.variational(outputs_dist, z_dist, images, latent_prior) loss_test(loss) with tf.contrib.summary.always_record_summaries(): loss_test.result() tf.contrib.summary.image(name='image/eval', tensor=images, max_images=1, step=global_step) tf.contrib.summary.image(name='outputs/eval', tensor=outputs_dist.mean(), max_images=1, step=global_step) pbar.set_description('loss (train): {}, loss (eval): {}'.format( loss_train.result().numpy(), loss_test.result().numpy())) checkpoint_prefix = os.path.join(args.job_dir, 'ckpt') checkpoint.save(checkpoint_prefix)
def main(_): """Eager execution workflow with RevNet trained on CIFAR-10.""" tf.enable_eager_execution() config = get_config(config_name=FLAGS.config, dataset=FLAGS.dataset) ds_train, ds_train_one_shot, ds_validation, ds_test = get_datasets( data_dir=FLAGS.data_dir, config=config) model = revnet.RevNet(config=config) global_step = tf.train.get_or_create_global_step() # Ensure correct summary global_step.assign(1) learning_rate = tf.train.piecewise_constant( global_step, config.lr_decay_steps, config.lr_list) optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=config.momentum) checkpointer = tf.train.Checkpoint( optimizer=optimizer, model=model, optimizer_step=global_step) if FLAGS.use_defun: model.call = tfe.defun(model.call) if FLAGS.train_dir: summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir) if FLAGS.restore: latest_path = tf.train.latest_checkpoint(FLAGS.train_dir) checkpointer.restore(latest_path) print("Restored latest checkpoint at path:\"{}\" " "with global_step: {}".format(latest_path, global_step.numpy())) sys.stdout.flush() for x, y in ds_train: train_one_iter(model, x, y, optimizer, global_step=global_step) if global_step.numpy() % config.log_every == 0: it_test = ds_test.make_one_shot_iterator() acc_test, loss_test = evaluate(model, it_test) if FLAGS.validate: it_train = ds_train_one_shot.make_one_shot_iterator() it_validation = ds_validation.make_one_shot_iterator() acc_train, loss_train = evaluate(model, it_train) acc_validation, loss_validation = evaluate(model, it_validation) print("Iter {}, " "training set accuracy {:.4f}, loss {:.4f}; " "validation set accuracy {:.4f}, loss {:.4f}; " "test accuracy {:.4f}, loss {:.4f}".format( global_step.numpy(), acc_train, loss_train, acc_validation, loss_validation, acc_test, loss_test)) else: print("Iter {}, test accuracy {:.4f}, loss {:.4f}".format( global_step.numpy(), acc_test, loss_test)) sys.stdout.flush() if FLAGS.train_dir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("Test accuracy", acc_test) tf.contrib.summary.scalar("Test loss", loss_test) if FLAGS.validate: tf.contrib.summary.scalar("Training accuracy", acc_train) tf.contrib.summary.scalar("Training loss", loss_train) tf.contrib.summary.scalar("Validation accuracy", acc_validation) tf.contrib.summary.scalar("Validation loss", loss_validation) if global_step.numpy() % config.save_every == 0 and FLAGS.train_dir: saved_path = checkpointer.save( file_prefix=os.path.join(FLAGS.train_dir, "ckpt")) print("Saved checkpoint at path: \"{}\" " "with global_step: {}".format(saved_path, global_step.numpy())) sys.stdout.flush()
def main(): tf.enable_eager_execution() tf.logging.set_verbosity(tf.logging.INFO) parser = add_parser_model_arguments() args = parser.parse_args() print("load the data") graph_data, profiles = load_data_pokec(args.data_dir) print("Loaded data with {} vertices and {} edges".format( graph_data.num_vertices, graph_data.edge_list.shape[0])) np.random.seed(42) # use consistent seed for simulation if args.simulated == 'attribute': treatments, outcomes, y_0, y_1, t_prob= \ simulate_from_pokec_covariate(args.data_dir, covariate=args.covariate, beta0=1.0, beta1=args.beta1, gamma=1.0) elif args.simulated == 'propensity': output = pd.read_csv(args.base_propensities_path, '\t') base_propensity_scores = output['treatment_probability'].values treatments, outcomes, y_0, y_1, t_prob= \ simulate_exogeneity_experiment(base_propensity_scores, exogeneous_con=args.exogeneity, beta0=1.0, beta1=args.beta1, gamma=1.0) # but let it change for data splitting and initialization tf.set_random_seed(args.seed) np.random.seed(args.seed + 42) os.makedirs(args.output_dir, exist_ok=True) np.savez(os.path.join(args.output_dir, 'simulated_data'), treatments=treatments, outcomes=outcomes, y_0=y_0, y_1=y_1, t_prob=t_prob) treatment_cat = True outcome_cat = not outcomes.dtype == np.float32 if not outcome_cat: # rescale outcome to reduce the sensitivity of training to optimization parameters outcomes = (outcomes - outcomes.mean()) / outcomes.std() if not args.do_train and not args.do_eval and not args.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) tf.gfile.MakeDirs(args.output_dir) session_config = tf.ConfigProto(intra_op_parallelism_threads=0, inter_op_parallelism_threads=4) if args.use_xla: session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 run_config = tf.estimator.RunConfig( log_step_count_steps=10, model_dir=args.output_dir, save_checkpoints_steps=args.save_checkpoints_steps, keep_checkpoint_max=args.keep_checkpoints, # save_checkpoints_steps=None, # save_checkpoints_secs=None, save_summary_steps=10, session_config=session_config) # estimator setup num_train_steps = args.num_train_steps vertex_embedding_params = { 'embedding_dim': args.embedding_dim, 'embedding_trainable': _str2bool(args.embedding_trainable) } model_fn = treatment_response_model_fn_builder( label_task_weight=args.label_task_weight, init_checkpoint=args.init_checkpoint, label_pred=args.label_pred, unsupervised=args.unsupervised, global_optimizer=_make_global_optimizer(args), embedding_optimizer=_make_local_optimizer(args), regularization=None, treatment_cat=treatment_cat, outcome_cat=outcome_cat, polyak_train=True) estimator = tf.estimator.Estimator( model_fn=model_fn, params={ **vertex_embedding_params, 'num_vertices': graph_data.num_vertices, 'batch_size': args.batch_size }, model_dir=args.output_dir, config=run_config) if args.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", args.batch_size) tf.logging.info(" Num steps = %d", num_train_steps) # subsample and process the data with tf.name_scope("training_data"): dataset_fn_train = get_dataset_fn(args.sampler, args) train_input_fn = make_input_fn(graph_data, args, treatments, outcomes, dataset_fn_train) # additional logging hooks = [ tf.train.LoggingTensorHook({'loss': 'loss'}, every_n_iter=100) ] if args.label_pred: hooks += [ tf.train.LoggingTensorHook( { # 'token_ids': 'token_ids', # 'token_mask': 'token_mask', # 'label_ids': 'label_ids', # 'pred_in': 'summary/in_split/predictions', # 'pred_out': 'summary/out_split/predictions', # 'ra_in': 'summary/in_split/labels/kappa/batch_random_agreement/random_agreement', # 'ra_out': 'summary/out_split/labels/kappa/batch_random_agreement/random_agreement', }, every_n_iter=1000) ] estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if args.do_train and (args.do_eval or args.do_predict): # reload the model to get rid of unsupervised parts of the model trained_model_checkpoint = tf.train.latest_checkpoint(args.output_dir) model_fn = treatment_response_model_fn_builder( label_task_weight=args.label_task_weight, init_checkpoint=trained_model_checkpoint, label_pred=True, unsupervised=False, treatment_cat=treatment_cat, outcome_cat=outcome_cat, polyak_train=False, polyak_restore=False) estimator = tf.estimator.Estimator( model_fn=model_fn, params={ **vertex_embedding_params, 'num_vertices': graph_data.num_vertices, 'batch_size': args.batch_size }, model_dir=args.output_dir, config=run_config) if args.do_eval: tf.logging.info("***** Running evaluation *****") # tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", args.batch_size) # This tells the estimator to run through the entire set. eval_steps = None with tf.name_scope("evaluation_data"): eval_input_fn = make_no_graph_input_fn(graph_data, args, treatments, outcomes, filter_test=True) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if args.do_predict: tf.logging.info("***** Running prediction*****") with tf.name_scope("evaluation_data"): predict_input_fn = make_no_graph_input_fn(graph_data, args, treatments, outcomes) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(args.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: tf.logging.info("***** Predict results *****") attribute_names = [ 'vertex_index', 'in_test', 'treatment_probability', 'expected_outcome_st_treatment', 'expected_outcome_st_no_treatment', 'outcome', 'treatment' ] header = "\t".join(attribute_name for attribute_name in attribute_names) + "\n" writer.write(header) for prediction in result: output_line = "\t".join( str(prediction[attribute_name]) for attribute_name in attribute_names) + "\n" writer.write(output_line)
[time_steps for _ in range(batch_size)], dtype=tf.int64) labels = tf.random_normal([batch_size, LABEL_DIMENSION]) return tf.data.Dataset.from_tensors((labels, chars, sequence_length)) class RNNColorbotTest(tf.test.TestCase): def testTrainOneEpoch(self): model = rnn_colorbot.RNNColorbot( rnn_cell_sizes=[256, 128, 64], label_dimension=LABEL_DIMENSION, keep_prob=1.0) optimizer = tf.train.AdamOptimizer(learning_rate=.01) dataset = random_dataset() with test_util.use_gpu(): rnn_colorbot.train_one_epoch(model, optimizer, dataset) def testTest(self): model = rnn_colorbot.RNNColorbot( rnn_cell_sizes=[256], label_dimension=LABEL_DIMENSION, keep_prob=1.0) dataset = random_dataset() with test_util.use_gpu(): rnn_colorbot.test(model, dataset) if __name__ == "__main__": tf.enable_eager_execution() tf.test.main()
def build_model(self): print('build_model check context eager: ', context.executing_eagerly()) self.X = tf.placeholder(tf.int32, [self.batch_size], name='input') self.Y = tf.placeholder(tf.int32, [self.batch_size], name='output') self.state = [ tf.placeholder(tf.float32, [self.batch_size, self.rnn_size], name='rnn_state') for _ in range(self.layers) ] self.global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('gru_layer'): tf.enable_eager_execution() sigma = self.sigma if self.sigma != 0 else np.sqrt( 6.0 / (self.n_items + self.rnn_size)) if self.init_as_normal: initializer = tf.random_normal_initializer(mean=0, stddev=sigma) else: initializer = tf.random_uniform_initializer(minval=-sigma, maxval=sigma) embedding = tf.get_variable(name='embedding', shape=[self.n_items, self.rnn_size], initializer=initializer) softmax_W = tf.get_variable(name='softmax_w', shape=[self.n_items, self.rnn_size], initializer=initializer) softmax_b = tf.get_variable( name='softmax_b', shape=[self.n_items], initializer=tf.constant_initializer(0.0)) cell = rnn_cell.GRUCell(self.rnn_size, activation=self.hidden_act) # cell = rnn_cell.BasicRNNCell(self.rnn_size, activation=self.hidden_act) drop_cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=self.dropout_p_hidden) cell_atten = tf.contrib.rnn.AttentionCellWrapper( drop_cell, attn_length=128, state_is_tuple=True) # mech = tf.contrib.seq2seq.BahdanauAttention(num_units=128, memory = ?) # z1 = tf.contrib.seq2seq.AttentionWrapper(cell=drop_cell, attention_mechanism=mech) stacked_cell = rnn_cell.MultiRNNCell( [drop_cell] * self.layers, state_is_tuple=True) # __init__ inputs = tf.nn.embedding_lookup(embedding, self.X) # params, ids # print('self.state = ', self.state[0]) output, state = stacked_cell( inputs=inputs, state=tuple(self.state)) # state=tuple(self.state)[0] self.final_state = state # # count no. of parameters # # https://stackoverflow.com/questions/38160940/how-to-count-total-number-of-trainable-parameters-in-a-tensorflow-model # total_param = 0 # for variable in tf.trainable_variables(): # # shape is an array of tf.Dimension # shape = variable.get_shape() # # print(shape) # # print(len(shape)) # variable_param = 1 # for dim in shape: # # print(dim) # variable_param *= dim.value # # print(variable_param) # total_param += variable_param # print('total parameters = ', total_param) # # END: count no. of parameters if self.is_training: ''' Use other examples of the minibatch as negative samples. ''' sampled_W = tf.nn.embedding_lookup(softmax_W, self.Y) # params, ids sampled_b = tf.nn.embedding_lookup(softmax_b, self.Y) logits = tf.matmul(output, sampled_W, transpose_b=True) + sampled_b self.yhat = self.final_activation(logits) self.cost = self.loss_function(self.yhat) else: logits = tf.matmul(output, softmax_W, transpose_b=True) + softmax_b self.yhat = self.final_activation(logits) if not self.is_training: tf.enable_eager_execution() return self.lr = tf.maximum( 1e-5, tf.train.exponential_decay(self.learning_rate, self.global_step, self.decay_steps, self.decay, staircase=True)) ''' Try different optimizers. ''' # optimizer = tf.train.AdagradOptimizer(self.lr) optimizer = tf.train.AdamOptimizer(self.lr) # optimizer = tf.train.AdadeltaOptimizer(self.lr) # optimizer = tf.train.RMSPropOptimizer(self.lr) tvars = tf.trainable_variables() gvs = optimizer.compute_gradients(self.cost, tvars) if self.grad_cap > 0: capped_gvs = [(tf.clip_by_norm(grad, self.grad_cap), var) for grad, var in gvs] else: capped_gvs = gvs self.train_op = optimizer.apply_gradients(capped_gvs, global_step=self.global_step)