def testCallingEnableEagerExecutionMoreThanOnce(self): # Note that eager.test.main() has already invoked enable_eager_exceution(). with self.assertRaisesRegexp( ValueError, r"Do not call tfe\.%s more than once in the same process" % tfe.enable_eager_execution.__name__): tfe.enable_eager_execution()
def main(_): """Run td3/ddpg evaluation.""" contrib_eager_python_tfe.enable_eager_execution() if FLAGS.use_gpu: tf.device('/device:GPU:0').__enter__() tf.gfile.MakeDirs(FLAGS.log_dir) summary_writer = contrib_summary.create_file_writer( FLAGS.log_dir, flush_millis=10000) env = gym.make(FLAGS.env) if FLAGS.wrap_for_absorbing: env = lfd_envs.AbsorbingWrapper(env) obs_shape = env.observation_space.shape act_shape = env.action_space.shape with tf.variable_scope('actor'): actor = Actor(obs_shape[0], act_shape[0]) random_reward, _ = do_rollout( env, actor, None, num_trajectories=10, sample_random=True) reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale') saver = contrib_eager_python_tfe.Saver(actor.variables + [reward_scale]) last_checkpoint = tf.train.latest_checkpoint(FLAGS.load_dir) with summary_writer.as_default(): while True: last_checkpoint = wait_for_next_checkpoint(FLAGS.load_dir, last_checkpoint) total_numsteps = int(last_checkpoint.split('-')[-1]) saver.restore(last_checkpoint) average_reward, average_length = do_rollout( env, actor, None, noise_scale=0.0, num_trajectories=FLAGS.num_trials) logging.info( 'Evaluation: average episode length %d, average episode reward %f', average_length, average_reward) print('Evaluation: average episode length {}, average episode reward {}'. format(average_length, average_reward)) with contrib_summary.always_record_summaries(): if reward_scale.numpy() != 1.0: contrib_summary.scalar( 'reward/scaled', (average_reward - random_reward) / (reward_scale.numpy() - random_reward), step=total_numsteps) contrib_summary.scalar('reward', average_reward, step=total_numsteps) contrib_summary.scalar('length', average_length, step=total_numsteps)
def main(_): tfe.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) checkpoint = tfe.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write("eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
def main(_): tfe.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) checkpoint = tfe.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write("eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
def main(_): tfe.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data_path") corpus = Corpus(FLAGS.data_path) # TODO(ashankar): Remove _batchify and _get_batch and use the Datasets API # instead. train_data = _batchify(corpus.train, FLAGS.batch_size) eval_data = _batchify(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tfe.restore_variables_on_create( tf.train.latest_checkpoint(FLAGS.logdir)): with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: tfe.Saver(model.trainable_weights + [learning_rate]).save( os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write( "eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
import tensorflow as tf import numpy as np import sys from tensor2tensor import models from tensor2tensor import problems from tensor2tensor.layers import common_layers from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import t2t_model from tensor2tensor.utils import registry from tensor2tensor.utils import metrics # Enable TF Eager execution from tensorflow.contrib.eager.python import tfe tfe.enable_eager_execution() # Other setup Modes = tf.estimator.ModeKeys ckpt_path = sys.argv[1] fin_name = sys.argv[2] fout_name = sys.argv[3] # Fetch the problem ende_problem = problems.problem("translate_ende_wmt32k") # Get the encoders from the problem encoders = ende_problem.feature_encoders(ckpt_path) # Setup helper functions for encoding and decoding def encode(input_str, output_str=None):
RuntimeError, r'add_check_numerics_ops\(\) is not compatible with eager execution'): numerics.add_check_numerics_ops() def testClassicSummaryOpsErrorOut(self): x = constant_op.constant(42) x_summary = summary.scalar('x', x) y = constant_op.constant([1, 3, 3, 7]) y_summary = summary.histogram('hist', y) with self.assertRaisesRegexp( RuntimeError, r'Merging tf\.summary\.\* ops is not compatible with eager execution'): summary.merge([x_summary, y_summary]) with self.assertRaisesRegexp( RuntimeError, r'Merging tf\.summary\.\* ops is not compatible with eager execution'): summary.merge_all() def testClassicSummaryFileWriterErrorsOut(self): with self.assertRaisesRegexp( RuntimeError, r'tf\.summary\.FileWriter is not compatible with eager execution'): writer.FileWriter(tempfile.mkdtemp()) if __name__ == '__main__': tfe.enable_eager_execution() test.main()
def main(_): """Run td3/ddpg training.""" contrib_eager_python_tfe.enable_eager_execution() if FLAGS.use_gpu: tf.device('/device:GPU:0').__enter__() tf.gfile.MakeDirs(FLAGS.log_dir) summary_writer = contrib_summary.create_file_writer(FLAGS.log_dir, flush_millis=10000) tf.set_random_seed(FLAGS.seed) np.random.seed(FLAGS.seed) random.seed(FLAGS.seed) env = gym.make(FLAGS.env) env.seed(FLAGS.seed) if FLAGS.env in ['HalfCheetah-v2', 'Ant-v1']: rand_actions = int(1e4) else: rand_actions = int(1e3) obs_shape = env.observation_space.shape act_shape = env.action_space.shape if FLAGS.algo == 'td3': model = ddpg_td3.DDPG(obs_shape[0], act_shape[0], use_td3=True, policy_update_freq=2, actor_lr=1e-3) else: model = ddpg_td3.DDPG(obs_shape[0], act_shape[0], use_td3=False, policy_update_freq=1, actor_lr=1e-4) replay_buffer_var = contrib_eager_python_tfe.Variable('', name='replay_buffer') gym_random_state_var = contrib_eager_python_tfe.Variable( '', name='gym_random_state') np_random_state_var = contrib_eager_python_tfe.Variable( '', name='np_random_state') py_random_state_var = contrib_eager_python_tfe.Variable( '', name='py_random_state') saver = contrib_eager_python_tfe.Saver( model.variables + [replay_buffer_var] + [gym_random_state_var, np_random_state_var, py_random_state_var]) tf.gfile.MakeDirs(FLAGS.save_dir) reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale') eval_saver = contrib_eager_python_tfe.Saver(model.actor.variables + [reward_scale]) tf.gfile.MakeDirs(FLAGS.eval_save_dir) last_checkpoint = tf.train.latest_checkpoint(FLAGS.save_dir) if last_checkpoint is None: replay_buffer = ReplayBuffer() total_numsteps = 0 prev_save_timestep = 0 prev_eval_save_timestep = 0 else: saver.restore(last_checkpoint) replay_buffer = pickle.loads(zlib.decompress( replay_buffer_var.numpy())) total_numsteps = int(last_checkpoint.split('-')[-1]) assert len(replay_buffer) == total_numsteps prev_save_timestep = total_numsteps prev_eval_save_timestep = total_numsteps env.unwrapped.np_random.set_state( pickle.loads(gym_random_state_var.numpy())) np.random.set_state(pickle.loads(np_random_state_var.numpy())) random.setstate(pickle.loads(py_random_state_var.numpy())) with summary_writer.as_default(): while total_numsteps < FLAGS.training_steps: rollout_reward, rollout_timesteps = do_rollout( env, model.actor, replay_buffer, noise_scale=FLAGS.exploration_noise, rand_actions=rand_actions) total_numsteps += rollout_timesteps logging.info('Training: total timesteps %d, episode reward %f', total_numsteps, rollout_reward) print('Training: total timesteps {}, episode reward {}'.format( total_numsteps, rollout_reward)) with contrib_summary.always_record_summaries(): contrib_summary.scalar('reward', rollout_reward, step=total_numsteps) contrib_summary.scalar('length', rollout_timesteps, step=total_numsteps) if len(replay_buffer) >= FLAGS.min_samples_to_start: for _ in range(rollout_timesteps): time_step = replay_buffer.sample( batch_size=FLAGS.batch_size) batch = TimeStep(*zip(*time_step)) model.update(batch) if total_numsteps - prev_save_timestep >= FLAGS.save_interval: replay_buffer_var.assign( zlib.compress(pickle.dumps(replay_buffer))) gym_random_state_var.assign( pickle.dumps(env.unwrapped.np_random.get_state())) np_random_state_var.assign( pickle.dumps(np.random.get_state())) py_random_state_var.assign(pickle.dumps(random.getstate())) saver.save(os.path.join(FLAGS.save_dir, 'checkpoint'), global_step=total_numsteps) prev_save_timestep = total_numsteps if total_numsteps - prev_eval_save_timestep >= FLAGS.eval_save_interval: eval_saver.save(os.path.join(FLAGS.eval_save_dir, 'checkpoint'), global_step=total_numsteps) prev_eval_save_timestep = total_numsteps
def main(_): """Run td3/ddpg training.""" contrib_eager_python_tfe.enable_eager_execution() if FLAGS.use_gpu: tf.device('/device:GPU:0').__enter__() tf.gfile.MakeDirs(FLAGS.log_dir) summary_writer = contrib_summary.create_file_writer( FLAGS.log_dir, flush_millis=10000) tf.set_random_seed(FLAGS.seed) np.random.seed(FLAGS.seed) random.seed(FLAGS.seed) env = gym.make(FLAGS.env) env.seed(FLAGS.seed) if FLAGS.learn_absorbing: env = lfd_envs.AbsorbingWrapper(env) if FLAGS.env in ['HalfCheetah-v2', 'Ant-v1']: rand_actions = int(1e4) else: rand_actions = int(1e3) obs_shape = env.observation_space.shape act_shape = env.action_space.shape subsampling_rate = env._max_episode_steps // FLAGS.trajectory_size # pylint: disable=protected-access lfd = gail.GAIL( obs_shape[0] + act_shape[0], subsampling_rate=subsampling_rate, gail_loss=FLAGS.gail_loss) if FLAGS.algo == 'td3': model = ddpg_td3.DDPG( obs_shape[0], act_shape[0], use_td3=True, policy_update_freq=2, actor_lr=FLAGS.actor_lr, get_reward=lfd.get_reward, use_absorbing_state=FLAGS.learn_absorbing) else: model = ddpg_td3.DDPG( obs_shape[0], act_shape[0], use_td3=False, policy_update_freq=1, actor_lr=FLAGS.actor_lr, get_reward=lfd.get_reward, use_absorbing_state=FLAGS.learn_absorbing) random_reward, _ = do_rollout( env, model.actor, None, num_trajectories=10, sample_random=True) replay_buffer_var = contrib_eager_python_tfe.Variable( '', name='replay_buffer') expert_replay_buffer_var = contrib_eager_python_tfe.Variable( '', name='expert_replay_buffer') # Save and restore random states of gym/numpy/python. # If the job is preempted, it guarantees that it won't affect the results. # And the results will be deterministic (on CPU) and reproducible. gym_random_state_var = contrib_eager_python_tfe.Variable( '', name='gym_random_state') np_random_state_var = contrib_eager_python_tfe.Variable( '', name='np_random_state') py_random_state_var = contrib_eager_python_tfe.Variable( '', name='py_random_state') reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale') saver = contrib_eager_python_tfe.Saver( model.variables + lfd.variables + [replay_buffer_var, expert_replay_buffer_var, reward_scale] + [gym_random_state_var, np_random_state_var, py_random_state_var]) tf.gfile.MakeDirs(FLAGS.save_dir) eval_saver = contrib_eager_python_tfe.Saver(model.actor.variables + [reward_scale]) tf.gfile.MakeDirs(FLAGS.eval_save_dir) last_checkpoint = tf.train.latest_checkpoint(FLAGS.save_dir) if last_checkpoint is None: expert_saver = contrib_eager_python_tfe.Saver([expert_replay_buffer_var]) last_checkpoint = os.path.join(FLAGS.expert_dir, 'expert_replay_buffer') expert_saver.restore(last_checkpoint) expert_replay_buffer = pickle.loads(expert_replay_buffer_var.numpy()) expert_reward = expert_replay_buffer.get_average_reward() logging.info('Expert reward %f', expert_reward) print('Expert reward {}'.format(expert_reward)) reward_scale.assign(expert_reward) expert_replay_buffer.subsample_trajectories(FLAGS.num_expert_trajectories) if FLAGS.learn_absorbing: expert_replay_buffer.add_absorbing_states(env) # Subsample after adding absorbing states, because otherwise we can lose # final states. print('Original dataset size {}'.format(len(expert_replay_buffer))) expert_replay_buffer.subsample_transitions(subsampling_rate) print('Subsampled dataset size {}'.format(len(expert_replay_buffer))) replay_buffer = ReplayBuffer() total_numsteps = 0 prev_save_timestep = 0 prev_eval_save_timestep = 0 else: saver.restore(last_checkpoint) replay_buffer = pickle.loads(zlib.decompress(replay_buffer_var.numpy())) expert_replay_buffer = pickle.loads( zlib.decompress(expert_replay_buffer_var.numpy())) total_numsteps = int(last_checkpoint.split('-')[-1]) prev_save_timestep = total_numsteps prev_eval_save_timestep = total_numsteps env.unwrapped.np_random.set_state( pickle.loads(gym_random_state_var.numpy())) np.random.set_state(pickle.loads(np_random_state_var.numpy())) random.setstate(pickle.loads(py_random_state_var.numpy())) with summary_writer.as_default(): while total_numsteps < FLAGS.training_steps: # Decay helps to make the model more stable. # TODO(agrawalk): Use tf.train.exponential_decay model.actor_lr.assign( model.initial_actor_lr * pow(0.5, total_numsteps // 100000)) logging.info('Learning rate %f', model.actor_lr.numpy()) rollout_reward, rollout_timesteps = do_rollout( env, model.actor, replay_buffer, noise_scale=FLAGS.exploration_noise, rand_actions=rand_actions, sample_random=(model.actor_step.numpy() == 0), add_absorbing_state=FLAGS.learn_absorbing) total_numsteps += rollout_timesteps logging.info('Training: total timesteps %d, episode reward %f', total_numsteps, rollout_reward) print('Training: total timesteps {}, episode reward {}'.format( total_numsteps, rollout_reward)) with contrib_summary.always_record_summaries(): contrib_summary.scalar( 'reward/scaled', (rollout_reward - random_reward) / (reward_scale.numpy() - random_reward), step=total_numsteps) contrib_summary.scalar('reward', rollout_reward, step=total_numsteps) contrib_summary.scalar('length', rollout_timesteps, step=total_numsteps) if len(replay_buffer) >= FLAGS.min_samples_to_start: for _ in range(rollout_timesteps): time_step = replay_buffer.sample(batch_size=FLAGS.batch_size) batch = TimeStep(*zip(*time_step)) time_step = expert_replay_buffer.sample(batch_size=FLAGS.batch_size) expert_batch = TimeStep(*zip(*time_step)) lfd.update(batch, expert_batch) for _ in range(FLAGS.updates_per_step * rollout_timesteps): time_step = replay_buffer.sample(batch_size=FLAGS.batch_size) batch = TimeStep(*zip(*time_step)) model.update( batch, update_actor=model.critic_step.numpy() >= FLAGS.policy_updates_delay) if total_numsteps - prev_save_timestep >= FLAGS.save_interval: replay_buffer_var.assign(zlib.compress(pickle.dumps(replay_buffer))) expert_replay_buffer_var.assign( zlib.compress(pickle.dumps(expert_replay_buffer))) gym_random_state_var.assign( pickle.dumps(env.unwrapped.np_random.get_state())) np_random_state_var.assign(pickle.dumps(np.random.get_state())) py_random_state_var.assign(pickle.dumps(random.getstate())) saver.save( os.path.join(FLAGS.save_dir, 'checkpoint'), global_step=total_numsteps) prev_save_timestep = total_numsteps if total_numsteps - prev_eval_save_timestep >= FLAGS.eval_save_interval: eval_saver.save( os.path.join(FLAGS.eval_save_dir, 'checkpoint'), global_step=total_numsteps) prev_eval_save_timestep = total_numsteps
from __future__ import print_function import keras from tensorflow.contrib.eager.python import tfe eager = True if eager: tfe.enable_eager_execution() import tensorflow as tf from keras.datasets import cifar10 import tfl import numpy as np import os from collections import OrderedDict os.environ["CUDA_VISIBLE_DEVICES"]= "0" os.environ['KMP_DUPLICATE_LIB_OK']='True' # --------------------- TRAINING PARAMETERS---------------------------------- iterations = 20000 data_augmentation = False subtract_pixel_mean = True n = 6 depth = n * 9 + 2 original_num_classes = 10 num_classes = original_num_classes + 7 use_logic = True transductive = True minibatch_size = 20 supervided_size = 1000 # -1 means all of them
def main(_): """Run td3/ddpg training.""" tfe.enable_eager_execution() if FLAGS.use_gpu: tf.device('/device:GPU:0').__enter__() if FLAGS.expert_dir.find(FLAGS.env) == -1: raise ValueError('Expert directory must contain the environment name') tf.set_random_seed(FLAGS.seed) np.random.seed(FLAGS.seed) random.seed(FLAGS.seed) env = gym.make(FLAGS.env) env.seed(FLAGS.seed) obs_shape = env.observation_space.shape act_shape = env.action_space.shape expert_replay_buffer_var = tfe.Variable('', name='expert_replay_buffer') saver = tfe.Saver([expert_replay_buffer_var]) tf.gfile.MakeDirs(FLAGS.save_dir) with tf.variable_scope('actor'): actor = Actor(obs_shape[0], act_shape[0]) expert_saver = tfe.Saver(actor.variables) best_checkpoint = None best_reward = float('-inf') checkpoint_state = tf.train.get_checkpoint_state(FLAGS.expert_dir) for checkpoint in checkpoint_state.all_model_checkpoint_paths: expert_saver.restore(checkpoint) expert_reward, _ = do_rollout(env, actor, replay_buffer=None, noise_scale=0.0, num_trajectories=10) if expert_reward > best_reward: best_reward = expert_reward best_checkpoint = checkpoint expert_saver.restore(best_checkpoint) expert_replay_buffer = ReplayBuffer() expert_reward, _ = do_rollout( env, actor, replay_buffer=expert_replay_buffer, noise_scale=0.0, num_trajectories=FLAGS.num_expert_trajectories) logging.info('Expert reward %f', expert_reward) print('Expert reward {}'.format(expert_reward)) expert_replay_buffer_var.assign(pickle.dumps(expert_replay_buffer)) saver.save(os.path.join(FLAGS.save_dir, 'expert_replay_buffer'))