def main(_): hparams = t2t_decoder.create_hparams() problem = hparams.problem frame_shape = [ problem.frame_height, problem.frame_width, problem.num_channels ] video_metrics.compute_and_save_video_metrics( FLAGS.output_dir, FLAGS.problem, hparams.video_num_target_frames, frame_shape)
def __init__(self, savedir, phorizon, cem_samples, cem_iters, cost='pixel'): self.eps = 0 self.savedir = savedir self.planstep = 0 self.phorizon = phorizon self.cem_samples = cem_samples self.cem_iters = cem_iters self.verbose = False self.num_acts = 5 self.cost = cost # LOADING SV2P FLAGS.data_dir = args.root + 'data/' FLAGS.problem = args.problem FLAGS.hparams = 'video_num_input_frames=5,video_num_target_frames=15' FLAGS.hparams_set = 'next_frame_sv2p' FLAGS.model = 'next_frame_sv2p' # Create hparams hparams = create_hparams() hparams.video_num_input_frames = 1 hparams.video_num_target_frames = self.phorizon # Params num_replicas = self.cem_samples frame_shape = hparams.problem.frame_shape forward_graph = tf.Graph() with forward_graph.as_default(): self.forward_sess = tf.Session() input_size = [num_replicas, hparams.video_num_input_frames] target_size = [num_replicas, hparams.video_num_target_frames] self.forward_placeholders = { 'inputs': tf.placeholder(tf.float32, input_size + frame_shape), 'input_action': tf.placeholder(tf.float32, input_size + [self.num_acts]), 'targets': tf.placeholder(tf.float32, target_size + frame_shape), 'target_action': tf.placeholder(tf.float32, target_size + [self.num_acts]), } # Create model forward_model_cls = registry.model(FLAGS.model) forward_model = forward_model_cls(hparams, tf.estimator.ModeKeys.PREDICT) self.forward_prediction_ops, _ = forward_model( self.forward_placeholders) forward_saver = tf.train.Saver() forward_saver.restore(self.forward_sess, args.model_dir) print('LOADED SV2P!')
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Create hparams hparams = create_hparams() hparams.force_full_predict = True batch_size = hparams.batch_size # Iterating over dev/test partition of the data. # Change the data partition if necessary. dataset = registry.problem(FLAGS.problem).dataset( tf.estimator.ModeKeys.PREDICT, shuffle_files=False, hparams=hparams) dataset = dataset.apply( tf.contrib.data.batch_and_drop_remainder(batch_size)) data = dataset.make_one_shot_iterator().get_next() input_data = dict( (k, data[k]) for k in data.keys() if k.startswith("input")) # Creat model model_cls = registry.model(FLAGS.model) model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT) prediction_ops = model.infer(input_data) # Confusion Matrix nr = hparams.problem.num_rewards cm_per_frame = np.zeros((nr, nr), dtype=np.uint64) cm_next_frame = np.zeros((nr, nr), dtype=np.uint64) saver = tf.train.Saver() with tf.train.SingularMonitoredSession() as sess: # Load latest checkpoint ckpt = tf.train.get_checkpoint_state( FLAGS.output_dir).model_checkpoint_path saver.restore(sess.raw_session(), ckpt) counter = 0 while not sess.should_stop(): counter += 1 if counter % 1 == 0: print(counter) # Predict next frames rew_pd, rew_gt = sess.run( [prediction_ops["target_reward"], data["target_reward"]]) for i in range(batch_size): cm_next_frame[rew_gt[i, 0, 0], rew_pd[i, 0, 0]] += 1 for gt, pd in zip(rew_gt[i], rew_pd[i]): cm_per_frame[gt, pd] += 1 print_confusion_matrix("Per-frame Confusion Matrix", cm_per_frame) print_confusion_matrix("Next-frame Confusion Matrix", cm_next_frame)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Create hparams hparams = create_hparams() hparams.force_full_predict = True batch_size = hparams.batch_size # Iterating over dev/test partition of the data. # Change the data partition if necessary. dataset = registry.problem(FLAGS.problem).dataset( tf.estimator.ModeKeys.PREDICT, shuffle_files=False, hparams=hparams) dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)) data = dataset.make_one_shot_iterator().get_next() input_data = dict((k, data[k]) for k in data.keys() if k.startswith("input")) # Creat model model_cls = registry.model(FLAGS.model) model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT) prediction_ops = model.infer(input_data) # Confusion Matrix nr = hparams.problem.num_rewards cm_per_frame = np.zeros((nr, nr), dtype=np.uint64) cm_next_frame = np.zeros((nr, nr), dtype=np.uint64) saver = tf.train.Saver() with tf.train.SingularMonitoredSession() as sess: # Load latest checkpoint ckpt = tf.train.get_checkpoint_state(FLAGS.output_dir).model_checkpoint_path saver.restore(sess.raw_session(), ckpt) counter = 0 while not sess.should_stop(): counter += 1 if counter % 1 == 0: print(counter) # Predict next frames rew_pd, rew_gt = sess.run( [prediction_ops["target_reward"], data["target_reward"]]) for i in range(batch_size): cm_next_frame[rew_gt[i, 0, 0], rew_pd[i, 0, 0]] += 1 for gt, pd in zip(rew_gt[i], rew_pd[i]): cm_per_frame[gt, pd] += 1 print_confusion_matrix("Per-frame Confusion Matrix", cm_per_frame) print_confusion_matrix("Next-frame Confusion Matrix", cm_next_frame)
def main(): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) FLAGS.use_tpu = False # decoding not supported on TPU hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) hp = t2t_decoder.create_hparams() decode_hp = t2t_decoder.create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def main(_): hparams = t2t_decoder.create_hparams() problem = hparams.problem frame_shape = [ problem.frame_height, problem.frame_width, problem.num_channels ] decode_hp = t2t_decoder.create_decode_hparams() output_dirs = [ os.path.join(FLAGS.output_dir, "decode_%05d" % decode_id) for decode_id in range(decode_hp.num_decodes) ] video_metrics.compute_and_save_video_metrics( output_dirs, FLAGS.problem, hparams.video_num_target_frames, frame_shape)
def main(_): hparams = t2t_decoder.create_hparams() problem = hparams.problem frame_shape = [problem.frame_height, problem.frame_width, problem.num_channels] decode_hp = t2t_decoder.create_decode_hparams() output_dirs = [ os.path.join(FLAGS.output_dir, "decode_%05d" % decode_id) for decode_id in range(decode_hp.num_decodes) ] video_metrics.compute_and_save_video_metrics( output_dirs, FLAGS.problem, hparams.video_num_target_frames, frame_shape)
def main(_): t2t_decoder.trainer_lib.set_random_seed(FLAGS.random_seed) t2t_decoder.usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) hparams = t2t_decoder.create_hparams() t2t_decoder.trainer_lib.add_problem_hparams(hparams, FLAGS.problem) # latest_ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) estimator = create_estimator(FLAGS.model, hparams, FLAGS.checkpoint_path) def user_action_steps_generator(): for text_input in interactive_text_inputs(): try: yield convert_text_inputs_to_action_steps(text_input) except Exception as e: print(e) traceback.print_exc() continue StepLoop().predict(estimator, user_action_steps_generator())
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Create hparams hparams = create_hparams() hparams.force_full_predict = True hparams.scheduled_sampling_k = -1 # Params num_agents = 1 # TODO(mbz): fix the code for more agents num_steps = FLAGS.num_steps num_actions = hparams.problem.num_actions frame_shape = hparams.problem.frame_shape resized_frame = hparams.preprocess_resize_frames is not None if resized_frame: frame_shape = hparams.preprocess_resize_frames frame_shape += [hparams.problem.num_channels] dataset = registry.problem(FLAGS.problem).dataset( tf.estimator.ModeKeys.TRAIN, shuffle_files=True, hparams=hparams) dataset = dataset.apply( tf.contrib.data.batch_and_drop_remainder(num_agents)) data = dataset.make_one_shot_iterator().get_next() # Setup input placeholders input_size = [num_agents, hparams.video_num_input_frames] placeholders = { "inputs": tf.placeholder(tf.float32, input_size + frame_shape), "input_action": tf.placeholder(tf.int64, input_size + [1]), "input_reward": tf.placeholder(tf.int64, input_size + [1]), } # Creat model model_cls = registry.model(FLAGS.model) model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT) prediction_ops = model.infer(placeholders) states_q = Queue(maxsize=hparams.video_num_input_frames) actions_q = Queue(maxsize=hparams.video_num_input_frames) rewards_q = Queue(maxsize=hparams.video_num_input_frames) all_qs = (states_q, actions_q, rewards_q) writer = common_video.WholeVideoWriter(fps=10, output_path=FLAGS.output_gif) saver = tf.train.Saver() with tf.train.SingularMonitoredSession() as sess: # Load latest checkpoint ckpt = tf.train.get_checkpoint_state( FLAGS.output_dir).model_checkpoint_path saver.restore(sess.raw_session(), ckpt) # get init frames from the dataset data_np = sess.run(data) frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1) for frame in frames: frame = np.squeeze(frame, 1) states_q.put(frame) writer.write(frame[0].astype(np.uint8)) actions = np.split(data_np["input_action"], hparams.video_num_input_frames, 1) for action in actions: actions_q.put(np.squeeze(action, 1)) rewards = np.split(data_np["input_reward"], hparams.video_num_input_frames, 1) for reward in rewards: rewards_q.put(np.squeeze(reward, 1)) for step in range(num_steps): print(">>>>>>> ", step) random_actions = np.random.randint(num_actions - 1) random_actions = np.expand_dims(random_actions, 0) random_actions = np.tile(random_actions, (num_agents, 1)) # Shape inputs and targets inputs, input_action, input_reward = (np.stack(list(q.queue), axis=1) for q in all_qs) # Predict next frames feed = { placeholders["inputs"]: inputs, placeholders["input_action"]: input_action, placeholders["input_reward"]: input_reward, } predictions = sess.run(prediction_ops, feed_dict=feed) predicted_states = predictions["targets"][:, 0] predicted_reward = predictions["target_reward"][:, 0] # Update queues new_data = (predicted_states, random_actions, predicted_reward) for q, d in zip(all_qs, new_data): q.get() q.put(d.copy()) writer.write(np.round(predicted_states[0]).astype(np.uint8)) video = writer.finish() writer.save_to_disk(video)
def __init__(self, difficulty, modeltype, cost, numsg=1, savedir='/tmp/', envtype='maze', phorizon=5, parallel=0, tdmdir='/tmp/mazetdm/', vaedir='/tmp/mazevae'): self.parallel = parallel self.envtype = envtype self.cost = cost self.modeltype = modeltype # Only use maze env if self.envtype == 'maze': self.env = environment.Environment(difficulty=difficulty) self.num_acts = 2 else: raise NotImplementedError self.numsg = numsg self.eps = 0 self.savedir = savedir self.planstep = 0 self.phorizon = phorizon self.it_graph = tf.Graph() with self.it_graph.as_default(): self.itsess = tf.Session() self.it = vae.ImageTransformSC(8) outall = self.it(bs=1) self.out, _, _, _ = outall itsaver = tf.train.Saver() vaedir = vaedir + '256_8/' # Restore variables from disk. itsaver.restore(self.itsess, vaedir + 'model-0') print('LOADED VAE!') # LOADING TDM self.tdm_graph = tf.Graph() with self.tdm_graph.as_default(): self.tdmsess = tf.Session() self.tdm = tdm.TemporalModel() self.s1 = tf.placeholder(tf.float32, shape=[None, 64, 64, 3]) self.s2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 3]) self.tdout = tf.nn.softmax(self.tdm(self.s1, self.s2)) tdmsaver = tf.train.Saver() tdmdir = tdmdir + '256TDM/' # Restore variables from disk. tdmsaver.restore(self.tdmsess, tdmdir + 'model-0') print('LOADED TDM!') # LOADING SV2P (Modify this to your path and problem) homedir = '/usr/local/google/home/nairsuraj' FLAGS.data_dir = homedir + '/data/maze3/' FLAGS.output_dir = homedir + '/models/rs=6.3/maze3/checkpoint' FLAGS.problem = 'video_bair_robot_pushing' FLAGS.hparams = 'video_num_input_frames=1,video_num_target_frames=10' FLAGS.hparams_set = 'next_frame_sv2p' FLAGS.model = 'next_frame_sv2p' # Create hparams hparams = create_hparams() hparams.video_num_input_frames = 1 hparams.video_num_target_frames = self.phorizon # Params num_replicas = 200 frame_shape = hparams.problem.frame_shape forward_graph = tf.Graph() with forward_graph.as_default(): self.forward_sess = tf.Session() input_size = [num_replicas, hparams.video_num_input_frames] target_size = [num_replicas, hparams.video_num_target_frames] self.forward_placeholders = { 'inputs': tf.placeholder(tf.float32, input_size + frame_shape), 'input_action': tf.placeholder(tf.float32, input_size + [self.num_acts]), 'targets': tf.placeholder(tf.float32, target_size + frame_shape), 'target_action': tf.placeholder(tf.float32, target_size + [self.num_acts]), } # Creat model forward_model_cls = registry.model(FLAGS.model) forward_model = forward_model_cls(hparams, tf_estimator.ModeKeys.PREDICT) self.forward_prediction_ops, _ = forward_model(self.forward_placeholders) forward_saver = tf.train.Saver() forward_saver.restore(self.forward_sess, homedir + '/models/rs=6.3/maze6/model.ckpt-0') print('LOADED SV2P!') _, self.state = self.env.get_observation()