def test_rldriver_main(): pyosr.init() dpy = pyosr.create_display() glctx = pyosr.create_gl_context(dpy) g = tf.Graph() util.mkdir_p(ckpt_dir) with g.as_default(): learning_rate_input = tf.placeholder(tf.float32) grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) masterdriver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True) driver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True, master_driver=masterdriver, grads_applier=grad_applier) driver.get_sync_from_master_op() driver.get_apply_grads_op() driver.learning_rate_input = learning_rate_input driver.a3c_local_t = 32 global_step = tf.contrib.framework.get_or_create_global_step() increment_global_step = tf.assign_add(global_step, 1, name='increment_global_step') saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step]) last_time = time.time() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir) print('ckpt {}'.format(ckpt)) epoch = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) epoch = sess.run(global_step) print('Restored!, global_step {}'.format(epoch)) while epoch < 100 * 1000: driver.train_a3c(sess) epoch += 1 sess.run(increment_global_step) if epoch % 1000 == 0 or time.time() - last_time >= 10 * 60: print("Saving checkpoint") fn = saver.save(sess, ckpt_dir+ckpt_prefix, global_step=global_step) print("Saved checkpoint to {}".format(fn)) last_time = time.time() print("Epoch {}".format(epoch)) '''
def test_rldriver_main(): pyosr.init() dpy = pyosr.create_display() glctx = pyosr.create_gl_context(dpy) g = tf.Graph() with g.as_default(): learning_rate_input = tf.placeholder(tf.float32) grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) masterdriver = rldriver.RLDriver( ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'], init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True) driver = rldriver.RLDriver( ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'], init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True, master_driver=masterdriver, grads_applier=grad_applier) driver.get_sync_from_master_op() driver.get_apply_grads_op() driver.learning_rate_input = learning_rate_input driver.a3c_local_t = 2 global_step = tf.contrib.framework.get_or_create_global_step() saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step]) last_time = time.time() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir) print('ckpt {}'.format(ckpt)) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restored!') epoch = 0 while True: driver.train_a3c(sess) epoch += 1 if epoch % 1000 == 0 or time.time() - last_time >= 10: print("Saving checkpoint") saver.save(sess, ckpt_dir, global_step=global_step) last_time = time.time() print("Epoch {}".format(epoch)) '''
def test_rldriver_main(): pyosr.init() dpy = pyosr.create_display() glctx = pyosr.create_gl_context(dpy) init_state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32) g = tf.Graph() with g.as_default(): driver = rldriver.RLDriver( ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'], np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32), [(30.0, 12), (-30.0, 12), (0, 4), (90, 1), (-90, 1)], config.SV_VISCFG, config.MV_VISCFG, use_rgb=True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) thread = threading.Thread(target=test_rldriver_worker, args=(dpy, glctx, driver, g)) thread.start() r = driver.renderer w = r.pbufferWidth h = r.pbufferHeight r.state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32) r.render_mvrgbd() img = r.mvrgb.reshape(w * r.views.shape[0], h, 3) dep = r.mvdepth.reshape(w * r.views.shape[0], h) depimg = Image.fromarray(dep) imsave('mvrgb-master.png', img) depimg.save('mvdepth-master.tiff') thread.join()
def __init__(self, global_step, ckpt_dir='./cat/ckpt', data_dir='./cat/depth_data'): self.ckpt_dir = ckpt_dir self.data_dir = data_dir self.images, self.labels = distorted_inputs(self.data_dir) self.driver = rldriver.RLDriver(['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'], np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32), [(30.0, 12), (-30.0, 12), (0, 4), (90, 1), (-90, 1)], config.SV_VISCFG, config.SV_VISCFG, 58, input_tensor=self.images) print('sv_colorfv = {}'.format(self.driver.sv_colorfv.shape)) print('mv_colorfv = {}'.format(self.driver.mv_colorfv.shape)) print('final = {}'.format(self.driver.final.shape)) self.logits = self.driver.final self.global_step = global_step print('(sparse) labels {}'.format(self.labels.shape)) print('logits {}'.format(self.logits.shape)) self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.labels, logits=self.logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(self.cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss') opt = tf.train.AdamOptimizer(1e-4) self.train_op = opt.minimize(loss=self.loss, global_step=self.global_step)
def torus_master(): pyosr.init() dpy = pyosr.create_display() glctx = pyosr.create_gl_context(dpy) g = tf.Graph() util.mkdir_p(ckpt_dir) with g.as_default(): global_step = tf.contrib.framework.get_or_create_global_step() increment_global_step = tf.assign_add(global_step, 1, name='increment_global_step') learning_rate_input = tf.placeholder(tf.float32) grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) masterdriver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True) saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step]) with tf.Session() as sess: threads = [] for i in range(THREAD): thread_args = (i, dpy, glctx, masterdriver, g, grad_applier, learning_rate_input, global_step, increment_global_step, sess, saver) thread = threading.Thread(target=torus_worker, args=thread_args) thread.start() graph_completes[i].wait() threads.append(thread) ''' We need to run the initializer because only master's variables are stored. ''' sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir) print('ckpt {}'.format(ckpt)) epoch = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) epoch = sess.run(global_step) print('Restored!, global_step {}'.format(epoch)) init_done.set() for thread in threads: thread.join() print("Saving final checkpoint") fn = saver.save(sess, ckpt_dir + ckpt_prefix, global_step=global_step) print("Saved checkpoint to {}".format(fn))
def train_puzzle(): pyosr.init() pyosr.create_gl_context(pyosr.create_display()) # FIXME: Each thread has one ctx init_state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32) with tf.Graph().as_default(): driver = rldriver.RLDriver(['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'], np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32), [(30.0, 12), (-30.0, 12), (0, 4), (90, 1), (-90, 1)], config.SV_VISCFG, config.MV_VISCFG, use_rgb=True)
def test_rldriver_worker(dpy, glctx, masterdriver, tfgraph): pyosr.create_gl_context(dpy, glctx) # OpenGL context for current thread. with tfgraph.as_default(): driver = rldriver.RLDriver( ['../res/alpha/env-1.2.obj', '../res/alpha/robot.obj'], np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32), [(30.0, 12), (-30.0, 12), (0, 4), (90, 1), (-90, 1)], config.SV_VISCFG, config.MV_VISCFG, use_rgb=True, master_driver=masterdriver) sync_op = driver.get_sync_from_master_op() r = driver.renderer w = r.pbufferWidth h = r.pbufferHeight r.state = np.array([0.2, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5], dtype=np.float32) r.render_mvrgbd() print('worker mvrgb shape {}'.format(r.mvrgb.shape)) img = r.mvrgb.reshape(w * r.views.shape[0], h, 3) dep = r.mvdepth.reshape(w * r.views.shape[0], h) depimg = Image.fromarray(dep) imsave('mvrgb-worker.png', img) depimg.save('mvdepth-worker.tiff') with tfgraph.as_default(): with tf.Session() as sess: sess.run(tf.global_variables_initializer()) value_before = np.array(sess.run(driver.get_nn_args()[0][0][0]), dtype=np.float32) print('Before {}'.format(value_before)) sess.run(sync_op) value_after = np.array(sess.run(driver.get_nn_args()[0][0][0]), dtype=np.float32) print('After {}'.format(value_after)) print('Delta {}'.format(np.linalg.norm(value_before - value_after)))
def torus_worker(index, dpy, glctx, masterdriver, tfgraph, grad_applier, lrtensor, global_step, increment_global_step, sess, saver): global graph_completes global init_done pyosr.create_gl_context(dpy, glctx) # OpenGL context for current thread. with tfgraph.as_default(): driver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True, master_driver=masterdriver, grads_applier=grad_applier, worker_thread_index=index) print("THREAD {} DRIVER CREATED".format(index)) driver.epsilon = 1.0 - (index + 1) * (1.0 / (THREAD + 1)) driver.get_sync_from_master_op() driver.get_apply_grads_op() driver.learning_rate_input = lrtensor # driver.a3c_local_t = 32 graph_completes[index].set() init_done.wait() ''' if index == 0: for i in range(1,4): graph_completes[i].wait() print("Graph {} waited".format(i)) sess.run(tf.global_variables_initializer()) init_done.set() else: graph_completes[index].set() print("Graph {} Set".format(index)) init_done.wait() print("Init_done on thread {}, continuing".format(index)) ''' last_time = time.time() # FIXME: ttorus/ckpt-mt-5 are not loading global_step into epoch epoch = 0 driver.verbose_training = True while epoch < 100 * 1000: #while epoch < 2 * 1000: driver.epsilon = random.choice(POLICIES) driver.train_a3c(sess) epoch += 1 sess.run(increment_global_step) if index == 0 and time.time() - last_time >= 60 * 10: print("Saving checkpoint") fn = saver.save(sess, ckpt_dir + ckpt_prefix, global_step=global_step) print("Saved checkpoint to {}".format(fn)) last_time = time.time() print("[{}] Epoch {}, global_step {}".format( index, epoch, sess.run(global_step))) if index == 0: sess.run(driver.get_sync_from_master_op()) driver.renderer.state = init_state _, value, _, _ = driver.evaluate(sess) print("Master Driver V for init_state {}".format(value)) # Choose some random initial conf for better training if random.random() > driver.epsilon: driver.restart_epoch() else: driver.restart_epoch()
def show_torus_ring(): pyosr.init() dpy = pyosr.create_display() glctx = pyosr.create_gl_context(dpy) g = tf.Graph() util.mkdir_p(ckpt_dir) with g.as_default(): learning_rate_input = tf.placeholder(tf.float32) grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) masterdriver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, use_rgb=True) global_step = tf.contrib.framework.get_or_create_global_step() increment_global_step = tf.assign_add(global_step, 1, name='increment_global_step') saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step]) last_time = time.time() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir) print('ckpt {}'.format(ckpt)) epoch = 0 policy_before, value_before, _, _ = masterdriver.evaluate(sess) #print("Last b before {}".format(sess.run(masterdriver.get_nn_args()[-2]))) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) epoch = sess.run(global_step) print('Restored!, global_step {}'.format(epoch)) else: print('Cannot find checkpoint at {}'.format(ckpt_dir)) return policy_after, value_after, _, _ = masterdriver.evaluate(sess) print("Value Before Restoring {} and After {}".format(value_before, value_after)) # print("Last b {}".format(sess.run(masterdriver.get_nn_args()[-2]))) driver = masterdriver r = masterdriver.renderer fig = plt.figure() class ReAnimator(object): reaching_terminal = False driver = None im = None sess = None def __init__(self, driver, sess): self.driver = driver self.sess = sess def perform(self, framedata): driver = self.driver r = driver.renderer sess = self.sess if not self.reaching_terminal: policy, value, img, dep = driver.evaluate(sess) policy = policy.reshape(driver.action_size) action = driver.make_decision(policy, sess) nstate,reward,self.reaching_terminal = driver.get_reward(action) valid = r.is_valid_state(nstate) print('Current Value {} Policy {} Action {} Reward {}'.format(value, policy, action, reward)) print('\tNew State {} Collision Free ? {}'.format(nstate, valid)) # print('Action {}, New State {}'.format(action, nstate)) rgb = np.squeeze(img[0, 0, :, : ,:], axis=[0,1]) if self.im is None: print('rgb {}'.format(rgb.shape)) self.im = plt.imshow(rgb) else: self.im.set_array(rgb) r.state = nstate ra = ReAnimator(driver, sess) ani = animation.FuncAnimation(fig, ra.perform) plt.show()
def aa_train_main(args): ckpt_dir = args.ckptdir ckpt_prefix = args.ckptprefix device = args.device pyosr.init() dpy = pyosr.create_display() glctx = pyosr.create_gl_context(dpy) g = tf.Graph() util.mkdir_p(ckpt_dir) with g.as_default(): learning_rate_input = tf.placeholder(tf.float32) grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) masterdriver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, output_number=AA_OUTPUT_NUMBER, use_rgb=True, continuous_policy_loss=True) driver = rldriver.RLDriver(MODELS, init_state, view_config, config.SV_VISCFG, config.MV_VISCFG, output_number=AA_OUTPUT_NUMBER, use_rgb=True, master_driver=masterdriver, grads_applier=grad_applier, continuous_policy_loss=True) driver.get_sync_from_master_op() driver.get_apply_grads_op() driver.learning_rate_input = learning_rate_input driver.a3c_local_t = 32 global_step = tf.contrib.framework.get_or_create_global_step() increment_global_step = tf.assign_add(global_step, 1, name='increment_global_step') saver = tf.train.Saver(masterdriver.get_nn_args() + [global_step]) last_time = time.time() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir=ckpt_dir) print('ckpt {}'.format(ckpt)) epoch = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) epoch = sess.run(global_step) print('Restored!, global_step {}'.format(epoch)) while epoch < args.iter: fn = "{}/{}{:06d}.npz".format(args.path, args.prefix, epoch % args.gtnumber) dic = np.load(fn) driver.train_from_gt(sess, dic['KEYS'], dic['TR'], dic['ROT'], dic['DIST']) epoch += 1 sess.run(increment_global_step) if epoch % 1000 == 0 or time.time() - last_time >= 10 * 60: print("Saving checkpoint") fn = saver.save(sess, ckpt_dir + ckpt_prefix, global_step=global_step) print("Saved checkpoint to {}".format(fn)) last_time = time.time() print("Epoch {}".format(epoch))