def testRecoverSessionNoChkptStillRunsLocalInitOp(self): # This test checks for backwards compatibility. # In particular, we continue to ensure that recover_session will execute # local_init_op exactly once, regardless of whether the session was # successfully recovered. with tf.Graph().as_default(): w = tf.Variable( 1, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=None, local_init_op=w.initializer) # Try to recover session from None sess, initialized = sm2.recover_session( "", saver=None, checkpoint_dir=None) # Succeeds because recover_session still run local_init_op self.assertFalse(initialized) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(w))
def testRecoverSession(self): # Create a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), "recover_session") try: gfile.DeleteRecursively(checkpoint_dir) except OSError: pass # Ignore gfile.MakeDirs(checkpoint_dir) with tf.Graph().as_default(): v = tf.Variable(1, name="v") sm = tf.train.SessionManager(ready_op=tf.assert_variables_initialized()) saver = tf.train.Saver({"v": v}) sess, initialized = sm.recover_session("", saver=saver, checkpoint_dir=checkpoint_dir) self.assertFalse(initialized) sess.run(v.initializer) self.assertEquals(1, sess.run(v)) saver.save(sess, os.path.join(checkpoint_dir, "recover_session_checkpoint")) # Create a new Graph and SessionManager and recover. with tf.Graph().as_default(): v = tf.Variable(2, name="v") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) sm2 = tf.train.SessionManager(ready_op=tf.assert_variables_initialized()) saver = tf.train.Saver({"v": v}) sess, initialized = sm2.recover_session("", saver=saver, checkpoint_dir=checkpoint_dir) self.assertTrue(initialized) self.assertEqual(True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval(session=sess)) self.assertEquals(1, sess.run(v))
def testWaitForSessionLocalInit(self): server = tf.train.Server.create_local_server() with tf.Graph().as_default() as graph: v = tf.Variable(1, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") sm = tf.train.SessionManager( graph=graph, ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=tf.report_uninitialized_variables( tf.all_variables()), local_init_op=w.initializer) # Initialize v but not w s = tf.Session(server.target, graph=graph) s.run(v.initializer) sess = sm.wait_for_session(server.target, max_wait_secs=3) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval( session=sess)) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(v)) self.assertEquals(1, sess.run(w))
def testIsVariableInitialized(self): for use_gpu in [True, False]: with self.test_session(use_gpu=use_gpu): v0 = state_ops.variable_op([1, 2], tf.float32) self.assertEqual(False, tf.is_variable_initialized(v0).eval()) tf.assign(v0, [[2.0, 3.0]]).eval() self.assertEqual(True, tf.is_variable_initialized(v0).eval())
def testPrepareSessionWithReadyForLocalInitOp(self): with tf.Graph().as_default(): v = tf.Variable(1, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=tf.report_uninitialized_variables( tf.all_variables()), local_init_op=w.initializer) sess = sm2.prepare_session("", init_op=v.initializer) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval( session=sess)) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(v)) self.assertEquals(1, sess.run(w))
def testRecoverSessionWithReadyForLocalInitOpFailsToReadyLocal(self): # We use ready_for_local_init_op=tf.report_uninitialized_variables(), # which causes recover_session to not run local_init_op, and to return # initialized=False # Create a checkpoint. checkpoint_dir = os.path.join( self.get_temp_dir(), "recover_session_ready_for_local_init_fails_to_ready_local") try: gfile.DeleteRecursively(checkpoint_dir) except errors.OpError: pass # Ignore gfile.MakeDirs(checkpoint_dir) with tf.Graph().as_default(): v = tf.Variable(1, name="v") sm = tf.train.SessionManager(ready_op=tf.report_uninitialized_variables()) saver = tf.train.Saver({"v": v}) sess, initialized = sm.recover_session( "", saver=saver, checkpoint_dir=checkpoint_dir) self.assertFalse(initialized) sess.run(v.initializer) self.assertEquals(1, sess.run(v)) saver.save(sess, os.path.join(checkpoint_dir, "recover_session_checkpoint")) # Create a new Graph and SessionManager and recover. with tf.Graph().as_default(): v = tf.Variable(2, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=tf.report_uninitialized_variables(), local_init_op=w.initializer) saver = tf.train.Saver({"v": v}) sess, initialized = sm2.recover_session( "", saver=saver, checkpoint_dir=checkpoint_dir) self.assertFalse(initialized) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval( session=sess)) self.assertEqual( False, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(v))
def testPrepareSessionFails(self): checkpoint_dir = os.path.join(self.get_temp_dir(), "prepare_session") checkpoint_dir2 = os.path.join(self.get_temp_dir(), "prepare_session2") try: gfile.DeleteRecursively(checkpoint_dir) gfile.DeleteRecursively(checkpoint_dir2) except OSError: pass # Ignore gfile.MakeDirs(checkpoint_dir) with tf.Graph().as_default(): v = tf.Variable([1.0, 2.0, 3.0], name="v") sm = tf.train.SessionManager(ready_op=tf.assert_variables_initialized()) saver = tf.train.Saver({"v": v}) sess = sm.prepare_session( "", init_op=tf.initialize_all_variables(), saver=saver, checkpoint_dir=checkpoint_dir ) self.assertAllClose([1.0, 2.0, 3.0], sess.run(v)) checkpoint_filename = os.path.join(checkpoint_dir, "prepare_session_checkpoint") saver.save(sess, checkpoint_filename) # Create a new Graph and SessionManager and recover. with tf.Graph().as_default(): # Renames the checkpoint directory. os.rename(checkpoint_dir, checkpoint_dir2) gfile.MakeDirs(checkpoint_dir) v = tf.Variable([6.0, 7.0, 8.0], name="v") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) tf.train.SessionManager(ready_op=tf.assert_variables_initialized()) saver = tf.train.Saver({"v": v}) # This should fail as there's no checkpoint within 2 seconds. with self.assertRaisesRegexp(RuntimeError, "no init_op or init_fn was given"): sess = sm.prepare_session( "", init_op=None, saver=saver, checkpoint_dir=checkpoint_dir, wait_for_checkpoint=True, max_wait_secs=2, ) # Rename the checkpoint directory back. gfile.DeleteRecursively(checkpoint_dir) os.rename(checkpoint_dir2, checkpoint_dir) # This should succeed as there's checkpoint. sess = sm.prepare_session( "", init_op=None, saver=saver, checkpoint_dir=checkpoint_dir, wait_for_checkpoint=True, max_wait_secs=2 ) self.assertEqual(True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
def testPrepareSessionDidNotInitLocalVariable(self): with tf.Graph().as_default(): v = tf.Variable(1, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables()) with self.assertRaisesRegexp(RuntimeError, "Init operations did not make model ready"): sm2.prepare_session("", init_op=v.initializer)
def initialize_uninitialized(self, sess): global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars))
def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] print([str(i.name) for i in not_initialized_vars]) # only for testing if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars))
def _build(self): unconstrained = self._build_parameter() constrained = self._build_constrained(unconstrained) prior = self._build_prior(unconstrained, constrained) self._is_initialized_tensor = tf.is_variable_initialized(unconstrained) self._unconstrained_tensor = unconstrained self._constrained_tensor = constrained self._prior_tensor = prior
def testPrepareSessionWithInsufficientReadyForLocalInitCheck(self): with tf.Graph().as_default(): v = tf.Variable(1, name="v") w = tf.Variable( v, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=None, local_init_op=w.initializer) with self.assertRaisesRegexp(tf.errors.FailedPreconditionError, "Attempting to use uninitialized value v"): sm2.prepare_session("", init_op=None)
def testRecoverSessionFailsStillRunsLocalInitOp(self): # Create a checkpoint. checkpoint_dir = os.path.join( self.get_temp_dir(), "recover_session_ready_for_local_init_fails_stil_run") try: gfile.DeleteRecursively(checkpoint_dir) except errors.OpError: pass # Ignore gfile.MakeDirs(checkpoint_dir) # Create a new Graph and SessionManager and recover. with tf.Graph().as_default(): v = tf.Variable(2, name="v") w = tf.Variable( 1, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], name="w") with self.test_session(): self.assertEqual(False, tf.is_variable_initialized(v).eval()) self.assertEqual(False, tf.is_variable_initialized(w).eval()) sm2 = tf.train.SessionManager( ready_op=tf.report_uninitialized_variables(), ready_for_local_init_op=None, local_init_op=w.initializer) saver = tf.train.Saver({"v": v}) sess, initialized = sm2.recover_session( "", saver=saver, checkpoint_dir=checkpoint_dir, wait_for_checkpoint=False) self.assertFalse(initialized) self.assertEqual( False, tf.is_variable_initialized(sess.graph.get_tensor_by_name("v:0")).eval( session=sess)) self.assertEqual( True, tf.is_variable_initialized(sess.graph.get_tensor_by_name("w:0")).eval( session=sess)) self.assertEquals(1, sess.run(w))
def initialize_uninitialized(sess = None): """ Initialize unitialized variables, doesn't affect those already initialized :param sess: in which session to initialize stuff. Defaults to tf.get_default_session() """ sess = sess or tf.get_default_session() global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars))
def save(self, save_file_name = None): variables = [] for i in self._vars: if tf.is_variable_initialized(i).eval(): try: variables.append((self.removeUUIDandColon(i.name),i.value().eval())) except: #TODO: Don't do this. Limit exceptions to known expected ones. pass if save_file_name is None: save_file_name = self._file_name with open(self._file_name, "wb") as file: pkl.dump(variables, file)
def testPrepareSessionReadyWithInit(self): with tf.Graph().as_default(): v = tf.Variable(1, name="v") is_v_initialized = tf.is_variable_initialized(v) with self.test_session(): self.assertEqual(False, is_v_initialized.eval()) sm = tf.train.SessionManager() # Prepare session returns a session even though v is not initialized # because no ready_op was provided, so model is trivially ready sess = sm.prepare_session("") self.assertEqual(False, sess.run(is_v_initialized)) sess.run(v.initializer) self.assertEqual(True, sess.run(is_v_initialized))
def init_uninited_vars(vars=None): if vars is None: vars = tf.global_variables() test_vars = []; test_ops = [] with tf.control_dependencies(None): # ignore surrounding control_dependencies for var in vars: assert is_tf_expression(var) try: tf.get_default_graph().get_tensor_by_name(var.name.replace(':0', '/IsVariableInitialized:0')) except KeyError: # Op does not exist => variable may be uninitialized. test_vars.append(var) with absolute_name_scope(var.name.split(':')[0]): test_ops.append(tf.is_variable_initialized(var)) init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] run([var.initializer for var in init_vars])
def initialize_uninitialized_variables(sess): """ Only initialize the weights that have not yet been initialized by other means, such as importing a metagraph and a checkpoint. It's useful when extending an existing model. """ uninit_vars = [] uninit_tensors = [] for var in tf.global_variables(): uninit_vars.append(var) uninit_tensors.append(tf.is_variable_initialized(var)) uninit_bools = sess.run(uninit_tensors) uninit = zip(uninit_bools, uninit_vars) uninit = [var for init, var in uninit if not init] sess.run(tf.variables_initializer(uninit))
def _create_autosummary_var(name, value_expr): assert not _autosummary_finalized v = tf.cast(value_expr, tf.float32) if v.shape.ndims is 0: v = [v, np.float32(1.0)] elif v.shape.ndims is 1: v = [tf.reduce_sum(v), tf.cast(tf.shape(v)[0], tf.float32)] else: v = [tf.reduce_sum(v), tf.reduce_prod(tf.cast(tf.shape(v), tf.float32))] v = tf.cond(tf.is_finite(v[0]), lambda: tf.stack(v), lambda: tf.zeros(2)) with tf.control_dependencies(None): var = tf.Variable(tf.zeros(2)) # [numerator, denominator] update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) if name in _autosummary_vars: _autosummary_vars[name].append(var) else: _autosummary_vars[name] = [var] return update_op
def initialize_uninitialized_global_variables(sess): """ Only initializes the variables of a TensorFlow session that were not already initialized. :param sess: the TensorFlow session :return: """ # List all global variables global_vars = tf.global_variables() # Find initialized status for all variables is_var_init = [tf.is_variable_initialized(var) for var in global_vars] is_initialized = sess.run(is_var_init) # List all variables that were not initialized previously not_initialized_vars = [var for (var, init) in zip(global_vars, is_initialized) if not init] # Initialize all uninitialized variables found, if any if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars))
def train(self, mnist, expected_steps=1000): if self.train_mode is False: raise Exception("Sorry I can't train it...") if self.finetune is True: to_do_var_list = list() var_list = tf.global_variables()+tf.local_variables() for one_var in var_list: if self.sess.run(tf.is_variable_initialized(one_var)): pass else: to_do_var_list.append(one_var) self.sess.run(tf.variables_initializer(to_do_var_list)) else: self.sess.run(tf.global_variables_initializer()) for i in range(expected_steps): batch = mnist.train.next_batch(50) if i % 100 == 0: train_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: batch[0], self.y_: batch[1], self.keep_prob: 1.0}) print("step %d, training accuracy %g" % (i, train_accuracy)) self.train_step.run(session=self.sess, feed_dict={self.x: batch[0], self.y_: batch[1], self.keep_prob: 0.5}) print("test accuracy %g" % self.accuracy.eval(session=self.sess, feed_dict={self.x: mnist.test.images, self.y_: mnist.test.labels, self.keep_prob: 1.0}))
#output of nn curr_output = nn_policy(inputs_placeholder, inputSize, outputSize, num_fc_layers, depth_fc_layers, tf_datatype) #define training theta = tf.trainable_variables() loss = tf.reduce_mean(tf.square(curr_output - labels_placeholder)) opt = tf.train.AdamOptimizer(learning_rate) gv = [(g, v) for g, v in opt.compute_gradients(loss, theta) if g is not None] train_step = opt.apply_gradients(gv) #get all the uninitialized variables (ie right now all of them) list_vars = [] for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): if (not (tf.is_variable_initialized(var).eval())): list_vars.append(var) sess.run(tf.variables_initializer(list_vars)) #aggregation iterations for agg_iter in range(num_agg_iters): print("ON AGGREGATION ITERATION ", agg_iter) rewards_for_this_iter = [] plot_trainingloss_x = [] plot_trainingloss_y = [] plot_validloss_x = [] plot_validloss_y = [] for i in range(nEpoch):
def train_model(self, needInit=True, loadmodelType= 1, epochs=2, ): input_x = self.inputParams['input_x'] input_y = self.inputParams['input_y'] keep_prob = self.inputParams['keep_prob'] loss = self.outputParams['loss'] accuracy = self.outputParams['accuracy'] train_op = self.outputParams['train_op'] merged_summary = self.summaryParams['merged_summary'] with tf.Session() as sess: default_graph = sess.graph if needInit: init = tf.global_variables_initializer() sess.run(init) elif loadmodelType == 'all': # all表示加载所有变量 init_saver = tf.train.Saver() init_saver.restore(sess, os.path.join(MODEL_PATH, TENSORFLOW_MODEL_DIR)) else: # 其他表示紧加载词嵌入 init_saver = tf.train.Saver({"bert/embeddings/word_embeddings": default_graph.get_tensor_by_name("embedding/encoder_embedding:0")}) # 必须使用该方法下载模型,然后加载 path = remote_helper.get_remote_date("https://www.flyai.com/m/chinese_L-12_H-768_A-12.zip") init_saver.restore(sess, path) # 本地测试用 # init_saver.restore(sess, os.path.join(os.getcwd(),'chinese_L-12_H-768_A-12','bert_model.ckpt')) global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] # print([str(i.name) for i in not_initialized_vars]) # only for testing if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) filters = default_graph.get_tensor_by_name("cnn/filters:0") fc1 = default_graph.get_tensor_by_name('fc1/fc1:0') bias1 = default_graph.get_tensor_by_name('fc1/bias1:0') fc2 = default_graph.get_tensor_by_name('fc2/fc2:0') bias2 = default_graph.get_tensor_by_name('fc2/bias2:0') # partial_init = default_graph.get_operation_by_name('partial_init') # default_graph.g # sess.run(partial_init) train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph) # dataset = Dataset(train_batch=128, val_batch=64, split_ratio = 0.9,) # epochs = 2 # step = math.ceil(self.data.get_train_length() / min(256,)) max_acc = 0 min_loss = 0 save_saver = tf.train.Saver() for j in range(self.data.get_step()): x_train, y_train = self.data.next_train_batch() fetches = [loss, accuracy, train_op] feed_dict = {input_x: x_train, input_y: y_train, keep_prob: 0.8} loss_, accuracy_, _ = sess.run(fetches, feed_dict=feed_dict) if j % 100 == 0 or j == self.data.get_step()-1: summary_train = sess.run(merged_summary, feed_dict=feed_dict, ) train_writer.add_summary(summary_train, j) nSmp_val = 0 nCount = 0 ave_loss = 0 for i in range(10): x_val, y_val = self.data.next_validation_batch() summary_val = sess.run([loss, accuracy], feed_dict={input_x: x_val, input_y: y_val, keep_prob: 1.0}) nSmp_val += x_val.shape[0] nCount += summary_val[1] * x_val.shape[0] ave_loss += summary_val[0] val_accuracy = nCount / nSmp_val ave_loss = ave_loss / 10 print('当前批次: {} | 当前训练损失: {} | 当前训练准确率: {} | ' '当前验证集损失: {} | 当前验证集准确率: {}'.format(j, loss_, accuracy_, ave_loss, val_accuracy)) if val_accuracy > max_acc or (val_accuracy == max_acc and ave_loss < min_loss): max_acc, min_loss = val_accuracy, ave_loss save_path = save_saver.save(sess, os.path.join(MODEL_PATH, TENSORFLOW_MODEL_DIR)) print("Model saved in path: %s" % save_path)
def _build(self): tensor = self._build_parameter() self._dataholder_tensor = tensor self._is_initialized_tensor = tf.is_variable_initialized(tensor)
def init_uninitialized(): for var in tf.all_variables(): if not sess.run(tf.is_variable_initialized(var)): sess.run(tf.variables_initializer([var]))
def run_worker(): """Main worker loop.""" # todo: rename "config" into distributed_config config = load_config() cluster_spec = config.cluster_spec # import pdb; pdb.set_trace() ps_tasks = len(cluster_spec['ps']) assert ps_tasks >= 0 # returns device like /job:worker/task:0 worker_device = '' assert config.task_type == 'worker' if config.task_id == 1: time.sleep(60) # slow-down second worker worker_device = get_worker_device(config.task_id) ps_device = get_ps_device(0) # todo: replace with int64 # todo: replace with varscope.getvariable like in alextp suggestion with timeit("worker graph create"): params = make_params() with tf.device(worker_device): val = tf.ones((), dtype=params.dtype) grads = tf.fill([params.shape[0]], val) # todo: add two-way communication with tf.device(ps_device): update = params.assign_add(grads) params0 = params[0] #uninitialized_op = tf.report_uninitialized_variables() initialized_op = tf.is_variable_initialized(params) # todo: check how estimator does it # TODO: retries for errors during server creation? # it can fail if assigned port is unavailable with timeit("worker server start"): server = tf.train.Server(cluster_spec, config=session_config(), job_name=config.task_type, task_index=config.task_id) # follow logic in prepare_session # https://github.com/tensorflow/tensorflow/blob/22586bdf900640217deac6dc826054bc6e785518/tensorflow/python/training/session_manager.py#L71 def create_session(): # uninited_list = ['somevariable'] is_initialized = False while not is_initialized: try: with timeit("session creation"): sess = tf.InteractiveSession(server.target, config=session_config()) with timeit("sessrun"): # uninited_list = sessrun(uninitialized_op) is_initialized = sessrun(initialized_op) except Exception as e: print("Initialization failed with %s, retrying" %(e,)) print(("Model not initialized, " "retrying in %.1f seconds" %(RETRY_DELAY_SEC,))) time.sleep(RETRY_DELAY_SEC) return sess # are there failures in creating session with timeit('create session'): sess = tf.InteractiveSession(server.target, config=session_config()) # only run initialization on worker task 0 if config.task_id == 0: sess_run_succeeded = False while not sess_run_succeeded: try: with timeit('intialize vars'): sessrun(params.initializer) sess_run_succeeded = True except Exception as e: print("Initialization failed with %s, retrying " "in %.1f sec" %(e, RETRY_DELAY_SEC)) # this can fail if workers too too long to come up and # sessrun failed with DeadlineExceeded time.sleep(RETRY_DELAY_SEC) for step in range(FLAGS.iters): start_time = time.time() for i in range(FLAGS.iters_per_step): sess_run_succeeded = False while not sess_run_succeeded: try: sessrun(update) sess_run_succeeded = True # Exception when ps restarts, need to recreate session except Exception as e: print(("sess run failed with %s, " "retrying in %.1f seconds" %(e, RETRY_DELAY_SEC,))) time.sleep(RETRY_DELAY_SEC) sess = create_session() elapsed_time = time.time() - start_time rate = float(FLAGS.iters_per_step)*FLAGS.data_mb/elapsed_time event = write_event('rate', rate, step) print('%.2f MB/s'%(rate,))
def learn(self, total_timesteps, callback=None, log_interval=100, tb_log_name="ACKTR", reset_num_timesteps=True): new_tb_log = self._init_num_timesteps(reset_num_timesteps) callback = self._init_callback(callback) with SetVerbosity(self.verbose), TensorboardWriter(self.graph, self.tensorboard_log, tb_log_name, new_tb_log) \ as writer: self._setup_learn() self.n_batch = self.n_envs * self.n_steps self.learning_rate_schedule = Scheduler( initial_value=self.learning_rate, n_values=total_timesteps, schedule=self.lr_schedule) # FIFO queue of the q_runner thread is closed at the end of the learn function. # As a result, it needs to be redefinied at every call with self.graph.as_default(): with tf.variable_scope( "kfac_apply", reuse=self.trained, custom_getter=tf_util.outer_scope_getter( "kfac_apply")): # Some of the variables are not in a scope when they are create # so we make a note of any previously uninitialized variables tf_vars = tf.global_variables() is_uninitialized = self.sess.run( [tf.is_variable_initialized(var) for var in tf_vars]) old_uninitialized_vars = [ v for (v, f) in zip(tf_vars, is_uninitialized) if not f ] self.train_op, self.q_runner = self.optim.apply_gradients( list(zip(self.grads_check, self.params))) # then we check for new uninitialized variables and initialize them tf_vars = tf.global_variables() is_uninitialized = self.sess.run( [tf.is_variable_initialized(var) for var in tf_vars]) new_uninitialized_vars = [ v for (v, f) in zip(tf_vars, is_uninitialized) if not f and v not in old_uninitialized_vars ] if len(new_uninitialized_vars) != 0: self.sess.run( tf.variables_initializer(new_uninitialized_vars)) self.trained = True t_start = time.time() coord = tf.train.Coordinator() if self.q_runner is not None: enqueue_threads = self.q_runner.create_threads(self.sess, coord=coord, start=True) else: enqueue_threads = [] callback.on_training_start(locals(), globals()) for update in range(1, total_timesteps // self.n_batch + 1): callback.on_rollout_start() # pytype:disable=bad-unpacking # true_reward is the reward without discount if isinstance(self.runner, PPO2Runner): # We are using GAE rollout = self.runner.run(callback) obs, returns, masks, actions, values, _, states, ep_infos, true_reward = rollout else: rollout = self.runner.run(callback) obs, states, returns, masks, actions, values, ep_infos, true_reward = rollout # pytype:enable=bad-unpacking callback.on_rollout_end() # Early stopping due to the callback if not self.runner.continue_training: break self.ep_info_buf.extend(ep_infos) policy_loss, value_loss, policy_entropy = self._train_step( obs, states, returns, masks, actions, values, self.num_timesteps // (self.n_batch + 1), writer) n_seconds = time.time() - t_start fps = int((update * self.n_batch) / n_seconds) if writer is not None: total_episode_reward_logger( self.episode_reward, true_reward.reshape((self.n_envs, self.n_steps)), masks.reshape((self.n_envs, self.n_steps)), writer, self.num_timesteps) if self.verbose >= 1 and (update % log_interval == 0 or update == 1): explained_var = explained_variance(values, returns) logger.record_tabular("nupdates", update) logger.record_tabular("total_timesteps", self.num_timesteps) logger.record_tabular("fps", fps) logger.record_tabular("policy_entropy", float(policy_entropy)) logger.record_tabular("policy_loss", float(policy_loss)) logger.record_tabular("value_loss", float(value_loss)) logger.record_tabular("explained_variance", float(explained_var)) if len(self.ep_info_buf) > 0 and len( self.ep_info_buf[0]) > 0: logger.logkv( 'ep_reward_mean', safe_mean([ ep_info['r'] for ep_info in self.ep_info_buf ])) logger.logkv( 'ep_len_mean', safe_mean([ ep_info['l'] for ep_info in self.ep_info_buf ])) logger.dump_tabular() coord.request_stop() coord.join(enqueue_threads) callback.on_training_end() return self
# save model objects to serialized format saver.save(sess, "./model_ckpt_dense_pruned") # Retrain networks cross_entropy = -tf.reduce_sum(y_*tf.log(tf.clip_by_value(y_conv,1e-10,1.0))) trainer = tf.train.AdamOptimizer(1e-4) grads_and_vars = trainer.compute_gradients(cross_entropy) grads_and_vars = apply_prune_on_grads(grads_and_vars, dict_nzidx) train_step = trainer.apply_gradients(grads_and_vars) correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # Initialize firstly touched variables (mostly from accuracy calc.) for var in tf.all_variables(): if tf.is_variable_initialized(var).eval() == False: sess.run(tf.initialize_variables([var])) # Train x epochs additionally for i in range(papl.config.retrain_iterations): batch = mnist.train.next_batch(50) if i%100 == 0: train_accuracy = accuracy.eval(feed_dict={ x:batch[0], y_: batch[1], keep_prob: 1.0}) print("step %d, training accuracy %g"%(i, train_accuracy)) train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) # Save retrained variables to a desne form # key = check_file_exists("model_ckpt_dense_retrained") # saver.save(sess, key) saver.save(sess, "model_ckpt_dense_retrained")
def train(self): """ Trains policy on env using algo Pseudocode: for itr in n_itr: for step in num_inner_grad_steps: sampler.sample() algo.compute_updated_dists() algo.optimize_policy() sampler.update_goals() """ with self.sess.as_default() as sess: # initialize uninitialized vars (only initialize vars that were not loaded) uninit_vars = [ var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var)) ] sess.run(tf.variables_initializer(uninit_vars)) start_time = time.time() for itr in range(self.start_itr, self.n_itr): self.sampler.update_tasks() itr_start_time = time.time() logger.log( "\n ---------------- Iteration %d ----------------" % itr) logger.log( "Sampling set of tasks/goals for this meta-batch...") """ -------------------- Sampling --------------------------""" logger.log("Obtaining samples...") time_env_sampling_start = time.time() paths = self.sampler.obtain_samples(log=True, log_prefix='train-') sampling_time = time.time() - time_env_sampling_start """ ----------------- Processing Samples ---------------------""" logger.log("Processing samples...") time_proc_samples_start = time.time() samples_data = self.sample_processor.process_samples( paths, log='all', log_prefix='train-') proc_samples_time = time.time() - time_proc_samples_start if type(paths) is list: self.log_diagnostics(paths, prefix='train-') else: self.log_diagnostics(sum(paths.values(), []), prefix='train-') """ ------------------ Policy Update ---------------------""" logger.log("Optimizing policy...") # This needs to take all samples_data so that it can construct graph for meta-optimization. time_optimization_step_start = time.time() self.algo.optimize_policy(samples_data) """ ------------------- Logging Stuff --------------------------""" logger.logkv('Itr', itr) logger.logkv('n_timesteps', self.sampler.total_timesteps_sampled) logger.logkv('Time-Optimization', time.time() - time_optimization_step_start) logger.logkv('Time-SampleProc', np.sum(proc_samples_time)) logger.logkv('Time-Sampling', sampling_time) logger.logkv('Time', time.time() - start_time) logger.logkv('ItrTime', time.time() - itr_start_time) logger.log("Saving snapshot...") params = self.get_itr_snapshot(itr) logger.save_itr_params(itr, params) logger.log("Saved") logger.dumpkvs() if itr == 0: sess.graph.finalize() logger.log("Training finished") self.sess.close()
def main(_): # Import data mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() y_train = np.eye(10)[y_train] y_test = np.eye(10)[y_test] train_ind = 0 test_ind = 0 model = create_model() x = model.x y_ = model.y_ cross_entropy = model.cross_entropy layers = model.layers logits = model.logits solver = create_admm_solver(model) keep_prob = model.keep_prob train_step = solver.train_step train_step1 = solver.train_step1 W_conv1 = model.W_conv1 W_conv2 = model.W_conv2 W_fc1 = model.W_fc1 W_fc2 = model.W_fc2 A = solver.A B = solver.B C = solver.C D = solver.D E = solver.E F = solver.F G = solver.G H = solver.H my_trainer = tf.train.AdamOptimizer(1e-3) grads = my_trainer.compute_gradients(cross_entropy) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1)) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) graph_location = tempfile.mkdtemp() print('Saving graph to: %s' % graph_location) train_writer = tf.summary.FileWriter(graph_location) train_writer.add_graph(tf.get_default_graph()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(20000): # batch = mnist.train.next_batch(50) batch = (x_train[train_ind:train_ind + 50].reshape(-1, 784), y_train[train_ind:train_ind + 50]) train_ind = (train_ind + 50) % len(x_train) if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) print('step %d, training accuracy %g' % (i, train_accuracy)) train_step.run(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) print('test accuracy %g' % accuracy.eval(feed_dict={ x: x_test.reshape(-1, 784), y_: y_test, keep_prob: 1.0 })) Z1 = sess.run(W_conv1) Z1 = projection(Z1, percent=P1) U1 = np.zeros_like(Z1) Z2 = sess.run(W_conv2) Z2 = projection(Z2, percent=P2) U2 = np.zeros_like(Z2) Z3 = sess.run(W_fc1) Z3 = projection(Z3, percent=P3) U3 = np.zeros_like(Z3) Z4 = sess.run(W_fc2) Z4 = projection(Z4, percent=P4) U4 = np.zeros_like(Z4) for j in range(30): for i in range(5000): # batch = mnist.train.next_batch(50) batch = (x_train[train_ind:train_ind + 50].reshape(-1, 784), y_train[train_ind:train_ind + 50]) train_ind = (train_ind + 50) % len(x_train) if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) print('step %d, training accuracy %g' % (i, train_accuracy)) train_step1.run( feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0, A: Z1, B: U1, C: Z2, D: U2, E: Z3, F: U3, G: Z4, H: U4 }) Z1 = sess.run(W_conv1) + U1 Z1 = projection(Z1, percent=P1) U1 = U1 + sess.run(W_conv1) - Z1 Z2 = sess.run(W_conv2) + U2 Z2 = projection(Z2, percent=P2) U2 = U2 + sess.run(W_conv2) - Z2 Z3 = sess.run(W_fc1) + U3 Z3 = projection(Z3, percent=P3) U3 = U3 + sess.run(W_fc1) - Z3 Z4 = sess.run(W_fc2) + U4 Z4 = projection(Z4, percent=P4) U4 = U4 + sess.run(W_fc2) - Z4 print('test accuracy %g' % accuracy.eval(feed_dict={ x: x_test.reshape(-1, 784), y_: y_test, keep_prob: 1.0 })) print(LA.norm(sess.run(W_conv1) - Z1)) print(LA.norm(sess.run(W_conv2) - Z2)) print(LA.norm(sess.run(W_fc1) - Z3)) print(LA.norm(sess.run(W_fc2) - Z4)) dense_w['conv1/W_conv1'] = W_conv1 dense_w['conv2/W_conv2'] = W_conv2 dense_w['fc1/W_fc1'] = W_fc1 dense_w['fc2/W_fc2'] = W_fc2 dict_nzidx = apply_prune(dense_w, sess) print("checking space dictionary") print(dict_nzidx.keys()) grads = apply_prune_on_grads(grads, dict_nzidx) apply_gradient_op = my_trainer.apply_gradients(grads) for var in tf.global_variables(): if tf.is_variable_initialized(var).eval() == False: sess.run(tf.variables_initializer([var])) print("start retraining after pruning") for i in range(20000): # batch = mnist.train.next_batch(50) batch = (x_train[train_ind:train_ind + 50].reshape(-1, 784), y_train[train_ind:train_ind + 50]) train_ind = (train_ind + 50) % len(x_train) if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) print('step %d, training accuracy %g' % (i, train_accuracy)) apply_gradient_op.run(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) print('test accuracy %g' % accuracy.eval(feed_dict={ x: x_test.reshape(-1, 784), y_: y_test, keep_prob: 1.0 })) print(np.sum(sess.run(W_conv1) != 0)) print(np.sum(sess.run(W_conv2) != 0)) print(np.sum(sess.run(W_fc1) != 0)) print(np.sum(sess.run(W_fc2) != 0)) # do the saving. saver = tf.train.Saver() saver.save(sess, "./lenet_5_pruned_model.ckpt")
def are_variables_initialized(vars): """op which is True iff all vars are initialized.""" result = True for var in vars: result = tf.logical_and(tf.is_variable_initialized(var), result) return result
def initialize_uninitialized_vars(session): uninitialized = [ var for var in tf.all_variables() if not session.run(tf.is_variable_initialized(var)) ] session.run(tf.initialize_variables(uninitialized))
def __init__(self, clf, height=28, width=28, colors=1, n_classes=10, learning_rate=0.001, optimizer='Adam', model_path=None, sampling=100): """ A classifier model made to fit on mnist-like dataset. """ # input dimension parameters # design for the mnist dataset self.h_input = height self.w_input = width self.input_channels = colors self.n_classes = n_classes self.sampling = sampling # placeholder definition self.X = tf.placeholder(dtype=tf.float32, shape=(None, self.h_input, self.w_input, self.input_channels), name='input') self.Y = tf.placeholder(dtype=tf.int32, shape=(None, n_classes), name='ground_truth') self.lr = tf.get_variable("learning_rate", initializer=learning_rate, trainable=False) # architectures self.lenet = clf # tensors for objectives self.Y_pred = self.lenet(self.X) self.Y_predcat = tf.cast( tf.argmax(self.Y_pred, axis=1, name='classes'), tf.int32) self.Y_cat = tf.cast(tf.argmax(self.Y, axis=1, name='classes'), tf.int32) # objectives self.fY = tf.cast(self.Y, tf.float32) self.loss = tf.reduce_mean( tf.keras.backend.categorical_crossentropy(self.fY, self.Y_pred, from_logits=False)) # self.loss = - tf.reduce_mean(self.fY * tf.log(self.Y_pred + 1e-10) + (1. - self.fY) * tf.log((1. - self.Y_pred) + 1e-10)) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.Y_predcat, self.Y_cat), tf.float32)) # optimization if optimizer == 'Adam': self.optimizer = tf.train.AdamOptimizer(self.lr) elif optimizer == 'SGD': self.optimizer = tf.train.GradientDescentOptimizer(self.lr) elif optimizer == 'RMS': self.optimizer = tf.train.RMSPropOptimizer(self.lr) # training procedures self.training = self.optimizer.minimize( self.loss, var_list=self.lenet.trainable_weights) # At the end do what all models do with computation graph # computation graph self.saver = tf.train.Saver(var_list=self.lenet.trainable_weights) self.sess = tf.Session() # graph initialization # case where we update a previous graph if model_path is None: self.sess.run(tf.global_variables_initializer()) else: print("\nLoading weights from a previous trained model at " + model_path + " !!!") self.saver.restore(self.sess, model_path) # hunt not-initialized variables global_vars = tf.global_variables() is_not_initialized = self.sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): self.sess.run(tf.variables_initializer(not_initialized_vars))
def __init__(self, sess, max_len, num_classes, vocab_size, batch_size, dim_word, filter_sizes, num_filters, source_dict, target_dict, gpu_device, positive_data, negative_data, source_data, vocab_size_s = None, dev_positive_data=None, dev_negative_data=None, dev_source_data=None, max_epoches=10, dispFreq = 1, saveFreq = 10, devFreq=1000, clip_c = 1.0, optimizer='adadelta', saveto='discriminator', reload=False, reshuffle = False, l2_reg_lambda=0.0, scope='discnn', init_device="/cpu:0", reuse_var=False): self.sess = sess self.max_len = max_len self.num_classes = num_classes self.vocab_size = vocab_size self.dim_word = dim_word self.filter_sizes = filter_sizes self.num_filters = num_filters self.l2_reg_lambda = l2_reg_lambda self.num_filters_total = sum(self.num_filters) self.scope = scope self.positive_data = positive_data self.negative_data = negative_data self.source_data = source_data self.dev_positive_data = dev_positive_data self.dev_negative_data = dev_negative_data self.dev_source_data = dev_source_data self.reshuffle = reshuffle self.batch_size = batch_size self.max_epoches = max_epoches self.dispFreq = dispFreq self.saveFreq = saveFreq self.devFreq = devFreq self.clip_c = clip_c self.saveto = saveto self.reload = reload if vocab_size_s is None: self.vocab_size_s = self.vocab_size else: self.vocab_size_s = vocab_size_s print('num_filters_total is ', self.num_filters_total) if optimizer == 'adam': self.ptimizer = tf.train.AdamOptimizer() print("using adam as the optimizer for the discriminator") elif optimizer == 'adadelta': self.optimizer = tf.train.AdadeltaOptimizer(learning_rate=1.,rho=0.95,epsilon=1e-6) print("using adadelta as the optimizer for the discriminator") elif optimizer == 'sgd': self.optimizer = tf.train.GradientDescentOptimizer(0.0001) print("using sgd as the optimizer for the discriminator") elif optimizer == 'rmsprop': self.optimizer = tf.train.RMSPropOptimizer(0.0001) print("using rmsprop as the optimizer for the discriminator") else : raise ValueError("optimizer must be adam, adadelta or sgd.") dictionaries=[] dictionaries.append(source_dict) dictionaries.append(target_dict) self.dictionaries = dictionaries gpu_string = gpu_device gpu_devices = [] gpu_devices = gpu_string.split('-') self.gpu_devices = gpu_devices[1:] self.gpu_num = len(self.gpu_devices) #print('the gpu_num is ', self.gpu_num) self.build_placeholder() if reuse_var == False: with tf.variable_scope(self.scope or 'disCNN'): with tf.variable_scope('model_self'): with tf.device(init_device): embeddingtable = tf.get_variable('embeddingtable', initializer = tf.random_uniform([self.vocab_size, self.dim_word], -1.0, 1.0)) embeddingtable_s = tf.get_variable('embeddingtable_s', initializer = tf.random_uniform([self.vocab_size_s, self.dim_word], -1.0, 1.0)) W = tf.get_variable('W', initializer = tf.truncated_normal([self.num_filters_total * 2, self.num_classes], stddev=0.1)) b = tf.get_variable('b', initializer = tf.constant(0.1, shape=[self.num_classes])) ## build_model ########## print('building train model') self.build_train_model() print('done') print('build_discriminate ') #self.build_discriminate(gpu_device=self.gpu_devices[-1]) self.build_discriminator_model(dis_devices=self.gpu_devices) print('done') params = [param for param in tf.global_variables() if self.scope in param.name] if not self.sess.run(tf.is_variable_initialized(params[0])): init_op = tf.variables_initializer(params) self.sess.run(init_op) saver = tf.train.Saver(params) self.saver = saver if self.reload: #ckpt = tf.train.get_checkpoint_state('./') #if ckpt and ckpt.model_checkpoint_path: # print('reloading file from %s' % ckpt.model_checkpoint_path) # self.saver.restore(self.sess, ckpt.model_checkpoint_path) #else: print('reloading file from %s' % self.saveto) self.saver.restore(self.sess, self.saveto) print('reloading file done')
def learn(self, total_timesteps, callback=None, seed=None, log_interval=100): with SetVerbosity(self.verbose): self._setup_learn(seed) self.n_batch = self.n_envs * self.n_steps self.learning_rate_schedule = Scheduler( initial_value=self.learning_rate, n_values=total_timesteps, schedule=self.lr_schedule) # FIFO queue of the q_runner thread is closed at the end of the learn function. # As a result, it needs to be redefinied at every call with self.graph.as_default(): # Some of the variables are not in a scope when they are create # so we make a note of any previously uninitialized variables tf_vars = tf.global_variables() is_uninitialized = self.sess.run( [tf.is_variable_initialized(var) for var in tf_vars]) old_uninitialized_vars = [ v for (v, f) in zip(tf_vars, is_uninitialized) if not f ] self.train_op, self.q_runner = self.optim.apply_gradients( list(zip(self.grads_check, self.params))) # then we check for new uninitialized variables and initialize them tf_vars = tf.global_variables() is_uninitialized = self.sess.run( [tf.is_variable_initialized(var) for var in tf_vars]) new_uninitialized_vars = [ v for (v, f) in zip(tf_vars, is_uninitialized) if not f and v not in old_uninitialized_vars ] if len(new_uninitialized_vars) != 0: self.sess.run( tf.variables_initializer(new_uninitialized_vars)) runner = A2CRunner(self.env, self, n_steps=self.n_steps, gamma=self.gamma) t_start = time.time() coord = tf.train.Coordinator() enqueue_threads = self.q_runner.create_threads(self.sess, coord=coord, start=True) for update in range(1, total_timesteps // self.n_batch + 1): obs, states, rewards, masks, actions, values = runner.run() policy_loss, value_loss, policy_entropy = self._train_step( obs, states, rewards, masks, actions, values) n_seconds = time.time() - t_start fps = int((update * self.n_batch) / n_seconds) if callback is not None: callback(locals(), globals()) if self.verbose >= 1 and (update % log_interval == 0 or update == 1): explained_var = explained_variance(values, rewards) logger.record_tabular("nupdates", update) logger.record_tabular("total_timesteps", update * self.n_batch) logger.record_tabular("fps", fps) logger.record_tabular("policy_entropy", float(policy_entropy)) logger.record_tabular("policy_loss", float(policy_loss)) logger.record_tabular("value_loss", float(value_loss)) logger.record_tabular("explained_variance", float(explained_var)) logger.dump_tabular() coord.request_stop() coord.join(enqueue_threads) return self
return net target_x = tf.placeholder(dtype=tf.float32, shape=[1, 28, 28, 1], name='target_x') target_z = tf.get_variable('anogan/target_z', shape=[1, z_dim], initializer=tf.random_uniform_initializer(-1, 1), trainable=True) mapped_x = generator(target_z) target_d_feature = get_discriminator_feature(target_x) mapped_d_feature = get_discriminator_feature(mapped_x) lam = 0.7 anogan_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='anogan') residual_loss = tf.reduce_mean(tf.abs(target_x - mapped_x), axis=[1, 2, 3]) discrimination_loss = tf.reduce_mean(tf.abs(target_d_feature - mapped_d_feature), axis=[1, 2, 3]) mapping_loss = (1-lam)*residual_loss + lam*discrimination_loss mapping_loss_opt1 = tf.train.AdamOptimizer(learning_rate=1E-1, name='mapping-optimizer-1').minimize(loss=mapping_loss, var_list=anogan_var) mapping_loss_opt2 = tf.train.AdamOptimizer(learning_rate=1E-2, name='mapping-optimizer-2').minimize(loss=mapping_loss, var_list=anogan_var) uninitialized_variables = [var for var in tf.global_variables() if not(sess.run(tf.is_variable_initialized(var)))] sess.run(tf.variables_initializer(uninitialized_variables)) query_x = mnist.test.images[2].reshape(1, 28, 28, 1) sess.run(tf.variables_initializer(anogan_var)) mapping_loss_traj = [] mapping_loss_opt = mapping_loss_opt1 for i in range(150): if i == 50: mapping_loss_opt = mapping_loss_opt2 loss, _ = sess.run([mapping_loss, mapping_loss_opt], feed_dict={target_x: query_x, is_train: False}) mapping_loss_traj.extend(loss) anomaly_score = mapping_loss[-1] ### Comparison of Query Image and Mapped Image
# incrementing the variables. Note that assign_add is a specific function # implemented only for variables. Variables also inherit all the functions of a # tensor. Thus, we can use variables are input to operators. assign1 = X.assign_add(b) assign2 = Y.assign_add(b) config = tf.ConfigProto(log_device_placement=True) with tf.Session("grpc://vm-32-%d:2222" % (FLAGS.task_index + 1), config=config) as sess: tf.train.SummaryWriter("%s/asyncsgd" % (os.environ.get("TF_LOG_DIR")), sess.graph) # variables need to be initialized, if not. if you re-initialize the # variables, all previously stored data from other sessions will be lost if False == tf.is_variable_initialized( X).eval() or False == tf.is_variable_initialized(Y).eval(): sess.run(tf.initialize_all_variables()) # feel free to increment the loop count, if you want to observe variables for # longer durations. for i in range(0, 100): # session can be run to compute the values of multiple # tensors/variables. The variables of interest are provided as a list # when invoking run. sess.run([assign1, assign2]) # observe the values in a variable print X.eval(), Y.eval() sess.close()
print("Initialize model from pretrained_model") all_vars = tf.global_variables() # load pretrained_model checkpoint assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint( tvars=all_vars, init_checkpoint=config.pretrained_model ) for var in initialized_variable_names: print(str(var) + " *INIT FROM CKPT* ") print("Total {:g} variables are restored from ckpt : {}".format( len(initialized_variable_names), str(config.pretrained_model))) tf.train.init_from_checkpoint( config.pretrained_model, assignment_map) # find uninitialized variables and initialize it is_initialized = sess.run([tf.is_variable_initialized(var) for var in all_vars]) not_initialized_vars = [var for (var, f) in zip(all_vars, is_initialized) if not f] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) saver = tf.train.Saver() bind_model(sess) if config.pause: nsml.paused(scope=locals()) bTrainmode = False if config.mode == 'train':
def fit_stage(self, x, batch_size, stage_num=-1, alpha_scheduler=None, learning_rate_scheduler=None, folder=None, save_epoch=1, seed_noise=None, seed_labels=None): assert self.optimizer_g # You must first compile the model self.batch_size = batch_size alpha_scheduler = alpha_scheduler or (lambda _: 1.0) learning_rate_scheduler = learning_rate_scheduler or ( lambda _: self._lr) # Resize train samples to the specifed stage size X_train = x[0] if isinstance(x, (tuple, list)) else x X_train = resize_images_tf(X_train, self.train_stages[stage_num]['size'][:2], sess=self.sess) x = X_train, x[1] if self.labels_emb_size else X_train # Create the dataset object dataset = tf.data.Dataset.from_tensor_slices(x).shuffle( X_train.shape[0]).batch(batch_size, drop_remainder=True) if self.buffer_size: # We shuffled the buffer dataset earlier when created the buffer generator buffer_dataset = self._get_buffer_dataset(X_train.shape).batch( batch_size // 2, drop_remainder=True) dataset = tf.data.Dataset.zip((dataset, buffer_dataset)) if self.tpu_strategy: train_iterator = self.tpu_strategy.make_dataset_iterator(dataset) train_iterator_init = train_iterator.initialize() train_samples = next(train_iterator) else: train_iterator = dataset.make_initializable_iterator() train_iterator_init = train_iterator.initializer train_samples = train_iterator.get_next() if self.tpu_strategy: buffer_values_replica, dist_train_gen_replica, dist_train_dis_replica = self.tpu_strategy.experimental_run_v2( self.train_step, args=(train_samples, )) dist_train_gen, dist_train_dis = dist_train_gen_replica.values, dist_train_dis_replica.values else: buffer_values, dist_train_gen, dist_train_dis = self.train_step( train_samples) # Initialize unitialized variables only all_variables = tf.global_variables() uninit_variables = [ var for var in all_variables if not self.sess.run(tf.is_variable_initialized(var)) ] self.sess.run(tf.variables_initializer(uninit_variables)) # Used to track training progress losses = [] inputs = [seed_noise, seed_labels ] if self.labels_emb_size else seed_noise generated_images = self.generator(inputs, stage_num, alpha=self.alpha, training=False) for epoch in range(self.train_stages[stage_num]['train_epochs']): epoch += 1 print( '\n Processing epoch: {} ==========================================' .format(epoch)) start = time.time() # Set up the transition coefficient with `alpha_scheduler` new_alpha = alpha_scheduler(epoch - 1) # *0 + 1 tf.keras.backend.set_value(self.alpha, new_alpha) # A tricky way to set up the learning rate on the fly during training with `learning_rate_scheduler` new_lr = learning_rate_scheduler(epoch) tf.keras.backend.set_value(self.learning_rate, new_lr) # Train loop self.sess.run(train_iterator_init) train_steps = X_train.shape[0] // ( batch_size * (self.dis_train_iters + self.gen_train_iters)) # Set `proba` to 1 so that buffer stores every generated samples until it is full proba = self.buffer_store_proba if self.buffer_size and self.buffer.is_full else 1 for step in range(train_steps): try: # Disriminator training loop for _ in range(self.dis_train_iters): loss_d = self.sess.run(dist_train_dis) # Generator training loop for _ in range(self.gen_train_iters): loss_g = self.sess.run(dist_train_gen) if self.buffer_size and random.random() < proba: self.buffer.store(*self.sess.run(buffer_values)) except (StopIteration, tf.errors.OutOfRangeError): break loss_d, loss_g = np.mean(loss_d), np.mean(loss_g) losses.append([loss_d, loss_g]) print(' Epoch: {}; Alpha: {};, D_loss: {:.4}; G_loss: {:.4}'. format(epoch, new_alpha, loss_d, loss_g)) print(" Train Epoch time: %.3f s" % (time.time() - start)) if epoch % save_epoch == 0: # Save the weights self.save_weights('{}/weights'.format(folder), tpu=self.tpu_strategy is not None) samples = self.sess.run(generated_images) fig = plot(samples, 10, 10, title='stage:{} epoch:{}'.format( stage_num, str(epoch).zfill(3))) plt.savefig('{}/progress/{}_{}_{}.png'.format( folder, stage_num, self.gan_mode, str(epoch).zfill(3)), bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.plot(losses) plt.savefig('{}/losses/{}_{}_{}.jpeg'.format( folder, self.gan_mode, 'losses', stage_num)) plt.close(fig)
Variable can be saved selectively with a different name. ''' v1 = tf.get_variable("v1", shape=[3], initializer=tf.zeros_initializer) v2 = tf.get_variable("v2", shape=[5], initializer=tf.random_normal_initializer) inc_v1 = v1.assign(v1 + 1) dec_v2 = v2.assign(v2 - 1) #only save v2 as v3 saver = tf.train.Saver({"v3": v2}) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #run the operation where tensor comes from inc_v1.op.run() dec_v2 = v2.op.run() save_path = saver.save(sess, "/tmp/model.ckpt") print("model saved in file %s" % save_path) v3 = tf.get_variable("v3", shape=[3], initializer=tf.zeros_initializer) print("------------ restore variables--------------") with tf.Session() as sess: saver.restore(sess, "/tmp/model.ckpt") for x in tf.global_variables(): #v2 is initialized, but v1 is not initialized print(x.name, "is initialized or not: ", sess.run(tf.is_variable_initialized(x)))
def model_creator(batch_size, name="default", dtype=np.float32): """Create MNIST autoencoder model. Dataset is part of model.""" model = Model(name) def get_batch_size(data): if isinstance(data, IndexedGrad): return int(data.live[0].shape[1]) else: return int(data.shape[1]) init_dict = {} global_vars = [] local_vars = [] # TODO: factor out to reuse between scripts # TODO: change feed_dict logic to reuse value provided to VarStruct # current situation makes reinitialization of global variable change # it's value, counterinituitive def init_var(val, name, is_global=False): """Helper to create variables with numpy or TF initial values.""" if isinstance(val, tf.Tensor): var = u.get_variable(name=name, initializer=val, reuse=is_global) else: val = np.array(val) assert u.is_numeric(val), "Non-numeric type." var_struct = u.get_var(name=name, initializer=val, reuse=is_global) holder = var_struct.val_ init_dict[holder] = val var = var_struct.var if is_global: global_vars.append(var) else: local_vars.append(var) return var # TODO: get rid of purely_relu def nonlin(x): if purely_relu: return tf.nn.relu(x) elif purely_linear: return tf.identity(x) else: return tf.sigmoid(x) # TODO: rename into "nonlin_d" def d_nonlin(y): if purely_relu: return u.relu_mask(y) elif purely_linear: return 1 else: return y*(1-y) patches = train_images[:,:args.batch_size]; test_patches = test_images[:,:args.batch_size]; if args.dataset == 'cifar': input_dim = 3*32*32 elif args.dataset == 'mnist': input_dim = 28*28 else: assert False fs = [args.batch_size, input_dim, 1024, 1024, 1024, 196, 1024, 1024, 1024, input_dim] def f(i): return fs[i+1] # W[i] has shape f[i] x f[i-1] n = len(fs) - 2 # Full dataset from which new batches are sampled X_full = init_var(train_images, "X_full", is_global=True) X = init_var(patches, "X", is_global=False) # stores local batch per model W = [None]*n W.insert(0, X) A = [None]*(n+2) A[1] = W[0] for i in range(1, n+1): init_val = ng_init(f(i), f(i-1)).astype(dtype) W[i] = init_var(init_val, "W_%d"%(i,), is_global=True) A[i+1] = nonlin(kfac_lib.matmul(W[i], A[i])) err = A[n+1] - A[1] model.loss = u.L2(err) / (2 * get_batch_size(err)) # create test error eval layer0 = init_var(test_patches, "X_test", is_global=True) layer = layer0 for i in range(1, n+1): layer = nonlin(W[i] @ layer) verr = (layer - layer0) model.vloss = u.L2(verr) / (2 * get_batch_size(verr)) # manually compute backprop to use for sanity checking B = [None]*(n+1) B2 = [None]*(n+1) B[n] = err*d_nonlin(A[n+1]) _sampled_labels_live = tf.random_normal((f(n), f(-1)), dtype=dtype, seed=0) if args.fixed_labels: _sampled_labels_live = tf.ones(shape=(f(n), f(-1)), dtype=dtype) _sampled_labels = init_var(_sampled_labels_live, "to_be_deleted", is_global=False) B2[n] = _sampled_labels*d_nonlin(A[n+1]) for i in range(n-1, -1, -1): backprop = t(W[i+1]) @ B[i+1] B[i] = backprop*d_nonlin(A[i+1]) backprop2 = t(W[i+1]) @ B2[i+1] B2[i] = backprop2*d_nonlin(A[i+1]) cov_A = [None]*(n+1) # covariance of activations[i] cov_B2 = [None]*(n+1) # covariance of synthetic backprops[i] vars_svd_A = [None]*(n+1) vars_svd_B2 = [None]*(n+1) dW = [None]*(n+1) dW2 = [None]*(n+1) pre_dW = [None]*(n+1) # preconditioned dW # todo: decouple initial value from covariance update # maybe need start with identity and do running average for i in range(1,n+1): if regularized_svd: cov_A[i] = init_var(A[i]@t(A[i])/args.batch_size+args.Lambda*u.Identity(f(i-1)), "cov_A%d"%(i,)) cov_B2[i] = init_var(B2[i]@t(B2[i])/args.batch_size+args.Lambda*u.Identity(f(i)), "cov_B2%d"%(i,)) else: cov_A[i] = init_var(A[i]@t(A[i])/args.batch_size, "cov_A%d"%(i,)) cov_B2[i] = init_var(B2[i]@t(B2[i])/args.batch_size, "cov_B2%d"%(i,)) vars_svd_A[i] = u.SvdWrapper(cov_A[i],"svd_A_%d"%(i,)) vars_svd_B2[i] = u.SvdWrapper(cov_B2[i],"svd_B2_%d"%(i,)) if use_tikhonov: whitened_A = u.regularized_inverse3(vars_svd_A[i],L=args.Lambda) @ A[i] whitened_B2 = u.regularized_inverse3(vars_svd_B2[i],L=args.Lambda) @ B[i] else: whitened_A = u.pseudo_inverse2(vars_svd_A[i]) @ A[i] whitened_B2 = u.pseudo_inverse2(vars_svd_B2[i]) @ B[i] dW[i] = (B[i] @ t(A[i]))/args.batch_size dW2[i] = B[i] @ t(A[i]) pre_dW[i] = (whitened_B2 @ t(whitened_A))/args.batch_size sampled_labels_live = A[n+1] + tf.random_normal((f(n), f(-1)), dtype=dtype, seed=0) if args.fixed_labels: sampled_labels_live = A[n+1]+tf.ones(shape=(f(n), f(-1)), dtype=dtype) sampled_labels = init_var(sampled_labels_live, "sampled_labels", is_global=False) err2 = A[n+1] - sampled_labels model.loss2 = u.L2(err2) / (2 * args.batch_size) model.global_vars = global_vars model.local_vars = local_vars model.trainable_vars = W[1:] # todo, we have 3 places where model step is tracked, reduce model.step = init_var(u.as_int32(0), "step", is_global=False) advance_step_op = model.step.assign_add(1) assert get_batch_size(X_full) % args.batch_size == 0 batches_per_dataset = (get_batch_size(X_full) // args.batch_size) batch_idx = tf.mod(model.step, batches_per_dataset) start_idx = batch_idx * args.batch_size advance_batch_op = X.assign(X_full[:,start_idx:start_idx + args.batch_size]) def advance_batch(): print("Step for model(%s) is %s"%(model.name, u.eval(model.step))) sess = u.get_default_session() # TODO: get rid of _sampled_labels sessrun([sampled_labels.initializer, _sampled_labels.initializer]) if args.advance_batch: with u.timeit("advance_batch"): sessrun(advance_batch_op) sessrun(advance_step_op) model.advance_batch = advance_batch # TODO: refactor this to take initial values out of Var struct #global_init_op = tf.group(*[v.initializer for v in global_vars]) global_init_ops = [v.initializer for v in global_vars] global_init_op = tf.group(*[v.initializer for v in global_vars]) global_init_query_ops = [tf.logical_not(tf.is_variable_initialized(v)) for v in global_vars] def initialize_global_vars(verbose=False, reinitialize=False): """If reinitialize is false, will not reinitialize variables already initialized.""" sess = u.get_default_session() if not reinitialize: uninited = sessrun(global_init_query_ops) # use numpy boolean indexing to select list of initializers to run to_initialize = list(np.asarray(global_init_ops)[uninited]) else: to_initialize = global_init_ops if verbose: print("Initializing following:") for v in to_initialize: print(" " + v.name) sessrun(to_initialize, feed_dict=init_dict) model.initialize_global_vars = initialize_global_vars # didn't quite work (can't initialize var in same run call as deps likely) # enforce that batch is initialized before everything # except fake labels opa # for v in local_vars: # if v != X and v != sampled_labels and v != _sampled_labels: # print("Adding dep %s on %s"%(v.initializer.name, X.initializer.name)) # u.add_dep(v.initializer, on_op=X.initializer) local_init_op = tf.group(*[v.initializer for v in local_vars], name="%s_localinit"%(model.name)) print("Local vars:") for v in local_vars: print(v.name) def initialize_local_vars(): sess = u.get_default_session() sessrun(_sampled_labels.initializer, feed_dict=init_dict) sessrun(X.initializer, feed_dict=init_dict) sessrun(local_init_op, feed_dict=init_dict) model.initialize_local_vars = initialize_local_vars return model
def test_initialize_variables(): # NOTE: keep the `K.get_session()` call up here to ensure the the `initialize_variables` function # is working properly. tf.reset_default_graph() #K.manual_variable_initialization(True) K.clear_session( ) # this is needed if we want to create sessions at the beginning sess = K.get_session() # create model with a mix of pretrained and new weights # NOTE: the pretrained layers will be initialized by Keras on creation, while the new Dense # layer will remain uninitialized input_shape = (224, 224, 3) inputs = Input(shape=input_shape) model_base = VGG16(include_top=False, input_shape=input_shape, input_tensor=inputs) x = model_base.output x = GlobalAveragePooling2D()(x) logits = Dense(1)(x) model = Model(inputs=inputs, outputs=logits, name="model") # check that pre-trained model is initialized # NOTE: This occurs because using pretrained weights ends up calling `K.batch_set_value`, which # creates assignment ops and calls `K.get_session()` to get the session and then run the # assignment ops. The `K.get_session()` call initializes the model variables to random values # and sets the `_keras_initialized` attribute to True for each variable. Then the assignment ops # run and actually set the variables to the pretrained values. Without pretrained weights, the # `K.get_session()` function is not called upon model creation, and thus these variables will # remain uninitialized. Furthermore, if we set `K.manual_variable_initialization(True)`, the # pretrained weights will be loaded, but there will be no indication that those variables were # already initialized, and thus we will end up reinitializing them to random values. This is all # a byproduct of using Keras + TensorFlow in a hybrid setup, and we should look into making this # less brittle. for v in model_base.weights: assert hasattr(v, '_keras_initialized' ) and v._keras_initialized # check for initialization assert sess.run( tf.is_variable_initialized(v)) # check for initialization # the new dense layer is not initialized yet #with pytest.raises(AssertionError): assert len(model.layers[-1].weights) == 2 for v in model.layers[-1].weights: assert not getattr(v, '_keras_initialized', False) assert not sess.run(tf.is_variable_initialized(v)) # initialize variables, including marking them with the `_keras_initialized` attribute initialize_variables(sess) # check that everything is initialized and marked with the `_keras_initialized` attribute # NOTE: this is important for a hybrid Keras & TensorFlow setup where Keras is being used for the # model creation part, and raw TensorFlow is being used for the rest. if variables are not # initialized *and* marked with the special Keras attribute, then certain Keras functions will end # up accidentally reinitializing variables when they use `K.get_session()` internally. In a pure # Keras setup, this would not happen since the model would be initialized at the proper times. In # a Keras & TensorFlow hybrid setup, this can cause issues. By encapsulating this nonsense in a # function, we can avoid these problems. for v in tf.global_variables(): assert hasattr(v, '_keras_initialized' ) and v._keras_initialized # check for initialization assert sess.run( tf.is_variable_initialized(v)) # check for initialization
def train(self, X_train, y_train, X_val=None, y_val=None, is_print=True): ''' train an ensemble of NNs ''' # note we use different notation in this file, # so b_1 is first bias - elsewhere we call this b_0 if self.activation_fn == 'relu' or self.activation_fn == 'softplus' or self.activation_fn == 'Lrelu': init_stddev_1_w = np.sqrt( self.w_0_var) # /np.sqrt(self.hidden_size) init_stddev_1_b = np.sqrt( self.b_0_var) # /np.sqrt(self.hidden_size) init_stddev_2_w = 1.0 / np.sqrt( self.hidden_size) #*np.sqrt(10) # 2nd layer init. dist lambda_anchor = self.data_noise / (np.array([ init_stddev_1_w, init_stddev_1_b, init_stddev_2_w * 1 ])**2) #/X_train.shape[0] # lambda_anchor = [0.,0.,0.] elif self.activation_fn == 'tanh' or self.activation_fn == 'erf': init_stddev_1_w = np.sqrt( self.w_0_var) # 1st layer init. dist for weights init_stddev_1_b = np.sqrt(self.b_0_var) # for bias init_stddev_2_w = 1.0 / np.sqrt( self.hidden_size) # 2nd layer init. dist # lambda_anchor = [0.,0.,0.] # lambda for weight layer 1, bias layer 1, weight layer 2 lambda_anchor = self.data_noise / (np.array( [init_stddev_1_w, init_stddev_1_b, init_stddev_2_w])**2) elif self.activation_fn == 'rbf': init_stddev_1_w = np.sqrt(self.u_var) # centres = sig_u init_stddev_1_b = np.sqrt(self.g_var) # fixed /beta init_stddev_2_w = 1.0 / np.sqrt( self.hidden_size) # 2nd layer init. dist lambda_anchor = self.data_noise / (np.array( [init_stddev_1_w, init_stddev_1_b, init_stddev_2_w])**2) n = X_train.shape[0] X_dim = X_train.shape[1] y_dim = 1 #y_train.shape[1] # batch_size = n # --- ensembles w proper anchoring! --- NNs = [] y_pred = [] y_prior = [] tf.reset_default_graph() sess = tf.Session() for ens in range(0, self.n_ensembles): if is_print: print('\n\n-- working on ensemble number ' + str(self.total_trained + ens) + ' ---') else: print('-- working on ensemble number ' + str(self.total_trained + ens) + ' ---', end='\r') # create a NN NNs.append( NN(self.activation_fn, X_dim, y_dim, self.hidden_size, init_stddev_1_w, init_stddev_1_b, init_stddev_2_w, self.optimiser_in, n, self.learning_rate, decay_rate=self.decay_rate, drop_out=self.drop_out, deep_NN=self.deep_NN)) # sess.run(tf.global_variables_initializer()) # must do this after NN created # sess.run(tf.initialize_variables([NNs[ens].layer_1_w.kernel, NNs[ens].layer_1_w.bias, NNs[ens].output_w.kernel])) # initialise only unitialized variables global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) # set anchoring if self.deep_NN == False: NNs[ens].anchor(sess, lambda_anchor, regularise=self.regularise, unconstrain=self.unconstrain) else: NNs[ens].anchor_deep(sess, lambda_anchor, regularise=self.regularise, unconstrain=self.unconstrain) # prior # if X_val != None: y_prior.append(NNs[ens].predict(X_val, sess)) # do training feed = {} feed[NNs[ens].inputs] = X_train feed[NNs[ens].y_target] = y_train # feed[NNs[ens].l_rate_in] = 0.1 # print('\n\nhhhhh\n\n\n') # print(self.learning_rate) # if (X_val!=None)[0,0]: feed_val = {} feed_val[NNs[ens].inputs] = X_val feed_val[NNs[ens].y_target] = y_val # feed_val[NNs[ens].l_rate_in] = self.learning_rate ep_ = 0 train_complete = False while not train_complete: # for ep_ in range(0,self.n_epochs): if False and ep_ == 0: # view training as it progresses y_pred_temp = NNs[ens].predict(X_val, sess) plot_1d_grid(X_val, y_pred_temp, 0.01, X_train, y_train, title='ep ' + str(ep_)) # blank = sess.run(NNs[ens].optimizer, feed_dict=feed) # train in batches perm = np.random.permutation(X_train.shape[0]) X_train_shuff = X_train[perm] y_train_shuff = y_train[perm] n_batches = int(np.ceil(X_train.shape[0] / self.batch_size)) for b in range(0, n_batches): # if not final batch if b != n_batches: X_train_b = X_train_shuff[b * self.batch_size:(b + 1) * self.batch_size] y_train_b = y_train_shuff[b * self.batch_size:(b + 1) * self.batch_size] else: # use whatever is left X_train_b = X_train_shuff[b * self.batch_size:] y_train_b = y_train_shuff[b * self.batch_size:] feed_b = {} feed_b[NNs[ens].inputs] = X_train_b feed_b[NNs[ens].y_target] = y_train_b blank = sess.run(NNs[ens].optimizer, feed_dict=feed_b) if ep_ % self.cycle_print == 0 or ep_ == self.n_epochs - 1: if False: # view training as it progresses y_pred_temp = NNs[ens].predict(X_val, sess) plot_1d_grid(X_val, y_pred_temp, 0.01, X_train, y_train, title='ep ' + str(ep_)) loss = sess.run(NNs[ens].loss_, feed_dict=feed) # if (X_val!=None)[0,0]: loss_val = sess.run(NNs[ens].mse_loss, feed_dict=feed_val) l_rate_curr = sess.run(NNs[ens].l_rate_decay, feed_dict=feed_val) if is_print: print('ep:', ep_, '/', self.n_epochs - 1, 'train:', np.round(loss, 5), 'val mse:', np.round(loss_val, 5), 'lr', np.round(l_rate_curr, 5)) #, end='\r') # useful to do a stability check here # if one NN doesnt train perfectly it can mess up whole ensemble if ep_ == self.n_epochs - 1: # if last run # train further if increased since last check if (loss - loss_old) / loss > 0.02: ep_ = np.max(ep_ - int(self.n_epochs / 10), 0) print( ' !!! one was unstable !!!, continuing training' ) continue loss_old = loss.copy() ep_ += 1 if ep_ == self.n_epochs: train_complete = True # make prediction - used to do here so don't worry about reinit other NNs # but we found a way to only reinit new variables now # y_pred.append(NNs[ens].predict(x_s)) self.NNs = NNs self.sess = sess # priors # if X_val != None: y_priors = np.array(y_prior) y_priors = y_priors[:, :, 0] y_prior_mu = np.mean(y_prior, axis=0) y_prior_std = np.std(y_prior, axis=0, ddof=1) # -- last layer inference here -- above is same as NN_ens (except nn_last_layer fns) # first need input two last layer if not self.deep_NN: w1, b1, w2 = NNs[0].get_weights(sess) X_last = self.nn_last_layer_np(X_train, w1, w2, b1) else: w1, b1, w2, b2, w3 = NNs[0].get_weights_deep(sess) X_last = self.nn_last_layer_np_deep(X_train, w1, w2, w3, b1, b2) w_last_prior_var = init_stddev_2_w**2 # could inflate this artificially to get closer to true post print('w_last_prior_var', w_last_prior_var) w_last_post_cov = np.linalg.inv( np.matmul(X_last.T, X_last) / self.data_noise + np.identity(self.hidden_size) / w_last_prior_var) w_last_post_mu = np.matmul(np.matmul(w_last_post_cov, X_last.T), y_train) / self.data_noise # print('\nX_last\n',X_last) # print('\nw_last_post_cov\n',w_last_post_cov) # print('\nw_last_post_mu\n',w_last_post_mu) # print('\nw1\n',w1) # print('\nb1\n',b1) # print('\nw2\n',w2) self.w_last_post_cov = w_last_post_cov self.w_last_post_mu = w_last_post_mu return y_priors, y_prior_mu, y_prior_std
def make_safe_initializer(var): """Returns initializer op that only runs for uninitialized ops.""" return tf.cond(tf.is_variable_initialized(var), make_noop, make_initializer(var), name="safe_init_"+var.op.name).op
def learn(self, total_timesteps, callback=None, seed=None, log_interval=100, tb_log_name="ACKTR", reset_num_timesteps=True): new_tb_log = self._init_num_timesteps(reset_num_timesteps) with SetVerbosity(self.verbose), TensorboardWriter(self.graph, self.tensorboard_log, tb_log_name, new_tb_log) \ as writer: self._setup_learn(seed) self.n_batch = self.n_envs * self.n_steps self.learning_rate_schedule = Scheduler( initial_value=self.learning_rate, n_values=total_timesteps, schedule=self.lr_schedule) # FIFO queue of the q_runner thread is closed at the end of the learn function. # As a result, it needs to be redefinied at every call with self.graph.as_default(): with tf.variable_scope( "kfac_apply", reuse=self.trained, custom_getter=tf_util.outer_scope_getter( "kfac_apply")): # Some of the variables are not in a scope when they are create # so we make a note of any previously uninitialized variables tf_vars = tf.global_variables() is_uninitialized = self.sess.run( [tf.is_variable_initialized(var) for var in tf_vars]) old_uninitialized_vars = [ v for (v, f) in zip(tf_vars, is_uninitialized) if not f ] self.train_op, self.q_runner = self.optim.apply_gradients( list(zip(self.grads_check, self.params))) # then we check for new uninitialized variables and initialize them tf_vars = tf.global_variables() is_uninitialized = self.sess.run( [tf.is_variable_initialized(var) for var in tf_vars]) new_uninitialized_vars = [ v for (v, f) in zip(tf_vars, is_uninitialized) if not f and v not in old_uninitialized_vars ] if len(new_uninitialized_vars) != 0: self.sess.run( tf.variables_initializer(new_uninitialized_vars)) self.trained = True runner = A2CRunner(self.env, self, n_steps=self.n_steps, gamma=self.gamma) self.episode_reward = np.zeros((self.n_envs, )) t_start = time.time() coord = tf.train.Coordinator() if self.q_runner is not None: enqueue_threads = self.q_runner.create_threads(self.sess, coord=coord, start=True) else: enqueue_threads = [] # Training stats (when using Monitor wrapper) ep_info_buf = deque(maxlen=100) for update in range(1, total_timesteps // self.n_batch + 1): # true_reward is the reward without discount obs, states, rewards, masks, actions, values, action_masks, ep_infos, true_reward = runner.run( ) ep_info_buf.extend(ep_infos) policy_loss, value_loss, policy_entropy = self._train_step( obs, states, rewards, masks, actions, values, self.num_timesteps // (self.n_batch + 1), writer, action_masks) n_seconds = time.time() - t_start fps = int((update * self.n_batch) / n_seconds) if writer is not None: self.episode_reward = total_episode_reward_logger( self.episode_reward, true_reward.reshape((self.n_envs, self.n_steps)), masks.reshape((self.n_envs, self.n_steps)), writer, self.num_timesteps) if callback is not None: # Only stop training if return value is False, not when it is None. This is for backwards # compatibility with callbacks that have no return statement. if callback(locals(), globals()) is False: break if self.verbose >= 1 and (update % log_interval == 0 or update == 1): explained_var = explained_variance(values, rewards) logger.record_tabular("nupdates", update) logger.record_tabular("total_timesteps", self.num_timesteps) logger.record_tabular("fps", fps) logger.record_tabular("policy_entropy", float(policy_entropy)) logger.record_tabular("policy_loss", float(policy_loss)) logger.record_tabular("value_loss", float(value_loss)) logger.record_tabular("explained_variance", float(explained_var)) if len(ep_info_buf) > 0 and len(ep_info_buf[0]) > 0: logger.logkv( 'ep_reward_mean', safe_mean( [ep_info['r'] for ep_info in ep_info_buf])) logger.logkv( 'ep_len_mean', safe_mean( [ep_info['l'] for ep_info in ep_info_buf])) logger.dump_tabular() self.num_timesteps += self.n_batch + 1 coord.request_stop() coord.join(enqueue_threads) return self
def is_inited(self,sess,var_object): ''' return: True or False ''' return sess.run(tf.is_variable_initialized(var_object))
def train(self): """ Trains policy on env using algo Pseudocode:: for itr in n_itr: for step in num_inner_grad_steps: sampler.sample() algo.compute_updated_dists() algo.optimize_policy() sampler.update_goals() """ with self.sess.as_default() as sess: # initialize uninitialized vars (only initialize vars that were not loaded) uninit_vars = [ var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var)) ] sess.run(tf.variables_initializer(uninit_vars)) start_time = time.time() for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() logger.log( "\n ---------------- Iteration %d ----------------" % itr) logger.log( "Sampling set of tasks/goals for this meta-batch...") self.sampler.update_tasks() self.policy.switch_to_pre_update( ) # Switch to pre-update policy all_samples_data, all_paths = [], [] list_sampling_time, list_inner_step_time, list_outer_step_time, list_proc_samples_time = [], [], [], [] start_total_inner_time = time.time() for step in range(self.num_inner_grad_steps + 1): logger.log('** Step ' + str(step) + ' **') """ -------------------- Sampling --------------------------""" logger.log("Obtaining samples...") time_env_sampling_start = time.time() paths = self.sampler.obtain_samples(log=True, log_prefix='Step_%d-' % step) list_sampling_time.append(time.time() - time_env_sampling_start) all_paths.append(paths) """ ----------------- Processing Samples ---------------------""" logger.log("Processing samples...") time_proc_samples_start = time.time() # TODO: process samples of each task with different baseline could be better? samples_data = self.sample_processor.process_samples( paths, log='all', log_prefix='Step_%d-' % step) all_samples_data.append(samples_data) list_proc_samples_time.append(time.time() - time_proc_samples_start) self.log_diagnostics(sum(list(paths.values()), []), prefix='Step_%d-' % step) """ ------------------- Inner Policy Update --------------------""" time_inner_step_start = time.time() if step < self.num_inner_grad_steps: logger.log("Computing inner policy updates...") self.algo._adapt(samples_data) list_inner_step_time.append(time.time() - time_inner_step_start) total_inner_time = time.time() - start_total_inner_time time_maml_opt_start = time.time() """ ------------------ Outer Policy Update ---------------------""" logger.log("Optimizing policy...") # This needs to take all samples_data so that it can construct graph for meta-optimization. time_outer_step_start = time.time() self.algo.optimize_policy(all_samples_data) """ ------------------- Logging Stuff --------------------------""" logger.logkv('Itr', itr) logger.logkv('n_timesteps', self.sampler.total_timesteps_sampled) logger.logkv('Time-OuterStep', time.time() - time_outer_step_start) logger.logkv('Time-TotalInner', total_inner_time) logger.logkv('Time-InnerStep', np.sum(list_inner_step_time)) logger.logkv('Time-SampleProc', np.sum(list_proc_samples_time)) logger.logkv('Time-Sampling', np.sum(list_sampling_time)) logger.logkv('Time', time.time() - start_time) logger.logkv('ItrTime', time.time() - itr_start_time) logger.logkv('Time-MAMLSteps', time.time() - time_maml_opt_start) logger.log("Saving snapshot...") params = self.get_itr_snapshot(itr) logger.save_itr_params(itr, params) logger.log("Saved") logger.dumpkvs() logger.log("Training finished") self.sess.close()
def predict(self, target, data, reset_tf_vars=False): # check learnt local_hidden = [z for z in target.get_local_hidden() if z not in data.keys()] global_hidden = [h for h in self.latent_vars if h not in local_hidden and h not in data.keys()] other_observed = [a for a in self.observed_vars if a not in data.keys() and a != target] # add posterior of the latent variables for h in global_hidden: if h not in data.keys(): data.update({h: self.posterior(h)}) data_ed = {} for (key, value) in iteritems(data): data_ed.update( {key.dist if isinstance(key, inf.models.RandomVariable) else key : value.dist if isinstance(value, inf.models.RandomVariable) else value}) q_target = inf.Qmodel.new_qvar(target, check_observed=False) latent_vars_ed = {target.dist : q_target.dist} for z in local_hidden: qz = inf.Qmodel.new_qvar(z, check_observed=False) latent_vars_ed.update({z.dist : qz.dist}) for a in other_observed: qa = inf.Qmodel.new_qvar(a, check_observed=False) latent_vars_ed.update({a.dist : qa.dist}) inference_pred = ed.ReparameterizationKLqp(latent_vars_ed, data=data_ed) #inference_pred.run() inference_pred.initialize() sess = inf.util.Runtime.tf_sess if reset_tf_vars: tf.global_variables_initializer().run() else: for t in tf.global_variables(): if not sess.run(tf.is_variable_initialized(t)): sess.run(tf.variables_initializer([t])) for _ in range(inference_pred.n_iter): info_dict = inference_pred.update() inference_pred.print_progress(info_dict) inference_pred.finalize() # tf.graph_util.convert_variables_to_constants(inf.util.Runtime.tf_sess, tf.get_default_graph()) return q_target
def train(self): """ Trains policy on env using algo Pseudocode: for itr in n_itr: for step in num_inner_grad_steps: sampler.sample() algo.compute_updated_dists() algo.optimize_policy() sampler.update_goals() """ with self.sess.as_default() as sess: # initialize uninitialized vars (only initialize vars that were not loaded) uninit_vars = [ var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var)) ] sess.run(tf.variables_initializer(uninit_vars)) start_time = time.time() for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() logger.log( "\n ---------------- Iteration %d ----------------" % itr) time_env_sampling_start = time.time() logger.log( "Obtaining samples from the environment using the policy..." ) env_paths = self.sampler.obtain_samples(log=True, log_prefix='') logger.record_tabular('Time-EnvSampling', time.time() - time_env_sampling_start) logger.log("Processing environment samples...") # first processing just for logging purposes time_env_samp_proc = time.time() samples_data = self.sample_processor.process_samples( env_paths, log=True, log_prefix='EnvTrajs-') logger.record_tabular('Time-EnvSampleProc', time.time() - time_env_samp_proc) ''' --------------- fit dynamics model --------------- ''' time_fit_start = time.time() logger.log("Training dynamics model for %i epochs ..." % self.dynamics_model_max_epochs) self.dynamics_model.fit(samples_data['observations'], samples_data['actions'], samples_data['next_observations'], epochs=self.dynamics_model_max_epochs, verbose=False, log_tabular=True, early_stopping=True, compute_normalization=False) logger.log("Training the value function for %i epochs ..." % self.vfun_max_epochs) self.value_function.fit(samples_data['observations'], samples_data['returns'], epochs=self.vfun_max_epochs, verbose=False, log_tabular=True, compute_normalization=False) logger.log("Training the policy ...") self.algo.optimize_policy(samples_data) logger.record_tabular('Time-ModelFit', time.time() - time_fit_start) """ ------------------- Logging Stuff --------------------------""" logger.logkv('Itr', itr) logger.logkv('n_timesteps', self.sampler.total_timesteps_sampled) logger.logkv('Time', time.time() - start_time) logger.logkv('ItrTime', time.time() - itr_start_time) logger.log("Saving snapshot...") params = self.get_itr_snapshot(itr) self.log_diagnostics(env_paths, '') logger.save_itr_params(itr, params) logger.log("Saved") logger.dumpkvs() if itr == 0: sess.graph.finalize() logger.log("Training finished") self.sess.close()
def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v,f) in zip(global_vars, is_not_initialized) if not f] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars))
def train(): with tf.Graph().as_default(): with tf.device('/gpu:'+str(GPU_INDEX)): pointclouds_pl, labels_pl, masks_pl = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains. batch = tf.Variable(0) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) print("--- Get model and loss") # Get model and loss pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay) loss = MODEL.get_loss(pred, labels_pl, masks_pl, end_points) tf.summary.scalar('loss_old_model', loss) print("--- Get training operator") # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) # train_op = optimizer.minimize(loss, global_step=batch) # Add ops to save and restore all the variables. #variables_can_be_restored = list(set(tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)).intersection(tf.train.list_variables(MODEL_PATH))) # saver = tf.train.Saver(variables_can_be_restored) saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) saver.restore(sess, MODEL_PATH) global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] print("================") print("NOT INITIALIZED VARS (should be [])") print(not_initialized_vars) print("================") print("Adding extra layers") with tf.device('/gpu:'+str(GPU_INDEX)): pred, end_points = MODEL.get_flow_refine_model(pointclouds_pl, pred, is_training_pl, bn_decay=bn_decay) loss = MODEL.get_loss(pred, labels_pl, masks_pl, end_points) tf.summary.scalar('loss_new_model', loss) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) epe, acc1, acc2 = scene_flow_EPE_np(pred, labels_pl, masks_pl) tf.summary.scalar("epe", epe) tf.summary.scalar("acc 5%", acc1) tf.summary.scalar("acc 10%", acc2) merged_test = tf.summary.merge_all() test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) print("Getting uninit vars") global_vars = tf.global_variables() is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] print("================") print("NOT INITIALIZED VARS (should be some)") print(not_initialized_vars) print("================") with tf.device('/gpu:'+str(GPU_INDEX)): train_op = optimizer.minimize(loss, global_step=batch, var_list=not_initialized_vars) saver = tf.train.Saver() print("Initing them") global_vars2 = tf.global_variables() is_not_initialized2 = sess.run([tf.is_variable_initialized(var) for var in global_vars2]) not_initialized_vars2 = [v for (v, f) in zip(global_vars2, is_not_initialized2) if not f] print("================") print("NOT INITIALIZED VARS (should be couple more)") print(not_initialized_vars2) print("================") if len(not_initialized_vars2): sess.run(tf.variables_initializer(not_initialized_vars2)) # init = tf.global_variables_initializer() # sess.run(init) print("Done") ops = {'pointclouds_pl': pointclouds_pl, 'labels_pl': labels_pl, 'masks_pl': masks_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': loss, 'train_op': train_op, 'merged': merged, 'merged_test': merged_test, 'step': batch, 'end_points': end_points} for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) log_string("Model saved in file: %s" % save_path)
def __init__(self, ref, dep, coeff=0.5, height=28, width=28, colors=1, n_classes=10, learning_rate=0.001, ref_path=None, dep_path=None, optimizer='Adam'): """ A classifier model made to fit on mnist-like dataset. """ # input dimension parameters # design for the mnist dataset self.h_input = height self.w_input = width self.input_channels = colors self.n_classes = n_classes # placeholder definition self.X = tf.placeholder(dtype=tf.float32, shape=(None, self.h_input, self.w_input, self.input_channels), name='input') self.Y = tf.placeholder(dtype=tf.int32, shape=(None, n_classes), name='ground_truth') self.lr = tf.get_variable("learning_rate", initializer=learning_rate, trainable=False) # architectures self.reference = ref self.dependency = dep # predictions self.Y_refpred = self.reference(self.X) self.Y_deppred = self.dependency(self.X) stacked_pred = tf.stack([self.Y_refpred, self.Y_deppred], axis=0) bary_pred = tf.reduce_mean(stacked_pred, 0) self.Y_ensemblepred = tf.nn.softmax(bary_pred) # categories # cast cause default might be tf.int64 self.Y_cat = tf.cast(tf.argmax(self.Y, axis=1, name='classes'), tf.int32) self.Y_deppredcat = tf.cast( tf.argmax(self.Y_deppred, axis=1, name='depclasses'), tf.int32) self.Y_refpredcat = tf.cast( tf.argmax(self.Y_refpred, axis=1, name='refclasses'), tf.int32) self.Y_ensemblepredcat = tf.cast( tf.argmax(self.Y_ensemblepred, axis=1, name='ensembleclasses'), tf.int32) # objectives # defined to train dependency network # need ground_truth for classification self.floatY = tf.cast(self.Y, tf.float32) # need reference as one_hot to compute disagreement self.one_hot_Y_refpred = tf.one_hot(self.Y_refpredcat, n_classes) # logits = false, because output of network has been through softmax self.classifloss = tf.reduce_mean( tf.keras.backend.categorical_crossentropy(self.floatY, self.Y_deppred, from_logits=False)) self.disagreementloss = tf.reduce_mean( tf.keras.backend.categorical_crossentropy(self.one_hot_Y_refpred, self.Y_deppred, from_logits=False)) self.loss = self.classifloss - coeff * self.disagreementloss # accuracies self.refaccuracy = tf.reduce_mean( tf.cast(tf.equal(self.Y_refpredcat, self.Y_cat), tf.float32)) self.depaccuracy = tf.reduce_mean( tf.cast(tf.equal(self.Y_deppredcat, self.Y_cat), tf.float32)) self.ensembleaccuracy = tf.reduce_mean( tf.cast(tf.equal(self.Y_ensemblepredcat, self.Y_cat), tf.float32)) # optimization if optimizer == 'Adam': self.optimizer = tf.train.AdamOptimizer(self.lr) elif optimizer == 'SGD': self.optimizer = tf.train.GradientDescentOptimizer(self.lr) elif optimizer == 'RMS': self.optimizer = tf.train.RMSPropOptimizer(self.lr) # training procedures self.training = self.optimizer.minimize( self.loss, var_list=self.dependency.trainable_weights) # At the end do what all models do with computation graph # computation graph self.refsaver = tf.train.Saver( var_list=self.reference.trainable_weights) self.depsaver = tf.train.Saver( var_list=self.dependency.trainable_weights) self.sess = tf.Session() # graph initialization if ref_path is not None: print( "\nLoading weights from a previous REFERENCE trained model at " + ref_path + " !!!") self.refsaver.restore(self.sess, ref_path) if dep_path is not None: print( "\nLoading weights from a previous DEPENDENCY trained model at " + dep_path + " !!!") self.depsaver.restore(self.sess, dep_path) # hunt not-initialized variables global_vars = tf.global_variables() is_not_initialized = self.sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): self.sess.run(tf.variables_initializer(not_initialized_vars))
def tf_parcial_initializer(session): for x in tf.global_variables(): if not session( tf.is_variable_initialized( x ) ): session( x.initializer )
if (args.prune == True): saver = tf.train.Saver() saver.restore(sess, args.checkpoint) th_fc1 = get_th(dense_w["w_fc1"], percentage=0.9) th_fc2 = get_th(dense_w["w_fc2"], percentage=0.9) sp_w_fc1, idx_fc1 = prune(dense_w["w_fc1"], th_fc1, name="sp_w_fc1") sp_w_fc2, idx_fc2 = prune(dense_w["w_fc2"], th_fc2, name="sp_w_fc2") dense_w["w_fc1"] = sp_w_fc1 dense_w["w_fc2"] = sp_w_fc2 x = tf.placeholder(tf.float32, [None, 784], name="x") y_ = tf.placeholder(tf.float32, [None, 10], name="y_") keep_prob = tf.placeholder(tf.float32, name="keep_prob") for var in tf.all_variables(): if sess.run(tf.is_variable_initialized(var)) == False: sess.run(var.initializer) useless1, useless2, logit = dense_cnn_model(x, dense_w, keep_prob) test_acc = test(logit) print("test acc after pruning %g" % test_acc) saver.save(sess, "./model_ckpt_dense_pruned") cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=y_) trainer = tf.train.AdamOptimizer(1e-4) grads = trainer.compute_gradients(cross_entropy) delete_none_grads(grads) count = 0
def tfvar_is_initialized_in(tf_var, session): return session.run(tf.is_variable_initialized(tf_var))
def model_creator(batch_size, name="default", dtype=np.float32): """Create MNIST autoencoder model. Dataset is part of model.""" model = Model(name) def get_batch_size(data): if isinstance(data, IndexedGrad): return int(data.live[0].shape[1]) else: return int(data.shape[1]) init_dict = {} global_vars = [] local_vars = [] # TODO: factor out to reuse between scripts # TODO: change feed_dict logic to reuse value provided to VarStruct # current situation makes reinitialization of global variable change # it's value, counterinituitive def init_var(val, name, is_global=False): """Helper to create variables with numpy or TF initial values.""" if isinstance(val, tf.Tensor): var = u.get_variable(name=name, initializer=val, reuse=is_global) else: val = np.array(val) assert u.is_numeric(val), "Non-numeric type." var_struct = u.get_var(name=name, initializer=val, reuse=is_global) holder = var_struct.val_ init_dict[holder] = val var = var_struct.var if is_global: global_vars.append(var) else: local_vars.append(var) return var # TODO: get rid of purely_relu def nonlin(x): if purely_relu: return tf.nn.relu(x) elif purely_linear: return tf.identity(x) else: return tf.sigmoid(x) # TODO: rename into "nonlin_d" def d_nonlin(y): if purely_relu: return u.relu_mask(y) elif purely_linear: return 1 else: return y * (1 - y) patches = train_images[:, :args.batch_size] test_patches = test_images[:, :args.batch_size] if args.dataset == 'cifar': input_dim = 3 * 32 * 32 elif args.dataset == 'mnist': input_dim = 28 * 28 else: assert False if release_name == 'kfac_tiny': fs = [args.batch_size, input_dim, 196, input_dim] else: fs = [ args.batch_size, input_dim, 1024, 1024, 1024, 196, 1024, 1024, 1024, input_dim ] def f(i): return fs[i + 1] # W[i] has shape f[i] x f[i-1] n = len(fs) - 2 # Full dataset from which new batches are sampled X_full = init_var(train_images, "X_full", is_global=True) X = init_var(patches, "X", is_global=False) # stores local batch per model W = [None] * n W.insert(0, X) A = [None] * (n + 2) A[1] = W[0] for i in range(1, n + 1): init_val = ng_init(f(i), f(i - 1)).astype(dtype) W[i] = init_var(init_val, "W_%d" % (i, ), is_global=True) A[i + 1] = nonlin(kfac_lib.matmul(W[i], A[i])) err = A[n + 1] - A[1] model.loss = u.L2(err) / (2 * get_batch_size(err)) # create test error eval layer0 = init_var(test_patches, "X_test", is_global=True) layer = layer0 for i in range(1, n + 1): layer = nonlin(W[i] @ layer) verr = (layer - layer0) model.vloss = u.L2(verr) / (2 * get_batch_size(verr)) # manually compute backprop to use for sanity checking B = [None] * (n + 1) B2 = [None] * (n + 1) B[n] = err * d_nonlin(A[n + 1]) _sampled_labels_live = tf.random_normal((f(n), f(-1)), dtype=dtype, seed=0) if args.fixed_labels: _sampled_labels_live = tf.ones(shape=(f(n), f(-1)), dtype=dtype) _sampled_labels = init_var(_sampled_labels_live, "to_be_deleted", is_global=False) B2[n] = _sampled_labels * d_nonlin(A[n + 1]) for i in range(n - 1, -1, -1): backprop = t(W[i + 1]) @ B[i + 1] B[i] = backprop * d_nonlin(A[i + 1]) backprop2 = t(W[i + 1]) @ B2[i + 1] B2[i] = backprop2 * d_nonlin(A[i + 1]) # cov_A = [None]*(n+1) # covariance of activations[i] # cov_B2 = [None]*(n+1) # covariance of synthetic backprops[i] # vars_svd_A = [None]*(n+1) # vars_svd_B2 = [None]*(n+1) # dW = [None]*(n+1) # pre_dW = [None]*(n+1) # preconditioned dW # todo: decouple initial value from covariance update # # maybe need start with identity and do running average # for i in range(1,n+1): # if regularized_svd: # cov_A[i] = init_var(A[i]@t(A[i])/args.batch_size+args.Lambda*u.Identity(f(i-1)), "cov_A%d"%(i,)) # cov_B2[i] = init_var(B2[i]@t(B2[i])/args.batch_size+args.Lambda*u.Identity(f(i)), "cov_B2%d"%(i,)) # else: # cov_A[i] = init_var(A[i]@t(A[i])/args.batch_size, "cov_A%d"%(i,)) # cov_B2[i] = init_var(B2[i]@t(B2[i])/args.batch_size, "cov_B2%d"%(i,)) # vars_svd_A[i] = u.SvdWrapper(cov_A[i],"svd_A_%d"%(i,), do_inverses=False) # vars_svd_B2[i] = u.SvdWrapper(cov_B2[i],"svd_B2_%d"%(i,), do_inverses=False) # whitened_A = u.cached_inverse(vars_svd_A[i], args.Lambda) @ A[i] # whitened_B = u.cached_inverse(vars_svd_B2[i], args.Lambda) @ B[i] # dW[i] = (B[i] @ t(A[i]))/args.batch_size # pre_dW[i] = (whitened_B @ t(whitened_A))/args.batch_size sampled_labels_live = A[n + 1] + tf.random_normal( (f(n), f(-1)), dtype=dtype, seed=0) if args.fixed_labels: sampled_labels_live = A[n + 1] + tf.ones(shape=(f(n), f(-1)), dtype=dtype) sampled_labels = init_var(sampled_labels_live, "sampled_labels", is_global=False) err2 = A[n + 1] - sampled_labels model.loss2 = u.L2(err2) / (2 * args.batch_size) model.global_vars = global_vars model.local_vars = local_vars model.trainable_vars = W[1:] # todo, we have 3 places where model step is tracked, reduce model.step = init_var(u.as_int32(0), "step", is_global=False) advance_step_op = model.step.assign_add(1) assert get_batch_size(X_full) % args.batch_size == 0 batches_per_dataset = (get_batch_size(X_full) // args.batch_size) batch_idx = tf.mod(model.step, batches_per_dataset) start_idx = batch_idx * args.batch_size advance_batch_op = X.assign(X_full[:, start_idx:start_idx + args.batch_size]) def advance_batch(): # print("Step for model(%s) is %s"%(model.name, u.eval(model.step))) sess = u.get_default_session() # TODO: get rid of _sampled_labels sessrun([sampled_labels.initializer, _sampled_labels.initializer]) if args.advance_batch: sessrun(advance_batch_op) sessrun(advance_step_op) model.advance_batch = advance_batch # TODO: refactor this to take initial values out of Var struct #global_init_op = tf.group(*[v.initializer for v in global_vars]) global_init_ops = [v.initializer for v in global_vars] global_init_op = tf.group(*[v.initializer for v in global_vars]) global_init_query_ops = [ tf.logical_not(tf.is_variable_initialized(v)) for v in global_vars ] def initialize_global_vars(verbose=False, reinitialize=False): """If reinitialize is false, will not reinitialize variables already initialized.""" sess = u.get_default_session() if not reinitialize: uninited = sessrun(global_init_query_ops) # use numpy boolean indexing to select list of initializers to run to_initialize = list(np.asarray(global_init_ops)[uninited]) else: to_initialize = global_init_ops if verbose: print("Initializing following:") for v in to_initialize: print(" " + v.name) sessrun(to_initialize, feed_dict=init_dict) model.initialize_global_vars = initialize_global_vars # didn't quite work (can't initialize var in same run call as deps likely) # enforce that batch is initialized before everything # except fake labels opa # for v in local_vars: # if v != X and v != sampled_labels and v != _sampled_labels: # print("Adding dep %s on %s"%(v.initializer.name, X.initializer.name)) # u.add_dep(v.initializer, on_op=X.initializer) local_init_op = tf.group(*[v.initializer for v in local_vars], name="%s_localinit" % (model.name)) print("Local vars:") for v in local_vars: print(v.name) def initialize_local_vars(): sess = u.get_default_session() sessrun(_sampled_labels.initializer, feed_dict=init_dict) sessrun(X.initializer, feed_dict=init_dict) sessrun(local_init_op, feed_dict=init_dict) model.initialize_local_vars = initialize_local_vars return model
def init_rest(): all_vars = tf.global_variables() vars_to_init = [x for x in all_vars if not tf.is_variable_initialized(x).eval()] return tf.variables_initializer(vars_to_init)
def get_uninitialized_variables(session): variables = tf.global_variables() init_flag = session.run([tf.is_variable_initialized(v) for v in variables]) return [v for v, f in zip(variables, init_flag) if not f]