def _batch_norm(self, x, name): """Batch Normalization""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float64, norm_init(0.0, stddev=0.1, dtype=tf.float64)) gamma = tf.get_variable('gamma', params_shape, tf.float64, unif_init(0.1, 0.5, dtype=tf.float64)) mv_mean = tf.get_variable('moving_mean', params_shape, tf.float64, const_init(0.0, tf.float64), trainable=False) mv_var = tf.get_variable('moving_variance', params_shape, tf.float64, const_init(1.0, tf.float64), trainable=False) # Training Ops mean, variance = tf.nn.moments(x, [0], name='moments') hoge = assign_moving_average(mv_mean, mean, 0.99) piyo = assign_moving_average(mv_var, variance, 0.99) self._extra_train_ops.extend([hoge, piyo]) mean, variance = control_flow_ops.cond(self.is_training, lambda: (mean, variance), lambda: (mv_mean, mv_var)) y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 1e-6) y.set_shape(x.get_shape()) return y
def _batch_norm(self, x, name): """ Batch normalization """ with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float64, norm_init(0.0, stddev=0.1, dtype=tf.float64)) print(self.sess.run(tf.shape(beta))) gamma = tf.get_variable('gamma', params_shape, tf.float64, unif_init(0.1, 0.5, dtype=tf.float64)) mv_mean = tf.get_variable('moving_mean', params_shape, tf.float64, const_init(0.0, tf.float64), trainable=False) mv_var = tf.get_variable('moving_variance', params_shape, tf.float64, const_init(1.0, tf.float64), trainable=False) # These ops will only be preformed when training mean, variance = tf.nn.moments(x, [0], name='moments') self._extra_train_ops.append( assign_moving_average(mv_mean, mean, 0.99)) self._extra_train_ops.append( assign_moving_average(mv_var, variance, 0.99)) mean, variance = control_flow_ops.cond(self.is_training, lambda: (mean, variance), lambda: (mv_mean, mv_var)) y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 1e-6) y.set_shape(x.get_shape()) return y
def _batch_norm(self, x, name): with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float64, norm_init(0.0, stddev=0.1, dtype=tf.float64)) gamma = tf.get_variable('gamma', params_shape, tf.float64, unif_init(.1, .5, dtype=tf.float64)) mv_mean = tf.get_variable('moving_mean', params_shape, tf.float64, const_init(0, tf.float64), trainable=False) mv_var = tf.get_variable('moving_variance', params_shape, tf.float64, const_init(1, tf.float64), trainable=False) mean, variance = tf.nn.moments(x, [0]) self._extra_train_ops.append( assign_moving_average(mv_mean, mean, .99)) self._extra_train_ops.append( assign_moving_average(mv_var, variance, .99)) mean, variance = control_flow_ops.cond(self.is_training, lambda :(mean, variance),\ lambda : (mv_mean,mv_var)) y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 1e-6) y.set_shape(x.get_shape()) return y
def _batch_norm(self, x, name): """Batch normalization""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable(name='beta', shape=params_shape, dtype=tf.float64, initializer=norm_init(mean=0.0, stddev=0.1, dtype=tf.float64), trainable=True) gamma = tf.get_variable(name='gamma', shape=params_shape, dtype=tf.float64, initializer=unif_init(minval=0.1, maxval=0.5, dtype=tf.float64), trainable=True) mv_mean = tf.get_variable(name='moving_mean', shape=params_shape, dtype=tf.float64, initializer=const_init(value=0.0, dtype=tf.float64), trainable=False) mv_var = tf.get_variable(name='moving_variance', shape=params_shape, dtype=tf.float64, initializer=const_init(value=1.0, dtype=tf.float64), trainable=False) # These ops will only be performed when training: mean, variance = tf.nn.moments(x=x, axes=[0], name='moments') self._extra_train_ops.append( assign_moving_average(variable=mv_mean, value=mean, decay=0.99)) self._extra_train_ops.append( assign_moving_average(variable=mv_var, value=variance, decay=0.99)) mean, variance = control_flow_ops.cond(pred=self.is_training, true_fn=lambda: (mean, variance), false_fn=lambda: (mv_mean, mv_var)) y = tf.nn.batch_normalization(x=x, mean=mean, variance=variance, offset=beta, scale=gamma, variance_epsilon=1e-6) y.set_shape(x.get_shape()) return y
def train(self): start_time = time.time() # train operations self.global_step = tf.get_variable('global_step', [], initializer=const_init(1), trainable=False, dtype=tf.int32) trainable_vars = tf.trainable_variables() grads = tf.gradients(self.loss, trainable_vars) optimizer = tf.train.AdamOptimizer(self.learning_rate) apply_op = optimizer.apply_gradients(zip(grads, trainable_vars), global_step=self.global_step) train_ops = [apply_op] + self._extra_train_ops self.train_op = tf.group(*train_ops) self.loss_history = [] self.init_history = [] dW_valid, X_valid = self.sample_path(self.valid_size) feed_dict_valid = { self.dW: dW_valid, self.X: X_valid, self.is_training: False } step = 1 sess = self.sess sess.run(tf.global_variables_initializer()) temp_loss = sess.run(self.loss, feed_dict=feed_dict_valid) temp_init = self.Y0.eval()[0] self.loss_history.append(temp_loss) self.init_history.append(temp_init) if self.equation == 'Burger': print("u(0,x) = %f" % (self.u_fn(0, tf.constant(value=np.zeros([self.d]))))) print(" step : %5u , loss : %.4e , " % (0, temp_loss) + " Y0 : %.4e , runtime : %4u " % \ (temp_init, time.time() - start_time + self.t_bd)) for i in range(self.n_maxstep + 1): step = sess.run(self.global_step) dW_train, X_train = self.sample_path(self.batch_size) sess.run(self.train_op, feed_dict={ self.dW: dW_train, self.X: X_train, self.is_training: True }) if step % self.n_displaystep == 0: temp_loss = sess.run(self.loss, feed_dict=feed_dict_valid) temp_init = self.Y0.eval()[0] self.loss_history.append(temp_loss) self.init_history.append(temp_init) print(" step : %5u , loss : %.4e , " % \ (step, temp_loss) + \ " Y0 : %.4e , runtime : %4u " % \ (temp_init, time.time() - start_time + self.t_bd)) step += 1 end_time = time.time() print(" running time : %.3fs " % \ (end_time - start_time + self.t_bd ))
def train(self, sess): start_time = time.time() # train operations self.global_step = tf.get_variable(name='global_step', shape=[], initializer=const_init( value=1, dtype=tf.int32), trainable=False, dtype=tf.int32) trainable_vars = tf.trainable_variables() grads = tf.gradients(self.loss, trainable_vars) optimizer = tf.train.AdamOptimizer(self.learning_rate) apply_op = optimizer.apply_gradients(zip(grads, trainable_vars), global_step=self.global_step) train_ops = [apply_op] + self._extra_train_ops self.train_op = tf.group(*train_ops) self.loss_history = [] self.init_history = [] #for validation dS_valid, S_valid = self.sample_path_multivar(self.valid_size) # initialization step = 1 sess.run(tf.global_variables_initializer()) self._validate_and_print(sess, dS_valid, S_valid, step, start_time) # begin SGD iteration for _ in range(self.n_maxstep + 1): step = sess.run(self.global_step) dS_train, S_train = self.sample_path_multivar(self.batch_size) feed_dict_train = { self.dS: dS_train, self.S: S_train, self.is_training: True } sess.run(self.train_op, feed_dict=feed_dict_train) if (step % self.n_displaystep == 0): print(step) self._validate_and_print(sess, dS_valid, S_valid, step, start_time) step += 1 # why do we need this? end_time = time.time() print("running time: %.3f s" % (end_time - start_time + self.t_bd))
def train(self): sess = self.sess trainable_variables = tf.trainable_variables() self.global_step = tf.get_variable('global_step', [], initializer=const_init(1), trainable=False, dtype=tf.int32) learning_rate = tf.train.exponential_decay(1.0, self.global_step, decay_steps=300, decay_rate=0.5,staircase=False) grads = tf.gradients(self.loss, trainable_variables) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate ) apply_op = optimizer.apply_gradients( zip(grads, trainable_variables), global_step=self.global_step, name='train_step') train_ops = [apply_op] + self._extra_train_ops train_op = tf.group(*train_ops) multiplier = 30 sess.run(tf.global_variables_initializer()) self.num_path = 100 * multiplier step, loss, X_hist, W_hist, pi_hist, pi_plus_hist, dX_hist, dW_hist, \ lower_bound_hist, upper_bound_hist = sess.run([self.global_step, self.loss, \ self.X_hist, self.W_hist, self.pi_hist, self.pi_plus_hist, \ self.dX_hist, self.dW_hist, self.lower_bound_hist, self.upper_bound_hist], feed_dict={ self.X: np.ones([self.num_path, 1]).dot(self.Xinit.reshape([1, -1])), self.W: np.ones([self.num_path, 1]) * self.Winit, self.pi: np.zeros([self.num_path, self.d]), self.dZ: np.random.normal(size=[self.num_path, self.d, self.N]), self.W_pos: np.ones([self.num_path, 1], dtype=bool), self.Play_Opt_Policy: 1.0, self.is_back_test: False, self.is_training: False}) print('Playing Optimal Policy: %f' %(np.mean(sess.run(self.Utility(W_hist[-1]))))) for i in range(self.n_steps): sess.run(train_op,feed_dict={ self.X : np.ones([self.num_path,1]).dot(self.Xinit.reshape([1,-1])), self.W : np.ones([self.num_path,1]) * self.Winit, self.pi : np.zeros([self.num_path,self.d]), self.dZ : np.random.normal(size=[self.num_path,self.d,self.N]), self.W_pos: np.ones([self.num_path,1],dtype=bool), self.Play_Opt_Policy: 0.0, self.is_back_test: False, self.is_training : True}) if i % 50 == 0 or i == self.n_steps - 1: step, loss, X_hist, W_hist, pi_hist, pi_plus_hist, dX_hist, dW_hist, \ lower_bound_hist, upper_bound_hist = sess.run([self.global_step,self.loss, \ self.X_hist, self.W_hist, self.pi_hist, self.pi_plus_hist, \ self.dX_hist, self.dW_hist, self.lower_bound_hist, self.upper_bound_hist],feed_dict={ self.X: np.ones([self.num_path, 1]).dot(self.Xinit.reshape([1,-1])), self.W: np.ones([self.num_path, 1]) * self.Winit, self.pi: np.zeros([self.num_path, self.d]), self.dZ: np.random.normal(size=[self.num_path, self.d, self.N]), self.W_pos: np.ones([self.num_path, 1], dtype=bool), self.Play_Opt_Policy: 0.0, self.is_back_test: False, self.is_training : False}) print('step = %d, loss = %f' %(step, loss)) if loss < 800: print('good result!') self.num_path = 1 step, loss, X_hist, W_hist, pi_hist, pi_plus_hist, dX_hist, dW_hist, \ lower_bound_hist, upper_bound_hist = sess.run([self.global_step, self.loss, \ self.X_hist, self.W_hist, self.pi_hist, self.pi_plus_hist, \ self.dX_hist, self.dW_hist, self.lower_bound_hist, self.upper_bound_hist], feed_dict={ self.X: np.ones([self.num_path, 1]).dot(self.Xinit.reshape([1, -1])), self.W: np.ones([self.num_path, 1]) * self.Winit, self.pi: np.zeros([self.num_path, self.d]), self.dZ: np.expand_dims(self.back_test_dX,axis=0), self.W_pos: np.ones([self.num_path, 1], dtype=bool), self.Play_Opt_Policy: 0.0, self.is_back_test: True, self.is_training: False}) self.process_save_data(X_hist, W_hist, pi_hist, pi_plus_hist, dX_hist, lower_bound_hist, upper_bound_hist) print("finished")
def train(self): sess = self.sess trainable_variables = tf.trainable_variables() self.global_step = tf.get_variable('global_step', [], initializer=const_init(1), trainable=False, dtype=tf.int32) learning_rate = tf.train.exponential_decay(1.0, self.global_step, decay_steps=500, decay_rate=0.5, staircase=False) grads = tf.gradients(self.loss, trainable_variables) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) apply_op = optimizer.apply_gradients(zip(grads, trainable_variables), global_step=self.global_step, name='train_step') train_ops = [apply_op] + self._extra_train_ops train_op = tf.group(*train_ops) multiplier = 10 sess.run(tf.global_variables_initializer()) limit = self.limit sess.run(self.global_step.initializer) self.num_path = 100 * multiplier for i in range(self.n_steps): sess.run(train_op, feed_dict={ self.X: np.ones([self.num_path, 1]) * self.Xinit, self.W: np.ones([self.num_path, 1]) * self.Winit, self.pi: np.zeros([self.num_path, 1]), self.dZ: np.random.normal(size=[self.num_path, self.N]), self.W_pos: np.ones([self.num_path, 1], dtype=bool), self.upper_limit: np.ones([1, 1], dtype=float) * limit, self.lower_limit: np.ones([1, 1], dtype=float) * (-limit), self.is_training: True }) if (i % 50 == 0) or (i == self.n_steps - 1): self.num_path = 10000 step, loss, X_hist, W_hist, pi_hist, pi_plus_hist, dX_hist, \ lower_bound_hist, upper_bound_hist = sess.run([self.global_step,self.loss, \ self.X_hist, self.W_hist, self.pi_hist, self.pi_plus_hist, \ self.dX_hist, self.lower_bound_hist, self.upper_bound_hist],feed_dict={ self.X : np.ones([self.num_path,1]) * self.Xinit, self.W : np.ones([self.num_path,1]) * self.Winit, self.pi : np.zeros([self.num_path,1]), self.dZ: np.random.normal(size=[self.num_path, self.N]), self.W_pos: np.ones([self.num_path, 1], dtype=bool), self.upper_limit: np.ones([1, 1], dtype=float) * limit, self.lower_limit: np.ones([1, 1], dtype=float) * (-limit), self.is_training : False}) self.process_save_data(limit, X_hist, W_hist, pi_hist, pi_plus_hist, dX_hist, lower_bound_hist, upper_bound_hist, make_plot=(i == self.n_steps - 1)) print('limit = %f, step = %d, loss = %f' % (limit, step, loss)) self.num_path = 100 * multiplier