def _apply_dense_on_grassmann_with_noise(self, grad, var, seed): g = gutils.grassmann_project(var, grad) g_norm = gutils.norm(g) if g_norm >= 1 / (self._times): a = 1 - 1 / (tf.square(self._times) * tf.square(g_norm)) else: a = 1 / tf.square(self._times) b = 1 / tf.square(self._times) dim = grad.get_shape()[0] noise = tf.truncated_normal([dim, dim], mean=0.0, stddev=1.0, dtype=tf.float32, seed=seed, name="random_noise") if self._grad_clip == None: h = -self._learning_rate_t * (a * g + b * noise) else: h = -self._learning_rate_t * (a * g + b * noise) h = gutils.clip_by_norm(h, self._grad_clip_t) var_new = gutils.grassmann_retrction(var, h) return var_new
def _apply_dense_on_grassmann_with_noise(grad_clip, grad, var, seed, learning_rate, times): g = gutils.grassmann_project(var, grad) g_norm = gutils.norm(g) #a=tf.minimum(1-1/(tf.square(times+1)*tf.square(g_norm)+1e-5),1/tf.square(times+1)) a = 1.0 b = 1 / tf.square(times + 1) dim = tf.convert_to_tensor(grad.get_shape()[0], dtype=tf.int32) noise = tf.truncated_normal([dim, 1], mean=0.0, stddev=0.0001, dtype=tf.float32, seed=seed, name="random_noise") if grad_clip == None: h = -learning_rate * (a * g + b * noise) else: h = -learning_rate * (a * g + b * noise) h = gutils.clip_by_norm(h, grad_clip) var_new = gutils.grassmann_retrction(var, h) return var_new
def _apply_dense_on_grassmann_with_noise(grad_clip, grad, var, learning_rate, times, variance): g = gutils.grassmann_project(var, grad) #g_norm=gutils.norm(g) #a=tf.minimum(1-1/(tf.square(times+1)*tf.square(g_norm)+1e-5),1/tf.square(times+1)) a = 1.0 b = 1 / torch.square(times + 1) noise = variance * gutils.grassmann_project(var, torch.randn( var.size()[0])) if grad_clip == None: h = -learning_rate * (a * g + b * gutils.noise) else: h = -learning_rate * (a * g + b * noise) h = gutils.clip_by_norm(h, grad_clip) var_new = gutils.grassmann_retrction(var, h) return var_new
def _apply_dense_on_grasssmann(self, grad_on_grassmann, grad_on_obilique, var): a = tf.maximum(self._delta_t, 1 / (tf.square(self._times))) b_1 = 2 * (1 - a) * tf.matmul( tf.transpose(grad_on_grassmann), gutils.grassmann_project(var, grad_on_obilique)) b_2 = gutils.norm(gutils.grassmann_project(grad_on_obilique)) b = b_1 / b_2 if self._grad_clip != None: h = self._learning_rate_t * ( a * grad_on_grassmann + b * gutils.grassmann_project(var, grad_on_obilique)) h = -gutils.clip_by_norm(h, self._grad_clip_t) else: h = -self._learning_rate_t * ( a * grad_on_grassmann + b * gutils.grassmann_project(var, grad_on_obilique)) var_update = gutils.grassmann_retrction(var, h) return var_update
def apply_dense_on_grasssmann(grad_clip, grad_on_grassmann, grad_on_oblique, var, learning_rate, times, delta): a = tf.maximum(delta, 1 / tf.log((tf.log((times + 2))))) n = gutils.unit(gutils.grassmann_project( var, grad_on_oblique)) * gutils.norm(grad_on_grassmann) b_1 = 2 * (1 - a) * gutils.xTy(grad_on_grassmann, n) b_2 = gutils.norm(grad_on_grassmann) b = b_1 / (b_2 + 1e-5) if grad_clip != None: h = learning_rate * (a * grad_on_grassmann + b * n) h = -1 * gutils.clip_by_norm(h, grad_clip) else: h = -1 * learning_rate * (a * grad_on_grassmann + b * n) var_update = gutils.grassmann_retrction(var, h) return var_update
def train(mnist,LEARNING_RATE_BASE,MODEL_SAVE_PATH,FILE_SAVE_PATH): file_path_loss_grassmann = os.path.join(FILE_SAVE_PATH, ('loss_grassmann_' + str(LEARNING_RATE_BASE) + '.txt')) file_path_loss_oblique = os.path.join(FILE_SAVE_PATH, ('loss_oblique_' + str(LEARNING_RATE_OBLIQUE) + '.txt')) file1_path_grassmann = os.path.join(FILE_SAVE_PATH, ('accuracy_grassmann_' + str(LEARNING_RATE_BASE) + '.txt')) file1_path_oblique = os.path.join(FILE_SAVE_PATH, ('accuracy_oblique' + str(LEARNING_RATE_OBLIQUE) + '.txt')) file1_path_ensemble = os.path.join(FILE_SAVE_PATH, ('accuracy_ensemble' + str(LEARNING_RATE_BASE) + '.txt')) file_path_norm_grassmann = os.path.join(FILE_SAVE_PATH, ('norm_grassmann' + str(LEARNING_RATE_BASE) + '.txt')) file_path_norm_oblique = os.path.join(FILE_SAVE_PATH, ('norm_oblique' + str(LEARNING_RATE_OBLIQUE) + '.txt')) file_loss_grassmann = open(file_path_loss_grassmann, 'w') file_loss_oblique = open(file_path_loss_oblique, 'w') file_accuracy_grassmann = open(file1_path_grassmann, 'w') file_accuracy_oblique = open(file1_path_oblique, 'w') file_accuracy_ensemble = open(file1_path_ensemble, 'w') file_norm_grassmann = open(file_path_norm_grassmann, 'w') file_norm_oblique = open(file_path_norm_oblique, 'w') x=tf.placeholder(tf.float32,shape=[None,LeNet5.INPUT_NODE],name="x-input") y_=tf.placeholder(tf.float32,shape=[None,LeNet5.OUTPUT_NODE],name="y-output") x_reshaped=tf.reshape(x,[-1,LeNet5.IMAGE_SIZE,LeNet5.IMAGE_SIZE,LeNet5.NUM_CHANNELS]) times=tf.placeholder(tf.float32,shape=None,name="times") #GRAD_CLIP=tf.constant(1.0,dtype=tf.float32) #正则化 regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y_g_grassmann = LeNet5_grassmann.inference(x_reshaped,False,regularizer) y_o_oblique = LeNet5_oblique.inference(x_reshaped,False,regularizer) global_step=tf.Variable(0,trainable=None) #定义损失函数,滑动平均操作等 variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step) variable_averages_op=variable_averages.apply(tf.trainable_variables()) cross_entropy_g_grassmann = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y_g_grassmann) cross_entropy_o_oblique = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y_o_oblique) cross_entropy_mean_g_grassmann = tf.reduce_mean(cross_entropy_g_grassmann) cross_entropy_mean_o_oblique = tf.reduce_mean(cross_entropy_o_oblique) #损失函数,其中涉及到对一个列表中的元素(还是一个列表)求和 loss_g_grassmann = cross_entropy_mean_g_grassmann #+ tf.add_n(tf.get_collection('losses_g_grassmann')) loss_o_oblique = cross_entropy_mean_o_oblique #+ tf.add_n(tf.get_collection('losses_o_oblique')) learning_rate=tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY) learning_rate_o = tf.train.exponential_decay(LEARNING_RATE_OBLIQUE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) #learning_rate=LEARNING_RATE_BASE #更新参数 #train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step) #滑动平均并行计算 #with tf.control_dependencies([train_step,variable_averages_op]): #train_op=tf.no_op(name='train') correct_prediction_grassmann = tf.equal(tf.argmax(y_, 1), tf.argmax(y_g_grassmann, 1)) correct_prediction_oblique = tf.equal(tf.argmax(y_, 1), tf.argmax(y_o_oblique, 1)) correct_prediction_ensemble=tf.equal(tf.argmax(y_,1),tf.argmax(tf.add(y_g_grassmann,y_o_oblique),1)) accuracy_grassmann = tf.reduce_mean(tf.cast(correct_prediction_grassmann, tf.float32)) accuracy_oblique = tf.reduce_mean(tf.cast(correct_prediction_oblique, tf.float32)) accuracy_ensemble = tf.reduce_mean(tf.cast(correct_prediction_ensemble, tf.float32)) ########################################################################################################### with tf.variable_scope('layer1-conv1_grassmann', reuse=True): conv1_weights_g = tf.get_variable("weight_g") conv1_biases_g = tf.get_variable('biases_g') conv1_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp") conv1_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp") weights_grad_g_base_g_layer1 = tf.gradients(loss_g_grassmann, conv1_weights_g, stop_gradients=conv1_weights_g) weights_grad_g_base_g_biases_layer1 = tf.gradients(loss_g_grassmann, conv1_biases_g, stop_gradients=conv1_biases_g) weights_g = tf.reshape(conv1_weights_g , shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_g_layer1[0], dtype=tf.float32) weights_grad_g = tf.reshape(weights_grad_g_base_g_layer1[0], shape=[-1, 1]) grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g) weights_g_layer1 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann, weights_g, 100,learning_rate, times) #weights_g_layer1=weights_g-learning_rate*weights_grad_g weights_biases_grassmann_layer1 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_g_biases_layer1[0], tf.float32), conv1_biases_g) norm_g_1=tf.square(gutils.norm(grad_on_grassmann)) with tf.variable_scope('layer3-conv2_grassmann', reuse=True): conv2_weights_g = tf.get_variable("weight_g") conv2_biases_g = tf.get_variable('biases_g') conv2_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp") conv2_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp") weights_grad_g_base_g_layer3 = tf.gradients(loss_g_grassmann, conv2_weights_g,stop_gradients=conv2_weights_g) weights_grad_g_base_g_biases_layer3 = tf.gradients(loss_g_grassmann, conv2_biases_g, stop_gradients=conv2_biases_g) weights_g = tf.reshape(conv2_weights_g, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_g_layer3[0], dtype=tf.float32) weights_grad_g = tf.reshape(weights_grad_g_base_g_layer3[0], shape=[-1, 1]) grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g) weights_g_layer3 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann , weights_g, 101,learning_rate, times) #weights_g_layer3 = weights_g - learning_rate * weights_grad_g weights_biases_grassmann_layer3 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_g_biases_layer3[0], tf.float32), conv2_biases_g) norm_g_3 = tf.square(gutils.norm(grad_on_grassmann)) with tf.variable_scope('layer5-fc1_grassmann', reuse=True): fc1_weights_g = tf.get_variable("weight_g") fc1_biases_g = tf.get_variable("biases_g") fc1_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp") fc1_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp") weights_grad_g_base_g_layer5 = tf.gradients(loss_g_grassmann, fc1_weights_g, stop_gradients=fc1_weights_g) weights_grad_g_base_biases_g_layer5 = tf.gradients(loss_g_grassmann, fc1_biases_g, stop_gradients=fc1_biases_g) weights_g = tf.reshape(fc1_weights_g, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_g_layer5[0], dtype=tf.float32) weights_grad_g = tf.reshape(weights_grad_g_base_g_layer5[0], shape=[-1, 1]) grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g) weights_g_layer5 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann , weights_g, 102,learning_rate, times) #weights_g_layer5 = weights_g - learning_rate * weights_grad_g weights_biases_grassmann_layer5 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_biases_g_layer5[0], tf.float32), fc1_biases_g) norm_g_5 = tf.square(gutils.norm(grad_on_grassmann)) with tf.variable_scope('layer6-fc2_grassmann', reuse=True): fc2_weights_g = tf.get_variable("weight_g") fc2_biases_g = tf.get_variable("biases_g") fc2_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp") fc2_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp") weights_grad_g_base_g_layer6 = tf.gradients(loss_g_grassmann, fc2_weights_g , stop_gradients=fc2_weights_g) weights_grad_g_base_biases_g_layer6= tf.gradients(loss_g_grassmann, fc2_biases_g, stop_gradients=fc2_biases_g) weights_g = tf.reshape(fc2_weights_g, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_g_layer6[0], dtype=tf.float32) weights_grad_g = tf.reshape(weights_grad_g_base_g_layer6[0], shape=[-1, 1]) grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g) weights_g_layer6 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann, weights_g,103, learning_rate, times) #weights_g_layer6 = weights_g-learning_rate*weights_grad_g weights_biases_grassmann_layer6 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_biases_g_layer6[0], tf.float32), fc2_biases_g) norm_g_6 = tf.square(gutils.norm(grad_on_grassmann)) ############################################################################################################ with tf.variable_scope('layer1-conv1_oblique', reuse=True): conv1_weights_o = tf.get_variable("weight_o") conv1_biases_o = tf.get_variable('biases_o') dim_layer1 = conv1_weights_o.get_shape() conv1_weights_o_tmp = tf.get_variable("weight_o_tmp") conv1_biases_o_tmp = tf.get_variable("biases_o_tmp") weights_grad_o_base_layer1_o = tf.gradients(loss_o_oblique, conv1_weights_o, stop_gradients=conv1_weights_o) weights_grad_o_base_biases_layer1_o = tf.gradients(loss_o_oblique, conv1_biases_o, stop_gradients=conv1_biases_o) weights_o = tf.reshape(conv1_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_o_base_layer1_o[0], dtype=tf.float32) weights_grad_o = tf.reshape(weights_grad_o_base_layer1_o[0], shape=[-1, 1]) grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o) weights_o_layer1_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique , weights_o, 104,learning_rate_o, times) #weights_o_layer1_o = weights_o - learning_rate * weights_grad_o weights_biases_oblique_layer1 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer1_o[0], tf.float32), conv1_biases_o) norm_o_1 = tf.square(gutils.norm(grad_on_oblique)) with tf.variable_scope('layer3-conv2_oblique', reuse=True): conv2_weights_o = tf.get_variable("weight_o") conv2_biases_o = tf.get_variable('biases_o') conv2_weights_o_tmp = tf.get_variable("weight_o_tmp") conv2_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer3=conv2_weights_o.get_shape() weights_grad_o_base_layer3_o = tf.gradients(loss_o_oblique, conv2_weights_o , stop_gradients=conv2_weights_o) weights_grad_o_base_biases_layer3_o = tf.gradients(loss_o_oblique, conv2_biases_o, stop_gradients=conv2_biases_o) weights_o = tf.reshape(conv2_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_o_base_layer3_o[0], dtype=tf.float32) weights_grad_o = tf.reshape(weights_grad_o_base_layer3_o[0], shape=[-1, 1]) grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o) weights_o_layer3_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique, weights_o, 105,learning_rate_o, times) #weights_o_layer3_o = weights_o - learning_rate * weights_grad_o weights_biases_oblique_layer3 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer3_o[0], tf.float32), conv2_biases_o) norm_o_3 = tf.square(gutils.norm(grad_on_oblique)) with tf.variable_scope('layer5-fc1_oblique', reuse=True): fc1_weights_o = tf.get_variable("weight_o") fc1_biases_o = tf.get_variable("biases_o") fc1_weights_o_tmp = tf.get_variable("weight_o_tmp") fc1_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer5 = fc1_weights_o.get_shape() weights_grad_o_base_layer5_o = tf.gradients(loss_o_oblique, fc1_weights_o ,stop_gradients=fc1_weights_o) weights_grad_o_base_biases_layer5_o = tf.gradients(loss_o_oblique, fc1_biases_o, stop_gradients=fc1_biases_o) weights_o = tf.reshape(fc1_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_o_base_layer5_o[0], dtype=tf.float32) weights_grad_o = tf.reshape(weights_grad_o_base_layer5_o[0], shape=[-1, 1]) grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o) weights_o_layer5_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique , weights_o, 106,learning_rate_o, times) #weights_o_layer5_o = weights_o - learning_rate * weights_grad_o weights_biases_oblique_layer5 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer5_o[0], tf.float32), fc1_biases_o) norm_o_5 = tf.square(gutils.norm(grad_on_oblique)) with tf.variable_scope('layer6-fc2_oblique', reuse=True): fc2_weights_o = tf.get_variable("weight_o") fc2_biases_o = tf.get_variable("biases_o") fc2_weights_o_tmp = tf.get_variable("weight_o_tmp") fc2_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer6 = fc2_weights_o.get_shape() weights_grad_o_base_layer6_o = tf.gradients(loss_o_oblique, fc2_weights_o , stop_gradients=fc2_weights_o) weights_grad_o_base_biases_layer6_o = tf.gradients(loss_o_oblique, fc2_biases_o, stop_gradients=fc2_biases_o) weights_o = tf.reshape(fc2_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_o_base_layer6_o[0], dtype=tf.float32) weights_grad_o = tf.reshape(weights_grad_o_base_layer6_o[0], shape=[-1, 1]) grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o) weights_o_layer6_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique , weights_o, 107,learning_rate_o, times) #weights_o_layer6_o = weights_o - learning_rate * weights_grad_o weights_biases_oblique_layer6 = tf.add( -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer6_o[0], tf.float32), fc2_biases_o) norm_o_6 = tf.square(gutils.norm(grad_on_oblique)) ###################################################################################################################### _1 = tf.assign(conv1_weights_g_tmp_grassmann, tf.reshape(weights_g_layer1, shape=dim_layer1)) _2 = tf.assign(conv1_weights_o_tmp, tf.reshape(weights_o_layer1_o, shape=dim_layer1)) _3 = tf.assign(conv2_weights_g_tmp_grassmann, tf.reshape(weights_g_layer3, shape=dim_layer3)) _4 = tf.assign(conv2_weights_o_tmp, tf.reshape(weights_o_layer3_o, shape=dim_layer3)) _5 = tf.assign(fc1_weights_g_tmp_grassmann, tf.reshape(weights_g_layer5, shape=dim_layer5)) _6 = tf.assign(fc1_weights_o_tmp, tf.reshape(weights_o_layer5_o, shape=dim_layer5)) _7 = tf.assign(fc2_weights_g_tmp_grassmann, tf.reshape(weights_g_layer6, shape=dim_layer6)) _8 = tf.assign(fc2_weights_o_tmp, tf.reshape(weights_o_layer6_o, shape=dim_layer6)) _11 = tf.assign(conv1_biases_g_tmp_grassmann, weights_biases_grassmann_layer1) _12 = tf.assign(conv1_biases_o_tmp, weights_biases_oblique_layer1) _13 = tf.assign(conv2_biases_g_tmp_grassmann, weights_biases_grassmann_layer3) _14 = tf.assign(conv2_biases_o_tmp, weights_biases_oblique_layer3) _15 = tf.assign(fc1_biases_g_tmp_grassmann, weights_biases_grassmann_layer5) _16 = tf.assign(fc1_biases_o_tmp, weights_biases_oblique_layer5) _17 = tf.assign(fc2_biases_g_tmp_grassmann, weights_biases_grassmann_layer6) _18 = tf.assign(fc2_biases_o_tmp, weights_biases_oblique_layer6) _21 = tf.assign(conv1_weights_g, conv1_weights_g_tmp_grassmann) _22 = tf.assign(conv1_weights_o, conv1_weights_o_tmp) _23 = tf.assign(conv2_weights_g, conv2_weights_g_tmp_grassmann) _24 = tf.assign(conv2_weights_o, conv2_weights_o_tmp) _25 = tf.assign(fc1_weights_g, fc1_weights_g_tmp_grassmann) _26 = tf.assign(fc1_weights_o, fc1_weights_o_tmp) _27 = tf.assign(fc2_weights_g, fc2_weights_g_tmp_grassmann) _28 = tf.assign(fc2_weights_o, fc2_weights_o_tmp) _31 = tf.assign(conv1_biases_g, conv1_biases_g_tmp_grassmann) _32 = tf.assign(conv1_biases_o, conv1_biases_o_tmp) _33 = tf.assign(conv2_biases_g, conv2_biases_g_tmp_grassmann) _34 = tf.assign(conv2_biases_o, conv2_biases_o_tmp) _35 = tf.assign(fc1_biases_g, fc1_biases_g_tmp_grassmann) _36 = tf.assign(fc1_biases_o, fc1_biases_o_tmp) _37 = tf.assign(fc2_biases_g, fc2_biases_g_tmp_grassmann) _38 = tf.assign(fc2_biases_o, fc2_biases_o_tmp) #初始化持久化类 #saver=tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() #训练模型,其中每隔一段时间会保存训练的结果 for u in range(TRAINING_STEPS): xs,ys=mnist.train.next_batch(BATCH_SIZE) loss_value_g_grassmann,loss_value_o_oblique,\ accuracy_g_grassmann_value,accuracy_o_oblique_value,accuracy_ensemble_value,\ step=sess.run([loss_g_grassmann,loss_o_oblique,accuracy_grassmann,accuracy_oblique,accuracy_ensemble, global_step],feed_dict={x:xs,y_:ys}) #**************************************************************** sess.run([_1, _2, _3, _4, _5, _6, _7, _8], feed_dict={x: xs, y_: ys, times: float(u)}) sess.run([_11, _12, _13, _14, _15, _16, _17, _18], feed_dict={x: xs, y_: ys, times: float(u)}) sess.run([_21, _22, _23, _24, _25, _26, _27, _28]) sess.run([_31, _32, _33, _34, _35, _36, _37, _38]) ########################################################################################################## file_loss_grassmann.write(str(u)), file_loss_grassmann.write(' '), file_loss_grassmann.write( str(loss_value_g_grassmann)), file_loss_grassmann.write("\n") file_loss_oblique.write(str(u)), file_loss_oblique.write(' '), file_loss_oblique.write( str(loss_value_o_oblique)), file_loss_oblique.write("\n") file_accuracy_grassmann.write(str(u)), file_accuracy_grassmann.write(' '), file_accuracy_grassmann.write( str(accuracy_g_grassmann_value)), file_accuracy_grassmann.write('\n') file_accuracy_oblique.write(str(u)), file_accuracy_oblique.write(' '), file_accuracy_oblique.write( str(accuracy_o_oblique_value)), file_accuracy_oblique.write('\n') file_accuracy_ensemble.write(str(u)), file_accuracy_ensemble.write(' '), file_accuracy_ensemble.write( str(accuracy_ensemble_value)), file_accuracy_ensemble.write('\n') #file_norm_grassmann.write(str(u)), file_norm_grassmann.write(' '), file_norm_grassmann.write( #str(n_g)), file_norm_grassmann.write('\n') #file_norm_oblique.write(str(u)), file_norm_oblique.write(' '), file_norm_oblique.write( # str(n_o)), file_norm_oblique.write('\n') if u%100==0: print("After %d training steps, accuracy_grassmann accuracy_oblique and accuracy_ensemble on training batch is %g , %g and %g" % ( u, accuracy_g_grassmann_value, accuracy_o_oblique_value, accuracy_ensemble_value )) print("After %d training steps, loss_g and loss_o on training batch is %g , %g" % ( u, loss_value_g_grassmann, loss_value_o_oblique)) print(time.localtime(time.time())) #model_name=MODEL_NAME+"_"+str(LEARNING_RATE_BASE)+".ckpt" #saver.save(sess,os.path.join(MODEL_SAVE_PATH,model_name),global_step=global_step) xs = mnist.validation.images ys = mnist.validation.labels loss_value_g_grassmann, loss_value_o_oblique, accuracy_ensemble_value = sess.run( [loss_g_grassmann, loss_o_oblique, accuracy_ensemble], feed_dict={x: xs, y_: ys}) print("The loss_g, loss_o and accuracy on validation is %g %g and %g" % ( loss_value_g_grassmann, loss_value_o_oblique, accuracy_ensemble_value))
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: #momentum = group['momentum'] manifold = group['manifold'] learning_rate = group['lr'] variance = group['variance'] times = group['times'] if manifold != 'None': grad_clip = group['grad_clip'] length = len(group['params']) for i in range(length): p_grassmann = group['params'][i] p_oblique = group['params'][i + length / 2] if p_grassmann.grad and p_oblique is None: continue unity_grassmann, _ = gutils.unit( p_grassmann.data.view(p_grassmann.size()[0], -1)) unity_oblique, _ = gutils.unit( p_oblique.data.view(p_grassmann.size()[0], -1)) grad_grassmann = p_grassmann.grad.data.view( p_grassmann.size()[0], -1) grad_oblique = p_grassmann.grad.data.view( p_oblique.size()[0], -1) # if omega != 0: # L=|Y'Y-I|^2/2=|YY'-I|^2/2+c # dL/dY=2(YY'Y-Y) # g.add_(2*omega, torch.mm(torch.mm(unity, unity.t()), unity) - unity) h_grassmann = gutils.grassmann_project( unity_grassmann, grad_grassmann) h_oblique = gutils.oblique_project(unity_oblique, grad_oblique) # param_state = self.state[p] # if 'momentum_buffer' not in param_state: # param_state['momentum_buffer'] = torch.zeros(h_hat.size()) # if p.is_cuda: # param_state['momentum_buffer'] = param_state['momentum_buffer'].cuda() # mom = param_state['momentum_buffer'] # mom_new = momentum*mom - group['lr']*h_hat p_grassmann.data.copy_( _apply_dense_on_grassmann_with_noise( grad_clip, h_grassmann, unity_grassmann, learning_rate, times, variance).view(p_grassmann.size())) p_oblique.data.copy_( _apply_dense_on_oblique_with_noise( grad_clip, h_oblique, unity_oblique, learning_rate, times, variance).view(p_oblique.size())) elif manifold == "None": # This routine is from https://github.com/pytorch/pytorch/blob/master/torch/optim/sgd.py weight_decay = group['weight_decay'] # dampening = group['dampening'] # nesterov = group['nesterov'] for p in group['params']: if p.grad is None: continue d_p = p.grad.data if weight_decay != 0: d_p.add_(weight_decay, p.data) # if momentum != 0: # param_state = self.state[p] # if 'momentum_buffer' not in param_state: # buf = param_state['momentum_buffer'] = d_p.clone() # else: # buf = param_state['momentum_buffer'] # buf.mul_(momentum).add_(1 - dampening, d_p) # if nesterov: # d_p = d_p.add(momentum, buf) # else: # d_p = buf p.data.add_(-group['lr'], d_p) else: raise ValueError("There is no such a manifold") return loss
def train(LEARNING_RATE_BASE, MODEL_SAVE_PATH, FILE_SAVE_PATH): data, labels = reader.unpickle(reader.file) file_path_loss_g = os.path.join( FILE_SAVE_PATH, ('loss_g_' + str(LEARNING_RATE_GRASSMANN) + '.txt')) file_path_loss_o = os.path.join( FILE_SAVE_PATH, ('loss_o_' + str(LEARNING_RATE_OBLIQUE) + '.txt')) file_path_norm = os.path.join(FILE_SAVE_PATH, ('norm' + str(LEARNING_RATE_BASE) + '.txt')) file1_path = os.path.join(FILE_SAVE_PATH, ('accuracy_' + str(LEARNING_RATE_BASE) + '.txt')) file1_path_g = os.path.join( FILE_SAVE_PATH, ('accuracy_g_' + str(LEARNING_RATE_GRASSMANN) + '.txt')) file1_path_o = os.path.join( FILE_SAVE_PATH, ('accuracy_o_' + str(LEARNING_RATE_OBLIQUE) + '.txt')) file_loss_g = open(file_path_loss_g, 'w') file_loss_o = open(file_path_loss_o, 'w') file_norm = open(file_path_norm, 'w') file_accuracy = open(file1_path, 'w') file_accuracy_g = open(file1_path_g, 'w') file_accuracy_o = open(file1_path_o, 'w') x = tf.placeholder(tf.float32, shape=[None, cifar10_ensemble.INPUT_NODE], name="x-input") y_ = tf.placeholder(tf.float32, shape=[None, cifar10_ensemble.OUTPUT_NODE], name="y-output") x_reshaped = tf.reshape(x, [ -1, cifar10_ensemble.IMAGE_SIZE, cifar10_ensemble.IMAGE_SIZE, cifar10_ensemble.NUM_CHANNELS ]) times = tf.placeholder(tf.float32, shape=None, name="times") #GRAD_CLIP=tf.constant(1.0,dtype=tf.float32) #正则化 regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y_g, y_o = cifar10_ensemble.inference(x_reshaped, False, regularizer) global_step = tf.Variable(0, trainable=None) #定义损失函数,滑动平均操作等 variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) #variable_averages_op=variable_averages.apply(tf.trainable_variables()) cross_entropy_g = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.argmax(y_, 1), logits=y_g) cross_entropy_o = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.argmax(y_, 1), logits=y_o) cross_entropy_mean_g = tf.reduce_mean(cross_entropy_g) cross_entropy_mean_o = tf.reduce_mean(cross_entropy_o) #损失函数,其中涉及到对一个列表中的元素(还是一个列表)求和 loss_g = cross_entropy_mean_g #+tf.add_n(tf.get_collection('losses_g')) loss_o = cross_entropy_mean_o #+tf.add_n(tf.get_collection('losses_o')) learning_rate_g = tf.train.exponential_decay(LEARNING_RATE_GRASSMANN, global_step, 50000 / BATCH_SIZE, LEARNING_RATE_DECAY) learning_rate_o = tf.train.exponential_decay(LEARNING_RATE_OBLIQUE, global_step, 50000 / BATCH_SIZE, LEARNING_RATE_DECAY) #learning_rate=LEARNING_RATE_BASE #更新参数 #train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step) #滑动平均并行计算 #with tf.control_dependencies([train_step,variable_averages_op]): #train_op=tf.no_op(name='train') correct_prediction_g = tf.equal(tf.argmax(y_, 1), tf.argmax(y_g, 1)) correct_prediction_o = tf.equal(tf.argmax(y_, 1), tf.argmax(y_o, 1)) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(tf.add(y_g, y_o), 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy_g = tf.reduce_mean(tf.cast(correct_prediction_g, tf.float32)) accuracy_o = tf.reduce_mean(tf.cast(correct_prediction_o, tf.float32)) #########################################################################################################3 with tf.variable_scope('layer1-conv1', reuse=True): conv1_weights_g = tf.get_variable("weight_g") conv1_biases_g = tf.get_variable('biases_g') conv1_weights_o = tf.get_variable("weight_o") conv1_biases_o = tf.get_variable('biases_o') conv1_weights_g_tmp_layer1 = tf.get_variable("weight_g_tmp") conv1_weights_o_tmp_layer1 = tf.get_variable("weight_o_tmp") conv1_biases_g_tmp = tf.get_variable("biases_g_tmp") conv1_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer1 = conv1_weights_g.get_shape() weights_grad_g_base_layer1 = tf.gradients( loss_g, conv1_weights_g, stop_gradients=conv1_weights_g) weights_grad_o_base_layer1 = tf.gradients( loss_o, conv1_weights_o, stop_gradients=conv1_weights_o) weights_grad_g_base_biases_layer1 = tf.gradients( loss_g, conv1_biases_g, stop_gradients=conv1_biases_g) weights_grad_o_base_biases_layer1 = tf.gradients( loss_o, conv1_biases_o, stop_gradients=conv1_biases_o) weights_g_1 = tf.reshape(conv1_weights_g, shape=[-1, 1]) weights_o_1 = tf.reshape(conv1_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_layer1[0], dtype=tf.float32) tf.convert_to_tensor(weights_grad_o_base_layer1[0], dtype=tf.float32) weights_grad_g_base_1 = tf.reshape(weights_grad_g_base_layer1[0], shape=[-1, 1]) weights_grad_o_base_l = tf.reshape(weights_grad_o_base_layer1[0], shape=[-1, 1]) grad_on_grassmann_1 = gutils.grassmann_project(weights_g_1, weights_grad_g_base_1) grad_on_oblique_1 = gutils.oblique_project(weights_o_1, weights_grad_o_base_l) weights_g_layer1 = optimize_function.apply_dense_on_grasssmann( GRAD_CLIP, grad_on_grassmann_1, grad_on_oblique_1, weights_g_1, learning_rate_g, times, DELTA) weights_o_layer1 = optimize_function._apply_dense_on_oblique( GRAD_CLIP, grad_on_grassmann_1, grad_on_oblique_1, weights_o_1, learning_rate_o, times, DELTA) #weights_g_layer1 = weights_g_1 - learning_rate_g * weights_grad_g_base_1 #weights_o_layer1 = weights_o_1 - learning_rate_o * weights_grad_o_base_l weights_biases_g_layer1 = tf.add( -1 * learning_rate_g * tf.convert_to_tensor( weights_grad_g_base_biases_layer1[0], tf.float32), conv1_biases_g) weights_biases_o_layer1 = tf.add( -1 * learning_rate_o * tf.convert_to_tensor( weights_grad_o_base_biases_layer1[0], tf.float32), conv1_biases_o) norm_g_1 = tf.square(gutils.norm(grad_on_grassmann_1)) norm_o_1 = tf.square(gutils.norm(grad_on_oblique_1)) with tf.variable_scope('layer3-conv2', reuse=True): conv2_weights_g = tf.get_variable("weight_g") conv2_biases_g = tf.get_variable('biases_g') conv2_weights_o = tf.get_variable("weight_o") conv2_biases_o = tf.get_variable('biases_o') conv2_weights_g_tmp_layer3 = tf.get_variable("weight_g_tmp") conv2_weights_o_tmp_layer3 = tf.get_variable("weight_o_tmp") conv2_biases_g_tmp = tf.get_variable("biases_g_tmp") conv2_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer3 = conv2_weights_g.get_shape() weights_grad_g_base_3 = tf.gradients(loss_g, conv2_weights_g, stop_gradients=conv2_weights_g) weights_grad_o_base_3 = tf.gradients(loss_o, conv2_weights_o, stop_gradients=conv2_weights_o) weights_grad_g_base_biases_layer3 = tf.gradients( loss_g, conv2_biases_g, stop_gradients=conv2_biases_g) weights_grad_o_base_biases_layer3 = tf.gradients( loss_o, conv2_biases_o, stop_gradients=conv2_biases_o) weights_g_3 = tf.reshape(conv2_weights_g, shape=[-1, 1]) weights_o_3 = tf.reshape(conv2_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_3[0], dtype=tf.float32) tf.convert_to_tensor(weights_grad_o_base_3[0], dtype=tf.float32) weights_grad_g_3 = tf.reshape(weights_grad_g_base_3[0], shape=[-1, 1]) weights_grad_o_3 = tf.reshape(weights_grad_o_base_3[0], shape=[-1, 1]) grad_on_grassmann_3 = gutils.grassmann_project(weights_g_3, weights_grad_g_3) grad_on_oblique_3 = gutils.oblique_project(weights_o_3, weights_grad_o_3) weights_g_layer3 = optimize_function.apply_dense_on_grasssmann( GRAD_CLIP, grad_on_grassmann_3, grad_on_oblique_3, weights_g_3, learning_rate_g, times, DELTA) weights_o_layer3 = optimize_function._apply_dense_on_oblique( GRAD_CLIP, grad_on_grassmann_3, grad_on_oblique_3, weights_o_3, learning_rate_o, times, DELTA) #weights_g_layer3 = weights_g_3 - learning_rate_g * weights_grad_g_3 #weights_o_layer3 = weights_o_3 - learning_rate_o * weights_grad_o_3 weights_biases_g_layer3 = tf.add( -1 * learning_rate_g * tf.convert_to_tensor( weights_grad_g_base_biases_layer3[0], tf.float32), conv2_biases_g) weights_biases_o_layer3 = tf.add( -1 * learning_rate_o * tf.convert_to_tensor( weights_grad_o_base_biases_layer3[0], tf.float32), conv2_biases_o) norm_g_3 = tf.square(gutils.norm(grad_on_grassmann_3)) norm_o_3 = tf.square(gutils.norm(grad_on_oblique_3)) with tf.variable_scope('layer5-fc1', reuse=True): fc1_weights_g = tf.get_variable("weight_g") fc1_biases_g = tf.get_variable("biases_g") fc1_weights_o = tf.get_variable("weight_o") fc1_biases_o = tf.get_variable("biases_o") fc1_weights_g_tmp_layer5 = tf.get_variable("weight_g_tmp") fc1_weights_o_tmp_layer5 = tf.get_variable("weight_o_tmp") fc1_biases_g_tmp = tf.get_variable("biases_g_tmp") fc1_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer5 = fc1_weights_g.get_shape() weights_grad_g_base_5 = tf.gradients(loss_g, fc1_weights_g, stop_gradients=fc1_weights_g) weights_grad_o_base_5 = tf.gradients(loss_o, fc1_weights_o, stop_gradients=fc1_weights_o) weights_grad_g_base_biases_layer5 = tf.gradients( loss_g, fc1_biases_g, stop_gradients=fc1_biases_g) weights_grad_o_base_biases_layer5 = tf.gradients( loss_o, fc1_biases_o, stop_gradients=fc1_biases_o) weights_g_5 = tf.reshape(fc1_weights_g, shape=[-1, 1]) weights_o_5 = tf.reshape(fc1_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_5[0], dtype=tf.float32) tf.convert_to_tensor(weights_grad_o_base_5[0], dtype=tf.float32) weights_grad_g_5 = tf.reshape(weights_grad_g_base_5[0], shape=[-1, 1]) weights_grad_o_5 = tf.reshape(weights_grad_o_base_5[0], shape=[-1, 1]) grad_on_grassmann_5 = gutils.grassmann_project(weights_g_5, weights_grad_g_5) grad_on_oblique_5 = gutils.oblique_project(weights_o_5, weights_grad_o_5) weights_g_layer5 = optimize_function.apply_dense_on_grasssmann( GRAD_CLIP, grad_on_grassmann_5, grad_on_oblique_5, weights_g_5, learning_rate_g, times, DELTA) weights_o_layer5 = optimize_function._apply_dense_on_oblique( GRAD_CLIP, grad_on_grassmann_5, grad_on_oblique_5, weights_o_5, learning_rate_o, times, DELTA) #weights_g_layer5 = weights_g_5 - learning_rate_g * weights_grad_g_5 #weights_o_layer5 = weights_o_5 - learning_rate_o * weights_grad_o_5 weights_biases_g_layer5 = tf.add( -1 * learning_rate_g * tf.convert_to_tensor( weights_grad_g_base_biases_layer5[0], tf.float32), fc1_biases_g) weights_biases_o_layer5 = tf.add( -1 * learning_rate_o * tf.convert_to_tensor( weights_grad_o_base_biases_layer5[0], tf.float32), fc1_biases_o) norm_g_5 = tf.square(gutils.norm(grad_on_grassmann_5)) norm_o_5 = tf.square(gutils.norm(grad_on_oblique_5)) with tf.variable_scope('layer6-fc2', reuse=True): fc2_weights_g = tf.get_variable("weight_g") fc2_biases_g = tf.get_variable("biases_g") fc2_weights_o = tf.get_variable("weight_o") fc2_biases_o = tf.get_variable("biases_o") fc2_weights_g_tmp_layer6 = tf.get_variable("weight_g_tmp") fc2_weights_o_tmp_layer6 = tf.get_variable("weight_o_tmp") fc2_biases_g_tmp = tf.get_variable("biases_g_tmp") fc2_biases_o_tmp = tf.get_variable("biases_o_tmp") dim_layer6 = fc2_weights_g.get_shape() weights_grad_g_base_6 = tf.gradients(loss_g, fc2_weights_g) weights_grad_o_base_6 = tf.gradients(loss_o, fc2_weights_o) weights_grad_g_base_biases_layer6 = tf.gradients( loss_g, fc2_biases_g, stop_gradients=fc2_biases_g) weights_grad_o_base_biases_layer6 = tf.gradients( loss_o, fc2_biases_o, stop_gradients=fc2_biases_o) weights_g_6 = tf.reshape(fc2_weights_g, shape=[-1, 1]) weights_o_6 = tf.reshape(fc2_weights_o, shape=[-1, 1]) tf.convert_to_tensor(weights_grad_g_base_6[0], dtype=tf.float32) tf.convert_to_tensor(weights_grad_o_base_6[0], dtype=tf.float32) weights_grad_g = tf.reshape(weights_grad_g_base_6[0], shape=[-1, 1]) weights_grad_o = tf.reshape(weights_grad_o_base_6[0], shape=[-1, 1]) grad_on_grassmann_6 = gutils.grassmann_project(weights_g_6, weights_grad_g) grad_on_oblique_6 = gutils.oblique_project(weights_o_6, weights_grad_o) weights_g_layer6 = optimize_function.apply_dense_on_grasssmann( GRAD_CLIP, grad_on_grassmann_6, grad_on_oblique_6, weights_g_6, learning_rate_g, times, DELTA) weights_o_layer6 = optimize_function._apply_dense_on_oblique( GRAD_CLIP, grad_on_grassmann_6, grad_on_oblique_6, weights_o_6, learning_rate_o, times, DELTA) #weights_g_layer6 = weights_g_6 - learning_rate_g * weights_grad_g #weights_o_layer6 = weights_o_6 - learning_rate_o * weights_grad_o weights_biases_g_layer6 = tf.add( -1 * learning_rate_g * tf.convert_to_tensor( weights_grad_g_base_biases_layer6[0], tf.float32), fc2_biases_g) weights_biases_o_layer6 = tf.add( -1 * learning_rate_o * tf.convert_to_tensor( weights_grad_o_base_biases_layer6[0], tf.float32), fc2_biases_o) norm_g_6 = tf.square(gutils.norm(grad_on_grassmann_6)) norm_o_6 = tf.square(gutils.norm(grad_on_oblique_6)) n = norm_g_1 + norm_g_3 + norm_g_5 + norm_g_6 + norm_o_1 + norm_o_3 + norm_o_5 + norm_o_6 _1 = tf.assign(conv1_weights_g_tmp_layer1, gutils.unit(tf.reshape(weights_g_layer1, shape=dim_layer1))) _2 = tf.assign(conv1_weights_o_tmp_layer1, gutils.unit(tf.reshape(weights_o_layer1, shape=dim_layer1))) _3 = tf.assign(conv2_weights_g_tmp_layer3, gutils.unit(tf.reshape(weights_g_layer3, shape=dim_layer3))) _4 = tf.assign(conv2_weights_o_tmp_layer3, gutils.unit(tf.reshape(weights_o_layer3, shape=dim_layer3))) _5 = tf.assign(fc1_weights_g_tmp_layer5, gutils.unit(tf.reshape(weights_g_layer5, shape=dim_layer5))) _6 = tf.assign(fc1_weights_o_tmp_layer5, gutils.unit(tf.reshape(weights_o_layer5, shape=dim_layer5))) _7 = tf.assign(fc2_weights_g_tmp_layer6, gutils.unit(tf.reshape(weights_g_layer6, shape=dim_layer6))) _8 = tf.assign(fc2_weights_o_tmp_layer6, gutils.unit(tf.reshape(weights_o_layer6, shape=dim_layer6))) _11 = tf.assign(conv1_biases_g_tmp, weights_biases_g_layer1) _12 = tf.assign(conv1_biases_o_tmp, weights_biases_o_layer1) _13 = tf.assign(conv2_biases_g_tmp, weights_biases_g_layer3) _14 = tf.assign(conv2_biases_o_tmp, weights_biases_o_layer3) _15 = tf.assign(fc1_biases_g_tmp, weights_biases_g_layer5) _16 = tf.assign(fc1_biases_o_tmp, weights_biases_o_layer5) _17 = tf.assign(fc2_biases_g_tmp, weights_biases_g_layer6) _18 = tf.assign(fc2_biases_o_tmp, weights_biases_o_layer6) _21 = tf.assign(conv1_weights_g, conv1_weights_g_tmp_layer1) _22 = tf.assign(conv1_weights_o, conv1_weights_o_tmp_layer1) _23 = tf.assign(conv2_weights_g, conv2_weights_g_tmp_layer3) _24 = tf.assign(conv2_weights_o, conv2_weights_o_tmp_layer3) _25 = tf.assign(fc1_weights_g, fc1_weights_g_tmp_layer5) _26 = tf.assign(fc1_weights_o, fc1_weights_o_tmp_layer5) _27 = tf.assign(fc2_weights_g, fc2_weights_g_tmp_layer6) _28 = tf.assign(fc2_weights_o, fc2_weights_o_tmp_layer6) _31 = tf.assign(conv1_biases_g, conv1_biases_g_tmp) _32 = tf.assign(conv1_biases_o, conv1_biases_o_tmp) _33 = tf.assign(conv2_biases_g, conv2_biases_g_tmp) _34 = tf.assign(conv2_biases_o, conv2_biases_o_tmp) _35 = tf.assign(fc1_biases_g, fc1_biases_g_tmp) _36 = tf.assign(fc1_biases_o, fc1_biases_o_tmp) _37 = tf.assign(fc2_biases_g, fc2_biases_g_tmp) _38 = tf.assign(fc2_biases_o, fc2_biases_o_tmp) norm_1 = gutils.norm(conv1_weights_g) ###################################################################################################################### #初始化持久化类 #saver=tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() #训练模型,其中每隔一段时间会保存训练的结果 i = 0 while i <= EPOCH: for u in range(TRAINING_STEPS): if u * BATCH_SIZE >= 50000: print("run out of all data") break xs = data[(u * BATCH_SIZE):((u + 1) * BATCH_SIZE)] ys = labels[(u * BATCH_SIZE):((u + 1) * BATCH_SIZE)] loss_value_g, loss_value_o, \ accuracy_value, accuracy_g_value, accuracy_o_value, step = sess.run( [loss_g, loss_o, accuracy, accuracy_g, accuracy_o, global_step], feed_dict={x: xs, y_: ys}) #**************************************************************** #print(sess.run(norm_1,feed_dict={x: xs, y_: ys, times: float(u)})) sess.run([_1, _2, _3, _4, _5, _6, _7, _8], feed_dict={ x: xs, y_: ys, times: float(u) }) sess.run([_11, _12, _13, _14, _15, _16, _17, _18], feed_dict={ x: xs, y_: ys, times: float(u) }) sess.run([_21, _22, _23, _24, _25, _26, _27, _28]) sess.run([_31, _32, _33, _34, _35, _36, _37, _38]) n_value = sess.run(n, feed_dict={ x: xs, y_: ys, times: float(u) }) #print(n_value) ########################################################################################################## file_loss_g.write( str(u)), file_loss_g.write(' '), file_loss_g.write( str(loss_value_g)), file_loss_g.write("\n") file_loss_o.write( str(u)), file_loss_o.write(' '), file_loss_o.write( str(loss_value_o)), file_loss_o.write("\n") file_accuracy.write( str(u)), file_accuracy.write(' '), file_accuracy.write( str(accuracy_value)), file_accuracy.write('\n') file_accuracy_g.write( str(u)), file_accuracy_g.write(' '), file_accuracy_g.write( str(accuracy_g_value)), file_accuracy_g.write('\n') file_accuracy_o.write( str(u)), file_accuracy_o.write(' '), file_accuracy_o.write( str(accuracy_o_value)), file_accuracy_o.write('\n') file_norm.write(str(u)), file_norm.write(' '), file_norm.write( str(n)), file_norm.write('\n') if u % 100 == 0: print( "After %d training steps, loss_g and loss_o on training batch is %g and %g accuracy is %g" % (u, loss_value_g, loss_value_o, accuracy_value)) print( "After %d training steps, accuracy_g and accuracy_o on training batch is %g and %g" % (u, accuracy_g_value, accuracy_o_value)) print(time.localtime(time.time())) i = i + 1