Esempio n. 1
0
    def _apply_dense_on_grassmann_with_noise(self, grad, var, seed):
        g = gutils.grassmann_project(var, grad)
        g_norm = gutils.norm(g)
        if g_norm >= 1 / (self._times):
            a = 1 - 1 / (tf.square(self._times) * tf.square(g_norm))
        else:
            a = 1 / tf.square(self._times)
        b = 1 / tf.square(self._times)

        dim = grad.get_shape()[0]
        noise = tf.truncated_normal([dim, dim],
                                    mean=0.0,
                                    stddev=1.0,
                                    dtype=tf.float32,
                                    seed=seed,
                                    name="random_noise")

        if self._grad_clip == None:
            h = -self._learning_rate_t * (a * g + b * noise)
        else:
            h = -self._learning_rate_t * (a * g + b * noise)
            h = gutils.clip_by_norm(h, self._grad_clip_t)

        var_new = gutils.grassmann_retrction(var, h)

        return var_new
Esempio n. 2
0
def _apply_dense_on_grassmann_with_noise(grad_clip, grad, var, seed,
                                         learning_rate, times):
    g = gutils.grassmann_project(var, grad)
    g_norm = gutils.norm(g)

    #a=tf.minimum(1-1/(tf.square(times+1)*tf.square(g_norm)+1e-5),1/tf.square(times+1))
    a = 1.0

    b = 1 / tf.square(times + 1)

    dim = tf.convert_to_tensor(grad.get_shape()[0], dtype=tf.int32)

    noise = tf.truncated_normal([dim, 1],
                                mean=0.0,
                                stddev=0.0001,
                                dtype=tf.float32,
                                seed=seed,
                                name="random_noise")

    if grad_clip == None:
        h = -learning_rate * (a * g + b * noise)
    else:
        h = -learning_rate * (a * g + b * noise)
        h = gutils.clip_by_norm(h, grad_clip)

    var_new = gutils.grassmann_retrction(var, h)
    return var_new
def _apply_dense_on_grassmann_with_noise(grad_clip, grad, var, learning_rate,
                                         times, variance):
    g = gutils.grassmann_project(var, grad)
    #g_norm=gutils.norm(g)

    #a=tf.minimum(1-1/(tf.square(times+1)*tf.square(g_norm)+1e-5),1/tf.square(times+1))
    a = 1.0

    b = 1 / torch.square(times + 1)

    noise = variance * gutils.grassmann_project(var, torch.randn(
        var.size()[0]))

    if grad_clip == None:
        h = -learning_rate * (a * g + b * gutils.noise)
    else:
        h = -learning_rate * (a * g + b * noise)
        h = gutils.clip_by_norm(h, grad_clip)
    var_new = gutils.grassmann_retrction(var, h)
    return var_new
Esempio n. 4
0
    def _apply_dense_on_grasssmann(self, grad_on_grassmann, grad_on_obilique,
                                   var):
        a = tf.maximum(self._delta_t, 1 / (tf.square(self._times)))
        b_1 = 2 * (1 - a) * tf.matmul(
            tf.transpose(grad_on_grassmann),
            gutils.grassmann_project(var, grad_on_obilique))
        b_2 = gutils.norm(gutils.grassmann_project(grad_on_obilique))
        b = b_1 / b_2

        if self._grad_clip != None:
            h = self._learning_rate_t * (
                a * grad_on_grassmann +
                b * gutils.grassmann_project(var, grad_on_obilique))
            h = -gutils.clip_by_norm(h, self._grad_clip_t)
        else:
            h = -self._learning_rate_t * (
                a * grad_on_grassmann +
                b * gutils.grassmann_project(var, grad_on_obilique))

        var_update = gutils.grassmann_retrction(var, h)
        return var_update
Esempio n. 5
0
def apply_dense_on_grasssmann(grad_clip, grad_on_grassmann, grad_on_oblique,
                              var, learning_rate, times, delta):
    a = tf.maximum(delta, 1 / tf.log((tf.log((times + 2)))))
    n = gutils.unit(gutils.grassmann_project(
        var, grad_on_oblique)) * gutils.norm(grad_on_grassmann)
    b_1 = 2 * (1 - a) * gutils.xTy(grad_on_grassmann, n)
    b_2 = gutils.norm(grad_on_grassmann)
    b = b_1 / (b_2 + 1e-5)

    if grad_clip != None:
        h = learning_rate * (a * grad_on_grassmann + b * n)
        h = -1 * gutils.clip_by_norm(h, grad_clip)
    else:
        h = -1 * learning_rate * (a * grad_on_grassmann + b * n)

    var_update = gutils.grassmann_retrction(var, h)
    return var_update
Esempio n. 6
0
def train(mnist,LEARNING_RATE_BASE,MODEL_SAVE_PATH,FILE_SAVE_PATH):
    file_path_loss_grassmann = os.path.join(FILE_SAVE_PATH, ('loss_grassmann_' + str(LEARNING_RATE_BASE) + '.txt'))
    file_path_loss_oblique = os.path.join(FILE_SAVE_PATH, ('loss_oblique_' + str(LEARNING_RATE_OBLIQUE) + '.txt'))

    file1_path_grassmann = os.path.join(FILE_SAVE_PATH, ('accuracy_grassmann_' + str(LEARNING_RATE_BASE) + '.txt'))
    file1_path_oblique = os.path.join(FILE_SAVE_PATH, ('accuracy_oblique' + str(LEARNING_RATE_OBLIQUE) + '.txt'))
    file1_path_ensemble = os.path.join(FILE_SAVE_PATH, ('accuracy_ensemble' + str(LEARNING_RATE_BASE) + '.txt'))

    file_path_norm_grassmann = os.path.join(FILE_SAVE_PATH, ('norm_grassmann' + str(LEARNING_RATE_BASE) + '.txt'))
    file_path_norm_oblique = os.path.join(FILE_SAVE_PATH, ('norm_oblique' + str(LEARNING_RATE_OBLIQUE) + '.txt'))

    file_loss_grassmann = open(file_path_loss_grassmann, 'w')
    file_loss_oblique = open(file_path_loss_oblique, 'w')

    file_accuracy_grassmann = open(file1_path_grassmann, 'w')
    file_accuracy_oblique = open(file1_path_oblique, 'w')
    file_accuracy_ensemble = open(file1_path_ensemble, 'w')

    file_norm_grassmann = open(file_path_norm_grassmann, 'w')
    file_norm_oblique = open(file_path_norm_oblique, 'w')

    x=tf.placeholder(tf.float32,shape=[None,LeNet5.INPUT_NODE],name="x-input")
    y_=tf.placeholder(tf.float32,shape=[None,LeNet5.OUTPUT_NODE],name="y-output")
    x_reshaped=tf.reshape(x,[-1,LeNet5.IMAGE_SIZE,LeNet5.IMAGE_SIZE,LeNet5.NUM_CHANNELS])
    times=tf.placeholder(tf.float32,shape=None,name="times")

    #GRAD_CLIP=tf.constant(1.0,dtype=tf.float32)

    #正则化
    regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)

    y_g_grassmann = LeNet5_grassmann.inference(x_reshaped,False,regularizer)
    y_o_oblique = LeNet5_oblique.inference(x_reshaped,False,regularizer)
    global_step=tf.Variable(0,trainable=None)

    #定义损失函数,滑动平均操作等
    variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
    variable_averages_op=variable_averages.apply(tf.trainable_variables())

    cross_entropy_g_grassmann = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y_g_grassmann)
    cross_entropy_o_oblique = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y_o_oblique)

    cross_entropy_mean_g_grassmann = tf.reduce_mean(cross_entropy_g_grassmann)
    cross_entropy_mean_o_oblique = tf.reduce_mean(cross_entropy_o_oblique)

    #损失函数,其中涉及到对一个列表中的元素(还是一个列表)求和

    loss_g_grassmann = cross_entropy_mean_g_grassmann #+ tf.add_n(tf.get_collection('losses_g_grassmann'))
    loss_o_oblique = cross_entropy_mean_o_oblique #+ tf.add_n(tf.get_collection('losses_o_oblique'))

    learning_rate=tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY)
    learning_rate_o = tf.train.exponential_decay(LEARNING_RATE_OBLIQUE, global_step, mnist.train.num_examples / BATCH_SIZE,
                                               LEARNING_RATE_DECAY)
    #learning_rate=LEARNING_RATE_BASE
    #更新参数

    #train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
    #滑动平均并行计算
    #with tf.control_dependencies([train_step,variable_averages_op]):
        #train_op=tf.no_op(name='train')
    correct_prediction_grassmann = tf.equal(tf.argmax(y_, 1), tf.argmax(y_g_grassmann, 1))
    correct_prediction_oblique = tf.equal(tf.argmax(y_, 1), tf.argmax(y_o_oblique, 1))

    correct_prediction_ensemble=tf.equal(tf.argmax(y_,1),tf.argmax(tf.add(y_g_grassmann,y_o_oblique),1))

    accuracy_grassmann = tf.reduce_mean(tf.cast(correct_prediction_grassmann, tf.float32))
    accuracy_oblique = tf.reduce_mean(tf.cast(correct_prediction_oblique, tf.float32))
    accuracy_ensemble = tf.reduce_mean(tf.cast(correct_prediction_ensemble, tf.float32))
###########################################################################################################
    with tf.variable_scope('layer1-conv1_grassmann', reuse=True):
        conv1_weights_g = tf.get_variable("weight_g")
        conv1_biases_g = tf.get_variable('biases_g')

        conv1_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp")
        conv1_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp")

        weights_grad_g_base_g_layer1 = tf.gradients(loss_g_grassmann, conv1_weights_g, stop_gradients=conv1_weights_g)

        weights_grad_g_base_g_biases_layer1 = tf.gradients(loss_g_grassmann, conv1_biases_g, stop_gradients=conv1_biases_g)

        weights_g = tf.reshape(conv1_weights_g , shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_g_layer1[0], dtype=tf.float32)

        weights_grad_g = tf.reshape(weights_grad_g_base_g_layer1[0], shape=[-1, 1])

        grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g)

        weights_g_layer1 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann,
                                                                  weights_g, 100,learning_rate, times)
        #weights_g_layer1=weights_g-learning_rate*weights_grad_g

        weights_biases_grassmann_layer1 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_g_biases_layer1[0], tf.float32), conv1_biases_g)

        norm_g_1=tf.square(gutils.norm(grad_on_grassmann))

    with tf.variable_scope('layer3-conv2_grassmann', reuse=True):
        conv2_weights_g = tf.get_variable("weight_g")
        conv2_biases_g = tf.get_variable('biases_g')

        conv2_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp")
        conv2_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp")

        weights_grad_g_base_g_layer3 = tf.gradients(loss_g_grassmann, conv2_weights_g,stop_gradients=conv2_weights_g)

        weights_grad_g_base_g_biases_layer3 = tf.gradients(loss_g_grassmann, conv2_biases_g,
                                                           stop_gradients=conv2_biases_g)

        weights_g = tf.reshape(conv2_weights_g, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_g_layer3[0], dtype=tf.float32)

        weights_grad_g = tf.reshape(weights_grad_g_base_g_layer3[0], shape=[-1, 1])

        grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g)

        weights_g_layer3 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann
                                                                  , weights_g, 101,learning_rate, times)

        #weights_g_layer3 = weights_g - learning_rate * weights_grad_g

        weights_biases_grassmann_layer3 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_g_biases_layer3[0], tf.float32),
            conv2_biases_g)
        norm_g_3 = tf.square(gutils.norm(grad_on_grassmann))

    with tf.variable_scope('layer5-fc1_grassmann', reuse=True):
            fc1_weights_g = tf.get_variable("weight_g")
            fc1_biases_g = tf.get_variable("biases_g")

            fc1_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp")
            fc1_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp")

            weights_grad_g_base_g_layer5 = tf.gradients(loss_g_grassmann, fc1_weights_g, stop_gradients=fc1_weights_g)

            weights_grad_g_base_biases_g_layer5 = tf.gradients(loss_g_grassmann, fc1_biases_g, stop_gradients=fc1_biases_g)

            weights_g = tf.reshape(fc1_weights_g, shape=[-1, 1])

            tf.convert_to_tensor(weights_grad_g_base_g_layer5[0], dtype=tf.float32)

            weights_grad_g = tf.reshape(weights_grad_g_base_g_layer5[0], shape=[-1, 1])

            grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g)

            weights_g_layer5 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann
                                                                      , weights_g, 102,learning_rate, times)

            #weights_g_layer5 = weights_g - learning_rate * weights_grad_g

            weights_biases_grassmann_layer5 = tf.add(
                -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_biases_g_layer5[0], tf.float32),
                fc1_biases_g)
            norm_g_5 = tf.square(gutils.norm(grad_on_grassmann))

    with tf.variable_scope('layer6-fc2_grassmann', reuse=True):
        fc2_weights_g = tf.get_variable("weight_g")
        fc2_biases_g = tf.get_variable("biases_g")

        fc2_weights_g_tmp_grassmann = tf.get_variable("weight_g_tmp")
        fc2_biases_g_tmp_grassmann = tf.get_variable("biases_g_tmp")

        weights_grad_g_base_g_layer6 = tf.gradients(loss_g_grassmann, fc2_weights_g , stop_gradients=fc2_weights_g)

        weights_grad_g_base_biases_g_layer6= tf.gradients(loss_g_grassmann, fc2_biases_g, stop_gradients=fc2_biases_g)

        weights_g = tf.reshape(fc2_weights_g, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_g_layer6[0], dtype=tf.float32)

        weights_grad_g = tf.reshape(weights_grad_g_base_g_layer6[0], shape=[-1, 1])

        grad_on_grassmann = gutils.grassmann_project(weights_g, weights_grad_g)

        weights_g_layer6 = optimize_function._apply_dense_on_grassmann_with_noise(GRAD_CLIP, grad_on_grassmann,
                                                                  weights_g,103, learning_rate, times)

        #weights_g_layer6 = weights_g-learning_rate*weights_grad_g

        weights_biases_grassmann_layer6 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_g_base_biases_g_layer6[0], tf.float32),
            fc2_biases_g)
        norm_g_6 = tf.square(gutils.norm(grad_on_grassmann))

############################################################################################################
    with tf.variable_scope('layer1-conv1_oblique', reuse=True):

        conv1_weights_o = tf.get_variable("weight_o")
        conv1_biases_o = tf.get_variable('biases_o')

        dim_layer1 = conv1_weights_o.get_shape()

        conv1_weights_o_tmp = tf.get_variable("weight_o_tmp")
        conv1_biases_o_tmp = tf.get_variable("biases_o_tmp")

        weights_grad_o_base_layer1_o = tf.gradients(loss_o_oblique, conv1_weights_o, stop_gradients=conv1_weights_o)

        weights_grad_o_base_biases_layer1_o = tf.gradients(loss_o_oblique, conv1_biases_o, stop_gradients=conv1_biases_o)

        weights_o = tf.reshape(conv1_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_o_base_layer1_o[0], dtype=tf.float32)

        weights_grad_o = tf.reshape(weights_grad_o_base_layer1_o[0], shape=[-1, 1])

        grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o)

        weights_o_layer1_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique
                                                                , weights_o, 104,learning_rate_o, times)

        #weights_o_layer1_o = weights_o - learning_rate * weights_grad_o
        weights_biases_oblique_layer1 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer1_o[0], tf.float32),
            conv1_biases_o)
        norm_o_1 = tf.square(gutils.norm(grad_on_oblique))

    with tf.variable_scope('layer3-conv2_oblique', reuse=True):
        conv2_weights_o = tf.get_variable("weight_o")
        conv2_biases_o = tf.get_variable('biases_o')

        conv2_weights_o_tmp = tf.get_variable("weight_o_tmp")
        conv2_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer3=conv2_weights_o.get_shape()

        weights_grad_o_base_layer3_o = tf.gradients(loss_o_oblique, conv2_weights_o , stop_gradients=conv2_weights_o)

        weights_grad_o_base_biases_layer3_o = tf.gradients(loss_o_oblique, conv2_biases_o, stop_gradients=conv2_biases_o)

        weights_o = tf.reshape(conv2_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_o_base_layer3_o[0], dtype=tf.float32)

        weights_grad_o = tf.reshape(weights_grad_o_base_layer3_o[0], shape=[-1, 1])

        grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o)

        weights_o_layer3_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique,
                                                                weights_o, 105,learning_rate_o, times)

        #weights_o_layer3_o = weights_o - learning_rate * weights_grad_o
        weights_biases_oblique_layer3 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer3_o[0], tf.float32),
            conv2_biases_o)
        norm_o_3 = tf.square(gutils.norm(grad_on_oblique))

    with tf.variable_scope('layer5-fc1_oblique', reuse=True):
        fc1_weights_o = tf.get_variable("weight_o")
        fc1_biases_o = tf.get_variable("biases_o")

        fc1_weights_o_tmp = tf.get_variable("weight_o_tmp")
        fc1_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer5 = fc1_weights_o.get_shape()

        weights_grad_o_base_layer5_o = tf.gradients(loss_o_oblique, fc1_weights_o ,stop_gradients=fc1_weights_o)

        weights_grad_o_base_biases_layer5_o = tf.gradients(loss_o_oblique, fc1_biases_o, stop_gradients=fc1_biases_o)

        weights_o = tf.reshape(fc1_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_o_base_layer5_o[0], dtype=tf.float32)

        weights_grad_o = tf.reshape(weights_grad_o_base_layer5_o[0], shape=[-1, 1])

        grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o)

        weights_o_layer5_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique
                                                                , weights_o, 106,learning_rate_o, times)

        #weights_o_layer5_o = weights_o - learning_rate * weights_grad_o
        weights_biases_oblique_layer5 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer5_o[0], tf.float32),
            fc1_biases_o)
        norm_o_5 = tf.square(gutils.norm(grad_on_oblique))

    with tf.variable_scope('layer6-fc2_oblique', reuse=True):
        fc2_weights_o = tf.get_variable("weight_o")
        fc2_biases_o = tf.get_variable("biases_o")

        fc2_weights_o_tmp = tf.get_variable("weight_o_tmp")
        fc2_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer6 = fc2_weights_o.get_shape()

        weights_grad_o_base_layer6_o = tf.gradients(loss_o_oblique, fc2_weights_o , stop_gradients=fc2_weights_o)

        weights_grad_o_base_biases_layer6_o = tf.gradients(loss_o_oblique, fc2_biases_o, stop_gradients=fc2_biases_o)

        weights_o = tf.reshape(fc2_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_o_base_layer6_o[0], dtype=tf.float32)

        weights_grad_o = tf.reshape(weights_grad_o_base_layer6_o[0], shape=[-1, 1])

        grad_on_oblique = gutils.oblique_project(weights_o, weights_grad_o)

        weights_o_layer6_o = optimize_function._apply_dense_on_oblique_with_noise(GRAD_CLIP, grad_on_oblique
                                                                , weights_o, 107,learning_rate_o, times)

        #weights_o_layer6_o = weights_o - learning_rate * weights_grad_o
        weights_biases_oblique_layer6 = tf.add(
            -1 * learning_rate * tf.convert_to_tensor(weights_grad_o_base_biases_layer6_o[0], tf.float32),
            fc2_biases_o)
        norm_o_6 = tf.square(gutils.norm(grad_on_oblique))
######################################################################################################################
        _1 = tf.assign(conv1_weights_g_tmp_grassmann, tf.reshape(weights_g_layer1, shape=dim_layer1))
        _2 = tf.assign(conv1_weights_o_tmp, tf.reshape(weights_o_layer1_o, shape=dim_layer1))
        _3 = tf.assign(conv2_weights_g_tmp_grassmann, tf.reshape(weights_g_layer3, shape=dim_layer3))
        _4 = tf.assign(conv2_weights_o_tmp, tf.reshape(weights_o_layer3_o, shape=dim_layer3))
        _5 = tf.assign(fc1_weights_g_tmp_grassmann, tf.reshape(weights_g_layer5, shape=dim_layer5))
        _6 = tf.assign(fc1_weights_o_tmp, tf.reshape(weights_o_layer5_o, shape=dim_layer5))
        _7 = tf.assign(fc2_weights_g_tmp_grassmann, tf.reshape(weights_g_layer6, shape=dim_layer6))
        _8 = tf.assign(fc2_weights_o_tmp, tf.reshape(weights_o_layer6_o, shape=dim_layer6))

        _11 = tf.assign(conv1_biases_g_tmp_grassmann, weights_biases_grassmann_layer1)
        _12 = tf.assign(conv1_biases_o_tmp, weights_biases_oblique_layer1)
        _13 = tf.assign(conv2_biases_g_tmp_grassmann, weights_biases_grassmann_layer3)
        _14 = tf.assign(conv2_biases_o_tmp, weights_biases_oblique_layer3)
        _15 = tf.assign(fc1_biases_g_tmp_grassmann, weights_biases_grassmann_layer5)
        _16 = tf.assign(fc1_biases_o_tmp, weights_biases_oblique_layer5)
        _17 = tf.assign(fc2_biases_g_tmp_grassmann, weights_biases_grassmann_layer6)
        _18 = tf.assign(fc2_biases_o_tmp, weights_biases_oblique_layer6)

        _21 = tf.assign(conv1_weights_g, conv1_weights_g_tmp_grassmann)
        _22 = tf.assign(conv1_weights_o, conv1_weights_o_tmp)
        _23 = tf.assign(conv2_weights_g, conv2_weights_g_tmp_grassmann)
        _24 = tf.assign(conv2_weights_o, conv2_weights_o_tmp)
        _25 = tf.assign(fc1_weights_g, fc1_weights_g_tmp_grassmann)
        _26 = tf.assign(fc1_weights_o, fc1_weights_o_tmp)
        _27 = tf.assign(fc2_weights_g, fc2_weights_g_tmp_grassmann)
        _28 = tf.assign(fc2_weights_o, fc2_weights_o_tmp)

        _31 = tf.assign(conv1_biases_g, conv1_biases_g_tmp_grassmann)
        _32 = tf.assign(conv1_biases_o, conv1_biases_o_tmp)
        _33 = tf.assign(conv2_biases_g, conv2_biases_g_tmp_grassmann)
        _34 = tf.assign(conv2_biases_o, conv2_biases_o_tmp)
        _35 = tf.assign(fc1_biases_g, fc1_biases_g_tmp_grassmann)
        _36 = tf.assign(fc1_biases_o, fc1_biases_o_tmp)
        _37 = tf.assign(fc2_biases_g, fc2_biases_g_tmp_grassmann)
        _38 = tf.assign(fc2_biases_o, fc2_biases_o_tmp)
    #初始化持久化类
    #saver=tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        #训练模型,其中每隔一段时间会保存训练的结果
        for u in range(TRAINING_STEPS):
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            loss_value_g_grassmann,loss_value_o_oblique,\
            accuracy_g_grassmann_value,accuracy_o_oblique_value,accuracy_ensemble_value,\
            step=sess.run([loss_g_grassmann,loss_o_oblique,accuracy_grassmann,accuracy_oblique,accuracy_ensemble,
                           global_step],feed_dict={x:xs,y_:ys})

#****************************************************************
            sess.run([_1, _2, _3, _4, _5, _6, _7, _8], feed_dict={x: xs, y_: ys, times: float(u)})
            sess.run([_11, _12, _13, _14, _15, _16, _17, _18], feed_dict={x: xs, y_: ys, times: float(u)})
            sess.run([_21, _22, _23, _24, _25, _26, _27, _28])
            sess.run([_31, _32, _33, _34, _35, _36, _37, _38])

##########################################################################################################
            file_loss_grassmann.write(str(u)), file_loss_grassmann.write(' '), file_loss_grassmann.write(
                str(loss_value_g_grassmann)), file_loss_grassmann.write("\n")
            file_loss_oblique.write(str(u)), file_loss_oblique.write(' '), file_loss_oblique.write(
                str(loss_value_o_oblique)), file_loss_oblique.write("\n")

            file_accuracy_grassmann.write(str(u)), file_accuracy_grassmann.write(' '), file_accuracy_grassmann.write(
                str(accuracy_g_grassmann_value)), file_accuracy_grassmann.write('\n')
            file_accuracy_oblique.write(str(u)), file_accuracy_oblique.write(' '), file_accuracy_oblique.write(
                str(accuracy_o_oblique_value)), file_accuracy_oblique.write('\n')
            file_accuracy_ensemble.write(str(u)), file_accuracy_ensemble.write(' '), file_accuracy_ensemble.write(
                str(accuracy_ensemble_value)), file_accuracy_ensemble.write('\n')
            #file_norm_grassmann.write(str(u)), file_norm_grassmann.write(' '), file_norm_grassmann.write(
                #str(n_g)), file_norm_grassmann.write('\n')
            #file_norm_oblique.write(str(u)), file_norm_oblique.write(' '), file_norm_oblique.write(
            #    str(n_o)), file_norm_oblique.write('\n')

            if u%100==0:
                print("After %d training steps, accuracy_grassmann accuracy_oblique and accuracy_ensemble on training batch is %g , %g and %g" % (
                    u, accuracy_g_grassmann_value, accuracy_o_oblique_value, accuracy_ensemble_value ))
                print("After %d training steps, loss_g and loss_o on training batch is %g , %g" % (
                        u, loss_value_g_grassmann, loss_value_o_oblique))

                print(time.localtime(time.time()))
                #model_name=MODEL_NAME+"_"+str(LEARNING_RATE_BASE)+".ckpt"
                #saver.save(sess,os.path.join(MODEL_SAVE_PATH,model_name),global_step=global_step)
        xs = mnist.validation.images
        ys = mnist.validation.labels
        loss_value_g_grassmann, loss_value_o_oblique, accuracy_ensemble_value = sess.run(
            [loss_g_grassmann, loss_o_oblique, accuracy_ensemble], feed_dict={x: xs, y_: ys})

        print("The loss_g, loss_o and accuracy on validation is %g %g and %g" % (
        loss_value_g_grassmann, loss_value_o_oblique, accuracy_ensemble_value))
    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            #momentum = group['momentum']
            manifold = group['manifold']
            learning_rate = group['lr']
            variance = group['variance']
            times = group['times']

            if manifold != 'None':
                grad_clip = group['grad_clip']

                length = len(group['params'])

                for i in range(length):

                    p_grassmann = group['params'][i]
                    p_oblique = group['params'][i + length / 2]

                    if p_grassmann.grad and p_oblique is None:
                        continue

                    unity_grassmann, _ = gutils.unit(
                        p_grassmann.data.view(p_grassmann.size()[0], -1))
                    unity_oblique, _ = gutils.unit(
                        p_oblique.data.view(p_grassmann.size()[0], -1))

                    grad_grassmann = p_grassmann.grad.data.view(
                        p_grassmann.size()[0], -1)
                    grad_oblique = p_grassmann.grad.data.view(
                        p_oblique.size()[0], -1)

                    # if omega != 0:
                    # L=|Y'Y-I|^2/2=|YY'-I|^2/2+c
                    # dL/dY=2(YY'Y-Y)
                    # g.add_(2*omega, torch.mm(torch.mm(unity, unity.t()), unity) - unity)

                    h_grassmann = gutils.grassmann_project(
                        unity_grassmann, grad_grassmann)
                    h_oblique = gutils.oblique_project(unity_oblique,
                                                       grad_oblique)

                    # param_state = self.state[p]
                    # if 'momentum_buffer' not in param_state:
                    #    param_state['momentum_buffer'] = torch.zeros(h_hat.size())
                    #    if p.is_cuda:
                    #      param_state['momentum_buffer'] = param_state['momentum_buffer'].cuda()

                    # mom = param_state['momentum_buffer']
                    # mom_new = momentum*mom - group['lr']*h_hat

                    p_grassmann.data.copy_(
                        _apply_dense_on_grassmann_with_noise(
                            grad_clip, h_grassmann, unity_grassmann,
                            learning_rate, times,
                            variance).view(p_grassmann.size()))

                    p_oblique.data.copy_(
                        _apply_dense_on_oblique_with_noise(
                            grad_clip, h_oblique, unity_oblique, learning_rate,
                            times, variance).view(p_oblique.size()))

            elif manifold == "None":
                # This routine is from https://github.com/pytorch/pytorch/blob/master/torch/optim/sgd.py
                weight_decay = group['weight_decay']
                # dampening = group['dampening']
                # nesterov = group['nesterov']
                for p in group['params']:
                    if p.grad is None:
                        continue
                    d_p = p.grad.data
                    if weight_decay != 0:
                        d_p.add_(weight_decay, p.data)
                    # if momentum != 0:
                    #    param_state = self.state[p]
                    #    if 'momentum_buffer' not in param_state:
                    #        buf = param_state['momentum_buffer'] = d_p.clone()
                    #    else:
                    #        buf = param_state['momentum_buffer']
                    #        buf.mul_(momentum).add_(1 - dampening, d_p)
                    #    if nesterov:
                    #        d_p = d_p.add(momentum, buf)
                    #    else:
                    #        d_p = buf

                    p.data.add_(-group['lr'], d_p)

            else:
                raise ValueError("There is no such a manifold")

        return loss
Esempio n. 8
0
def train(LEARNING_RATE_BASE, MODEL_SAVE_PATH, FILE_SAVE_PATH):
    data, labels = reader.unpickle(reader.file)
    file_path_loss_g = os.path.join(
        FILE_SAVE_PATH, ('loss_g_' + str(LEARNING_RATE_GRASSMANN) + '.txt'))
    file_path_loss_o = os.path.join(
        FILE_SAVE_PATH, ('loss_o_' + str(LEARNING_RATE_OBLIQUE) + '.txt'))

    file_path_norm = os.path.join(FILE_SAVE_PATH,
                                  ('norm' + str(LEARNING_RATE_BASE) + '.txt'))

    file1_path = os.path.join(FILE_SAVE_PATH,
                              ('accuracy_' + str(LEARNING_RATE_BASE) + '.txt'))
    file1_path_g = os.path.join(
        FILE_SAVE_PATH,
        ('accuracy_g_' + str(LEARNING_RATE_GRASSMANN) + '.txt'))
    file1_path_o = os.path.join(
        FILE_SAVE_PATH, ('accuracy_o_' + str(LEARNING_RATE_OBLIQUE) + '.txt'))

    file_loss_g = open(file_path_loss_g, 'w')
    file_loss_o = open(file_path_loss_o, 'w')

    file_norm = open(file_path_norm, 'w')

    file_accuracy = open(file1_path, 'w')
    file_accuracy_g = open(file1_path_g, 'w')
    file_accuracy_o = open(file1_path_o, 'w')

    x = tf.placeholder(tf.float32,
                       shape=[None, cifar10_ensemble.INPUT_NODE],
                       name="x-input")
    y_ = tf.placeholder(tf.float32,
                        shape=[None, cifar10_ensemble.OUTPUT_NODE],
                        name="y-output")
    x_reshaped = tf.reshape(x, [
        -1, cifar10_ensemble.IMAGE_SIZE, cifar10_ensemble.IMAGE_SIZE,
        cifar10_ensemble.NUM_CHANNELS
    ])
    times = tf.placeholder(tf.float32, shape=None, name="times")

    #GRAD_CLIP=tf.constant(1.0,dtype=tf.float32)

    #正则化
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)

    y_g, y_o = cifar10_ensemble.inference(x_reshaped, False, regularizer)
    global_step = tf.Variable(0, trainable=None)

    #定义损失函数,滑动平均操作等
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    #variable_averages_op=variable_averages.apply(tf.trainable_variables())
    cross_entropy_g = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.argmax(y_, 1), logits=y_g)
    cross_entropy_o = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.argmax(y_, 1), logits=y_o)

    cross_entropy_mean_g = tf.reduce_mean(cross_entropy_g)
    cross_entropy_mean_o = tf.reduce_mean(cross_entropy_o)

    #损失函数,其中涉及到对一个列表中的元素(还是一个列表)求和
    loss_g = cross_entropy_mean_g  #+tf.add_n(tf.get_collection('losses_g'))
    loss_o = cross_entropy_mean_o  #+tf.add_n(tf.get_collection('losses_o'))

    learning_rate_g = tf.train.exponential_decay(LEARNING_RATE_GRASSMANN,
                                                 global_step,
                                                 50000 / BATCH_SIZE,
                                                 LEARNING_RATE_DECAY)
    learning_rate_o = tf.train.exponential_decay(LEARNING_RATE_OBLIQUE,
                                                 global_step,
                                                 50000 / BATCH_SIZE,
                                                 LEARNING_RATE_DECAY)
    #learning_rate=LEARNING_RATE_BASE
    #更新参数

    #train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
    #滑动平均并行计算
    #with tf.control_dependencies([train_step,variable_averages_op]):
    #train_op=tf.no_op(name='train')
    correct_prediction_g = tf.equal(tf.argmax(y_, 1), tf.argmax(y_g, 1))
    correct_prediction_o = tf.equal(tf.argmax(y_, 1), tf.argmax(y_o, 1))

    correct_prediction = tf.equal(tf.argmax(y_, 1),
                                  tf.argmax(tf.add(y_g, y_o), 1))

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy_g = tf.reduce_mean(tf.cast(correct_prediction_g, tf.float32))
    accuracy_o = tf.reduce_mean(tf.cast(correct_prediction_o, tf.float32))
    #########################################################################################################3
    with tf.variable_scope('layer1-conv1', reuse=True):
        conv1_weights_g = tf.get_variable("weight_g")
        conv1_biases_g = tf.get_variable('biases_g')

        conv1_weights_o = tf.get_variable("weight_o")
        conv1_biases_o = tf.get_variable('biases_o')

        conv1_weights_g_tmp_layer1 = tf.get_variable("weight_g_tmp")
        conv1_weights_o_tmp_layer1 = tf.get_variable("weight_o_tmp")

        conv1_biases_g_tmp = tf.get_variable("biases_g_tmp")
        conv1_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer1 = conv1_weights_g.get_shape()

        weights_grad_g_base_layer1 = tf.gradients(
            loss_g, conv1_weights_g, stop_gradients=conv1_weights_g)
        weights_grad_o_base_layer1 = tf.gradients(
            loss_o, conv1_weights_o, stop_gradients=conv1_weights_o)

        weights_grad_g_base_biases_layer1 = tf.gradients(
            loss_g, conv1_biases_g, stop_gradients=conv1_biases_g)
        weights_grad_o_base_biases_layer1 = tf.gradients(
            loss_o, conv1_biases_o, stop_gradients=conv1_biases_o)

        weights_g_1 = tf.reshape(conv1_weights_g, shape=[-1, 1])
        weights_o_1 = tf.reshape(conv1_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_layer1[0], dtype=tf.float32)
        tf.convert_to_tensor(weights_grad_o_base_layer1[0], dtype=tf.float32)

        weights_grad_g_base_1 = tf.reshape(weights_grad_g_base_layer1[0],
                                           shape=[-1, 1])
        weights_grad_o_base_l = tf.reshape(weights_grad_o_base_layer1[0],
                                           shape=[-1, 1])

        grad_on_grassmann_1 = gutils.grassmann_project(weights_g_1,
                                                       weights_grad_g_base_1)
        grad_on_oblique_1 = gutils.oblique_project(weights_o_1,
                                                   weights_grad_o_base_l)

        weights_g_layer1 = optimize_function.apply_dense_on_grasssmann(
            GRAD_CLIP, grad_on_grassmann_1, grad_on_oblique_1, weights_g_1,
            learning_rate_g, times, DELTA)

        weights_o_layer1 = optimize_function._apply_dense_on_oblique(
            GRAD_CLIP, grad_on_grassmann_1, grad_on_oblique_1, weights_o_1,
            learning_rate_o, times, DELTA)

        #weights_g_layer1 = weights_g_1 - learning_rate_g * weights_grad_g_base_1
        #weights_o_layer1 = weights_o_1 - learning_rate_o * weights_grad_o_base_l

        weights_biases_g_layer1 = tf.add(
            -1 * learning_rate_g * tf.convert_to_tensor(
                weights_grad_g_base_biases_layer1[0], tf.float32),
            conv1_biases_g)
        weights_biases_o_layer1 = tf.add(
            -1 * learning_rate_o * tf.convert_to_tensor(
                weights_grad_o_base_biases_layer1[0], tf.float32),
            conv1_biases_o)

        norm_g_1 = tf.square(gutils.norm(grad_on_grassmann_1))
        norm_o_1 = tf.square(gutils.norm(grad_on_oblique_1))

    with tf.variable_scope('layer3-conv2', reuse=True):
        conv2_weights_g = tf.get_variable("weight_g")
        conv2_biases_g = tf.get_variable('biases_g')
        conv2_weights_o = tf.get_variable("weight_o")
        conv2_biases_o = tf.get_variable('biases_o')

        conv2_weights_g_tmp_layer3 = tf.get_variable("weight_g_tmp")
        conv2_weights_o_tmp_layer3 = tf.get_variable("weight_o_tmp")

        conv2_biases_g_tmp = tf.get_variable("biases_g_tmp")
        conv2_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer3 = conv2_weights_g.get_shape()

        weights_grad_g_base_3 = tf.gradients(loss_g,
                                             conv2_weights_g,
                                             stop_gradients=conv2_weights_g)
        weights_grad_o_base_3 = tf.gradients(loss_o,
                                             conv2_weights_o,
                                             stop_gradients=conv2_weights_o)

        weights_grad_g_base_biases_layer3 = tf.gradients(
            loss_g, conv2_biases_g, stop_gradients=conv2_biases_g)
        weights_grad_o_base_biases_layer3 = tf.gradients(
            loss_o, conv2_biases_o, stop_gradients=conv2_biases_o)

        weights_g_3 = tf.reshape(conv2_weights_g, shape=[-1, 1])
        weights_o_3 = tf.reshape(conv2_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_3[0], dtype=tf.float32)
        tf.convert_to_tensor(weights_grad_o_base_3[0], dtype=tf.float32)

        weights_grad_g_3 = tf.reshape(weights_grad_g_base_3[0], shape=[-1, 1])
        weights_grad_o_3 = tf.reshape(weights_grad_o_base_3[0], shape=[-1, 1])

        grad_on_grassmann_3 = gutils.grassmann_project(weights_g_3,
                                                       weights_grad_g_3)
        grad_on_oblique_3 = gutils.oblique_project(weights_o_3,
                                                   weights_grad_o_3)

        weights_g_layer3 = optimize_function.apply_dense_on_grasssmann(
            GRAD_CLIP, grad_on_grassmann_3, grad_on_oblique_3, weights_g_3,
            learning_rate_g, times, DELTA)
        weights_o_layer3 = optimize_function._apply_dense_on_oblique(
            GRAD_CLIP, grad_on_grassmann_3, grad_on_oblique_3, weights_o_3,
            learning_rate_o, times, DELTA)

        #weights_g_layer3 = weights_g_3 - learning_rate_g * weights_grad_g_3
        #weights_o_layer3 = weights_o_3 - learning_rate_o * weights_grad_o_3

        weights_biases_g_layer3 = tf.add(
            -1 * learning_rate_g * tf.convert_to_tensor(
                weights_grad_g_base_biases_layer3[0], tf.float32),
            conv2_biases_g)
        weights_biases_o_layer3 = tf.add(
            -1 * learning_rate_o * tf.convert_to_tensor(
                weights_grad_o_base_biases_layer3[0], tf.float32),
            conv2_biases_o)

        norm_g_3 = tf.square(gutils.norm(grad_on_grassmann_3))
        norm_o_3 = tf.square(gutils.norm(grad_on_oblique_3))

    with tf.variable_scope('layer5-fc1', reuse=True):
        fc1_weights_g = tf.get_variable("weight_g")
        fc1_biases_g = tf.get_variable("biases_g")
        fc1_weights_o = tf.get_variable("weight_o")
        fc1_biases_o = tf.get_variable("biases_o")

        fc1_weights_g_tmp_layer5 = tf.get_variable("weight_g_tmp")
        fc1_weights_o_tmp_layer5 = tf.get_variable("weight_o_tmp")

        fc1_biases_g_tmp = tf.get_variable("biases_g_tmp")
        fc1_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer5 = fc1_weights_g.get_shape()

        weights_grad_g_base_5 = tf.gradients(loss_g,
                                             fc1_weights_g,
                                             stop_gradients=fc1_weights_g)
        weights_grad_o_base_5 = tf.gradients(loss_o,
                                             fc1_weights_o,
                                             stop_gradients=fc1_weights_o)

        weights_grad_g_base_biases_layer5 = tf.gradients(
            loss_g, fc1_biases_g, stop_gradients=fc1_biases_g)
        weights_grad_o_base_biases_layer5 = tf.gradients(
            loss_o, fc1_biases_o, stop_gradients=fc1_biases_o)

        weights_g_5 = tf.reshape(fc1_weights_g, shape=[-1, 1])
        weights_o_5 = tf.reshape(fc1_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_5[0], dtype=tf.float32)
        tf.convert_to_tensor(weights_grad_o_base_5[0], dtype=tf.float32)

        weights_grad_g_5 = tf.reshape(weights_grad_g_base_5[0], shape=[-1, 1])
        weights_grad_o_5 = tf.reshape(weights_grad_o_base_5[0], shape=[-1, 1])

        grad_on_grassmann_5 = gutils.grassmann_project(weights_g_5,
                                                       weights_grad_g_5)
        grad_on_oblique_5 = gutils.oblique_project(weights_o_5,
                                                   weights_grad_o_5)

        weights_g_layer5 = optimize_function.apply_dense_on_grasssmann(
            GRAD_CLIP, grad_on_grassmann_5, grad_on_oblique_5, weights_g_5,
            learning_rate_g, times, DELTA)
        weights_o_layer5 = optimize_function._apply_dense_on_oblique(
            GRAD_CLIP, grad_on_grassmann_5, grad_on_oblique_5, weights_o_5,
            learning_rate_o, times, DELTA)

        #weights_g_layer5 = weights_g_5 - learning_rate_g * weights_grad_g_5
        #weights_o_layer5 = weights_o_5 - learning_rate_o * weights_grad_o_5

        weights_biases_g_layer5 = tf.add(
            -1 * learning_rate_g * tf.convert_to_tensor(
                weights_grad_g_base_biases_layer5[0], tf.float32),
            fc1_biases_g)
        weights_biases_o_layer5 = tf.add(
            -1 * learning_rate_o * tf.convert_to_tensor(
                weights_grad_o_base_biases_layer5[0], tf.float32),
            fc1_biases_o)

        norm_g_5 = tf.square(gutils.norm(grad_on_grassmann_5))
        norm_o_5 = tf.square(gutils.norm(grad_on_oblique_5))

    with tf.variable_scope('layer6-fc2', reuse=True):
        fc2_weights_g = tf.get_variable("weight_g")
        fc2_biases_g = tf.get_variable("biases_g")
        fc2_weights_o = tf.get_variable("weight_o")
        fc2_biases_o = tf.get_variable("biases_o")

        fc2_weights_g_tmp_layer6 = tf.get_variable("weight_g_tmp")
        fc2_weights_o_tmp_layer6 = tf.get_variable("weight_o_tmp")

        fc2_biases_g_tmp = tf.get_variable("biases_g_tmp")
        fc2_biases_o_tmp = tf.get_variable("biases_o_tmp")

        dim_layer6 = fc2_weights_g.get_shape()

        weights_grad_g_base_6 = tf.gradients(loss_g, fc2_weights_g)
        weights_grad_o_base_6 = tf.gradients(loss_o, fc2_weights_o)

        weights_grad_g_base_biases_layer6 = tf.gradients(
            loss_g, fc2_biases_g, stop_gradients=fc2_biases_g)
        weights_grad_o_base_biases_layer6 = tf.gradients(
            loss_o, fc2_biases_o, stop_gradients=fc2_biases_o)

        weights_g_6 = tf.reshape(fc2_weights_g, shape=[-1, 1])
        weights_o_6 = tf.reshape(fc2_weights_o, shape=[-1, 1])

        tf.convert_to_tensor(weights_grad_g_base_6[0], dtype=tf.float32)
        tf.convert_to_tensor(weights_grad_o_base_6[0], dtype=tf.float32)

        weights_grad_g = tf.reshape(weights_grad_g_base_6[0], shape=[-1, 1])
        weights_grad_o = tf.reshape(weights_grad_o_base_6[0], shape=[-1, 1])

        grad_on_grassmann_6 = gutils.grassmann_project(weights_g_6,
                                                       weights_grad_g)
        grad_on_oblique_6 = gutils.oblique_project(weights_o_6, weights_grad_o)

        weights_g_layer6 = optimize_function.apply_dense_on_grasssmann(
            GRAD_CLIP, grad_on_grassmann_6, grad_on_oblique_6, weights_g_6,
            learning_rate_g, times, DELTA)
        weights_o_layer6 = optimize_function._apply_dense_on_oblique(
            GRAD_CLIP, grad_on_grassmann_6, grad_on_oblique_6, weights_o_6,
            learning_rate_o, times, DELTA)

        #weights_g_layer6 = weights_g_6 - learning_rate_g * weights_grad_g
        #weights_o_layer6 = weights_o_6 - learning_rate_o * weights_grad_o

        weights_biases_g_layer6 = tf.add(
            -1 * learning_rate_g * tf.convert_to_tensor(
                weights_grad_g_base_biases_layer6[0], tf.float32),
            fc2_biases_g)
        weights_biases_o_layer6 = tf.add(
            -1 * learning_rate_o * tf.convert_to_tensor(
                weights_grad_o_base_biases_layer6[0], tf.float32),
            fc2_biases_o)

        norm_g_6 = tf.square(gutils.norm(grad_on_grassmann_6))
        norm_o_6 = tf.square(gutils.norm(grad_on_oblique_6))

    n = norm_g_1 + norm_g_3 + norm_g_5 + norm_g_6 + norm_o_1 + norm_o_3 + norm_o_5 + norm_o_6

    _1 = tf.assign(conv1_weights_g_tmp_layer1,
                   gutils.unit(tf.reshape(weights_g_layer1, shape=dim_layer1)))
    _2 = tf.assign(conv1_weights_o_tmp_layer1,
                   gutils.unit(tf.reshape(weights_o_layer1, shape=dim_layer1)))
    _3 = tf.assign(conv2_weights_g_tmp_layer3,
                   gutils.unit(tf.reshape(weights_g_layer3, shape=dim_layer3)))
    _4 = tf.assign(conv2_weights_o_tmp_layer3,
                   gutils.unit(tf.reshape(weights_o_layer3, shape=dim_layer3)))
    _5 = tf.assign(fc1_weights_g_tmp_layer5,
                   gutils.unit(tf.reshape(weights_g_layer5, shape=dim_layer5)))
    _6 = tf.assign(fc1_weights_o_tmp_layer5,
                   gutils.unit(tf.reshape(weights_o_layer5, shape=dim_layer5)))
    _7 = tf.assign(fc2_weights_g_tmp_layer6,
                   gutils.unit(tf.reshape(weights_g_layer6, shape=dim_layer6)))
    _8 = tf.assign(fc2_weights_o_tmp_layer6,
                   gutils.unit(tf.reshape(weights_o_layer6, shape=dim_layer6)))

    _11 = tf.assign(conv1_biases_g_tmp, weights_biases_g_layer1)
    _12 = tf.assign(conv1_biases_o_tmp, weights_biases_o_layer1)
    _13 = tf.assign(conv2_biases_g_tmp, weights_biases_g_layer3)
    _14 = tf.assign(conv2_biases_o_tmp, weights_biases_o_layer3)
    _15 = tf.assign(fc1_biases_g_tmp, weights_biases_g_layer5)
    _16 = tf.assign(fc1_biases_o_tmp, weights_biases_o_layer5)
    _17 = tf.assign(fc2_biases_g_tmp, weights_biases_g_layer6)
    _18 = tf.assign(fc2_biases_o_tmp, weights_biases_o_layer6)

    _21 = tf.assign(conv1_weights_g, conv1_weights_g_tmp_layer1)
    _22 = tf.assign(conv1_weights_o, conv1_weights_o_tmp_layer1)
    _23 = tf.assign(conv2_weights_g, conv2_weights_g_tmp_layer3)
    _24 = tf.assign(conv2_weights_o, conv2_weights_o_tmp_layer3)
    _25 = tf.assign(fc1_weights_g, fc1_weights_g_tmp_layer5)
    _26 = tf.assign(fc1_weights_o, fc1_weights_o_tmp_layer5)
    _27 = tf.assign(fc2_weights_g, fc2_weights_g_tmp_layer6)
    _28 = tf.assign(fc2_weights_o, fc2_weights_o_tmp_layer6)

    _31 = tf.assign(conv1_biases_g, conv1_biases_g_tmp)
    _32 = tf.assign(conv1_biases_o, conv1_biases_o_tmp)
    _33 = tf.assign(conv2_biases_g, conv2_biases_g_tmp)
    _34 = tf.assign(conv2_biases_o, conv2_biases_o_tmp)
    _35 = tf.assign(fc1_biases_g, fc1_biases_g_tmp)
    _36 = tf.assign(fc1_biases_o, fc1_biases_o_tmp)
    _37 = tf.assign(fc2_biases_g, fc2_biases_g_tmp)
    _38 = tf.assign(fc2_biases_o, fc2_biases_o_tmp)

    norm_1 = gutils.norm(conv1_weights_g)
    ######################################################################################################################
    #初始化持久化类
    #saver=tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        #训练模型,其中每隔一段时间会保存训练的结果
        i = 0
        while i <= EPOCH:
            for u in range(TRAINING_STEPS):
                if u * BATCH_SIZE >= 50000:
                    print("run out of all data")
                    break
                xs = data[(u * BATCH_SIZE):((u + 1) * BATCH_SIZE)]
                ys = labels[(u * BATCH_SIZE):((u + 1) * BATCH_SIZE)]
                loss_value_g, loss_value_o, \
                         accuracy_value, accuracy_g_value, accuracy_o_value, step = sess.run(
                    [loss_g, loss_o, accuracy, accuracy_g, accuracy_o,
                     global_step], feed_dict={x: xs, y_: ys})
                #****************************************************************
                #print(sess.run(norm_1,feed_dict={x: xs, y_: ys, times: float(u)}))

                sess.run([_1, _2, _3, _4, _5, _6, _7, _8],
                         feed_dict={
                             x: xs,
                             y_: ys,
                             times: float(u)
                         })
                sess.run([_11, _12, _13, _14, _15, _16, _17, _18],
                         feed_dict={
                             x: xs,
                             y_: ys,
                             times: float(u)
                         })
                sess.run([_21, _22, _23, _24, _25, _26, _27, _28])
                sess.run([_31, _32, _33, _34, _35, _36, _37, _38])

                n_value = sess.run(n,
                                   feed_dict={
                                       x: xs,
                                       y_: ys,
                                       times: float(u)
                                   })

                #print(n_value)
                ##########################################################################################################
                file_loss_g.write(
                    str(u)), file_loss_g.write(' '), file_loss_g.write(
                        str(loss_value_g)), file_loss_g.write("\n")
                file_loss_o.write(
                    str(u)), file_loss_o.write(' '), file_loss_o.write(
                        str(loss_value_o)), file_loss_o.write("\n")

                file_accuracy.write(
                    str(u)), file_accuracy.write(' '), file_accuracy.write(
                        str(accuracy_value)), file_accuracy.write('\n')
                file_accuracy_g.write(
                    str(u)), file_accuracy_g.write(' '), file_accuracy_g.write(
                        str(accuracy_g_value)), file_accuracy_g.write('\n')
                file_accuracy_o.write(
                    str(u)), file_accuracy_o.write(' '), file_accuracy_o.write(
                        str(accuracy_o_value)), file_accuracy_o.write('\n')
                file_norm.write(str(u)), file_norm.write(' '), file_norm.write(
                    str(n)), file_norm.write('\n')

                if u % 100 == 0:
                    print(
                        "After %d training steps, loss_g and loss_o on training batch is %g and %g accuracy is %g"
                        % (u, loss_value_g, loss_value_o, accuracy_value))

                    print(
                        "After %d training steps, accuracy_g and accuracy_o on training batch is %g and %g"
                        % (u, accuracy_g_value, accuracy_o_value))
                    print(time.localtime(time.time()))
            i = i + 1