def train(self, dataset_flow): with tf.variable_scope(tf.get_variable_scope()): #global_step= tf.Variable(initial_value=tf.constant(0), trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES], name='global_step') ## this is ok, I will use the format global_step as follow # global_step = tf.train.get_or_create_global_step() #print 'tf.global_variables()', tf.global_variables() #print 'tf.train.get_global_step()', tf.train.get_global_step() in_x, in_y = dataset_flow lr_decay = tf.train.exponential_decay(learning_rate=0.008, global_step=global_step, decay_steps=1000, decay_rate=0.99, staircase = True) optimizer = tf.train.AdamOptimizer(learning_rate = lr_decay) x_list = tf.split(in_x, num_or_size_splits=4, axis=0) y_list = tf.split(in_y, num_or_size_splits=4, axis=0) tower_grads = [] tower_logits= [] for i in xrange(4): with tf.device('/gpu:' +str(i)): with tf.name_scope('name_scope-'+str(i)) as scope: logits = self.logits(x_list[i]) tf.losses.softmax_cross_entropy(onehot_labels=y_list[i], logits=logits) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print 'update_ops', update_ops with tf.control_dependencies(update_ops): losses = tf.get_collection(tf.GraphKeys.LOSSES, scope) print 'gpu:', i, 'losses is :', losses total_loss = tf.add_n(losses, name='total_loss') grads = optimizer.compute_gradients(total_loss) tf.summary.scalar('loss', total_loss) ## 这里对每个name_scope下的loss都做了记录 tower_grads.append(grads) tower_logits.append(logits) grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads, global_step=global_step) ## 每次执行到这里,会对变量global_step自增1 # merged_summary = tf.summary.merge_all() saver = tf.train.Saver() config = tf.ConfigProto(gpu_options = tf.GPUOptions(allow_growth=True), device_count = {'GPU':4}, allow_soft_placement = True) with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(self.model_dir, sess.graph) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) start_time = time.time() while True: try: _, loss_ = sess.run([train_op, total_loss]) except tf.errors.OutOfRangeError: print 'train end' break cur_step = tf.train.global_step(sess, global_step) if cur_step % 100== 0: start_time = time.time() if cur_step % 100== 1: accu, _ = sess.run(self.eval(in_x, in_y)) summary_res = sess.run(merged_summary) writer.add_summary(summary_res, cur_step) duration = time.time() - start_time print 'iter:\t', cur_step, '\tloss:\t', loss_, '\taccuracy:\t', accu, '\ttime cost(sec):\t', duration if cur_step % 10000==1: print 'save model into path:', self.model_dir, cur_step saver.save(sess, self.model_dir+'/ckpt', global_step=cur_step)
def train(self, dataset_flow): with tf.variable_scope(tf.get_variable_scope()): ema = tf.train.ExponentialMovingAverage(decay=0.999) in_x, in_y = dataset_flow optimizer = tf.train.AdamOptimizer(learning_rate = 0.008) x_list = tf.split(in_x, num_or_size_splits=4, axis=0) y_list = tf.split(in_y, num_or_size_splits=4, axis=0) tower_grads = [] tower_logits= [] for i in xrange(4): with tf.device('/gpu:' +str(i)): with tf.name_scope('name_scope-'+str(i)) as scope: logits = self.logits(x_list[i]) tf.losses.softmax_cross_entropy(onehot_labels=y_list[i], logits=logits) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) with tf.control_dependencies(update_ops): losses = tf.get_collection(tf.GraphKeys.LOSSES, scope) print 'gpu:', i, 'losses is :', losses total_loss = tf.add_n(losses, name='total_loss') grads = optimizer.compute_gradients(total_loss) tf.summary.scalar('loss', total_loss) ## 这里对每个name_scope下的loss都做了记录 tower_grads.append(grads) tower_logits.append(logits) grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads) train_avg = ema.apply(tf.trainable_variables()) ## 对指定的变量做滑动平均:ema.apply([self.w1,self.w2]) ## train_op_move = tf.group(train_op, train_avg) ## 将两组操作绑定到一起, 所有动作都是完成的,当整个op完成时 merged_summary = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables()) ## 这里保存要保存全部变量 ##才可以将所有的包括move_avg ## #saver = tf.train.Saver(em.variables_to_restore()) ## 这样好像也是可以的 ## config = tf.ConfigProto(gpu_options = tf.GPUOptions(allow_growth=True), device_count = {'GPU':4}, allow_soft_placement = True) with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(self.model_dir, sess.graph) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) iter_num = 0 while True: try: _, loss_ = sess.run([train_op_move, total_loss]) ## 这里需要是对gropu的操作优化 ## except tf.errors.OutOfRangeError: print 'train end' break if iter_num == 0: start_time = time.time() if iter_num % 100== 1: accu, _ = sess.run(self.eval(in_x, in_y)) summary_res = sess.run(merged_summary) writer.add_summary(summary_res, iter_num) duration = time.time() - start_time start_time = time.time() print 'iter:\t', iter_num, '\tloss:\t', loss_, '\taccuracy:\t', accu, '\ttime cost(sec):\t', duration if iter_num % 10000==1: print 'save model into path:', self.model_dir, iter_num saver.save(sess, self.model_dir+'/ckpt', global_step=iter_num) iter_num += 1
def mode_mine(features, labels, mode, params): net_used = net_model() x = tf.feature_column.input_layer(features=features, feature_columns=params['columns']) #print '-'*10, '#debug in mode_mine as input-x:', x.get_shape() #print '-'*10, '#debug in mode_mine as input-y:', labels.get_shape() ## predict mode ## if mode == tf.estimator.ModeKeys.PREDICT: logits = net_used.output(x) #prob = tf.nn.softmax(logits, dim=1) prob = tf.nn.sigmoid(logits) #prob_class = tf.argmax(prob, axis=1) prob_class = tf.cast(prob > 0.5, tf.int32) predictions_op = {'prob': prob, 'prob_class': prob_class} return tf.estimator.EstimatorSpec(mode, predictions=predictions_op) ## train and eval mode using GPU ## #print '#debug current dataset batch_size:', x.get_shape().as_list(), x # tf.1.3无法处理最后的batch不等于batch_size的情况,这里tf.split会报错,那怎么办呢? # tf.1.3+可以用dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)) 来做丢弃 x_list = tf.split(x, num_or_size_splits=params['gpu_num'], axis=0) y_list = tf.split(labels, num_or_size_splits=params['gpu_num'], axis=0) tower_grads = [] tower_logits = [] optimizer = tf.train.AdamOptimizer(learning_rate=0.002) print '#debug, tf.get_variable_scope():', tf.get_variable_scope() print '#debug, tf.variable_scope(tf.get_variable_scope()):', tf.variable_scope( tf.get_variable_scope()) with tf.variable_scope(tf.get_variable_scope()): for i in xrange(params['gpu_num']): with tf.device('/gpu:' + str(i)): ## 指定在不同的GPU上,设置对应的操作 ## with tf.name_scope('classification-' + str(i)) as scope: # model and loss # name_scope是用来做什么的呢? #print '#debug, net_used.w1.name: ', net_used.w1.name #print '#debug, tf.get_collection(TRAINABLE_VARIABLES)', tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) #print '#debug, tf.get_variable_scope():', tf.get_variable_scope() ## 发现collection里面的可训练变量只有 layer-1/w1:0 这样的模型参数 ## logits = net_used.output(x_list[i]) #print '#debug, here scope:', scope, logits loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast( y_list[i], tf.float32), logits=logits)) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) #tf.losses.softmax_cross_entropy(onehot_labels=y_list[i], logits=logits) ## 疑问,上面这句话为什么要单独执行 ? ## tf.losses.softmax_cross_entrypy will do what ? ## 1) create a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits; ## 2) notice: loss_collection=tf.GraphKeys.LOSSES 这个参数 ## 表示:collection to which the loss will be added ## 将这个GPU上的计算loss结果add到tf.GraphKeys.LOSSES(也即losses) 里. ## 其实如果有其他自定义的loss,也可以通过tf.losses.add_loss添加到collection的损失里. ## 背后都是用ops.add_to_collection(GraphKeys.LOSSES, loss) ## 3) 在来看下tf.Graph.add_to_collection(name, value)是干什么的? ## store the value in the collection given by name ## 查看代码,最重要的一句: self._collections[name].append(value) ## 于是我们知道了collections是一个map,key=tf.GraphKeys, value是通过add_to_collection追加的. ## 难道是是为了在这个GPU下执行计算一遍loss,方便后面采集loss ## ## tf.losses.softmax_cross_entropy 与 tf.nn.softmax_cross_entropy_with_logits的区别 ## 前者处理onehot_labels 后者更适合处理2分类 ## update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) ## 这句话,做了什么? 从collections的map里,取出了name=Update_ops in scope下的[value] ## 采集 需要update的操作 ## update_ops in combination with reuse varscope ## explain the tf.GraphKeys.UPDATE_OPS ## Custom functions to update some variables can be added to UPDATE_OPS ## and, separately run at each iteration using sess.run(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) ## In this case, these variables are set trainable=False to avoid being updated by gradient descent. with tf.control_dependencies(update_ops): ## 这里是做什么用的 ?## losses = tf.get_collection(tf.GraphKeys.LOSSES, scope) print 'gpu:', i, 'losses', losses total_loss = tf.add_n(losses, name='total_loss') ## tf.control_depencies表示 with段内的操作是在updates_op执行之后,再执行的 ## 控制了图的执行顺序 ## ## notice: 这里get_collection都使用了 scope来过滤,保证 操作和结果 都是本scope-GPU内的. ## 在本GPU上,根据切片输入,计算了logits,并将loss追加到collection里面;然后执行有update var的操作,再获取整个GPU上计算的loss,合起来,作为本GPU本切片的loss. ## 如何将对应的正则loss也提取出来 ? ## ## 如果前面在变量定义时,已经用regularizer设定了,则会自动被收集到colleciton的key=regularizer_loss里面 ## 疑惑,tf.GraphKeys.LOSSES里的值会在下次计算的时候更新么?还是继续追加呢,感觉有清理机制 ## ##reuse var tf.get_variable_scope().reuse_variables( ) ## 将当前变量空间,设置为其中变量可以重复使用 ## ## name_space 并不会影响 variable_space ## ## 当我的变量在定义时,都在variable_scope(reuse=tf.AUTO_REUSE)设置下,这里岂不是就可以不用了?是的 ## ## 这里共享的到底是谁?哪些变量?用tf.get_variable_scope来查看,发现是4个GPU下都是一样的variable_scope ## ## 4个GPU在同一个variable_scope下,是因为最开始的with tf.variable_scope(tf.get_variable_scope) ## 第一个GPU使用的trainable_variable,会由于 reuse=True的设置,允许后面GPU在使用tf.get_variable时,同名的变量是一样的/共享的. ## 比如,这个GPU下使用了变量layer-1/w1:0来计算loss,那么后面的GPU在使用变量layer-1/w1:0来计算loss时,是用同一个变量layer-1/w1:0 ## 疑惑,为什么要将当前变量空间搞成变量可共享呢?都有什么变量呢? 感觉是将w1,w2,b1,b2共享 ## print '-' * 10, '#debug, tf.get_variable_scope', tf.get_variable_scope( ), tf.get_variable_scope().name # grad compute grads = optimizer.compute_gradients(total_loss) #print '-'*10, '#debug, compute_gradients', grads ## this is the first part of minimize() ## ## optimizer.compute_gradients(loss, var_list=None, gate_gradients=GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None) ## what will do this operation ? ## ## compute gradients of loss for the variables in var_list(default=tf.GraphKeys.TRAINABLE_VARIABLES) ## return a list of (gradient, variable) pair tower_grads.append(grads) tower_logits.append(logits) # we must calculate the mean of each gradient, notice: this is synchronization across all tower # grads = average_gradients(tower_grads) ## apply the gradients to adjust the sared variables. train_op = optimizer.apply_gradients( grads, global_step=tf.train.get_global_step()) #prob = tf.nn.softmax(tf.concat(tower_logits, 0), dim=1) prob = tf.nn.sigmoid(tf.concat(tower_logits, 0)) #prob_class = tf.argmax(prob, axis=1) prob_class = tf.cast(prob > 0.5, tf.int32) accuracy = tf.metrics.accuracy(labels=labels, predictions=prob_class) ## train mode and eval mode ## if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=total_loss, train_op=train_op, eval_metric_ops={'accuracy': accuracy})
print 'gpu:======', i, 'losses is :', losses regular_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES ) ## 疑问?这里没法在scope获取到权重的正则化值 ##是因为大家都是共享的权重,其当前的正则化应该也是一致的才对 # print 'gpu:======', i, 'regular loss is:', regular_losses total_loss = tf.add_n(losses + regular_losses, name='total_loss') grads = optimizer.compute_gradients(total_loss) #gradients, variables = zip(*optimizer.compute_gradients(total_loss)) #gradients, _ = tf.clip_by_global_norm(t_list=gradients, clip_norm=100.0) ## clip ## #grads = zip(gradients, variables) tf.summary.scalar('loss', total_loss) ## 这里对每个name_scope下的loss都做了记录 tower_grads.append(grads) tower_logits.append(logits) grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients( grads, global_step=global_step) ## 每次执行到这里,会对变量global_step自增1 # merged_summary = tf.summary.merge_all() saver = tf.train.Saver() config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), device_count={'GPU': 4}, allow_soft_placement=True) with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(model_dir, sess.graph) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) start_time = time.time() sess.graph.finalize() while True:
def mode_mine(features, labels, mode, params): net_used = net_model() print 'before tf.feature_column.input_layer time:', time.ctime() x = tf.feature_column.input_layer(features=features, feature_columns=params['columns']) print 'after tf.feature_column.input_layer time:', time.ctime() #print '-'*10, '#debug in mode_mine as input-x:', x.get_shape() #print '-'*10, '#debug in mode_mine as input-y:', labels.get_shape() ## predict mode ## if mode == tf.estimator.ModeKeys.PREDICT: logits = net_used.output(x) prob = tf.nn.softmax(logits, dim=1) prob_class = tf.argmax(prob, axis=1) predictions_op = {'prob': prob, 'prob_class': prob_class} return tf.estimator.EstimatorSpec(mode, predictions=predictions_op) ## train and eval mode using GPU ## #print '#debug current dataset batch_size:', x.get_shape().as_list(), x # tf.1.3无法处理最后的batch不等于batch_size的情况,这里tf.split会报错,那怎么办呢? # tf.1.3+可以用dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)) 来做丢弃 print 'before tf.split time:', time.ctime() x_list = tf.split(x, num_or_size_splits=params['gpu_num'], axis=0) y_list = tf.split(labels, num_or_size_splits=params['gpu_num'], axis=0) print 'after tf.split time:', time.ctime() tower_grads = [] tower_logits = [] optimizer = tf.train.AdamOptimizer(learning_rate=0.002) print '#debug, before gpu, tf.get_variable_scope():', tf.get_variable_scope( ) print '#debug, before gpu, tf.variable_scope(tf.get_variable_scope()):', tf.variable_scope( tf.get_variable_scope()) ## you will find, here tf.get_variable_scope 与 net_used 时,是同一个variable_scope with tf.variable_scope(tf.get_variable_scope()): ## what is happening tf.variable_scope, why the same tf.get_variable_scope as input get diff result ## tf.variable_scope : A context manager for defining ops that creates variables (layers) ## for i in xrange(params['gpu_num']): with tf.device('/gpu:' + str(i)): ## 指定在不同的GPU上,设置对应的操作 ## with tf.name_scope('classification-' + str(i)) as scope: # model and loss # name_scope是用来做什么的呢? #print '#debug, net_used.w1.name: ', net_used.w1.name #print '#debug, tf.get_collection(TRAINABLE_VARIABLES)', tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) #print '#debug, tf.get_variable_scope():', tf.get_variable_scope() ## 发现collection里面的可训练变量只有 layer-1/w1:0 这样的模型参数 ## #print 'gpu:', str(i), 'before net_use.output time:', time.ctime() logits = net_used.output(x_list[i]) print 'gpu:', str(i), 'logits.name', logits.name #print '#debug, here scope:', scope, logits #print 'gpu:', str(i), 'before tf.losses time:', time.ctime() loss = tf.losses.softmax_cross_entropy( onehot_labels=y_list[i], logits=logits) print 'gpu:', str(i), 'loss.name', loss.name print 'gpu:', str( i), 'trainable_variabels', tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) ## 疑问,上面这句话为什么要单独执行 ? 将当前name_scope的loss-opertion保存起来 ## ## tf.losses.softmax_cross_entrypy will do what ? ## 1) create a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits; ## 2) notice: loss_collection=tf.GraphKeys.LOSSES 这个参数 ## 表示:collection to which the loss will be added ## 将这个GPU上的计算loss结果add到tf.GraphKeys.LOSSES(也即losses) 里. ## 其实如果有其他自定义的loss,也可以通过tf.losses.add_loss添加到collection的损失里. ## 背后都是用ops.add_to_collection(GraphKeys.LOSSES, loss) ## 3) 在来看下tf.Graph.add_to_collection(name, value)是干什么的? ## store the value in the collection given by name ## 查看代码,最重要的一句: self._collections[name].append(value) ## 于是我们知道了collections是一个map,key=tf.GraphKeys, value是通过add_to_collection追加的. ## 难道是是为了在这个GPU下执行计算一遍loss,方便后面采集loss ## ## tf.losses.softmax_cross_entropy 与 tf.nn.softmax_cross_entropy_with_logits的区别 ## 前者处理onehot_labels 后者更适合处理2分类,且没有自动添加到tf.GraphKeys.LOSSES的动作 ## 前者处理多分类 ## ## 也有类似的tf.losses.sigmoid_cross_entropy 处理2分类,且将loss自动添加到tf.GraphKeys.LOSSES里 ## ## 也可以使用 tf.nn.softmax_cross_entropy() 和 tf.add_to_collection(tf.GraphKeys.LOSSES, loss)来完成同样的动作 ## ## 疑问,为什么不直接执行 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits) ?而要多此一举,将loss添加到tf.GraphKeys.LOSSSE里面 ## 是因为,想要控制图的执行顺序,下面的update_ops必须先执行,再执行计算总loss的动作tf.add_n(loss_cur_gpu) ## 这个控制在BN里是非常重要的 ## ## 实际上,这里并不需要控制update_ops在total_loss前执行,所以可以删掉控制流逻辑的diam,total_loss直接用tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits())来计算 ## update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) #print 'gpu:', str(i), 'update_ops.name in scop', update_ops[0].name #print 'gpu:', str(i), 'update_ops scope', update_ops #print 'gpu:', str(i), 'update_ops all', tf.get_collection(tf.GraphKeys.UPDATE_OPS) ## 这句话,做了什么? 从collections的map里,取出了name=Update_ops in scope下的[value] ## 采集 需要update的操作 ## update_ops in combination with reuse varscope ## explain the tf.GraphKeys.UPDATE_OPS is What. ## Custom functions to update some variables can be added to UPDATE_OPS ## and, separately run at each iteration using sess.run(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) ## In this case, these variables are set trainable=False to avoid being updated by gradient descent. ## 注意这里是很重要的,会将trainable=True所涉及到的会change-variable值的操作都添加到tf.GraphKeys.UPDATE_OPS里面, ## 是不是,意味着tf的opt.apply_gradient是会被添加到update_ops里面?然而并不能在执行前后,发现tf.GraphKeys.UPDATE_OPS与内容,都是空的 ##?常用的是BN对参数的更新在这里更新 ## ## on earth, what is tf.GraphKeys.UPDATE_OPS ? with tf.control_dependencies( update_ops ): ## 这里是做什么用的 ?## 控制流程执行顺序,update_ops先执行完毕,再计算总loss值。 losses = tf.get_collection(tf.GraphKeys.LOSSES, scope) total_loss = tf.add_n( losses, name='total_loss') ## 为什么没有除以数量,求均值 ? ## print '#debug# ---losses in gpu:', str(i), losses print '#debug# ---losses all :', str( i), tf.get_collection(tf.GraphKeys.LOSSES) print '#debug# ---trainable var:', str( i), tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) print '#total_loss ----- here :', str( i ), total_loss ## 这个GPU上的总loss ## 难道这里上面的loss 不是这个GPU上的总loss么,因为那个loss尺寸是[N, 1]=y.shape,并没有加和。 #print '#debug# --- update_ops :', str(i), update_ops ## tf.control_depencies表示 with段内的操作是在updates_op执行之后,再执行的 ## 控制了图的执行顺序 ## ## notice: 这里get_collection都使用了 scope来过滤,保证 操作和结果 都是本scope-GPU内的. ## 在本GPU上,根据切片输入,计算了logits,并将loss追加到collection里面;然后执行有update var的操作,再获取整个GPU上计算的loss,合起来,作为本GPU本切片的loss. ## notice: 这里的total_loss是定义计算loss操作 ## ## 如何将对应的正则loss也提取出来 ? ## ## 如果前面在变量定义时,已经用regularizer设定了,则会自动被收集到colleciton的key=regularizer_loss里面 ## 疑惑,tf.GraphKeys.LOSSES里的值会在下次计算的时候更新么?还是继续追加呢,感觉有清理机制 ## ## reuse variable ## tf.get_variable_scope().reuse_variables( ) ## 将当前变量空间,设置为其中变量可以重复使用,必须配合tf.get_variable来使用 ## gpu上的操作都在同一个变量空间内,后面的gpu:1/2/3都可以复用gpu:0时使用的变量 ## name_space 并不会影响 variable_space ## ## 当我的变量在定义时,都在variable_scope(reuse=tf.AUTO_REUSE)设置下,这里岂不是就可以不用了?是的 ## 当每个用到了tf.get_variable的variable_scope下,都是共享参数的 ## ## 这里共享的到底是谁?哪些变量?用tf.get_variable_scope来查看,发现是4个GPU下都是一样的variable_scope ## ## 4个GPU在同一个variable_scope下,是因为最开始的with tf.variable_scope(tf.get_variable_scope) ## 第一个GPU使用的trainable_variable,会由于 reuse=True的设置,允许后面GPU在使用tf.get_variable时,同名的变量是一样的/共享的. ## 比如,这个GPU下使用了变量layer-1/w1:0来计算loss,那么后面的GPU在使用变量layer-1/w1:0来计算loss时,是用同一个变量layer-1/w1:0 ## 疑惑,为什么要将当前变量空间搞成变量可共享呢?都有什么变量呢? 感觉是将w1,w2,b1,b2共享 ## yes, 就是这个样子的,如果将变量定义的地方设置为variable_scope(reuse=tf.AUTO_REUSE),这里就省了 ## print '-' * 10, 'gpu:', i, '#debug, tf.get_variable_scope', tf.get_variable_scope( ) # grad compute print 'gpu:', i, 'before compute_gradients time:', time.ctime( ) grads = optimizer.compute_gradients(total_loss) #print '-'*10, '#debug, compute_gradients', grads ## this is the first part of minimize() ## ## optimizer.compute_gradients(loss, var_list=None, gate_gradients=GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None) ## what will do this operation ? ## ## compute gradients of loss for the variables in var_list(default=tf.GraphKeys.TRAINABLE_VARIABLES) ## return a list of (gradient, variable) pair tower_grads.append(grads) tower_logits.append(logits) # we must calculate the mean of each gradient, notice: this is synchronization across all tower # print 'before average_gradient time:', time.ctime() grads = average_gradients(tower_grads) ## apply the gradients to adjust the sared variables. print 'before apply_gradients time:', time.ctime() train_op = optimizer.apply_gradients( grads, global_step=tf.train.get_global_step()) print 'before tf.nn.softmax time:', time.ctime() prob = tf.nn.softmax(tf.concat(tower_logits, 0), dim=1) print 'before tf.argmax time:', time.ctime() prob_class = tf.argmax(prob, axis=1) print 'before tf.metrics.accuracy time:', time.ctime() accuracy = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=prob_class) ## train mode and eval mode ## if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: print 'before tf.estiamtor.EstimatorSpe time:', time.ctime() return tf.estimator.EstimatorSpec( mode, loss=total_loss, train_op=train_op, eval_metric_ops={'accuracy': accuracy})