def _model(self, images, is_training, reuse=False): with tf.variable_scope(self.name, reuse=reuse): layers = [] out_filters = self.out_filters with tf.variable_scope("stem_conv"): w = create_weight("w", [3, 3, 3, out_filters]) #self.w222 = w x = tf.nn.conv2d(images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) layers.append(x) if self.whole_channels: start_idx = 0 else: start_idx = self.num_branches for layer_id in range(self.num_layers): with tf.variable_scope("layer_{0}".format(layer_id)): if self.fixed_arc is None: x = self._enas_layer(layer_id, layers, start_idx, out_filters, is_training) else: x = self._fixed_layer(layer_id, layers, start_idx, out_filters, is_training) layers.append(x) if layer_id in self.pool_layers: if self.fixed_arc is not None: out_filters *= 2 with tf.variable_scope("pool_at_{0}".format(layer_id)): pooled_layers = [] for i, layer in enumerate(layers): with tf.variable_scope("from_{0}".format(i)): x = self._factorized_reduction( layer, out_filters, 2, is_training) pooled_layers.append(x) layers = pooled_layers if self.whole_channels: start_idx += 1 + layer_id else: start_idx += 2 * self.num_branches + layer_id print(layers[-1]) x = global_avg_pool(x, data_format=self.data_format) if is_training: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): if self.data_format == "NHWC": inp_c = x.get_shape()[3].value elif self.data_format == "NCHW": inp_c = x.get_shape()[1].value else: raise ValueError("Unknown data_format {0}".format( self.data_format)) w = create_weight("w", [inp_c, 10]) x = tf.matmul(x, w) return x
def _model(self, images, is_training, reuse=False): layers = [] with tf.variable_scope(self.name, reuse=reuse): out_filters = self.out_filters with tf.variable_scope("stem_conv"): x = qmodules.conv(images, 3, out_filters, stride=1, padding='SAME', data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) layers.append(x) if self.whole_channels: start_idx = 0 else: start_idx = self.num_branches for layer_id in range(self.num_layers): with tf.variable_scope("layer_{0}".format(layer_id)): if self.fixed_arc is None: x = self._enas_layer(layer_id, layers, start_idx, out_filters, is_training) else: x = self._fixed_layer(layer_id, layers, start_idx, out_filters, is_training) layers.append(x) if layer_id in self.pool_layers: if self.fixed_arc is not None: out_filters *= 2 with tf.variable_scope("pool_at_{0}".format(layer_id)): pooled_layers = [] for i, layer in enumerate(layers): with tf.variable_scope("from_{0}".format(i)): x = self._factorized_reduction( layer, out_filters, 2, is_training) pooled_layers.append(x) layers = pooled_layers if self.whole_channels: start_idx += 1 + layer_id else: start_idx += 2 * self.num_branches + layer_id print(layers[-1]) x = global_avg_pool(x, data_format=self.data_format) # TODO: WAGE training needs Dropout? # if is_training: # x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): x = qmodules.fc(x, 10) # for last layer(first layer in backpro) error input quantization with tf.variable_scope('lastQE'): if self.bitsE <= 16: x = Quantize.E(x) # TODO: whether needs self.H? # self.H.append(x) return x, layers
def _model(self, images, is_training, reuse=False): """Compute the logits given the images.""" if self.fixed_arc is None: is_training = True with tf.variable_scope(self.name, reuse=reuse): # the first two inputs with tf.variable_scope("stem_conv"): w = create_weight("w", [3, 3, 3, self.out_filters * 3]) x = tf.nn.conv2d(images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) if self.data_format == "NHCW": split_axis = 3 elif self.data_format == "NCHW": split_axis = 1 else: raise ValueError("Unknown data_format '{0}'".format( self.data_format)) layers = [x, x] # building layers in the micro space out_filters = self.out_filters for layer_id in range(self.num_layers + 2): with tf.variable_scope("layer_{0}".format(layer_id)): if layer_id not in self.pool_layers: if self.fixed_arc is None: x = self._enas_layer(layer_id, layers, self.normal_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.normal_arc, out_filters, 1, is_training, normal_or_reduction_cell="normal") else: out_filters *= 2 if self.fixed_arc is None: x = self._factorized_reduction( x, out_filters, 2, is_training) layers = [layers[0], x] x = self._enas_layer(layer_id, layers, self.reduce_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.reduce_arc, out_filters, 2, is_training, normal_or_reduction_cell="reduction") print("Layer {0:>2d}: {1}".format(layer_id, x)) layers = [layers[-1], x] # auxiliary heads self.num_aux_vars = 0 if (self.use_aux_heads and layer_id in self.aux_head_indices and is_training): print("Using aux_head at layer {0}".format(layer_id)) with tf.variable_scope("aux_head"): aux_logits = tf.nn.relu(x) aux_logits = tf.layers.average_pooling2d( aux_logits, [5, 5], [3, 3], "VALID", data_format=self.actual_data_format) with tf.variable_scope("proj"): inp_c = self._get_C(aux_logits) w = create_weight("w", [1, 1, inp_c, 128]) aux_logits = tf.nn.conv2d( aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm( aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("avg_pool"): inp_c = self._get_C(aux_logits) hw = self._get_HW(aux_logits) w = create_weight("w", [hw, hw, inp_c, 768]) aux_logits = tf.nn.conv2d( aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm( aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("fc"): aux_logits = global_avg_pool( aux_logits, data_format=self.data_format) inp_c = aux_logits.get_shape()[1].value w = create_weight("w", [inp_c, 10]) aux_logits = tf.matmul(aux_logits, w) self.aux_logits = aux_logits aux_head_variables = [ var for var in tf.trainable_variables() if (var.name.startswith(self.name) and "aux_head" in var.name) ] self.num_aux_vars = count_model_params(aux_head_variables) print("Aux head uses {0} params".format(self.num_aux_vars)) x = tf.nn.relu(x) x = global_avg_pool(x, data_format=self.data_format) if is_training and self.keep_prob is not None and self.keep_prob < 1.0: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): inp_c = self._get_C(x) w = create_weight("w", [inp_c, 10]) x = tf.matmul(x, w) return x
def _model(self, images, is_training, reuse=False): """Compute the logits given the images.""" if self.fixed_arc is None: is_training = True with tf.variable_scope(self.name, reuse=reuse): # the first two inputs with tf.variable_scope("stem_conv"): w = create_weight("w", [3, 3, 3, self.out_filters * 3]) x = tf.nn.conv2d( images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) if self.data_format == "NHCW": split_axis = 3 elif self.data_format == "NCHW": split_axis = 1 else: raise ValueError("Unknown data_format '{0}'".format(self.data_format)) layers = [x, x] # building layers in the micro space out_filters = self.out_filters for layer_id in range(self.num_layers + 2): with tf.variable_scope("layer_{0}".format(layer_id)): if layer_id not in self.pool_layers: if self.fixed_arc is None: x = self._enas_layer( layer_id, layers, self.normal_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.normal_arc, out_filters, 1, is_training, normal_or_reduction_cell="normal") else: out_filters *= 2 if self.fixed_arc is None: x = self._factorized_reduction(x, out_filters, 2, is_training) layers = [layers[-1], x] x = self._enas_layer( layer_id, layers, self.reduce_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.reduce_arc, out_filters, 2, is_training, normal_or_reduction_cell="reduction") ("Layer {0:>2d}: {1}".format(layer_id, x)) layers = [layers[-1], x] # auxiliary heads self.num_aux_vars = 0 if (self.use_aux_heads and layer_id in self.aux_head_indices and is_training): ("Using aux_head at layer {0}".format(layer_id)) with tf.variable_scope("aux_head"): aux_logits = tf.nn.relu(x) aux_logits = tf.layers.average_pooling2d( aux_logits, [5, 5], [3, 3], "VALID", data_format=self.actual_data_format) with tf.variable_scope("proj"): inp_c = self._get_C(aux_logits) w = create_weight("w", [1, 1, inp_c, 128]) aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm(aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("avg_pool"): inp_c = self._get_C(aux_logits) hw = self._get_HW(aux_logits) w = create_weight("w", [hw, hw, inp_c, 768]) aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm(aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("fc"): aux_logits = global_avg_pool(aux_logits, data_format=self.data_format) inp_c = aux_logits.get_shape()[1].value w = create_weight("w", [inp_c, 10]) aux_logits = tf.matmul(aux_logits, w) self.aux_logits = aux_logits aux_head_variables = [ var for var in tf.trainable_variables() if ( var.name.startswith(self.name) and "aux_head" in var.name)] self.num_aux_vars = count_model_params(aux_head_variables) ("Aux head uses {0} params".format(self.num_aux_vars)) x = tf.nn.relu(x) x = global_avg_pool(x, data_format=self.data_format) if is_training and self.keep_prob is not None and self.keep_prob < 1.0: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): inp_c = self._get_C(x) w = create_weight("w", [inp_c, 10]) x = tf.matmul(x, w) return x def _fixed_conv(self, x, f_size, out_filters, stride, is_training, stack_convs=2): """Apply fixed convolution. Args: stacked_convs: number of separable convs to apply. """ for conv_id in range(stack_convs): inp_c = self._get_C(x) if conv_id == 0: strides = self._get_strides(stride) else: strides = [1, 1, 1, 1] with tf.variable_scope("sep_conv_{}".format(conv_id)): w_depthwise = create_weight("w_depth", [f_size, f_size, inp_c, 1]) w_pointwise = create_weight("w_point", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.separable_conv2d( x, depthwise_filter=w_depthwise, pointwise_filter=w_pointwise, strides=strides, padding="SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) return x def _fixed_combine(self, layers, used, out_filters, is_training, normal_or_reduction_cell="normal"): """Adjust if necessary. Args: layers: a list of tf tensors of size [NHWC] of [NCHW]. used: a numpy tensor, [0] means not used. """ out_hw = min([self._get_HW(layer) for i, layer in enumerate(layers) if used[i] == 0]) out = [] with tf.variable_scope("final_combine"): for i, layer in enumerate(layers): if used[i] == 0: hw = self._get_HW(layer) if hw > out_hw: assert hw == out_hw * 2, ("i_hw={0} != {1}=o_hw".format(hw, out_hw)) with tf.variable_scope("calibrate_{0}".format(i)): x = self._factorized_reduction(layer, out_filters, 2, is_training) else: x = layer out.append(x) if self.data_format == "NHWC": out = tf.concat(out, axis=3) elif self.data_format == "NCHW": out = tf.concat(out, axis=1) else: raise ValueError("Unknown data_format '{0}'".format(self.data_format)) return out def _fixed_layer(self, layer_id, prev_layers, arc, out_filters, stride, is_training, normal_or_reduction_cell="normal"): """ Args: prev_layers: cache of previous layers. for skip connections is_training: for batch_norm """ assert len(prev_layers) == 2 layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=is_training) with tf.variable_scope("layer_base"): x = layers[1] inp_c = self._get_C(x) w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) layers[1] = x used = np.zeros([self.num_cells + 2], dtype=np.int32) f_sizes = [3, 5] for cell_id in range(self.num_cells): with tf.variable_scope("cell_{}".format(cell_id)): x_id = arc[4 * cell_id] used[x_id] += 1 x_op = arc[4 * cell_id + 1] x = layers[x_id] x_stride = stride if x_id in [0, 1] else 1 with tf.variable_scope("x_conv"): if x_op in [0, 1]: f_size = f_sizes[x_op] x = self._fixed_conv(x, f_size, out_filters, x_stride, is_training) elif x_op in [2, 3]: inp_c = self._get_C(x) if x_op == 2: x = tf.layers.average_pooling2d( x, [3, 3], [x_stride, x_stride], "SAME", data_format=self.actual_data_format) else: x = tf.layers.max_pooling2d( x, [3, 3], [x_stride, x_stride], "SAME", data_format=self.actual_data_format) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) else: inp_c = self._get_C(x) if x_stride > 1: assert x_stride == 2 x = self._factorized_reduction(x, out_filters, 2, is_training) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) if (x_op in [0, 1, 2, 3] and self.drop_path_keep_prob is not None and is_training): x = self._apply_drop_path(x, layer_id) y_id = arc[4 * cell_id + 2] used[y_id] += 1 y_op = arc[4 * cell_id + 3] y = layers[y_id] y_stride = stride if y_id in [0, 1] else 1 with tf.variable_scope("y_conv"): if y_op in [0, 1]: f_size = f_sizes[y_op] y = self._fixed_conv(y, f_size, out_filters, y_stride, is_training) elif y_op in [2, 3]: inp_c = self._get_C(y) if y_op == 2: y = tf.layers.average_pooling2d( y, [3, 3], [y_stride, y_stride], "SAME", data_format=self.actual_data_format) else: y = tf.layers.max_pooling2d( y, [3, 3], [y_stride, y_stride], "SAME", data_format=self.actual_data_format) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) y = tf.nn.relu(y) y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) y = batch_norm(y, is_training, data_format=self.data_format) else: inp_c = self._get_C(y) if y_stride > 1: assert y_stride == 2 y = self._factorized_reduction(y, out_filters, 2, is_training) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) y = tf.nn.relu(y) y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) y = batch_norm(y, is_training, data_format=self.data_format) if (y_op in [0, 1, 2, 3] and self.drop_path_keep_prob is not None and is_training): y = self._apply_drop_path(y, layer_id) out = x + y layers.append(out) out = self._fixed_combine(layers, used, out_filters, is_training, normal_or_reduction_cell) return out def _enas_cell(self, x, curr_cell, prev_cell, op_id, out_filters): """Performs an enas operation specified by op_id.""" num_possible_inputs = curr_cell + 1 with tf.variable_scope("avg_pool"): avg_pool = tf.layers.average_pooling2d( x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) avg_pool_c = self._get_C(avg_pool) if avg_pool_c != out_filters: with tf.variable_scope("conv"): w = create_weight( "w", [num_possible_inputs, avg_pool_c * out_filters]) w = w[prev_cell] w = tf.reshape(w, [1, 1, avg_pool_c, out_filters]) avg_pool = tf.nn.relu(avg_pool) avg_pool = tf.nn.conv2d(avg_pool, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) avg_pool = batch_norm(avg_pool, is_training=True, data_format=self.data_format) with tf.variable_scope("max_pool"): max_pool = tf.layers.max_pooling2d( x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) max_pool_c = self._get_C(max_pool) if max_pool_c != out_filters: with tf.variable_scope("conv"): w = create_weight( "w", [num_possible_inputs, max_pool_c * out_filters]) w = w[prev_cell] w = tf.reshape(w, [1, 1, max_pool_c, out_filters]) max_pool = tf.nn.relu(max_pool) max_pool = tf.nn.conv2d(max_pool, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) max_pool = batch_norm(max_pool, is_training=True, data_format=self.data_format) x_c = self._get_C(x) if x_c != out_filters: with tf.variable_scope("x_conv"): w = create_weight("w", [num_possible_inputs, x_c * out_filters]) w = w[prev_cell] w = tf.reshape(w, [1, 1, x_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) x = batch_norm(x, is_training=True, data_format=self.data_format) out = [ self._enas_conv(x, curr_cell, prev_cell, 3, out_filters), self._enas_conv(x, curr_cell, prev_cell, 5, out_filters), avg_pool, max_pool, x, ] out = tf.stack(out, axis=0) out = out[op_id, :, :, :, :] return out def _enas_conv(self, x, curr_cell, prev_cell, filter_size, out_filters, stack_conv=2): """Performs an enas convolution specified by the relevant parameters.""" with tf.variable_scope("conv_{0}x{0}".format(filter_size)): num_possible_inputs = curr_cell + 2 for conv_id in range(stack_conv): with tf.variable_scope("stack_{0}".format(conv_id)): # create params and pick the correct path inp_c = self._get_C(x) w_depthwise = create_weight( "w_depth", [num_possible_inputs, filter_size * filter_size * inp_c]) w_depthwise = w_depthwise[prev_cell, :] w_depthwise = tf.reshape( w_depthwise, [filter_size, filter_size, inp_c, 1]) w_pointwise = create_weight( "w_point", [num_possible_inputs, inp_c * out_filters]) w_pointwise = w_pointwise[prev_cell, :] w_pointwise = tf.reshape(w_pointwise, [1, 1, inp_c, out_filters]) with tf.variable_scope("bn"): zero_init = tf.initializers.zeros(dtype=tf.float32) one_init = tf.initializers.ones(dtype=tf.float32) offset = create_weight( "offset", [num_possible_inputs, out_filters], initializer=zero_init) scale = create_weight( "scale", [num_possible_inputs, out_filters], initializer=one_init) offset = offset[prev_cell] scale = scale[prev_cell] # the computations x = tf.nn.relu(x) x = tf.nn.separable_conv2d( x, depthwise_filter=w_depthwise, pointwise_filter=w_pointwise, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) x, _, _ = tf.nn.fused_batch_norm( x, scale, offset, epsilon=1e-5, data_format=self.data_format, is_training=True) return x def _enas_layer(self, layer_id, prev_layers, arc, out_filters): assert len(prev_layers) == 2, "need exactly 2 inputs" layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=True) used = [] for cell_id in range(self.num_cells): prev_layers = tf.stack(layers, axis=0) with tf.variable_scope("cell_{0}".format(cell_id)): with tf.variable_scope("x"): x_id = arc[4 * cell_id] x_op = arc[4 * cell_id + 1] x = prev_layers[x_id, :, :, :, :] x = self._enas_cell(x, cell_id, x_id, x_op, out_filters) x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32) with tf.variable_scope("y"): y_id = arc[4 * cell_id + 2] y_op = arc[4 * cell_id + 3] y = prev_layers[y_id, :, :, :, :] y = self._enas_cell(y, cell_id, y_id, y_op, out_filters) y_used = tf.one_hot(y_id, depth=self.num_cells + 2, dtype=tf.int32) out = x + y used.extend([x_used, y_used]) layers.append(out) used = tf.add_n(used) indices = tf.where(tf.equal(used, 0)) indices = tf.to_int32(indices) indices = tf.reshape(indices, [-1]) num_outs = tf.size(indices) out = tf.stack(layers, axis=0) out = tf.gather(out, indices, axis=0) inp = prev_layers[0] if self.data_format == "NHWC": N = tf.shape(inp)[0] H = tf.shape(inp)[1] W = tf.shape(inp)[2] C = tf.shape(inp)[3] out = tf.transpose(out, [1, 2, 3, 0, 4]) out = tf.reshape(out, [N, H, W, num_outs * out_filters]) elif self.data_format == "NCHW": N = tf.shape(inp)[0] C = tf.shape(inp)[1] H = tf.shape(inp)[2] W = tf.shape(inp)[3] out = tf.transpose(out, [1, 0, 2, 3, 4]) out = tf.reshape(out, [N, num_outs * out_filters, H, W]) else: raise ValueError("Unknown data_format '{0}'".format(self.data_format)) with tf.variable_scope("final_conv"): w = create_weight("w", [self.num_cells + 2, out_filters * out_filters]) w = tf.gather(w, indices, axis=0) w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) out = tf.nn.relu(out) out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) out = batch_norm(out, is_training=True, data_format=self.data_format) out = tf.reshape(out, tf.shape(prev_layers[0])) return out def _build_train(self): logits = self._model(self.x_train, in_training=True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) self.aux_loss = tf.reduce_mean(log_probs) train_loss = self.loss + 0.4 * self.aux_loss else: train_loss = self.loss self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if ( var.name.startswith(self.name) and "aux_head" not in var.name)] self.num_vars = count_model_params(tf_variables) ("Model has {0} params".format(self.num_vars)) if self.l2_reg > 0: l2_losses = [] for var in tf_variables: l2_losses.appennd(tf.reduce_sum(var ** 2)) l2_loss = tf.add_n(l2_losses) self.loss += l2_reg * l2_loss grads = tf.gradients(self.loss, tf_variables) self.grad_norm = tf.global_norm(grads) grad_norms = {} for v, g in zip(tf_variables, grads): if v is None or g is None: continue if isinstance(g, tf.IndexedSlices): grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2)) else: grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2)) clipped = [] for g in grads: if isinstance(g, tf.IndexedSlices): c_g = tf.clip_by_norm(g.values, grad_bound) c_g = tf.IndexedSlices(g.indices, c_g) else: c_g = tf.clip_by_norm(g, grad_bound) clipped.append(g) grads = clipped self.learning_rate = tf.train.exponential_decay( lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, lr_dec_rate, staircase=True ) self.optimizer = tf.train.GradientDescentOptimizer( learning_rate, use_locking=True) self.train_op = optimizer.apply_gradients( zip(grads, tf_variables), global_step=train_step) def _build_valid(self): if self.x_valid is not None: logits = self._model(self.x_valid, False, reuse=True) self.valid_preds = tf.argmax(logits, axis=1) self.valid_preds = tf.to_int32(self.valid_preds) self.valid_acc = tf.equal(self.valid_preds, self.y_valid) self.valid_acc = tf.to_int32(self.valid_acc) self.valid_acc = tf.reduce_sum(self.valid_acc) def _build_test(self): logits = self._model(self.x_test, False, reuse=True) self.test_preds = tf.argmax(logits, axis=1) self.test_preds = tf.to_int32(self.test_preds) self.test_acc = tf.equal(self.test_preds, self.y_test) self.test_acc = tf.to_int32(self.test_acc) self.test_acc = tf.reduce_sum(self.test_acc) def build_model(self): with tf.device("/cpu:0"): x_valid_shuffle, y_valid_shuffle = tf.train.shuffle_batch( [self.images["valid_original"], self.labels["valid_original"]], batch_size=self.batch_size, capacity=25000, enqueue_many=True, min_after_dequeue=0, num_threads=16, seed=self.seed, allow_smaller_final_batch=True, ) def _pre_process(x): x = tf.pad(x, [[4,4], [4,4], [0,0]]) x = tf.random_crop(x, [32, 32, 3], seed=self.seed) x = tf.image.random_flip_left_right(x, seed=self.seed) return x if shuffle: x_valid_shuffle = tf.map_fn( _pre_process, x_valid_shuffle, back_prop=False) logits = self._model(x_valid_shuffle, is_training=True, reuse=True) valid_shuffle_preds = tf.argmax(logits, axis=1) valid_shuffle_preds = tf.to_int32(valid_shuffle_preds) self.valid_shuffle_acc = tf.equal(valid_shuffle_preds, y_valid_shuffle) self.valid_shuffle_acc = tf.to_int32(self.valid_shuffle_acc) self.valid_shuffle_acc = tf.reduce_sum(self.valid_shuffle_acc) def connect_controller(self, controller_model): self.normal_arc, self.reduce_arc = controller_model.sample_arc self._build_train() self._build_valid() self._build_test()
out_filters *= 2 with tf.variable_scope("pool_at_{0}".format(layer_id)): #pool_at层的实现部分 pooled_layers = [] for i, layer in enumerate(layers): with tf.variable_scope("from_{0}".format(i)): x = self._factorized_reduction( layer, out_filters, 2, is_training) pooled_layers.append(x) layers = pooled_layers if self.whole_channels: start_idx += 1 + layer_id else: start_idx += 2 * self.num_branches + layer_id print(layers[-1]) x = global_avg_pool(x, data_format=self.data_format) #主要的操作是mean if is_training: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): if self.data_format == "NWHC": inp_c = x.get_shape()[3].value elif self.data_format == "NCHW": inp_c = x.get_shape()[1].value else: raise ValueError("Unknown data_format {0}".format(self.data_format)) #10代表10类,如果需要迁移到其他数据集,此处需要修改 w = create_weight("w", [inp_c, 10]) x = tf.matmul(x, w) return x def _enas_layer(self, layer_id, prev_layers, start_idx, out_filters, is_training):