def inception_v3_parameters(weight_decay=0.00004, stddev=0.1, batch_norm_decay=0.9997, batch_norm_epsilon=0.001): """Yields the scope with the default parameters for inception_v3. Args: weight_decay: the weight decay for weights variables. stddev: standard deviation of the truncated guassian weight distribution. batch_norm_decay: decay for the moving average of batch_norm momentums. batch_norm_epsilon: small float added to variance to avoid dividing by zero. Yields: a arg_scope with the parameters needed for inception_v3. """ # Set weight_decay for weights in Conv and FC layers. with scopes.arg_scope([ops.conv2d, ops.fc], weight_decay=weight_decay): # Set stddev, activation and parameters for batch_norm. with scopes.arg_scope([ops.conv2d], stddev=stddev, activation=tf.nn.relu, batch_norm_params={ 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon }) as arg_scope: yield arg_scope
def inference(images, num_classes, for_training=False, restore_logits=True, scope=None, restore_new_layers=False): """Build Inception v3 model architecture. See here for reference: http://arxiv.org/abs/1512.00567 Args: images: Images returned from inputs() or distorted_inputs(). num_classes: number of classes for_training: If set to `True`, build the inference model for training. Kernels that operate differently for inference during training e.g. dropout, are appropriately configured. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: optional prefix string identifying the ImageNet tower. Returns: Logits. 2-D float Tensor. Auxiliary Logits. 2-D float Tensor of side-head. Used for training only. """ # Parameters for BatchNorm. batch_norm_params = { # Decay for the moving averages. 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, # epsilon to prevent 0s in variance. 'epsilon': 0.001, } # Set weight_decay for weights in Conv and FC layers. with scopes.arg_scope([ops.conv2d, ops.fc], weight_decay=0.00004): with scopes.arg_scope([ops.conv2d], stddev=0.1, activation=tf.nn.relu, batch_norm_params=batch_norm_params): logits, endpoints = inception_v3(images, dropout_keep_prob=0.8, num_classes=num_classes, is_training=for_training, restore_logits=restore_logits, scope=scope) # Add summaries for viewing model statistics on TensorBoard. _activation_summaries(endpoints) # Grab the logits associated with the side head. Employed during training. auxiliary_logits = endpoints['aux_logits'] return logits, auxiliary_logits
def inference(images, num_classes, for_training=False, restore_logits=True, scope=None,restore_new_layers=False): """Build Inception v3 model architecture. See here for reference: http://arxiv.org/abs/1512.00567 Args: images: Images returned from inputs() or distorted_inputs(). num_classes: number of classes for_training: If set to `True`, build the inference model for training. Kernels that operate differently for inference during training e.g. dropout, are appropriately configured. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: optional prefix string identifying the ImageNet tower. Returns: Logits. 2-D float Tensor. Auxiliary Logits. 2-D float Tensor of side-head. Used for training only. """ # Parameters for BatchNorm. batch_norm_params = { # Decay for the moving averages. 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, # epsilon to prevent 0s in variance. 'epsilon': 0.001, } # Set weight_decay for weights in Conv and FC layers. with scopes.arg_scope([ops.conv2d, ops.fc], weight_decay=0.00004): with scopes.arg_scope([ops.conv2d], stddev=0.1, activation=tf.nn.relu, batch_norm_params=batch_norm_params): logits, endpoints = inception_v3( images, dropout_keep_prob=0.8, num_classes=num_classes, is_training=for_training, restore_logits=restore_logits, scope=scope) # Add summaries for viewing model statistics on TensorBoard. _activation_summaries(endpoints) # Grab the logits associated with the side head. Employed during training. auxiliary_logits = endpoints['aux_logits'] return logits, auxiliary_logits
def _grad(self): """ Builds the tower on each GPU """ global batchnorm_updates tower_grads = [] # Store the gradient from each GPU self.var_names = [ ] # Stores the input placeholder names from each tower # For each GPU for _gpu in self.gpu_ids: # Build the tower with tf.device('/gpu:{0}'.format(_gpu)): with tf.name_scope('tower_{0}'.format(_gpu)) as scope: # Here we put all the variables on 'variables_on_device' with scopes.arg_scope([variables.variable], device=self.variables_on_device): loss, input_var_names, tower_log = self.tower_handle( scope) self.var_names = self.var_names + input_var_names num_placeholders_per_tower = len(input_var_names) # Re-use variables for the next tower tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) if self.batch_norm_in_use: batchnorm_updates = tf.get_collection( ops.UPDATE_OPS_COLLECTION, scope) grads = self.opt.compute_gradients(loss) tower_grads.append(grads) # Average gradient from the GPUs. This is where GPUs synchronization happens. grads = _average_gradients(tower_grads) self.grads = grads self.summaries = summaries # Summary from the last tower self.tower_log = tower_log # Log from the last tower self.loss = loss # Loss on the last tower self.num_placeholders = num_placeholders_per_tower # Number of placeholders per tower if self.batch_norm_in_use: self.batchnorm_updates = batchnorm_updates
def inception_v3_parameters(weight_decay=0.00004, stddev=0.1, batch_norm_decay=0.9997, batch_norm_epsilon=0.001): """Yields the scope with the default parameters for inception_v3. Args: weight_decay: the weight decay for weights variables. stddev: standard deviation of the truncated guassian weight distribution. batch_norm_decay: decay for the moving average of batch_norm momentums. batch_norm_epsilon: small float added to variance to avoid dividing by zero. Yields: a arg_scope with the parameters needed for inception_v3. """ # Set weight_decay for weights in Conv and FC layers. with scopes.arg_scope([ops.conv2d, ops.fc], weight_decay=weight_decay): # Set stddev, activation and parameters for batch_norm. with scopes.arg_scope([ops.conv2d], stddev=stddev, activation=tf.nn.relu, batch_norm_params={ 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon}) as arg_scope: yield arg_scope
def _grad(self): """ Builds the tower on each GPU """ global batchnorm_updates tower_grads = [] # Store the gradient from each GPU self.var_names = [] # Stores the input placeholder names from each tower # For each GPU for _gpu in self.gpu_ids: # Build the tower with tf.device('/gpu:{0}'.format(_gpu)): with tf.name_scope('tower_{0}'.format(_gpu)) as scope: # Here we put all the variables on 'variables_on_device' with scopes.arg_scope([variables.variable], device=self.variables_on_device): loss, input_var_names, tower_log = self.tower_handle(scope) self.var_names = self.var_names + input_var_names num_placeholders_per_tower = len(input_var_names) # Re-use variables for the next tower tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) if self.batch_norm_in_use: batchnorm_updates = tf.get_collection(ops.UPDATE_OPS_COLLECTION, scope) grads = self.opt.compute_gradients(loss) tower_grads.append(grads) # Average gradient from the GPUs. This is where GPUs synchronization happens. grads = _average_gradients(tower_grads) self.grads = grads self.summaries = summaries # Summary from the last tower self.tower_log = tower_log # Log from the last tower self.loss = loss # Loss on the last tower self.num_placeholders = num_placeholders_per_tower # Number of placeholders per tower if self.batch_norm_in_use: self.batchnorm_updates = batchnorm_updates
def fc(inputs, num_units_out, activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a fully connected layer followed by an optional batch_norm layer. FC creates a variable called 'weights', representing the fully connected weight matrix, that is multiplied by the input. If `batch_norm` is None, a second variable called 'biases' is added to the result of the initial vector-matrix multiplication. Args: inputs: a [B x N] tensor where B is the batch size and N is the number of input units in the layer. num_units_out: the number of output units in the layer. activation: activation function. stddev: the standard deviation for the weights. bias: the initial value of the biases. weight_decay: the weight decay. batch_norm_params: parameters for the batch_norm. If is None don't use it. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: the tensor variable representing the result of the series of operations. """ with tf.variable_op_scope([inputs], scope, 'FC', reuse=reuse): num_units_in = inputs.get_shape()[1] weights_shape = [num_units_in, num_units_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev,seed=seed) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) if batch_norm_params is not None: outputs = tf.matmul(inputs, weights) with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(outputs, **batch_norm_params) else: bias_shape = [num_units_out,] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.xw_plus_b(inputs, weights, biases) if activation: outputs = activation(outputs) return outputs
def conv2d(inputs, num_filters_out, kernel_size, stride=1, padding='SAME', activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a 2D convolution followed by an optional batch_norm layer. conv2d creates a variable called 'weights', representing the convolutional kernel, that is convolved with the input. If `batch_norm_params` is None, a second variable called 'biases' is added to the result of the convolution operation. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_filters_out: the number of output filters. kernel_size: a list of length 2: [kernel_height, kernel_width] of of the filters. Can be an int if both values are the same. stride: a list of length 2: [stride_height, stride_width]. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of 'VALID' or 'SAME'. activation: activation function. stddev: standard deviation of the truncated guassian weight distribution. bias: the initial value of the biases. weight_decay: the weight decay. batch_norm_params: parameters for the batch_norm. If is None don't use it. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: a tensor representing the output of the operation. """ with tf.variable_op_scope([inputs], scope, 'Conv', reuse=reuse): kernel_h, kernel_w = _two_element_tuple(kernel_size) stride_h, stride_w = _two_element_tuple(stride) num_filters_in = inputs.get_shape()[-1] weights_shape = [kernel_h, kernel_w, num_filters_in, num_filters_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev,seed=seed) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) if batch_norm_params is not None: with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(conv, **batch_norm_params) else: bias_shape = [num_filters_out,] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.bias_add(conv, biases) if activation: outputs = activation(outputs) return outputs
def inception_v3(inputs, dropout_keep_prob=0.8, num_classes=1000, is_training=True, restore_logits=True, scope=''): """Latest Inception from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for op_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.op_scope([inputs], scope, 'inception_v3'): with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='VALID'): # 299 x 299 x 3 end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, scope='conv0') # 149 x 149 x 32 end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], scope='conv1') # 147 x 147 x 32 end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], padding='SAME', scope='conv2') # 147 x 147 x 64 end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], stride=2, scope='pool1') # 73 x 73 x 64 end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], scope='conv3') # 73 x 73 x 80. end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], scope='conv4') # 71 x 71 x 192. end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], stride=2, scope='pool2') # 35 x 35 x 192. net = end_points['pool2'] # Inception blocks with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. with tf.variable_scope('mixed_35x35x256a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) net = tf.concat(3, [branch1x1, branch5x5, branch3x3dbl, branch_pool]) end_points['mixed_35x35x256a'] = net # mixed_1: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat(3, [branch1x1, branch5x5, branch3x3dbl, branch_pool]) end_points['mixed_35x35x288a'] = net # mixed_2: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat(3, [branch1x1, branch5x5, branch3x3dbl, branch_pool]) end_points['mixed_35x35x288b'] = net # mixed_3: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat(3, [branch3x3, branch3x3dbl, branch_pool]) end_points['mixed_17x17x768a'] = net # mixed4: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 128, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 128, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768b'] = net # mixed_5: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768c'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768c'] = net # mixed_6: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768d'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768d'] = net # mixed_7: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768e'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 192, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 192, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768e'] = net # Auxiliary Head logits aux_logits = tf.identity(end_points['mixed_17x17x768e']) with tf.variable_scope('aux_logits'): aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') # Shape of feature map before the final layer. shape = aux_logits.get_shape() aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, padding='VALID') aux_logits = ops.flatten(aux_logits) aux_logits = ops.fc(aux_logits, num_classes, activation=None, stddev=0.001, restore=restore_logits) end_points['aux_logits'] = aux_logits # mixed_8: 8 x 8 x 1280. # Note that the scope below is not changed to not void previous # checkpoints. # (TODO) Fix the scope when appropriate. with tf.variable_scope('mixed_17x17x1280a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 192, [1, 1]) branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch7x7x3'): branch7x7x3 = ops.conv2d(net, 192, [1, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat(3, [branch3x3, branch7x7x3, branch_pool]) end_points['mixed_17x17x1280a'] = net # mixed_9: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat(3, [ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1])]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat(3, [ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1])]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat(3, [branch1x1, branch3x3, branch3x3dbl, branch_pool]) end_points['mixed_8x8x2048a'] = net # mixed_10: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat(3, [ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1])]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat(3, [ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1])]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat(3, [branch1x1, branch3x3, branch3x3dbl, branch_pool]) end_points['mixed_8x8x2048b'] = net # Final pooling and prediction with tf.variable_scope('logits'): shape = net.get_shape() net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') # 1 x 1 x 2048 net = ops.dropout(net, dropout_keep_prob, scope='dropout') net = ops.flatten(net, scope='flatten') # 2048 logits = ops.fc(net, num_classes, activation=None, scope='logits', restore=restore_logits) # 1000 end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax(logits, name='predictions') return logits, end_points
def inception_v3(inputs, dropout_keep_prob=0.8, num_classes=1000, is_training=True, restore_logits=True, scope=''): """Latest Inception from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for op_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.op_scope([inputs], scope, 'inception_v3'): with scopes.arg_scope( [ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='VALID'): # 299 x 299 x 3 end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, scope='conv0') # 149 x 149 x 32 end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], scope='conv1') # 147 x 147 x 32 end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], padding='SAME', scope='conv2') # 147 x 147 x 64 end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], stride=2, scope='pool1') # 73 x 73 x 64 end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], scope='conv3') # 73 x 73 x 80. end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], scope='conv4') # 71 x 71 x 192. end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], stride=2, scope='pool2') # 35 x 35 x 192. net = end_points['pool2'] # Inception blocks with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. with tf.variable_scope('mixed_35x35x256a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) net = tf.concat( 3, [branch1x1, branch5x5, branch3x3dbl, branch_pool]) end_points['mixed_35x35x256a'] = net # mixed_1: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat( 3, [branch1x1, branch5x5, branch3x3dbl, branch_pool]) end_points['mixed_35x35x288a'] = net # mixed_2: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat( 3, [branch1x1, branch5x5, branch3x3dbl, branch_pool]) end_points['mixed_35x35x288b'] = net # mixed_3: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat(3, [branch3x3, branch3x3dbl, branch_pool]) end_points['mixed_17x17x768a'] = net # mixed4: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 128, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 128, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( 3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768b'] = net # mixed_5: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768c'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( 3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768c'] = net # mixed_6: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768d'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( 3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768d'] = net # mixed_7: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768e'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 192, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 192, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( 3, [branch1x1, branch7x7, branch7x7dbl, branch_pool]) end_points['mixed_17x17x768e'] = net # Auxiliary Head logits aux_logits = tf.identity(end_points['mixed_17x17x768e']) with tf.variable_scope('aux_logits'): aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') # Shape of feature map before the final layer. shape = aux_logits.get_shape() aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, padding='VALID') aux_logits = ops.flatten(aux_logits) aux_logits = ops.fc(aux_logits, num_classes, activation=None, stddev=0.001, restore=restore_logits) end_points['aux_logits'] = aux_logits # mixed_8: 8 x 8 x 1280. # Note that the scope below is not changed to not void previous # checkpoints. # (TODO) Fix the scope when appropriate. with tf.variable_scope('mixed_17x17x1280a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 192, [1, 1]) branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch7x7x3'): branch7x7x3 = ops.conv2d(net, 192, [1, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat(3, [branch3x3, branch7x7x3, branch_pool]) end_points['mixed_17x17x1280a'] = net # mixed_9: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat(3, [ ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1]) ]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat(3, [ ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1]) ]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( 3, [branch1x1, branch3x3, branch3x3dbl, branch_pool]) end_points['mixed_8x8x2048a'] = net # mixed_10: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat(3, [ ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1]) ]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat(3, [ ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1]) ]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( 3, [branch1x1, branch3x3, branch3x3dbl, branch_pool]) end_points['mixed_8x8x2048b'] = net # Final pooling and prediction with tf.variable_scope('logits'): shape = net.get_shape() net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') # 1 x 1 x 2048 net = ops.dropout(net, dropout_keep_prob, scope='dropout') net = ops.flatten(net, scope='flatten') # 2048 logits = ops.fc(net, num_classes, activation=None, scope='logits', restore=restore_logits) # 1000 end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax( logits, name='predictions') return logits, end_points
def fc(inputs, num_units_out, activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a fully connected layer followed by an optional batch_norm layer. FC creates a variable called 'weights', representing the fully connected weight matrix, that is multiplied by the input. If `batch_norm` is None, a second variable called 'biases' is added to the result of the initial vector-matrix multiplication. Args: inputs: a [B x N] tensor where B is the batch size and N is the number of input units in the layer. num_units_out: the number of output units in the layer. activation: activation function. stddev: the standard deviation for the weights. bias: the initial value of the biases. weight_decay: the weight decay. batch_norm_params: parameters for the batch_norm. If is None don't use it. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: the tensor variable representing the result of the series of operations. """ with tf.variable_op_scope([inputs], scope, 'FC', reuse=reuse): num_units_in = inputs.get_shape()[1] weights_shape = [num_units_in, num_units_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev, seed=seed) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) if batch_norm_params is not None: outputs = tf.matmul(inputs, weights) with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(outputs, **batch_norm_params) else: bias_shape = [ num_units_out, ] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.xw_plus_b(inputs, weights, biases) if activation: outputs = activation(outputs) return outputs
def conv2d(inputs, num_filters_out, kernel_size, stride=1, padding='SAME', activation=tf.nn.relu, stddev=0.01, bias=0.0, weight_decay=0, batch_norm_params=None, is_training=True, trainable=True, restore=True, scope=None, reuse=None): """Adds a 2D convolution followed by an optional batch_norm layer. conv2d creates a variable called 'weights', representing the convolutional kernel, that is convolved with the input. If `batch_norm_params` is None, a second variable called 'biases' is added to the result of the convolution operation. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_filters_out: the number of output filters. kernel_size: a list of length 2: [kernel_height, kernel_width] of of the filters. Can be an int if both values are the same. stride: a list of length 2: [stride_height, stride_width]. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of 'VALID' or 'SAME'. activation: activation function. stddev: standard deviation of the truncated guassian weight distribution. bias: the initial value of the biases. weight_decay: the weight decay. batch_norm_params: parameters for the batch_norm. If is None don't use it. is_training: whether or not the model is in training mode. trainable: whether or not the variables should be trainable or not. restore: whether or not the variables should be marked for restore. scope: Optional scope for variable_op_scope. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. Returns: a tensor representing the output of the operation. """ with tf.variable_op_scope([inputs], scope, 'Conv', reuse=reuse): kernel_h, kernel_w = _two_element_tuple(kernel_size) stride_h, stride_w = _two_element_tuple(stride) num_filters_in = inputs.get_shape()[-1] weights_shape = [kernel_h, kernel_w, num_filters_in, num_filters_out] weights_initializer = tf.truncated_normal_initializer(stddev=stddev, seed=seed) l2_regularizer = None if weight_decay and weight_decay > 0: l2_regularizer = losses.l2_regularizer(weight_decay) weights = variables.variable('weights', shape=weights_shape, initializer=weights_initializer, regularizer=l2_regularizer, trainable=trainable, restore=restore) conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) if batch_norm_params is not None: with scopes.arg_scope([batch_norm], is_training=is_training, trainable=trainable, restore=restore): outputs = batch_norm(conv, **batch_norm_params) else: bias_shape = [ num_filters_out, ] bias_initializer = tf.constant_initializer(bias) biases = variables.variable('biases', shape=bias_shape, initializer=bias_initializer, trainable=trainable, restore=restore) outputs = tf.nn.bias_add(conv, biases) if activation: outputs = activation(outputs) return outputs