def __init__(self, filter_count_values=[16, 32, 48, 64], initial_conv=[96, 7, 2], num_classes=40, depth_multiplier=1): super().__init__() #Store architecture hyper_params for model persistence / loading self.hyper_params = zip_params(filter_count_values, initial_conv, num_classes, depth_multiplier) self.conv1 = conv_2d(3, initial_conv[0], initial_conv[1], stride=initial_conv[2]) self.max_pool1 = nn.MaxPool2d(3, 2) self.slim1 = Slim(initial_conv[0], filter_count_values[0]) self.max_pool2 = nn.MaxPool2d(3, 2) self.slim2 = Slim(filter_count_values[0] * 3, filter_count_values[1]) self.max_pool3 = nn.MaxPool2d(3, 2) self.slim3 = Slim(filter_count_values[1] * 3, filter_count_values[2]) self.max_pool4 = nn.MaxPool2d(3, 2) self.slim4 = Slim(filter_count_values[2] * 3, filter_count_values[3]) self.max_pool5 = nn.MaxPool2d(3, 2) self.global_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(filter_count_values[3] * 3, num_classes) [module.apply(init_weights) for module in [self.conv1, self.fc]]
def conv_block(x, num_filters=32, filter_dims=[5, 5], fc_size=1024, scope='conv_block', batch_size=4): s = x.get_shape().as_list() with tf.variable_scope(scope): # downsample image with stride [3, 3] a = conv_2d(x, dims=[7, 7], filters=num_filters, strides=[3, 3], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv1') # no downsampling with stride [1, 1] a = conv_2d(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv2') num_filters = 2 * num_filters # downsample image with stride [2, 2] a = conv_2d(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv3') # no downsampling with stride [1, 1] a = conv_2d(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv4') num_filters = 2 * num_filters # downsample image with stride [2, 2] a = conv_2d(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv5') # no downsampling with stride [1, 1] a = conv_2d(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv6') # downsample image with stride [2, 2] num_filters = 32 a = conv_2d(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv7') # Convert to vector with fullyconnected layer a = tf.reshape(a, shape=[batch_size, -1]) a = fully_connected(a, output_units=fc_size, activation=tf.nn.relu, std='xavier', scope='fc') print "output vector of conv_block is: {}".format(a) return a
def process(inputs, bypass, name, skip, config, is_training): """WRITEME. LATER: Clean up inputs: input to the network bypass: gt to by used when trying to bypass name: name of the siamese branch skip: whether to apply the bypass information """ # let's look at the inputs that get fed into this layer except when we are # looking at the whole image if name != "img": image_summary_nhwc(name + "-input", inputs) if skip: return bypass_kp(bypass) # we always expect a dictionary as return value to be more explicit res = {} # now abuse cur_in so that we can simply copy paste cur_in = inputs # lets apply batch normalization on the input - we did not normalize the # input range! # with tf.variable_scope("input-bn"): # if config.use_input_batch_norm: # cur_in = batch_norm(cur_in, training=is_training) with tf.variable_scope("conv-ghh-1"): nu = 1 ns = 4 nm = 4 cur_in = conv_2d(cur_in, config.kp_filter_size, nu * ns * nm, 1, "VALID") # batch norm on the output of convolutions! # if config.use_batch_norm: # cur_in = batch_norm(cur_in, training=is_training) cur_in = ghh(cur_in, ns, nm) res["scoremap-uncut"] = cur_in # --------------------------------------------------------------------- # Check how much we need to cut kp_input_size = config.kp_input_size patch_size = get_patch_size_no_aug(config) desc_input_size = config.desc_input_size rf = float(kp_input_size) / float(patch_size) input_shape = get_tensor_shape(inputs) uncut_shape = get_tensor_shape(cur_in) req_boundary = np.ceil(rf * np.sqrt(2) * desc_input_size / 2.0).astype(int) cur_boundary = (input_shape[2] - uncut_shape[2]) // 2 crop_size = req_boundary - cur_boundary # Stop building the network outputs if we are building for the full image if name == "img": return res # # Debug messages # resized_shape = get_tensor_shape(inputs) # print(' -- kp_info: output score map shape {}'.format(uncut_shape)) # print(' -- kp_info: input size after resizing {}'.format(resized_shape[2])) # print(' -- kp_info: output score map size {}'.format(uncut_shape[2])) # print(' -- kp info: required boundary {}'.format(req_boundary)) # print(' -- kp info: current boundary {}'.format(cur_boundary)) # print(' -- kp_info: additional crop size {}'.format(crop_size)) # print(' -- kp_info: additional crop size {}'.format(crop_size)) # print(' -- kp_info: final cropped score map size {}'.format( # uncut_shape[2] - 2 * crop_size)) # print(' -- kp_info: movement ratio will be {}'.format(( # float(uncut_shape[2] - 2.0 * crop_size) / # float(kp_input_size - 1)))) # Crop center cur_in = cur_in[:, crop_size:-crop_size, crop_size:-crop_size, :] res["scoremap"] = cur_in # --------------------------------------------------------------------- # Mapping layer to x,y,z com_strength = config.kp_com_strength # eps = 1e-10 scoremap_shape = get_tensor_shape(cur_in) od = len(scoremap_shape) # CoM to get the coordinates pos_array_x = tf.range(scoremap_shape[2], dtype=tf.float32) pos_array_y = tf.range(scoremap_shape[1], dtype=tf.float32) out = cur_in max_out = tf.reduce_max(out, axis=list(range(1, od)), keep_dims=True) o = tf.exp(com_strength * (out - max_out)) # + eps sum_o = tf.reduce_sum(o, axis=list(range(1, od)), keep_dims=True) x = tf.reduce_sum(o * tf.reshape(pos_array_x, [1, 1, -1, 1]), axis=list(range(1, od)), keep_dims=True) / sum_o y = tf.reduce_sum(o * tf.reshape(pos_array_y, [1, -1, 1, 1]), axis=list(range(1, od)), keep_dims=True) / sum_o # Remove the unecessary dimensions (i.e. flatten them) x = tf.reshape(x, (-1, )) y = tf.reshape(y, (-1, )) # -------------- # Turn x, and y into range -1 to 1, where the patch size is # mapped to -1 and 1 orig_patch_width = (scoremap_shape[2] + np.cast["float32"](req_boundary * 2.0)) orig_patch_height = (scoremap_shape[1] + np.cast["float32"](req_boundary * 2.0)) x = ((x + np.cast["float32"](req_boundary)) / np.cast["float32"]( (orig_patch_width - 1.0) * 0.5) - np.cast["float32"](1.0)) y = ((y + np.cast["float32"](req_boundary)) / np.cast["float32"]( (orig_patch_height - 1.0) * 0.5) - np.cast["float32"](1.0)) # -------------- # No movement in z direction z = tf.zeros_like(x) res["xyz"] = tf.stack([x, y, z], axis=1) # --------------------------------------------------------------------- # Mapping layer to x,y,z res["score"] = softmax( res["scoremap"], axis=list(range(1, od)), softmax_strength=config.kp_scoremap_softmax_strength) return res
def process(inputs, bypass, name, skip, config, is_training): """WRITEME. inputs: input to the network bypass: gt to by used when trying to bypass name: name of the siamese branch skip: whether to apply the bypass information """ # let's look at the inputs that get fed into this layer image_summary_nhwc(name + "-input", inputs) if skip: return bypass_ori(bypass) # we always expect a dictionary as return value to be more explicit res = {} # now abuse cur_in so that we can simply copy paste cur_in = inputs # lets apply batch normalization on the input - we did not normalize the # input range! with tf.variable_scope("input-bn"): if config.use_input_batch_norm: cur_in = batch_norm(cur_in, training=is_training) with tf.variable_scope("conv-act-pool-1"): cur_in = conv_2d(cur_in, 5, 10, 1, "VALID") if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) cur_in = tf.nn.relu(cur_in) cur_in = pool_max(cur_in, 2, 2, "VALID") with tf.variable_scope("conv-act-pool-2"): cur_in = conv_2d(cur_in, 5, 20, 1, "VALID") if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) cur_in = tf.nn.relu(cur_in) cur_in = pool_max(cur_in, 2, 2, "VALID") with tf.variable_scope("conv-act-pool-3"): cur_in = conv_2d(cur_in, 3, 50, 1, "VALID") if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) cur_in = tf.nn.relu(cur_in) cur_in = pool_max(cur_in, 2, 2, "VALID") # res["ori_out3"] = cur_in with tf.variable_scope("fc-ghh-drop-4"): nu = 100 ns = 4 nm = 4 cur_in = fc(cur_in, nu * ns * nm) # cur_in = fc(cur_in, nu) if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) if config.ori_activation == 'ghh': cur_in = ghh(cur_in, ns, nm) elif config.ori_activation == 'tanh': cur_in = tf.nn.tanh(cur_in) else: raise RuntimeError("Bad orientation rectifier") # cur_in = tf.nn.relu(cur_in) if config.use_dropout_ori: raise RuntimeError('Dropout not working properly!') cur_in = tf.nn.dropout( cur_in, keep_prob=1.0 - (0.3 * tf.cast(is_training, tf.float32)), ) # res["ori_out4"] = cur_in with tf.variable_scope("fc-ghh-5"): nu = 2 ns = 4 nm = 4 cur_in = fc(cur_in, nu * ns * nm) # cur_in = fc(cur_in, nu) if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) if config.ori_activation == 'ghh': cur_in = ghh(cur_in, ns, nm) elif config.ori_activation == 'tanh': cur_in = tf.nn.tanh(cur_in) else: raise RuntimeError("Bad orientation rectifier") # cur_in = tf.nn.relu(cur_in) # res["ori_out5"] = cur_in # with tf.variable_scope("fc-ghh-6"): # cur_in = fc(cur_in, nu) # res["ori_out6"] = cur_in with tf.variable_scope("cs-norm"): eps = 1e-10 # First, normalize according to the maximum of the two cur_in_abs_max = tf.reduce_max(tf.abs(cur_in), axis=1, keep_dims=True) cur_in = cur_in / tf.maximum(eps, cur_in_abs_max) # Add an epsilon to avoid singularity eps = 1e-3 cur_in += tf.to_float(cur_in >= 0) * eps - tf.to_float(cur_in < 0) * eps # Now make norm one without worrying about div by zero cur_in_norm = tf.sqrt(tf.reduce_sum(tf.square( cur_in), axis=1, keep_dims=True)) cur_in /= cur_in_norm res["cs"] = tf.reshape(cur_in, (-1, 2)) return res
def process(inputs, bypass, name, skip, config, is_training): """WRITEME inputs: input to the network bypass: gt to by used when trying to bypass name: name of the siamese branch skip: whether to apply the bypass information Note ---- We don't have to worry about the reuse flag here, since it is already dealt with in the higher level. We just need to inherit it. """ # We never skip descriptor assert skip is False # we always expect a dictionary as return value to be more explicit res = {} # let's look at the inputs that get fed into this layer image_summary_nhwc(name + "-input", inputs) # Import the lift_desc_sub_kernel.h5 to get the kernel file # script_dir = os.path.dirname(os.path.realpath(__file__)) # sub_kernel = loadh5(script_dir + "/lift_desc_sub_kernel.h5")["kernel"] # activation if config.desc_activ == "tanh": activ = tf.nn.tanh elif config.desc_activ == "relu": activ = tf.nn.relu else: raise RuntimeError('Unknown activation type') # pooling def pool(cur_in, desc_pool, ksize): if desc_pool == "l2_pool": return pool_l2(cur_in, ksize, ksize, "VALID") elif desc_pool == "max_pool": return tf.nn.max_pool(cur_in, (1, ksize, ksize, 1), (1, ksize, ksize, 1), "VALID") elif desc_pool == "avg_pool": return tf.nn.avg_pool(cur_in, (1, ksize, ksize, 1), (1, ksize, ksize, 1), "VALID") else: raise RuntimeError('Unknown pooling type') # now abuse cur_in so that we can simply copy paste cur_in = inputs # lets apply batch normalization on the input - we did not normalize the # input range! with tf.variable_scope("input-bn"): if config.use_input_batch_norm: cur_in = batch_norm(cur_in, training=is_training) with tf.variable_scope("conv-act-pool-norm-1"): cur_in = conv_2d(cur_in, 7, 32, 1, "VALID") if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) cur_in = activ(cur_in) cur_in = pool(cur_in, config.desc_pool, 2) # if config.use_subtractive_norm: # cur_in = norm_spatial_subtractive(cur_in, sub_kernel) with tf.variable_scope("conv-act-pool-norm-2"): cur_in = conv_2d(cur_in, 6, 64, 1, "VALID") if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) cur_in = activ(cur_in) cur_in = pool(cur_in, config.desc_pool, 3) # if config.use_subtractive_norm: # cur_in = norm_spatial_subtractive(cur_in, sub_kernel) with tf.variable_scope("conv-act-pool-3"): cur_in = conv_2d(cur_in, 5, 128, 1, "VALID") if config.use_batch_norm: cur_in = batch_norm(cur_in, training=is_training) cur_in = activ(cur_in) cur_in = pool(cur_in, config.desc_pool, 4) res["desc"] = tf.reshape(cur_in, (-1, 128)) return res