def test_FLOPsEstimator(): x = nn.Variable((1, 3, 12, 12)) y = PF.depthwise_convolution(x, kernel=(5, 5), with_bias=True) t = PF.fused_batch_normalization(y) z = F.relu6(F.sigmoid(PF.affine(t, (3, 3), base_axis=2) + 3)) z = F.global_average_pooling(z) est = FLOPsEstimator() assert est.predict(z) == 17644
def global_average_pooling_data_grad_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] gdy = F.global_average_pooling(gdx) return gdy
def construct_architecture(image, num_class, num_cells, num_nodes, both_archs, output_filter, test): """ Construct an architecture based on the given lists. Note that first 2 layers are stem conv and have nothing to do with node operations. """ conv_arch, reduc_arch = both_archs aux_logits = None used_weights = set() pool_distance = num_cells // 3 pool_layers = [pool_distance - 1, 2*pool_distance - 1] pool_layers = [_ for _ in pool_layers if _ > 0] if len(pool_layers) > 0: aux_head_indices = [pool_layers[-1] + 1] else: # this must not be happened. since num_cells needs to be more than 3. aux_head_indices = [1] ref_groups, required_indices = get_reference_layers(num_cells, pool_layers) prev_layers = [list() for _ in range(ref_groups[-1] + 1)] # Note that this implementation is slightly different from the one written by tensorflow. if not test: image = F.image_augmentation( image, angle=0.25, flip_lr=True) # random_crop, min_scale image.need_grad = False x = image # --------------------------------------- 1st cell --------------------------------------- with nn.parameter_scope("stem_conv1"): x = PF.convolution(x, output_filter, (3, 3), (1, 1), with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) used_weights.update( {"stem_conv1/conv/W", "stem_conv1/bn/gamma", "stem_conv1/bn/beta"}) prev_layers[0].append(x) # store to the "unpooled" layer # spatial reduction (this might be skipped) for i in range(1, len(required_indices[0])): curr_scope = "stem1_reduc{}".format(i) x = factorized_reduction(x, 2*x.shape[1], curr_scope, test) local_used_weights = get_factorized_weights_name(curr_scope) used_weights.update(local_used_weights) prev_layers[i].append(x) # --------------------------------------- 2nd cell --------------------------------------- with nn.parameter_scope("stem_conv2"): x = PF.convolution( prev_layers[0][-1], output_filter, (3, 3), (1, 1), with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) used_weights.update( {"stem_conv2/conv/W", "stem_conv2/bn/gamma", "stem_conv2/bn/beta"}) prev_layers[0].append(x) # store to the "unpooled" layer # spatial reduction (this might be skipped) for i in range(1, len(required_indices[1])): curr_scope = "stem2_reduc{}".format(i) x = factorized_reduction(x, 2*x.shape[1], curr_scope, test) local_used_weights = get_factorized_weights_name(curr_scope) used_weights.update(local_used_weights) prev_layers[i].append(x) # ------------------------------- Normal / Reduction cells ------------------------------- for layer_id in range(2, num_cells): using_layer_index = ref_groups[layer_id] required_index = list(required_indices[layer_id]) required_index.sort() scope = 'w{}'.format(layer_id) if layer_id in pool_layers: architecture = reduc_arch else: architecture = conv_arch previous_outputs = prev_layers[using_layer_index] x, local_used_weights = construct_cell( previous_outputs, architecture, num_nodes, previous_outputs[-1].shape[1], scope, test) used_weights.update(local_used_weights) prev_layers[using_layer_index].append(x) required_index.remove(using_layer_index) # discard an index used above # if this output (x) is reused as an input in other cells and # its shape needs to be changed, apply downsampling in advance for i in required_index: curr_scope = "scope{0}_reduc{1}".format(layer_id, i) x = factorized_reduction(x, 2*x.shape[1], curr_scope, test) local_used_weights = get_factorized_weights_name(curr_scope) used_weights.update(local_used_weights) prev_layers[i].append(x) # auxiliary head, to use the intermediate output for training if layer_id in aux_head_indices and not test: print("Using aux_head at layer {}".format(layer_id)) aux_logits = F.relu(x) aux_logits = F.average_pooling(aux_logits, (5, 5), (3, 3)) with nn.parameter_scope("proj"): aux_logits = PF.convolution( aux_logits, 128, (3, 3), (1, 1), with_bias=False) aux_logits = PF.batch_normalization( aux_logits, batch_stat=not test) aux_logits = F.relu(aux_logits) used_weights.update( {"proj/conv/W", "proj/bn/gamma", "proj/bn/beta"}) with nn.parameter_scope("avg_pool"): aux_logits = PF.convolution( aux_logits, 768, (3, 3), (1, 1), with_bias=False) aux_logits = PF.batch_normalization( aux_logits, batch_stat=not test) aux_logits = F.relu(aux_logits) used_weights.update( {"avg_pool/conv/W", "avg_pool/bn/gamma", "avg_pool/bn/beta"}) with nn.parameter_scope("additional_fc"): aux_logits = F.global_average_pooling(aux_logits) aux_logits = PF.affine(aux_logits, num_class, with_bias=False) used_weights.update({"additional_fc/affine/W"}) x = F.global_average_pooling(prev_layers[-1][-1]) if not test: dropout_rate = 0.5 x = F.dropout(x, dropout_rate) with nn.parameter_scope("fc"): pred = PF.affine(x, num_class, with_bias=False) used_weights.add("fc/affine/W") return pred, aux_logits, used_weights
def construct_architecture(image, num_class, operations, output_filter, test, connect_patterns): """ Architecture Construction. """ ops = { 0: conv3x3, 1: conv5x5, 2: depthwise_separable_conv3x3, 3: depthwise_separable_conv5x5, 4: max_pool, 5: average_pool } used_weights = set() pool_distance = len(operations) // 3 pool_layers = [pool_distance - 1, 2 * pool_distance - 1] # exclude negative indices pool_layers = [idx for idx in pool_layers if idx > 0] ref_groups = len(operations) * [0] tmp_list = pool_layers + [len(operations) - 1] index = 0 for n in range(len(operations)): if n <= tmp_list[index]: ref_groups[n] = index else: index += 1 ref_groups[n] = index # elements in ref_groups tell you how many times you need to do pooling. # e.g. [0, 0, 0, 1, 1, 1, ..., 2] : the 1st layer needs no pooling, # but the last needs 2 poolings, to get spatially reduced variables. #required_indices = get_requirement_soft(ref_groups) required_indices = get_requirement_strict(ref_groups, connect_patterns, pool_layers) num_of_pooling = len(pool_layers) normal_layers = [list()] pooled_layers = [list() for j in range(num_of_pooling)] prev_layers = normal_layers + pooled_layers # prev_layer consists of: [[initial_size_layers], [1x pooled_layers], [2x pooled_layers], ...] if not test: image = F.image_augmentation(image, angle=0.25, flip_lr=True) image.need_grad = False x = image # next comes the basic operation. for the first layer, # just apply a convolution (to make the size of the input the same as that of successors) with nn.parameter_scope("stem_conv"): x = PF.convolution(x, output_filter, (3, 3), (1, 1), with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) used_weights.update( {"stem_conv/conv/W", "stem_conv/bn/gamma", "stem_conv/bn/beta"}) prev_layers[0].append(x) # "unpooled" variable is stored in normal_layers (prev_layers[0]). # then apply factorized reduction (kind of pooling), # but ONLY IF the spatially-reduced variable is required. # for example, when this layer has skip connection with latter layers. for j in range(1, len(prev_layers)): if required_indices[0][j]: nested_scope = "stem_pool_{}".format(j) reduced_var = factorized_reduction(prev_layers[j - 1][-1], output_filter, nested_scope, test) used_weights.update(get_factorized_weights_name(nested_scope)) else: # dummy variable. Should never be used. reduced_var = nn.Variable([1, 1, 1, 1]) prev_layers[j].append(reduced_var) # reduced (or "pooled") variable is stored in pooled_layers (prev_layers[1:]). # basically, repeat the same process, for whole layers. for i, elem in enumerate(operations): scope = 'w{}_{}'.format(i, elem) # basic operation (and connects it with previous layers if it has skip connections) using_layer_index = ref_groups[i] connect_pattern = connect_patterns[i] x, local_used_weights = apply_ops_and_connect( prev_layers[using_layer_index][-1], prev_layers[using_layer_index], connect_pattern, ops, elem, output_filter, scope, test) used_weights.update(local_used_weights) prev_layers[using_layer_index].append(x) # factorized reduction for j in range(using_layer_index + 1, len(prev_layers)): if required_indices[i + 1][j]: nested_scope = "{0}_pool{1}".format(scope, j) reduced_var = factorized_reduction(prev_layers[j - 1][-1], output_filter, nested_scope, test) used_weights.update(get_factorized_weights_name(nested_scope)) else: reduced_var = nn.Variable([1, 1, 1, 1]) # dummy variable. prev_layers[j].append(reduced_var) x = F.global_average_pooling(x) if not test: dropout_rate = 0.5 x = F.dropout(x, dropout_rate) with nn.parameter_scope("fc"): pred = PF.affine(x, num_class, with_bias=False) used_weights.add("fc/affine/W") return pred, used_weights
def call(self, input): return F.global_average_pooling(input)
def resnet56_prediction(image, test=False, ncls=10, nmaps=64, act=F.relu, seed=0): """ Construct ResNet 56 """ channels = [16, 32, 64] # Residual Unit def res_unit(x, scope_name, c, i): subsampling = i == 0 and c > 16 strides = (2, 2) if subsampling else (1, 1) with nn.parameter_scope(scope_name): # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): h = PF.convolution(x, c, kernel=(3, 3), pad=(1, 1), stride=strides, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) # Conv -> BN -> Nonlinear with nn.parameter_scope("conv2"): h = PF.convolution(h, c, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) if subsampling: # Conv -> BN with nn.parameter_scope("conv3"): x = PF.convolution(x, c, kernel=(1, 1), pad=(0, 0), stride=(2, 2), with_bias=False) # Residual -> Nonlinear h = act(F.add2(h, x)) return h # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, min_scale=0.8, max_scale=1.2, flip_lr=True, seed=seed) image.need_grad = False h = PF.convolution(image, channels[0], kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) for c in channels: h = res_unit(h, f"{c}_conv2", c, 0) h = res_unit(h, f"{c}_conv3", c, 1) h = res_unit(h, f"{c}_conv4", c, 2) h = res_unit(h, f"{c}_conv5", c, 3) h = res_unit(h, f"{c}_conv6", c, 4) h = res_unit(h, f"{c}_conv7", c, 5) h = res_unit(h, f"{c}_conv8", c, 6) h = res_unit(h, f"{c}_conv9", c, 7) h = res_unit(h, f"{c}_conv10", c, 8) h = F.global_average_pooling(h) # -> 1x1 if test: h.need_grad = False pred = PF.affine(h, ncls) return pred, h