Example #1
0
def test_FLOPsEstimator():
    x = nn.Variable((1, 3, 12, 12))
    y = PF.depthwise_convolution(x, kernel=(5, 5), with_bias=True)
    t = PF.fused_batch_normalization(y)
    z = F.relu6(F.sigmoid(PF.affine(t, (3, 3), base_axis=2) + 3))
    z = F.global_average_pooling(z)

    est = FLOPsEstimator()
    assert est.predict(z) == 17644
Example #2
0
def global_average_pooling_data_grad_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdx = inputs[0]
    gdy = F.global_average_pooling(gdx)
    return gdy
Example #3
0
def construct_architecture(image, num_class, num_cells, num_nodes, both_archs, output_filter, test):
    """
        Construct an architecture based on the given lists.
        Note that first 2 layers are stem conv and have nothing to do with node operations.
    """
    conv_arch, reduc_arch = both_archs

    aux_logits = None
    used_weights = set()

    pool_distance = num_cells // 3
    pool_layers = [pool_distance - 1, 2*pool_distance - 1]
    pool_layers = [_ for _ in pool_layers if _ > 0]

    if len(pool_layers) > 0:
        aux_head_indices = [pool_layers[-1] + 1]
    else:
        # this must not be happened. since num_cells needs to be more than 3.
        aux_head_indices = [1]

    ref_groups, required_indices = get_reference_layers(num_cells, pool_layers)
    prev_layers = [list() for _ in range(ref_groups[-1] + 1)]

    # Note that this implementation is slightly different from the one written by tensorflow.
    if not test:
        image = F.image_augmentation(
            image, angle=0.25, flip_lr=True)  # random_crop, min_scale
        image.need_grad = False
    x = image

    # --------------------------------------- 1st cell ---------------------------------------
    with nn.parameter_scope("stem_conv1"):
        x = PF.convolution(x, output_filter, (3, 3), (1, 1), with_bias=False)
        x = PF.batch_normalization(x, batch_stat=not test)
    used_weights.update(
        {"stem_conv1/conv/W", "stem_conv1/bn/gamma", "stem_conv1/bn/beta"})
    prev_layers[0].append(x)  # store to the "unpooled" layer

    # spatial reduction (this might be skipped)
    for i in range(1, len(required_indices[0])):
        curr_scope = "stem1_reduc{}".format(i)
        x = factorized_reduction(x, 2*x.shape[1], curr_scope, test)

        local_used_weights = get_factorized_weights_name(curr_scope)
        used_weights.update(local_used_weights)
        prev_layers[i].append(x)

    # --------------------------------------- 2nd cell ---------------------------------------
    with nn.parameter_scope("stem_conv2"):
        x = PF.convolution(
            prev_layers[0][-1], output_filter, (3, 3), (1, 1), with_bias=False)
        x = PF.batch_normalization(x, batch_stat=not test)
    used_weights.update(
        {"stem_conv2/conv/W", "stem_conv2/bn/gamma", "stem_conv2/bn/beta"})
    prev_layers[0].append(x)  # store to the "unpooled" layer

    # spatial reduction (this might be skipped)
    for i in range(1, len(required_indices[1])):
        curr_scope = "stem2_reduc{}".format(i)
        x = factorized_reduction(x, 2*x.shape[1], curr_scope, test)

        local_used_weights = get_factorized_weights_name(curr_scope)
        used_weights.update(local_used_weights)
        prev_layers[i].append(x)

    # ------------------------------- Normal / Reduction cells -------------------------------
    for layer_id in range(2, num_cells):
        using_layer_index = ref_groups[layer_id]
        required_index = list(required_indices[layer_id])
        required_index.sort()
        scope = 'w{}'.format(layer_id)

        if layer_id in pool_layers:
            architecture = reduc_arch
        else:
            architecture = conv_arch

        previous_outputs = prev_layers[using_layer_index]
        x, local_used_weights = construct_cell(
            previous_outputs, architecture, num_nodes, previous_outputs[-1].shape[1], scope, test)
        used_weights.update(local_used_weights)
        prev_layers[using_layer_index].append(x)

        required_index.remove(using_layer_index)  # discard an index used above

        # if this output (x) is reused as an input in other cells and
        # its shape needs to be changed, apply downsampling in advance
        for i in required_index:
            curr_scope = "scope{0}_reduc{1}".format(layer_id, i)
            x = factorized_reduction(x, 2*x.shape[1], curr_scope, test)
            local_used_weights = get_factorized_weights_name(curr_scope)
            used_weights.update(local_used_weights)
            prev_layers[i].append(x)

        # auxiliary head, to use the intermediate output for training
        if layer_id in aux_head_indices and not test:
            print("Using aux_head at layer {}".format(layer_id))
            aux_logits = F.relu(x)
            aux_logits = F.average_pooling(aux_logits, (5, 5), (3, 3))

            with nn.parameter_scope("proj"):
                aux_logits = PF.convolution(
                    aux_logits, 128, (3, 3), (1, 1), with_bias=False)
                aux_logits = PF.batch_normalization(
                    aux_logits, batch_stat=not test)
                aux_logits = F.relu(aux_logits)
            used_weights.update(
                {"proj/conv/W", "proj/bn/gamma", "proj/bn/beta"})

            with nn.parameter_scope("avg_pool"):
                aux_logits = PF.convolution(
                    aux_logits, 768, (3, 3), (1, 1), with_bias=False)
                aux_logits = PF.batch_normalization(
                    aux_logits, batch_stat=not test)
                aux_logits = F.relu(aux_logits)
            used_weights.update(
                {"avg_pool/conv/W", "avg_pool/bn/gamma", "avg_pool/bn/beta"})

            with nn.parameter_scope("additional_fc"):
                aux_logits = F.global_average_pooling(aux_logits)
                aux_logits = PF.affine(aux_logits, num_class, with_bias=False)
            used_weights.update({"additional_fc/affine/W"})

    x = F.global_average_pooling(prev_layers[-1][-1])

    if not test:
        dropout_rate = 0.5
        x = F.dropout(x, dropout_rate)

    with nn.parameter_scope("fc"):
        pred = PF.affine(x, num_class, with_bias=False)
        used_weights.add("fc/affine/W")

    return pred, aux_logits, used_weights
Example #4
0
def construct_architecture(image, num_class, operations, output_filter, test,
                           connect_patterns):
    """
        Architecture Construction. 
    """
    ops = {
        0: conv3x3,
        1: conv5x5,
        2: depthwise_separable_conv3x3,
        3: depthwise_separable_conv5x5,
        4: max_pool,
        5: average_pool
    }

    used_weights = set()

    pool_distance = len(operations) // 3
    pool_layers = [pool_distance - 1, 2 * pool_distance - 1]
    # exclude negative indices
    pool_layers = [idx for idx in pool_layers if idx > 0]

    ref_groups = len(operations) * [0]
    tmp_list = pool_layers + [len(operations) - 1]
    index = 0
    for n in range(len(operations)):
        if n <= tmp_list[index]:
            ref_groups[n] = index
        else:
            index += 1
            ref_groups[n] = index

    # elements in ref_groups tell you how many times you need to do pooling.
    # e.g. [0, 0, 0, 1, 1, 1, ..., 2] : the 1st layer needs no pooling,
    # but the last needs 2 poolings, to get spatially reduced variables.

    #required_indices = get_requirement_soft(ref_groups)
    required_indices = get_requirement_strict(ref_groups, connect_patterns,
                                              pool_layers)

    num_of_pooling = len(pool_layers)
    normal_layers = [list()]
    pooled_layers = [list() for j in range(num_of_pooling)]

    prev_layers = normal_layers + pooled_layers
    # prev_layer consists of: [[initial_size_layers], [1x pooled_layers], [2x pooled_layers], ...]

    if not test:
        image = F.image_augmentation(image, angle=0.25, flip_lr=True)
        image.need_grad = False
    x = image

    # next comes the basic operation. for the first layer,
    # just apply a convolution (to make the size of the input the same as that of successors)

    with nn.parameter_scope("stem_conv"):
        x = PF.convolution(x, output_filter, (3, 3), (1, 1), with_bias=False)
        x = PF.batch_normalization(x, batch_stat=not test)
        used_weights.update(
            {"stem_conv/conv/W", "stem_conv/bn/gamma", "stem_conv/bn/beta"})
    prev_layers[0].append(x)
    # "unpooled" variable is stored in normal_layers (prev_layers[0]).

    # then apply factorized reduction (kind of pooling),
    # but ONLY IF the spatially-reduced variable is required.
    # for example, when this layer has skip connection with latter layers.

    for j in range(1, len(prev_layers)):
        if required_indices[0][j]:
            nested_scope = "stem_pool_{}".format(j)
            reduced_var = factorized_reduction(prev_layers[j - 1][-1],
                                               output_filter, nested_scope,
                                               test)
            used_weights.update(get_factorized_weights_name(nested_scope))
        else:
            # dummy variable. Should never be used.
            reduced_var = nn.Variable([1, 1, 1, 1])
        prev_layers[j].append(reduced_var)

    # reduced (or "pooled") variable is stored in pooled_layers (prev_layers[1:]).

    # basically, repeat the same process, for whole layers.
    for i, elem in enumerate(operations):
        scope = 'w{}_{}'.format(i, elem)

        # basic operation (and connects it with previous layers if it has skip connections)

        using_layer_index = ref_groups[i]
        connect_pattern = connect_patterns[i]
        x, local_used_weights = apply_ops_and_connect(
            prev_layers[using_layer_index][-1], prev_layers[using_layer_index],
            connect_pattern, ops, elem, output_filter, scope, test)

        used_weights.update(local_used_weights)
        prev_layers[using_layer_index].append(x)

        # factorized reduction

        for j in range(using_layer_index + 1, len(prev_layers)):
            if required_indices[i + 1][j]:
                nested_scope = "{0}_pool{1}".format(scope, j)
                reduced_var = factorized_reduction(prev_layers[j - 1][-1],
                                                   output_filter, nested_scope,
                                                   test)
                used_weights.update(get_factorized_weights_name(nested_scope))
            else:
                reduced_var = nn.Variable([1, 1, 1, 1])  # dummy variable.
            prev_layers[j].append(reduced_var)

    x = F.global_average_pooling(x)

    if not test:
        dropout_rate = 0.5
        x = F.dropout(x, dropout_rate)

    with nn.parameter_scope("fc"):
        pred = PF.affine(x, num_class, with_bias=False)
        used_weights.add("fc/affine/W")

    return pred, used_weights
Example #5
0
 def call(self, input):
     return F.global_average_pooling(input)
Example #6
0
def resnet56_prediction(image,
                        test=False,
                        ncls=10,
                        nmaps=64,
                        act=F.relu,
                        seed=0):
    """
    Construct ResNet 56
    """

    channels = [16, 32, 64]

    # Residual Unit
    def res_unit(x, scope_name, c, i):
        subsampling = i == 0 and c > 16
        strides = (2, 2) if subsampling else (1, 1)

        with nn.parameter_scope(scope_name):
            # Conv -> BN -> Nonlinear
            with nn.parameter_scope("conv1"):
                h = PF.convolution(x,
                                   c,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   stride=strides,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = act(h)
            # Conv -> BN -> Nonlinear
            with nn.parameter_scope("conv2"):
                h = PF.convolution(h,
                                   c,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)

            if subsampling:
                # Conv -> BN
                with nn.parameter_scope("conv3"):
                    x = PF.convolution(x,
                                       c,
                                       kernel=(1, 1),
                                       pad=(0, 0),
                                       stride=(2, 2),
                                       with_bias=False)
            # Residual -> Nonlinear
            h = act(F.add2(h, x))

            return h

    # Conv -> BN -> Nonlinear
    with nn.parameter_scope("conv1"):
        # Preprocess
        if not test:
            image = F.image_augmentation(image,
                                         min_scale=0.8,
                                         max_scale=1.2,
                                         flip_lr=True,
                                         seed=seed)

            image.need_grad = False
        h = PF.convolution(image,
                           channels[0],
                           kernel=(3, 3),
                           pad=(1, 1),
                           with_bias=False)
        h = PF.batch_normalization(h, batch_stat=not test)
        h = act(h)

    for c in channels:
        h = res_unit(h, f"{c}_conv2", c, 0)
        h = res_unit(h, f"{c}_conv3", c, 1)
        h = res_unit(h, f"{c}_conv4", c, 2)
        h = res_unit(h, f"{c}_conv5", c, 3)
        h = res_unit(h, f"{c}_conv6", c, 4)
        h = res_unit(h, f"{c}_conv7", c, 5)
        h = res_unit(h, f"{c}_conv8", c, 6)
        h = res_unit(h, f"{c}_conv9", c, 7)
        h = res_unit(h, f"{c}_conv10", c, 8)

    h = F.global_average_pooling(h)  # -> 1x1
    if test:
        h.need_grad = False
    pred = PF.affine(h, ncls)

    return pred, h