Example #1
0
def setLayers(data_source,
              batch_size,
              layername,
              kernel,
              stride,
              outCH,
              label_name,
              transform_param_in,
              deploy=False):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)

    # produce data definition for deploy net
    if deploy == False:
        n.data, n.tops['label'] = L.CPMData(cpmdata_param=dict(
            backend=1, source=data_source, batch_size=batch_size),
                                            transform_param=transform_param_in,
                                            ntop=2)
        n.tops[label_name[1]], n.tops[label_name[0]] = L.Slice(
            n.label, slice_param=dict(axis=1, slice_point=15), ntop=2)
    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.pool_center_lower = L.Pooling(n.center_map,
                                    kernel_size=9,
                                    stride=8,
                                    pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = 'image'
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'C':
            if state == 'image':
                conv_name = 'conv%d_stage%d' % (conv_counter, stage)
            else:
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
            if stage == 1:
                lr_m = 5
            else:
                lr_m = 1
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer = conv_name
            if layername[l + 1] != 'L':
                if (state == 'image'):
                    ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                else:
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                last_layer = ReLUname
            conv_counter += 1
        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False:
                if stage == 1:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[0]])
                else:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[1]])

            stage += 1
            last_connect = last_layer
            last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            state = 'image'
        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer],
                n.tops[last_connect],
                n.pool_center_lower,
                concat_param=dict(axis=1))
            conv_counter = 1
            state = 'fuse'
            last_layer = 'concat_stage%d' % stage
        elif layername[l] == '$':
            if not share_point:
                share_point = last_layer
            else:
                last_layer = share_point

    # final process
    stage -= 1
    if stage == 1:
        n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
Example #2
0
def setLayers_twoBranches(data_source,
                          batch_size,
                          layername,
                          kernel,
                          stride,
                          outCH,
                          label_name,
                          transform_param_in,
                          deploy=False,
                          batchnorm=0,
                          lr_mult_distro=[1, 1, 1]):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)
    num_parts = transform_param['num_parts']

    if deploy == False and "lmdb" not in data_source:
        if (len(label_name) == 1):
            n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict(
                batch_size=batch_size, source=data_source),
                                                       ntop=2)
        elif (len(label_name) == 2):
            n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data(
                hdf5_data_param=dict(batch_size=batch_size,
                                     source=data_source),
                ntop=3)
    # produce data definition for deploy net
    elif deploy == False:
        n.data, n.tops['label'] = L.CPMData(
            data_param=dict(backend=1,
                            source=data_source,
                            batch_size=batch_size),
            cpm_transform_param=transform_param_in,
            ntop=2)
        n.tops[label_name[2]], n.tops[label_name[3]], n.tops[
            label_name[4]], n.tops[label_name[5]] = L.Slice(
                n.label,
                slice_param=dict(
                    axis=1, slice_point=[38, num_parts + 1, num_parts + 39]),
                ntop=4)
        n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]],
                                          n.tops[label_name[4]],
                                          operation=P.Eltwise.PROD)
        n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]],
                                          n.tops[label_name[5]],
                                          operation=P.Eltwise.PROD)

    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.silence2 = L.Silence(n.center_map, ntop=0)
    #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = ['image', 'image']
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    local_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'V':  #pretrained VGG layers
            conv_name = 'conv%d_%d' % (pool_counter, local_counter)
            lr_m = lr_mult_distro[0]
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)
            ReLUname = 'relu%d_%d' % (pool_counter, local_counter)
            n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True)
            local_counter += 1
            print ReLUname
        if layername[l] == 'B':
            pool_counter += 1
            local_counter = 1
        if layername[l] == 'C':
            if state == 'image':
                #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                conv_name = 'conv%d_%d_CPM' % (
                    pool_counter, local_counter
                )  # no image state in subsequent stages
                if stage == 1:
                    lr_m = lr_mult_distro[1]
                else:
                    lr_m = lr_mult_distro[1]
            else:  # fuse
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
                lr_m = lr_mult_distro[2]
                conv_counter += 1
            #if stage == 1:
            #    lr_m = 1
            #else:
            #    lr_m = lr_sub
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)

            if layername[l + 1] != 'L':
                if (state == 'image'):
                    if (batchnorm == 1):
                        batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                else:
                    if (batchnorm == 1):
                        batchnorm_name = 'Mbn%d_stage%d' % (conv_counter,
                                                            stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                #last_layer = ReLUname
                print ReLUname

            #conv_counter += 1
            local_counter += 1

        elif layername[l] == 'C2':
            for level in range(0, 2):
                if state == 'image':
                    #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                    conv_name = 'conv%d_%d_CPM_L%d' % (
                        pool_counter, local_counter, level + 1
                    )  # no image state in subsequent stages
                    if stage == 1:
                        lr_m = lr_mult_distro[1]
                    else:
                        lr_m = lr_mult_distro[1]
                else:  # fuse
                    conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage,
                                                         level + 1)
                    lr_m = lr_mult_distro[2]
                    #conv_counter += 1
                #if stage == 1:
                #    lr_m = 1
                #else:
                #    lr_m = lr_sub
                if layername[l + 1] == 'L2' or layername[l + 1] == 'L3':
                    if level == 0:
                        outCH[l] = 38
                    else:
                        outCH[l] = 19

                n.tops[conv_name] = L.Convolution(
                    n.tops[last_layer[level]],
                    kernel_size=kernel[l],
                    num_output=outCH[l],
                    pad=int(math.floor(kernel[l] / 2)),
                    param=[
                        dict(lr_mult=lr_m, decay_mult=1),
                        dict(lr_mult=lr_m * 2, decay_mult=0)
                    ],
                    weight_filler=dict(type='gaussian', std=0.01),
                    bias_filler=dict(type='constant'))
                last_layer[level] = conv_name
                print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m)

                if layername[l + 1] != 'L2' and layername[l + 1] != 'L3':
                    if (state == 'image'):
                        if (batchnorm == 1):
                            batchnorm_name = 'bn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                        ReLUname = 'relu%d_%d_CPM_L%d' % (
                            pool_counter, local_counter, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    else:
                        if (batchnorm == 1):
                            batchnorm_name = 'Mbn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter,
                                                            stage, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    print ReLUname

            conv_counter += 1
            local_counter += 1

        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
            local_counter = 1
            conv_counter += 1
            print last_layer[0]

        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False and "lmdb" not in data_source:
                n.tops['map_vec_stage%d' % stage] = L.Flatten(
                    n.tops[last_layer[0]])
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]])
            elif deploy == False:
                level = 1
                name = 'weight_stage%d' % stage
                n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                         n.tops[label_name[(level + 2)]],
                                         operation=P.Eltwise.PROD)
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops[name], n.tops[label_name[level]])

            print 'loss %d' % stage
            stage += 1
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L2':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            for level in range(0, 2):
                if deploy == False and "lmdb" not in data_source:
                    n.tops['map_vec_stage%d_L%d' %
                           (stage, level + 1)] = L.Flatten(
                               n.tops[last_layer[level]])
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops['map_vec_stage%d' % stage],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])
                elif deploy == False:
                    name = 'weight_stage%d_L%d' % (stage, level + 1)
                    n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                             n.tops[label_name[(level + 2)]],
                                             operation=P.Eltwise.PROD)
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops[name],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])

                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L3':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            if deploy == False:
                level = 0
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.Euclidean2Loss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           n.tops[label_name[2]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)
                level = 1
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.EuclideanLoss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer[0]],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            #if not share_point:
            #    share_point = last_layer
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer[0]],
                n.tops[last_layer[1]],
                n.tops[share_point],
                concat_param=dict(axis=1))

            local_counter = 1
            state = 'fuse'
            last_layer[0] = 'concat_stage%d' % stage
            last_layer[1] = 'concat_stage%d' % stage
            print last_layer
        elif layername[l] == '$':
            share_point = last_layer[0]
            pool_counter += 1
            local_counter = 1
            print 'share'

    # final process
    stage -= 1
    #if stage == 1:
    #    n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
Example #3
0
def setLayers(data_source,
              batch_size,
              layername,
              kernel,
              stride,
              outCH,
              label_name,
              transform_param_in,
              deploy=False):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)

    # produce data definition for deploy net
    if deploy == False:
        # here we will return the new structure for loading h36m dataset
        n.data, n.tops['label'] = L.CPMData(cpmdata_param=dict(
            backend=1, source=data_source, batch_size=batch_size),
                                            transform_param=transform_param_in,
                                            ntop=2)
        n.tops[label_name[1]], n.tops[label_name[0]], n.tops[
            label_name[2]] = L.Slice(n.label,
                                     slice_param=dict(axis=1,
                                                      slice_point=[18, 36]),
                                     ntop=3)
        n.image, n.center_map = L.Slice(n.data,
                                        slice_param=dict(axis=1,
                                                         slice_point=3),
                                        ntop=2)
    else:
        input = "data"
        dim1 = 18
        dim2 = 5
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()
        # Slice layer slices input layer to multiple output along a given dimension
        # axis: 1 define in which dimension to slice
        # slice_point: 3 define the index in the selected dimension (the number of
        # indices must be equal to the number of top blobs minus one)
        # Considering input Nx3x1x1, by slice_point = 2
        # top1 : Nx2x1x1
        # top2 : Nx1x1x1
        n.image, n.center_map, n.tops[label_name[2]] = L.Slice(
            n.data, slice_param=dict(axis=1, slice_point=[3, 4]), ntop=3)

    n.pool_center_lower = L.Pooling(n.center_map,
                                    kernel_size=9,
                                    stride=8,
                                    pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = 'image'
    stage = 1
    conv_counter = 1
    last_manifold = 'NONE'
    last_merg = 'NONE'
    pool_counter = 1
    drop_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0
    manifold_current_stage = False
    merge_init_avg = False

    for l in range(0, len(layername)):
        decay_mult = 1

        if layername[l] == 'C':
            if state == 'image':
                conv_name = 'conv%d_stage%d' % (conv_counter, stage)
            else:
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
            #if stage == 1:
            #    lr_m = 5
            #else:
            #    lr_m = 1
            lr_m = 1e-3  # 1e-3 (best res so far)
            if ((stage == 1 and conv_counter == 7) or
                (stage > 1 and state != 'image' and (conv_counter in [1, 5]))):
                conv_name = '%s_new' % conv_name
                lr_m = 1  #1e-2
                decay_mult = 1

#            if (stage <= 4):
#                lr_m = 0
#                decay_mult = 0
# additional for python layer
#            if (stage > 1 and state != 'image' and (conv_counter == 1)):
#                conv_name = '%s_mf' % conv_name
#                lr_m = 1 #1e-1
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=decay_mult),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer = conv_name
            if not (layername[l + 1] == 'L' or layername[l + 1] == 'M'):
                if (state == 'image'):
                    ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                else:
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                last_layer = ReLUname
            conv_counter += 1
        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
        elif layername[l] == 'M':
            last_manifold = 'manifolds_stage%d' % stage
            last_merg = 'merge_hm_stage%d' % stage
            debug_mode = 0
            #            if (stage == 5):
            #                debug_mode = 1
            manifold_current_stage = True
            if (stage >= 4):
                merge_init_avg = True
            # TODO: change it back


#            if stage == 4:
#                debug_mode = 4
            parameters = '{"njoints": 17,"sigma": 1, "debug_mode": %r, "max_area": 100, "percentage_max": 3, "train": %u, "Lambda": %.3f }' % (
                debug_mode, not deploy, 0.05)
            # DONE: change it back
            # if manifold_current_stage:
            n.tops[last_manifold] = L.Python(
                n.tops[last_layer],
                n.tops[label_name[2]],
                python_param=dict(module='newheatmaps',
                                  layer='MyCustomLayer',
                                  param_str=parameters))  #,loss_weight=1)
            #            n.tops[last_manifold] = L.Python(n.tops[label_name[1]],n.tops[label_name[2]],python_param=dict(module='newheatmaps',layer='MyCustomLayer',param_str=parameters))#,loss_weight=1)
            init_str = 'zero'
            if merge_init_avg:
                init_str = 'avg'
            merge_lr = 5e-2
            parameters = '{"init": %r, "learning_rate": %r}' % (init_str,
                                                                merge_lr)
            n.tops[last_merg] = L.Python(n.tops[last_layer],
                                         n.tops[last_manifold],
                                         python_param=dict(
                                             module='processheatmaps',
                                             layer='MergeHeatMaps',
                                             param_str=parameters))
        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False:
                if stage == 1:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[0]])
                else:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[1]])

            stage += 1
            last_connect = last_layer
            last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            state = 'image'
        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            # DONE: change it back
            #  no this          n.tops['concat_stage%d' % stage] = L.Concat(n.tops[last_layer], n.tops[last_connect], n.pool_center_lower, n.tops[label_name[1]], concat_param=dict(axis=1))
            #            n.tops['concat_stage%d' % stage] = L.Concat(n.tops[last_layer], n.tops[last_connect], n.pool_center_lower, n.tops[last_manifold], concat_param=dict(axis=1))
            if manifold_current_stage:
                n.tops['concat_stage%d' % stage] = L.Concat(
                    n.tops[last_layer],
                    n.tops[last_merg],
                    n.pool_center_lower,
                    concat_param=dict(axis=1))
            else:
                n.tops['concat_stage%d' % stage] = L.Concat(
                    n.tops[last_layer],
                    n.tops[last_connect],
                    n.pool_center_lower,
                    concat_param=dict(axis=1))

            conv_counter = 1
            state = 'fuse'
            last_layer = 'concat_stage%d' % stage
        elif layername[l] == '$':
            if not share_point:
                share_point = last_layer
            else:
                last_layer = share_point
    # final process
    stage -= 1
    if stage == 1:
        n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])