Exemple #1
0
    def __init__(self):
        super(TinyYoloNet, self).__init__()
        self.num_classes = 20
        self.anchors = [
            1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52
        ]
        self.num_anchors = len(self.anchors) / 2
        num_output = (5 + self.num_classes) * self.num_anchors

        self.cnn = nn.Sequential(
            OrderedDict([
                # conv1
                ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)),
                ('bn1', nn.BatchNorm2d(16)),
                ('leaky1', nn.LeakyReLU(0.1, inplace=True)),
                ('pool1', nn.MaxPool2d(2, 2)),

                # conv2
                ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)),
                ('bn2', nn.BatchNorm2d(32)),
                ('leaky2', nn.LeakyReLU(0.1, inplace=True)),
                ('pool2', nn.MaxPool2d(2, 2)),

                # conv3
                ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)),
                ('bn3', nn.BatchNorm2d(64)),
                ('leaky3', nn.LeakyReLU(0.1, inplace=True)),
                ('pool3', nn.MaxPool2d(2, 2)),

                # conv4
                ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)),
                ('bn4', nn.BatchNorm2d(128)),
                ('leaky4', nn.LeakyReLU(0.1, inplace=True)),
                ('pool4', nn.MaxPool2d(2, 2)),

                # conv5
                ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)),
                ('bn5', nn.BatchNorm2d(256)),
                ('leaky5', nn.LeakyReLU(0.1, inplace=True)),
                ('pool5', nn.MaxPool2d(2, 2)),

                # conv6
                ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)),
                ('bn6', nn.BatchNorm2d(512)),
                ('leaky6', nn.LeakyReLU(0.1, inplace=True)),
                ('pool6', MaxPoolStride1()),

                # conv7
                ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)),
                ('bn7', nn.BatchNorm2d(1024)),
                ('leaky7', nn.LeakyReLU(0.1, inplace=True)),

                # conv8
                ('conv8', nn.Conv2d(1024, 1024, 3, 1, 1, bias=False)),
                ('bn8', nn.BatchNorm2d(1024)),
                ('leaky8', nn.LeakyReLU(0.1, inplace=True)),

                # output
                ('output', nn.Conv2d(1024, num_output, 1, 1, 0)),
            ]))
    def create_network(self, net_info):
        models = OrderedDict()
        blob_channels = dict()
        blob_width = dict()
        blob_height = dict()

        layers = net_info['layers']
        props = net_info['props']
        layer_num = len(layers)

        if props.has_key('input_shape'):
            blob_channels['data'] = int(props['input_shape']['dim'][1])
            blob_height['data'] = int(props['input_shape']['dim'][2])
            blob_width['data'] = int(props['input_shape']['dim'][3])
        else:
            blob_channels['data'] = int(props['input_dim'][1])
            blob_height['data'] = int(props['input_dim'][2])
            blob_width['data'] = int(props['input_dim'][3])
        i = 0
        while i < layer_num:
            layer = layers[i]
            lname = layer['name']
            ltype = layer['type']
            if ltype == 'Data':
                i = i + 1
                continue
            bname = layer['bottom']
            tname = layer['top']
            if ltype == 'Convolution':
                convolution_param = layer['convolution_param']
                channels = blob_channels[bname]
                out_filters = int(convolution_param['num_output'])
                kernel_size = int(convolution_param['kernel_size'])
                stride = int(convolution_param['stride']
                             ) if convolution_param.has_key('stride') else 1
                pad = int(convolution_param['pad']
                          ) if convolution_param.has_key('pad') else 0
                group = int(convolution_param['group']
                            ) if convolution_param.has_key('group') else 1
                bias = True
                if convolution_param.has_key(
                        'bias_term'
                ) and convolution_param['bias_term'] == 'false':
                    bias = False
                models[lname] = nn.Conv2d(channels,
                                          out_filters,
                                          kernel_size,
                                          stride,
                                          pad,
                                          group,
                                          bias=bias)
                blob_channels[tname] = out_filters
                blob_width[tname] = (blob_width[bname] + 2 * pad -
                                     kernel_size) / stride + 1
                blob_height[tname] = (blob_height[bname] + 2 * pad -
                                      kernel_size) / stride + 1
                i = i + 1
            elif ltype == 'BatchNorm':
                assert (i + 1 < layer_num)
                assert (layers[i + 1]['type'] == 'Scale')
                momentum = 0.9
                if layer.has_key('batch_norm_param') and layer[
                        'batch_norm_param'].has_key('moving_average_fraction'):
                    momentum = float(
                        layer['batch_norm_param']['moving_average_fraction'])
                channels = blob_channels[bname]
                models[lname] = nn.BatchNorm2d(channels, momentum=momentum)
                tname = layers[i + 1]['top']
                blob_channels[tname] = channels
                blob_width[tname] = blob_width[bname]
                blob_height[tname] = blob_height[bname]
                i = i + 2
            elif ltype == 'ReLU':
                inplace = (bname == tname)
                if layer.has_key('relu_param') and layer['relu_param'].has_key(
                        'negative_slope'):
                    negative_slope = float(
                        layer['relu_param']['negative_slope'])
                    models[lname] = nn.LeakyReLU(negative_slope=negative_slope,
                                                 inplace=inplace)
                else:
                    models[lname] = nn.ReLU(inplace=inplace)
                blob_channels[tname] = blob_channels[bname]
                blob_width[tname] = blob_width[bname]
                blob_height[tname] = blob_height[bname]
                i = i + 1
            elif ltype == 'Pooling':
                kernel_size = int(layer['pooling_param']['kernel_size'])
                stride = int(layer['pooling_param']['stride'])
                padding = 0
                if layer['pooling_param'].has_key('pad'):
                    padding = int(layer['pooling_param']['pad'])
                pool_type = layer['pooling_param']['pool']
                if pool_type == 'MAX' and kernel_size == 2 and stride == 1:  # for tiny-yolo-voc
                    models[lname] = MaxPoolStride1()
                    blob_width[tname] = blob_width[bname]
                    blob_height[tname] = blob_height[bname]
                else:
                    if pool_type == 'MAX':
                        models[lname] = nn.MaxPool2d(kernel_size,
                                                     stride,
                                                     padding=padding)
                    elif pool_type == 'AVE':
                        models[lname] = nn.AvgPool2d(kernel_size,
                                                     stride,
                                                     padding=padding)

                    if stride > 1:
                        blob_width[tname] = (blob_width[bname] - kernel_size +
                                             1) / stride + 1
                        blob_height[tname] = (blob_height[bname] -
                                              kernel_size + 1) / stride + 1
                    else:
                        blob_width[tname] = blob_width[bname] - kernel_size + 1
                        blob_height[
                            tname] = blob_height[bname] - kernel_size + 1
                blob_channels[tname] = blob_channels[bname]
                i = i + 1
            elif ltype == 'Eltwise':
                operation = 'SUM'
                if layer.has_key('eltwise_param') and layer[
                        'eltwise_param'].has_key('operation'):
                    operation = layer['eltwise_param']['operation']
                bname0 = bname[0]
                bname1 = bname[1]
                models[lname] = Eltwise(operation)
                blob_channels[tname] = blob_channels[bname0]
                blob_width[tname] = blob_width[bname0]
                blob_height[tname] = blob_height[bname0]
                i = i + 1
            elif ltype == 'InnerProduct':
                filters = int(layer['inner_product_param']['num_output'])
                if blob_width[bname] != -1 or blob_height[bname] != -1:
                    channels = blob_channels[bname] * blob_width[
                        bname] * blob_height[bname]
                    models[lname] = nn.Sequential(FCView(),
                                                  nn.Linear(channels, filters))
                else:
                    channels = blob_channels[bname]
                    models[lname] = nn.Linear(channels, filters)
                blob_channels[tname] = filters
                blob_width[tname] = -1
                blob_height[tname] = -1
                i = i + 1
            elif ltype == 'Softmax':
                models[lname] = nn.Softmax()
                blob_channels[tname] = blob_channels[bname]
                blob_width[tname] = -1
                blob_height[tname] = -1
                i = i + 1
            elif ltype == 'SoftmaxWithLoss':
                loss = nn.CrossEntropyLoss()
                blob_width[tname] = -1
                blob_height[tname] = -1
                i = i + 1
            elif ltype == 'Region':
                anchors = layer['region_param']['anchors'].strip('"').split(
                    ',')
                self.anchors = [float(j) for j in anchors]
                self.num_anchors = int(layer['region_param']['num'])
                self.anchor_step = len(self.anchors) / self.num_anchors
                self.num_classes = int(layer['region_param']['classes'])
                i = i + 1
            else:
                print('create_network: unknown type #%s#' % ltype)
                i = i + 1
        return models
    def __init__(self):
        super(TinyYoloNet, self).__init__()
        self.seen = 0
        self.num_classes = 80
        self.anchors = [
            0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282,
            3.52778, 9.77052, 9.1682
        ]
        self.num_anchors = len(self.anchors) / 2
        num_output = int((5 + self.num_classes) * self.num_anchors)
        self.width = 416
        self.height = 416

        self.loss = RegionLoss(self.num_classes, self.anchors,
                               self.num_anchors)
        self.cnn = nn.Sequential(
            OrderedDict([
                # conv1
                ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)),
                ('bn1', nn.BatchNorm2d(16)),
                ('leaky1', nn.LeakyReLU(0.1, inplace=True)),
                ('pool1', nn.MaxPool2d(2, 2)),
                # conv2
                ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)),
                ('bn2', nn.BatchNorm2d(32)),
                ('leaky2', nn.LeakyReLU(0.1, inplace=True)),
                ('pool2', nn.MaxPool2d(2, 2)),

                # conv3
                ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)),
                ('bn3', nn.BatchNorm2d(64)),
                ('leaky3', nn.LeakyReLU(0.1, inplace=True)),
                ('pool3', nn.MaxPool2d(2, 2)),

                # conv4
                ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)),
                ('bn4', nn.BatchNorm2d(128)),
                ('leaky4', nn.LeakyReLU(0.1, inplace=True)),
                ('pool4', nn.MaxPool2d(2, 2)),

                # conv5
                ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)),
                ('bn5', nn.BatchNorm2d(256)),
                ('leaky5', nn.LeakyReLU(0.1, inplace=True)),
                ('pool5', nn.MaxPool2d(2, 2)),

                # conv6
                ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)),
                ('bn6', nn.BatchNorm2d(512)),
                ('leaky6', nn.LeakyReLU(0.1, inplace=True)),
                ('pool6', MaxPoolStride1()),

                # conv7
                ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)),
                ('bn7', nn.BatchNorm2d(1024)),
                ('leaky7', nn.LeakyReLU(0.1, inplace=True)),

                # conv8
                ('conv8', nn.Conv2d(1024, 512, 3, 1, 1, bias=False)),
                ('bn8', nn.BatchNorm2d(512)),
                ('leaky8', nn.LeakyReLU(0.1, inplace=True)),

                # output
                ('output', nn.Conv2d(512, num_output, 1, 1, 0)),
            ]))