def __init__(self): super(TinyYoloNet, self).__init__() self.num_classes = 20 self.anchors = [ 1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52 ] self.num_anchors = len(self.anchors) / 2 num_output = (5 + self.num_classes) * self.num_anchors self.cnn = nn.Sequential( OrderedDict([ # conv1 ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)), ('bn1', nn.BatchNorm2d(16)), ('leaky1', nn.LeakyReLU(0.1, inplace=True)), ('pool1', nn.MaxPool2d(2, 2)), # conv2 ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)), ('bn2', nn.BatchNorm2d(32)), ('leaky2', nn.LeakyReLU(0.1, inplace=True)), ('pool2', nn.MaxPool2d(2, 2)), # conv3 ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)), ('bn3', nn.BatchNorm2d(64)), ('leaky3', nn.LeakyReLU(0.1, inplace=True)), ('pool3', nn.MaxPool2d(2, 2)), # conv4 ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)), ('bn4', nn.BatchNorm2d(128)), ('leaky4', nn.LeakyReLU(0.1, inplace=True)), ('pool4', nn.MaxPool2d(2, 2)), # conv5 ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)), ('bn5', nn.BatchNorm2d(256)), ('leaky5', nn.LeakyReLU(0.1, inplace=True)), ('pool5', nn.MaxPool2d(2, 2)), # conv6 ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)), ('bn6', nn.BatchNorm2d(512)), ('leaky6', nn.LeakyReLU(0.1, inplace=True)), ('pool6', MaxPoolStride1()), # conv7 ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)), ('bn7', nn.BatchNorm2d(1024)), ('leaky7', nn.LeakyReLU(0.1, inplace=True)), # conv8 ('conv8', nn.Conv2d(1024, 1024, 3, 1, 1, bias=False)), ('bn8', nn.BatchNorm2d(1024)), ('leaky8', nn.LeakyReLU(0.1, inplace=True)), # output ('output', nn.Conv2d(1024, num_output, 1, 1, 0)), ]))
def create_network(self, net_info): models = OrderedDict() blob_channels = dict() blob_width = dict() blob_height = dict() layers = net_info['layers'] props = net_info['props'] layer_num = len(layers) if props.has_key('input_shape'): blob_channels['data'] = int(props['input_shape']['dim'][1]) blob_height['data'] = int(props['input_shape']['dim'][2]) blob_width['data'] = int(props['input_shape']['dim'][3]) else: blob_channels['data'] = int(props['input_dim'][1]) blob_height['data'] = int(props['input_dim'][2]) blob_width['data'] = int(props['input_dim'][3]) i = 0 while i < layer_num: layer = layers[i] lname = layer['name'] ltype = layer['type'] if ltype == 'Data': i = i + 1 continue bname = layer['bottom'] tname = layer['top'] if ltype == 'Convolution': convolution_param = layer['convolution_param'] channels = blob_channels[bname] out_filters = int(convolution_param['num_output']) kernel_size = int(convolution_param['kernel_size']) stride = int(convolution_param['stride'] ) if convolution_param.has_key('stride') else 1 pad = int(convolution_param['pad'] ) if convolution_param.has_key('pad') else 0 group = int(convolution_param['group'] ) if convolution_param.has_key('group') else 1 bias = True if convolution_param.has_key( 'bias_term' ) and convolution_param['bias_term'] == 'false': bias = False models[lname] = nn.Conv2d(channels, out_filters, kernel_size, stride, pad, group, bias=bias) blob_channels[tname] = out_filters blob_width[tname] = (blob_width[bname] + 2 * pad - kernel_size) / stride + 1 blob_height[tname] = (blob_height[bname] + 2 * pad - kernel_size) / stride + 1 i = i + 1 elif ltype == 'BatchNorm': assert (i + 1 < layer_num) assert (layers[i + 1]['type'] == 'Scale') momentum = 0.9 if layer.has_key('batch_norm_param') and layer[ 'batch_norm_param'].has_key('moving_average_fraction'): momentum = float( layer['batch_norm_param']['moving_average_fraction']) channels = blob_channels[bname] models[lname] = nn.BatchNorm2d(channels, momentum=momentum) tname = layers[i + 1]['top'] blob_channels[tname] = channels blob_width[tname] = blob_width[bname] blob_height[tname] = blob_height[bname] i = i + 2 elif ltype == 'ReLU': inplace = (bname == tname) if layer.has_key('relu_param') and layer['relu_param'].has_key( 'negative_slope'): negative_slope = float( layer['relu_param']['negative_slope']) models[lname] = nn.LeakyReLU(negative_slope=negative_slope, inplace=inplace) else: models[lname] = nn.ReLU(inplace=inplace) blob_channels[tname] = blob_channels[bname] blob_width[tname] = blob_width[bname] blob_height[tname] = blob_height[bname] i = i + 1 elif ltype == 'Pooling': kernel_size = int(layer['pooling_param']['kernel_size']) stride = int(layer['pooling_param']['stride']) padding = 0 if layer['pooling_param'].has_key('pad'): padding = int(layer['pooling_param']['pad']) pool_type = layer['pooling_param']['pool'] if pool_type == 'MAX' and kernel_size == 2 and stride == 1: # for tiny-yolo-voc models[lname] = MaxPoolStride1() blob_width[tname] = blob_width[bname] blob_height[tname] = blob_height[bname] else: if pool_type == 'MAX': models[lname] = nn.MaxPool2d(kernel_size, stride, padding=padding) elif pool_type == 'AVE': models[lname] = nn.AvgPool2d(kernel_size, stride, padding=padding) if stride > 1: blob_width[tname] = (blob_width[bname] - kernel_size + 1) / stride + 1 blob_height[tname] = (blob_height[bname] - kernel_size + 1) / stride + 1 else: blob_width[tname] = blob_width[bname] - kernel_size + 1 blob_height[ tname] = blob_height[bname] - kernel_size + 1 blob_channels[tname] = blob_channels[bname] i = i + 1 elif ltype == 'Eltwise': operation = 'SUM' if layer.has_key('eltwise_param') and layer[ 'eltwise_param'].has_key('operation'): operation = layer['eltwise_param']['operation'] bname0 = bname[0] bname1 = bname[1] models[lname] = Eltwise(operation) blob_channels[tname] = blob_channels[bname0] blob_width[tname] = blob_width[bname0] blob_height[tname] = blob_height[bname0] i = i + 1 elif ltype == 'InnerProduct': filters = int(layer['inner_product_param']['num_output']) if blob_width[bname] != -1 or blob_height[bname] != -1: channels = blob_channels[bname] * blob_width[ bname] * blob_height[bname] models[lname] = nn.Sequential(FCView(), nn.Linear(channels, filters)) else: channels = blob_channels[bname] models[lname] = nn.Linear(channels, filters) blob_channels[tname] = filters blob_width[tname] = -1 blob_height[tname] = -1 i = i + 1 elif ltype == 'Softmax': models[lname] = nn.Softmax() blob_channels[tname] = blob_channels[bname] blob_width[tname] = -1 blob_height[tname] = -1 i = i + 1 elif ltype == 'SoftmaxWithLoss': loss = nn.CrossEntropyLoss() blob_width[tname] = -1 blob_height[tname] = -1 i = i + 1 elif ltype == 'Region': anchors = layer['region_param']['anchors'].strip('"').split( ',') self.anchors = [float(j) for j in anchors] self.num_anchors = int(layer['region_param']['num']) self.anchor_step = len(self.anchors) / self.num_anchors self.num_classes = int(layer['region_param']['classes']) i = i + 1 else: print('create_network: unknown type #%s#' % ltype) i = i + 1 return models
def __init__(self): super(TinyYoloNet, self).__init__() self.seen = 0 self.num_classes = 80 self.anchors = [ 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.1682 ] self.num_anchors = len(self.anchors) / 2 num_output = int((5 + self.num_classes) * self.num_anchors) self.width = 416 self.height = 416 self.loss = RegionLoss(self.num_classes, self.anchors, self.num_anchors) self.cnn = nn.Sequential( OrderedDict([ # conv1 ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)), ('bn1', nn.BatchNorm2d(16)), ('leaky1', nn.LeakyReLU(0.1, inplace=True)), ('pool1', nn.MaxPool2d(2, 2)), # conv2 ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)), ('bn2', nn.BatchNorm2d(32)), ('leaky2', nn.LeakyReLU(0.1, inplace=True)), ('pool2', nn.MaxPool2d(2, 2)), # conv3 ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)), ('bn3', nn.BatchNorm2d(64)), ('leaky3', nn.LeakyReLU(0.1, inplace=True)), ('pool3', nn.MaxPool2d(2, 2)), # conv4 ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)), ('bn4', nn.BatchNorm2d(128)), ('leaky4', nn.LeakyReLU(0.1, inplace=True)), ('pool4', nn.MaxPool2d(2, 2)), # conv5 ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)), ('bn5', nn.BatchNorm2d(256)), ('leaky5', nn.LeakyReLU(0.1, inplace=True)), ('pool5', nn.MaxPool2d(2, 2)), # conv6 ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)), ('bn6', nn.BatchNorm2d(512)), ('leaky6', nn.LeakyReLU(0.1, inplace=True)), ('pool6', MaxPoolStride1()), # conv7 ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)), ('bn7', nn.BatchNorm2d(1024)), ('leaky7', nn.LeakyReLU(0.1, inplace=True)), # conv8 ('conv8', nn.Conv2d(1024, 512, 3, 1, 1, bias=False)), ('bn8', nn.BatchNorm2d(512)), ('leaky8', nn.LeakyReLU(0.1, inplace=True)), # output ('output', nn.Conv2d(512, num_output, 1, 1, 0)), ]))