def FireNet_generic(FireNet_module_func, choose_num_output_func, batch_size, pool_after, s):
    print s

    n = NetSpec()
    FireNet_data_layer(n, batch_size) #add data layer to the net

    layer_idx=1 #e.g. conv1, fire2, etc. 
    n.conv1 = L.Convolution(n.data, kernel_size=7, num_output=96, stride=2, weight_filler=dict(type='xavier'))
    curr_bottom = 'conv1'
    n.tops['relu_conv1'] = L.ReLU(n.tops[curr_bottom], in_place=True)

    if curr_bottom in pool_after.keys():
        curr_bottom = FireNet_pooling_layer(n, curr_bottom, pool_after[curr_bottom], layer_idx)

    for layer_idx in xrange(2,10):
        firenet_dict = choose_num_output_func(layer_idx-2, s)
        print firenet_dict
        curr_bottom = FireNet_module_func(n, curr_bottom, firenet_dict, layer_idx) 

        if curr_bottom in pool_after.keys():
            curr_bottom = FireNet_pooling_layer(n, curr_bottom, pool_after[curr_bottom], layer_idx) 

    n.tops['drop'+str(layer_idx)] = L.Dropout(n.tops[curr_bottom], dropout_ratio=0.5, in_place=True)
    n.tops['conv_final'] = L.Convolution(n.tops[curr_bottom], kernel_size=1, num_output=1000, weight_filler=dict(type='gaussian', std=0.01, mean=0.0)) 
    n.tops['relu_conv_final'] = L.ReLU(n.tops['conv_final'], in_place=True) 
    n.tops['pool_final'] = L.Pooling(n.tops['conv_final'], global_pooling=1, pool=P.Pooling.AVE)
 
    if phase == 'trainval':
        n.loss = L.SoftmaxWithLoss(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TRAIN))
        n.accuracy = L.Accuracy(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TEST))
        n.accuracy_top5 = L.Accuracy(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TEST), top_k=5) 
    return n.to_proto()
Esempio n. 2
0
    def get_phocnet(self, word_image_lmdb_path, phoc_lmdb_path,
                    phoc_size=604, generate_deploy=False):
        '''
        Returns a NetSpec definition of the PHOCNet. The definition can then be transformed
        into a protobuffer message by casting it into a str.
        '''
        n = NetSpec()
        # Data
        self.set_phocnet_data(n=n, generate_deploy=generate_deploy,
                              word_image_lmdb_path=word_image_lmdb_path,
                              phoc_lmdb_path=phoc_lmdb_path)

        # Conv Part
        self.set_phocnet_conv_body(n=n, relu_in_place=True)

        # FC Part
        n.spp5 = L.SPP(n.relu4_3, spp_param=dict(pool=P.SPP.MAX, pyramid_height=3, engine=self.spp_engine))
        n.fc6, n.relu6, n.drop6 = self.fc_relu(bottom=n.spp5, layer_size=4096,
                                               dropout_ratio=0.5, relu_in_place=True)
        n.fc7, n.relu7, n.drop7 = self.fc_relu(bottom=n.drop6, layer_size=4096,
                                               dropout_ratio=0.5, relu_in_place=True)
        n.fc8 = L.InnerProduct(n.drop7, num_output=phoc_size,
                               weight_filler=dict(type=self.initialization),
                               bias_filler=dict(type='constant'))
        n.sigmoid = L.Sigmoid(n.fc8, include=dict(phase=self.phase_test))

        # output part
        if not generate_deploy:
            n.silence = L.Silence(n.sigmoid, ntop=0, include=dict(phase=self.phase_test))
            n.loss = L.SigmoidCrossEntropyLoss(n.fc8, n.phocs)

        return n.to_proto()
Esempio n. 3
0
    def val_tail(self, last_top, stage=None):
        n = NetSpec()

        include_param = dict(phase=caffe.TEST)
        if stage is not None:
            include_param['stage'] = stage

        if stage is None:
            n.loss = L.SoftmaxWithLoss(bottom=[last_top, "label"])
        n.accuracy = L.Accuracy(bottom=[last_top, "label"],
                                include=include_param)
        return n.to_proto()
Esempio n. 4
0
def NiN(opts):

    n = NetSpec()
    FireNet_data_layer(n, batch_size) #add data layer to the net
    curr_bottom = 'data'

    #TODO: possibly rename layers to conv1.1, 1.2, 1.3; 2.1, 2.2, etc.

    curr_bottom = conv_relu_xavier(n, 11, 96, str(1), 4, 0, curr_bottom) #_, ksize, nfilt, layerIdx, stride, pad, _
    if 'pool1' in opts:
        curr_bottom = NiN_pool(n, str(3), curr_bottom)
    curr_bottom = conv_relu_xavier(n, 1,  96, str(2), 1, 0, curr_bottom)
    curr_bottom = conv_relu_xavier(n, 1,  96, str(3), 1, 0, curr_bottom)
    curr_bottom = NiN_pool(n, str(3), curr_bottom)

    curr_bottom = conv_relu_xavier(n, 5, 256, str(4), 1, 2, curr_bottom)
    curr_bottom = conv_relu_xavier(n, 1, 256, str(5), 1, 0, curr_bottom)
    curr_bottom = conv_relu_xavier(n, 1, 256, str(6), 1, 0, curr_bottom)
    curr_bottom = NiN_pool(n, str(6), curr_bottom)

    #conv8 and conv9 are the least computationally intensive layers
    curr_bottom = conv_relu_xavier(n, 3, 384, str(7), 1, 1, curr_bottom) 
    conv8_nfilt = get_conv8_nfilt(opts)
    curr_bottom = conv_relu_xavier(n, 1, conv8_nfilt, str(8), 1, 0, curr_bottom)
    curr_bottom = conv_relu_xavier(n, 1, 384, str(9), 1, 0, curr_bottom)
    curr_bottom = NiN_pool(n, str(9), curr_bottom)
    n.tops['drop9'] = L.Dropout(n.tops[curr_bottom], dropout_ratio=0.5, in_place=True)

    curr_bottom = conv_relu_xavier(n, 3, 1024, str(10), 1, 1, curr_bottom)
    curr_bottom = conv_relu_xavier(n, 1, 1024, str(11), 1, 0, curr_bottom)

    num_output=1000
    if 'out10k' in opts:
        num_output=10000

    n.tops['conv_12'] = L.Convolution(n.tops[curr_bottom], kernel_size=1, num_output=num_output, weight_filler=dict(type='gaussian', std=0.01, mean=0.0))
    n.tops['relu_conv_12'] = L.ReLU(n.tops['conv_12'], in_place=True)
    n.tops['pool_12'] = L.Pooling(n.tops['conv_12'], global_pooling=1, pool=P.Pooling.AVE)

    if phase == 'trainval':
        n.loss = L.SoftmaxWithLoss(n.tops['pool_12'], n.label, include=dict(phase=caffe_pb2.TRAIN))
        n.accuracy = L.Accuracy(n.tops['pool_12'], n.label, include=dict(phase=caffe_pb2.TEST))
        n.accuracy_top5 = L.Accuracy(n.tops['pool_12'], n.label, include=dict(phase=caffe_pb2.TEST), top_k=5) 

    out_dir = 'nets/NiN_' + '_'.join(opts)
    return [n.to_proto(), out_dir]
def FireNet(batch_size, pool_after, s, c1):
    print s

    n = NetSpec()
    FireNet_data_layer(n, batch_size) #add data layer to the net

    layer_idx=1 #e.g. conv1, fire2, etc. 
    n.conv1 = L.Convolution(n.data, kernel_size=c1['dim'], num_output=c1['nfilt'], stride=2, weight_filler=dict(type='xavier'))
    curr_bottom = 'conv1'
    n.tops['relu_conv1'] = L.ReLU(n.tops[curr_bottom], in_place=True)

    #if curr_bottom in pool_after.keys():
    #    curr_bottom = FireNet_pooling_layer(n, curr_bottom, pool_after[curr_bottom], layer_idx)

    if layer_idx in pool_after:
        n.tops['pool1'] = L.Pooling(n.tops[curr_bottom], kernel_size=3, stride=2, pool=P.Pooling.MAX)
        curr_bottom = 'pool1'    

    for layer_idx in xrange(2, s['n_layers']+2):
        firenet_dict = choose_num_output(layer_idx-2, s)
        print firenet_dict
        curr_bottom = FireNet_module(n, curr_bottom, firenet_dict, layer_idx) 

        if layer_idx in pool_after:
            next_bottom = 'pool%d' %layer_idx
            n.tops[next_bottom] = L.Pooling(n.tops[curr_bottom], kernel_size=3, stride=2, pool=P.Pooling.MAX)
            curr_bottom = next_bottom

    n.tops['drop'+str(layer_idx)] = L.Dropout(n.tops[curr_bottom], dropout_ratio=0.5, in_place=True)

    #optional pre_conv_final (w/ appropriate CEratio)
    #n.pre_conv_final = L.Convolution(n.tops[curr_bottom], kernel_size=1, num_output=int(1000*s['CEratio']), stride=1, weight_filler=dict(type='xavier'))
    #n.tops['relu_pre_conv_final'] = L.ReLU(n.tops['pre_conv_final'], in_place=True)
    #curr_bottom='pre_conv_final'

    n.tops['conv_final'] = L.Convolution(n.tops[curr_bottom], kernel_size=1, num_output=1000, weight_filler=dict(type='gaussian', std=0.01, mean=0.0)) 
    n.tops['relu_conv_final'] = L.ReLU(n.tops['conv_final'], in_place=True) 
    n.tops['pool_final'] = L.Pooling(n.tops['conv_final'], global_pooling=1, pool=P.Pooling.AVE)
 
    if phase == 'trainval':
        n.loss = L.SoftmaxWithLoss(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TRAIN))
        n.accuracy = L.Accuracy(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TEST))
        n.accuracy_top5 = L.Accuracy(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TEST), top_k=5) 
    return n.to_proto()
Esempio n. 6
0
def lenet(lmdbData, lmdbLabel, batch_size):
    n = NetSpec()
    
    n.data  = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdbData,
                    transform_param=dict(scale=1./255), ntop=1)
    
    n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdbLabel,
                    transform_param=dict(scale=1./255), ntop=1)

    n.conv1 = L.Convolution(n.data, kernel_size=4, num_output=200, weight_filler=dict(type='xavier'))
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=3, num_output=50, weight_filler=dict(type='xavier'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=1, pool=P.Pooling.MAX)
    n.fc1   = L.InnerProduct(n.pool2, num_output=200, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.score = L.InnerProduct(n.relu1, num_output=1200, weight_filler=dict(type='xavier'))
    n.loss  = L.Python(n.score, n.label, module='pyloss', layer='EuclideanLossLayer')

    return n.to_proto()
Esempio n. 7
0
def gen_net(batch_size=512):
    n=NetSpec();
    n.data = L.DummyData(shape={"dim":[batch_size,3,96,96]})
    n.select1 = L.DummyData(shape={"dim":[2]})
    n.select2 = L.DummyData(shape={"dim":[2]})
    n.label = L.DummyData(shape={"dim":[2]})
    caffenet_stack(n.data, n)
    n.first = L.BatchReindex(n.fc6, n.select1)
    n.second = L.BatchReindex(n.fc6, n.select2)
    n.fc6_concat=L.Concat(n.first, n.second);

    n.fc7, n.bn7, n.relu7 = fc_relu(n.fc6_concat, 4096, batchnorm=True);
    n.fc8, n.relu8 = fc_relu(n.relu7, 4096);
    n.fc9 = L.InnerProduct(n.relu8, num_output=8,
                            weight_filler=dict(type='xavier'));
    n.loss = L.SoftmaxWithLoss(n.fc9, n.label, loss_param=dict(normalization=P.Loss.NONE));

    prot=n.to_proto()
    prot.debug_info=True
    return prot;
Esempio n. 8
0
 def train_tail(self, last_top):
     n = NetSpec()
     n.loss = L.SoftmaxWithLoss(bottom=[last_top, "label"])
     return n.to_proto()
Esempio n. 9
0
    def get_phocnet(self,
                    word_image_lmdb_path,
                    phoc_lmdb_path,
                    phoc_size=604,
                    generate_deploy=False):
        '''
        Returns a NetSpec definition of the PHOCNet. The definition can then be transformed
        into a protobuffer message by casting it into a str.
        '''
        n = NetSpec()
        relu_in_place = True
        # Data
        if generate_deploy:
            n.word_images = L.Input(shape=dict(dim=[1, 1, 100, 250]))
            relu_in_place = False
        else:
            n.word_images, n.label = L.Data(batch_size=1,
                                            backend=P.Data.LMDB,
                                            source=word_image_lmdb_path,
                                            prefetch=20,
                                            transform_param=dict(
                                                mean_value=255,
                                                scale=-1. / 255,
                                            ),
                                            ntop=2)
            n.phocs, n.label_phocs = L.Data(batch_size=1,
                                            backend=P.Data.LMDB,
                                            source=phoc_lmdb_path,
                                            prefetch=20,
                                            ntop=2)
        # Conv Part
        n.conv1_1, n.relu1_1 = self.conv_relu(n.word_images,
                                              nout=64,
                                              relu_in_place=relu_in_place)
        n.conv1_2, n.relu1_2 = self.conv_relu(n.relu1_1,
                                              nout=64,
                                              relu_in_place=relu_in_place)
        n.pool1 = L.Pooling(n.relu1_2,
                            pooling_param=dict(pool=P.Pooling.MAX,
                                               kernel_size=2,
                                               stride=2))

        n.conv2_1, n.relu2_1 = self.conv_relu(n.pool1,
                                              nout=128,
                                              relu_in_place=relu_in_place)
        n.conv2_2, n.relu2_2 = self.conv_relu(n.relu2_1,
                                              nout=128,
                                              relu_in_place=relu_in_place)
        n.pool2 = L.Pooling(n.relu2_2,
                            pooling_param=dict(pool=P.Pooling.MAX,
                                               kernel_size=2,
                                               stride=2))

        n.conv3_1, n.relu3_1 = self.conv_relu(n.pool2,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_2, n.relu3_2 = self.conv_relu(n.relu3_1,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_3, n.relu3_3 = self.conv_relu(n.relu3_2,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_4, n.relu3_4 = self.conv_relu(n.relu3_3,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_5, n.relu3_5 = self.conv_relu(n.relu3_4,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_6, n.relu3_6 = self.conv_relu(n.relu3_5,
                                              nout=256,
                                              relu_in_place=relu_in_place)

        n.conv4_1, n.relu4_1 = self.conv_relu(n.relu3_6,
                                              nout=512,
                                              relu_in_place=relu_in_place)
        n.conv4_2, n.relu4_2 = self.conv_relu(n.relu4_1,
                                              nout=512,
                                              relu_in_place=relu_in_place)
        n.conv4_3, n.relu4_3 = self.conv_relu(n.relu4_2,
                                              nout=512,
                                              relu_in_place=relu_in_place)

        # FC Part
        n.spp5 = L.SPP(n.relu4_3,
                       spp_param=dict(pool=P.SPP.MAX,
                                      pyramid_height=3,
                                      engine=self.spp_engine))
        n.fc6, n.relu6, n.drop6 = self.fc_relu(bottom=n.spp5,
                                               layer_size=4096,
                                               dropout_ratio=0.5,
                                               relu_in_place=relu_in_place)
        n.fc7, n.relu7, n.drop7 = self.fc_relu(bottom=n.drop6,
                                               layer_size=4096,
                                               dropout_ratio=0.5,
                                               relu_in_place=relu_in_place)
        n.fc8 = L.InnerProduct(n.drop7,
                               num_output=phoc_size,
                               weight_filler=dict(type=self.initialization),
                               bias_filler=dict(type='constant'))
        n.sigmoid = L.Sigmoid(n.fc8, include=dict(phase=self.phase_test))

        # output part
        if not generate_deploy:
            n.silence = L.Silence(n.sigmoid,
                                  ntop=0,
                                  include=dict(phase=self.phase_test))
            n.loss = L.SigmoidCrossEntropyLoss(n.fc8, n.phocs)

        return n.to_proto()
Esempio n. 10
0
def StickNet(batch_size, s):
    inImgH = 224 #TODO: put inImg{H,W} into 's' if necessary.
    inImgW = 224
    round_to_nearest = 4

    n = NetSpec()
    FireNet_data_layer(n, batch_size) #add data layer to the net
    curr_bottom='data'

    #layer-to-layer counters
    _totalStride = 1 #note that, using 1x1 conv, our (stride>1) is only in pooling layers.
    _numPoolings = 1 #for indexing 'conv2_1', etc.
    _ch=3
    [activH, activW] = est_activ_size(inImgH, inImgW, _totalStride)
    n_filt = choose_num_output(1, 1, _ch, activH, activW, s['mflop_per_img_target'], s['n_layers']) #only using this for conv1 to avoid oscillations.
    n_filt = round_to(n_filt, round_to_nearest) #make divisible by 8

    #FIXME: somehow account for num_output produced by conv1 when selecting number of filters for conv2. (else, conv2 goes way over budget on flops.)
    # perhaps we need to find the number N such that N^2*activations = mflop_per_img_target?

    idx_minor = 1
    idx_major = 1

    #this goes to (n_layers-1) ... then we do conv_final separately because it has a different weight init.
    for layer_idx in xrange(1, s['n_layers']):

        layer_str = '%d.%d' %(idx_major, idx_minor)

        #select number of filters in this layer:
        #[activH, activW] = est_activ_size(inImgH, inImgW, _totalStride)
        #n_filt = choose_num_output(1, 1, _ch, activH, activW, s['mflop_per_img_target'], s['n_layers']) 
        #TODO: to avoid oscillations, perhaps just use choose_num_output for conv1, 
        #      and then just double n_filt whenever we do stride=2.

        #generate layer
        ksize=1
        stride=1
        pad=0
        curr_bottom = conv_relu_xavier(n, ksize, n_filt, layer_str, stride, pad, curr_bottom)
        _ch = n_filt #for next layer

        if layer_idx in s['pool_after'].keys():
            pinfo = s['pool_after'][layer_idx]

            #next_bottom = 'pool%d' %layer_idx
            next_bottom = 'pool_' + layer_str
            n.tops[next_bottom] = L.Pooling(n.tops[curr_bottom], kernel_size=pinfo['kernel_size'], stride=pinfo['stride'], pool=P.Pooling.MAX)
            curr_bottom = next_bottom

            _totalStride = _totalStride * pinfo['stride']
            _numPoolings = _numPoolings + 1

            n_filt = n_filt * pinfo['stride'] #to keep (most) layers at roughly the same complexity-per-layer

            idx_major = idx_major + 1
            idx_minor = 1

        else:
            idx_minor = idx_minor + 1

    n.tops['drop'+str(layer_idx)] = L.Dropout(n.tops[curr_bottom], dropout_ratio=0.5, in_place=True)

    n.tops['conv_final'] = L.Convolution(n.tops[curr_bottom], kernel_size=1, num_output=1000, weight_filler=dict(type='gaussian', std=0.01, mean=0.0)) 
    n.tops['relu_conv_final'] = L.ReLU(n.tops['conv_final'], in_place=True) 
    n.tops['pool_final'] = L.Pooling(n.tops['conv_final'], global_pooling=1, pool=P.Pooling.AVE)
 
    if phase == 'trainval':
        n.loss = L.SoftmaxWithLoss(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TRAIN))
        n.accuracy = L.Accuracy(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TEST))
        n.accuracy_top5 = L.Accuracy(n.tops['pool_final'], n.label, include=dict(phase=caffe_pb2.TEST), top_k=5) 
    return n.to_proto()