Ejemplo n.º 1
0
    def get_phocnet(self, word_image_lmdb_path, phoc_lmdb_path,
                    phoc_size=604, generate_deploy=False):
        '''
        Returns a NetSpec definition of the PHOCNet. The definition can then be transformed
        into a protobuffer message by casting it into a str.
        '''
        n = NetSpec()
        # Data
        self.set_phocnet_data(n=n, generate_deploy=generate_deploy,
                              word_image_lmdb_path=word_image_lmdb_path,
                              phoc_lmdb_path=phoc_lmdb_path)

        # Conv Part
        self.set_phocnet_conv_body(n=n, relu_in_place=True)

        # FC Part
        n.spp5 = L.SPP(n.relu4_3, spp_param=dict(pool=P.SPP.MAX, pyramid_height=3, engine=self.spp_engine))
        n.fc6, n.relu6, n.drop6 = self.fc_relu(bottom=n.spp5, layer_size=4096,
                                               dropout_ratio=0.5, relu_in_place=True)
        n.fc7, n.relu7, n.drop7 = self.fc_relu(bottom=n.drop6, layer_size=4096,
                                               dropout_ratio=0.5, relu_in_place=True)
        n.fc8 = L.InnerProduct(n.drop7, num_output=phoc_size,
                               weight_filler=dict(type=self.initialization),
                               bias_filler=dict(type='constant'))
        n.sigmoid = L.Sigmoid(n.fc8, include=dict(phase=self.phase_test))

        # output part
        if not generate_deploy:
            n.silence = L.Silence(n.sigmoid, ntop=0, include=dict(phase=self.phase_test))
            n.loss = L.SigmoidCrossEntropyLoss(n.fc8, n.phocs)

        return n.to_proto()
Ejemplo n.º 2
0
def gen_net(batch_size=512):
    n=NetSpec();
    n.data = L.DummyData(shape={"dim":[batch_size,3,96,96]})
    n.select1 = L.DummyData(shape={"dim":[2]})
    n.select2 = L.DummyData(shape={"dim":[2]})
    n.label = L.DummyData(shape={"dim":[2]})
    caffenet_stack(n.data, n)
    n.first = L.BatchReindex(n.fc6, n.select1)
    n.second = L.BatchReindex(n.fc6, n.select2)
    n.fc6_concat=L.Concat(n.first, n.second);

    n.fc7, n.bn7, n.relu7 = fc_relu(n.fc6_concat, 4096, batchnorm=True);
    n.fc8, n.relu8 = fc_relu(n.relu7, 4096);
    n.fc9 = L.InnerProduct(n.relu8, num_output=8,
                            weight_filler=dict(type='xavier'));
    n.loss = L.SoftmaxWithLoss(n.fc9, n.label, loss_param=dict(normalization=P.Loss.NONE));

    prot=n.to_proto()
    prot.debug_info=True
    return prot;
Ejemplo n.º 3
0
    def get_phocnet(self,
                    word_image_lmdb_path,
                    phoc_lmdb_path,
                    phoc_size=604,
                    generate_deploy=False):
        '''
        Returns a NetSpec definition of the PHOCNet. The definition can then be transformed
        into a protobuffer message by casting it into a str.
        '''
        n = NetSpec()
        relu_in_place = True
        # Data
        if generate_deploy:
            n.word_images = L.Input(shape=dict(dim=[1, 1, 100, 250]))
            relu_in_place = False
        else:
            n.word_images, n.label = L.Data(batch_size=1,
                                            backend=P.Data.LMDB,
                                            source=word_image_lmdb_path,
                                            prefetch=20,
                                            transform_param=dict(
                                                mean_value=255,
                                                scale=-1. / 255,
                                            ),
                                            ntop=2)
            n.phocs, n.label_phocs = L.Data(batch_size=1,
                                            backend=P.Data.LMDB,
                                            source=phoc_lmdb_path,
                                            prefetch=20,
                                            ntop=2)
        # Conv Part
        n.conv1_1, n.relu1_1 = self.conv_relu(n.word_images,
                                              nout=64,
                                              relu_in_place=relu_in_place)
        n.conv1_2, n.relu1_2 = self.conv_relu(n.relu1_1,
                                              nout=64,
                                              relu_in_place=relu_in_place)
        n.pool1 = L.Pooling(n.relu1_2,
                            pooling_param=dict(pool=P.Pooling.MAX,
                                               kernel_size=2,
                                               stride=2))

        n.conv2_1, n.relu2_1 = self.conv_relu(n.pool1,
                                              nout=128,
                                              relu_in_place=relu_in_place)
        n.conv2_2, n.relu2_2 = self.conv_relu(n.relu2_1,
                                              nout=128,
                                              relu_in_place=relu_in_place)
        n.pool2 = L.Pooling(n.relu2_2,
                            pooling_param=dict(pool=P.Pooling.MAX,
                                               kernel_size=2,
                                               stride=2))

        n.conv3_1, n.relu3_1 = self.conv_relu(n.pool2,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_2, n.relu3_2 = self.conv_relu(n.relu3_1,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_3, n.relu3_3 = self.conv_relu(n.relu3_2,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_4, n.relu3_4 = self.conv_relu(n.relu3_3,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_5, n.relu3_5 = self.conv_relu(n.relu3_4,
                                              nout=256,
                                              relu_in_place=relu_in_place)
        n.conv3_6, n.relu3_6 = self.conv_relu(n.relu3_5,
                                              nout=256,
                                              relu_in_place=relu_in_place)

        n.conv4_1, n.relu4_1 = self.conv_relu(n.relu3_6,
                                              nout=512,
                                              relu_in_place=relu_in_place)
        n.conv4_2, n.relu4_2 = self.conv_relu(n.relu4_1,
                                              nout=512,
                                              relu_in_place=relu_in_place)
        n.conv4_3, n.relu4_3 = self.conv_relu(n.relu4_2,
                                              nout=512,
                                              relu_in_place=relu_in_place)

        # FC Part
        n.spp5 = L.SPP(n.relu4_3,
                       spp_param=dict(pool=P.SPP.MAX,
                                      pyramid_height=3,
                                      engine=self.spp_engine))
        n.fc6, n.relu6, n.drop6 = self.fc_relu(bottom=n.spp5,
                                               layer_size=4096,
                                               dropout_ratio=0.5,
                                               relu_in_place=relu_in_place)
        n.fc7, n.relu7, n.drop7 = self.fc_relu(bottom=n.drop6,
                                               layer_size=4096,
                                               dropout_ratio=0.5,
                                               relu_in_place=relu_in_place)
        n.fc8 = L.InnerProduct(n.drop7,
                               num_output=phoc_size,
                               weight_filler=dict(type=self.initialization),
                               bias_filler=dict(type='constant'))
        n.sigmoid = L.Sigmoid(n.fc8, include=dict(phase=self.phase_test))

        # output part
        if not generate_deploy:
            n.silence = L.Silence(n.sigmoid,
                                  ntop=0,
                                  include=dict(phase=self.phase_test))
            n.loss = L.SigmoidCrossEntropyLoss(n.fc8, n.phocs)

        return n.to_proto()