def get_phocnet(self, word_image_lmdb_path, phoc_lmdb_path, phoc_size=604, generate_deploy=False): ''' Returns a NetSpec definition of the PHOCNet. The definition can then be transformed into a protobuffer message by casting it into a str. ''' n = NetSpec() # Data self.set_phocnet_data(n=n, generate_deploy=generate_deploy, word_image_lmdb_path=word_image_lmdb_path, phoc_lmdb_path=phoc_lmdb_path) # Conv Part self.set_phocnet_conv_body(n=n, relu_in_place=True) # FC Part n.spp5 = L.SPP(n.relu4_3, spp_param=dict(pool=P.SPP.MAX, pyramid_height=3, engine=self.spp_engine)) n.fc6, n.relu6, n.drop6 = self.fc_relu(bottom=n.spp5, layer_size=4096, dropout_ratio=0.5, relu_in_place=True) n.fc7, n.relu7, n.drop7 = self.fc_relu(bottom=n.drop6, layer_size=4096, dropout_ratio=0.5, relu_in_place=True) n.fc8 = L.InnerProduct(n.drop7, num_output=phoc_size, weight_filler=dict(type=self.initialization), bias_filler=dict(type='constant')) n.sigmoid = L.Sigmoid(n.fc8, include=dict(phase=self.phase_test)) # output part if not generate_deploy: n.silence = L.Silence(n.sigmoid, ntop=0, include=dict(phase=self.phase_test)) n.loss = L.SigmoidCrossEntropyLoss(n.fc8, n.phocs) return n.to_proto()
def train_head(self, subset): n = NetSpec() # train image_data_param = dict(source=subset.get_list_absolute_path(), batch_size=self.batch_sizes[0], new_width=self.infmt.new_width, new_height=self.infmt.new_height, rand_skip=self.batch_size, shuffle=True) if subset.root_folder is not None: image_data_param['root_folder'] = subset.root_folder transform_param = dict( mirror=self.infmt.mirror, crop_size=self.infmt.crop_size, # mean_value = self.infmt.mean_pixel, ) if self.infmt.scale is not None: transform_param['scale'] = self.infmt.scale if self.infmt.mean_file is not None: transform_param['mean_file'] = self.infmt.mean_file elif self.infmt.mean_pixel is not None: transform_param['mean_value'] = self.infmt.mean_pixel n.data, n.label = L.ImageData(ntop=2, image_data_param=image_data_param, transform_param=transform_param, include=dict(phase=caffe.TRAIN)) net = n.to_proto() net.name = self.name return net
def __init__(self, number_of_neighbors=6, inner_product_output=100, weight_lr_mult=1, weight_decay_mult=1, b_lr_mult=2, b_decay_mult=0): self.number_of_neighbors = number_of_neighbors #self.netP = self.net = NetSpec() self.shared_weight_counter = 0 self.num_output = inner_product_output self.weight_lr_mult = weight_lr_mult self.weight_decay_mult = weight_decay_mult self.b_lr_mult = b_lr_mult self.b_decay_mult = b_decay_mult
def val_tail(self, last_top, stage=None): n = NetSpec() include_param = dict(phase=caffe.TEST) if stage is not None: include_param['stage'] = stage if stage is None: n.loss = L.SoftmaxWithLoss(bottom=[last_top, "label"]) n.accuracy = L.Accuracy(bottom=[last_top, "label"], include=include_param) return n.to_proto()
def val_head(self, subset, stage=None): image_data_param = dict( source=subset.get_list_absolute_path(), batch_size=self.batch_sizes[1], # root_folder=subset.root_folder, rand_skip=self.batch_sizes[1], shuffle=True, # new_width, # new_height ) transform_param = dict( mirror=False, # crop_size = self.infmt.crop_size, # mean_value = self.infmt.mean_pixel, # mean_file, # scale, ) if subset.root_folder is not None: image_data_param['root_folder'] = subset.root_folder if self.crop_on_test: image_data_param['new_width'] = self.infmt.new_width image_data_param['new_height'] = self.infmt.new_height transform_param['crop_size'] = self.infmt.crop_size else: image_data_param['new_width'] = self.infmt.crop_size image_data_param['new_height'] = self.infmt.crop_size if self.infmt.scale is not None: transform_param['scale'] = self.infmt.scale if self.infmt.mean_file is not None: transform_param['mean_file'] = self.infmt.mean_file elif self.infmt.mean_pixel is not None: transform_param['mean_value'] = self.infmt.mean_pixel include_param = dict(phase=caffe.TEST) if stage is not None: include_param['stage'] = stage n = NetSpec() n.data, n.label = L.ImageData(ntop=2, image_data_param=image_data_param, transform_param=transform_param, include=include_param) net = n.to_proto() net.name = self.name return net
def extract_head(self, subset): image_data_param = dict( source=subset.get_list_absolute_path(), batch_size=self.batch_size, root_folder=subset.root_folder, # new_width, # new_height ) transform_param = dict( mirror=False, # crop_size = self.infmt.crop_size, # mean_value = self.infmt.mean_pixel, # mean_file, # scale, ) if self.crop_on_test: image_data_param['new_width'] = self.infmt.new_width image_data_param['new_height'] = self.infmt.new_height transform_param['crop_size'] = self.infmt.crop_size else: image_data_param['new_width'] = self.infmt.crop_size image_data_param['new_height'] = self.infmt.crop_size if self.infmt.scale is not None: transform_param['scale'] = self.infmt.scale if self.infmt.mean_file is not None: transform_param['mean_file'] = self.infmt.mean_file elif self.infmt.mean_pixel is not None: transform_param['mean_value'] = self.infmt.mean_pixel n = NetSpec() n.data, n.label = L.ImageData(ntop=2, image_data_param=image_data_param, transform_param=transform_param ) # , include=dict(phase=caffe.TEST)) net = n.to_proto() net.name = self.name return net
def main(): from argparse import ArgumentParser from os import path parser = ArgumentParser() parser.add_argument('prototxt') parser.add_argument('-l', '--load', help='Load a caffemodel') parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]') #parser.add_argument('-q', action='store_true', help='Quiet execution') parser.add_argument('-sm', action='store_true', help='Summary only') parser.add_argument('-q', action='store_true', help='Quiet execution') parser.add_argument('-a', '--all', action='store_true', help='Show the statistic for all layers') parser.add_argument('-nc', action='store_true', help='Do not use color') parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]') parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]') parser.add_argument('-nit', type=int, default=10, help='Number of iterations') parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?') args = parser.parse_args() if args.q: from os import environ environ['GLOG_minloglevel'] = '2' import caffe, load from caffe import NetSpec, layers as L caffe.set_mode_gpu() if args.gpu is not None: caffe.set_device(args.gpu) if args.data is not None: model = load.ProtoDesc(args.prototxt) net = NetSpec() fl = getFileList(args.data) if len(fl) == 0: print("Unknown data type for '%s'" % args.data) exit(1) from tempfile import NamedTemporaryFile f = NamedTemporaryFile('w') f.write('\n'.join([path.abspath(i) + ' 0' for i in fl])) f.flush() net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict( mean_value=[104, 117, 123], scale=args.s), ntop=2) net.out = model(data=net.data, label=net.label) n = netFromString('force_backward:true\n' + str(net.to_proto()), caffe.TRAIN) else: n = caffe.Net(args.prototxt, caffe.TRAIN) if args.load is not None: n.copy_from(args.load) cvar = printMeanStddev(n, NIT=args.nit, show_all=args.all, show_color=not args.nc, quiet=args.sm) cv, gr = computeGradientRatio(n, NIT=args.nit) print() print(' Summary ') print('-----------') print() print( 'layer name out cvar rate cvar rate mean' ) for l in n._layer_names: if l in cvar and l in cv and l in gr: print('%-30s %10.2f %10.2f %10.2f' % (l, cvar[l], cv[l], gr[l]))
def test_tail(self, last_top): n = NetSpec() n.accuracy = L.Accuracy(bottom=[last_top, "label"], include=dict(phase=caffe.TEST)) return n.to_proto()
def train_tail(self, last_top): n = NetSpec() n.loss = L.SoftmaxWithLoss(bottom=[last_top, "label"]) return n.to_proto()
def deploy_tail(self, last_top): n = NetSpec() n.score = L.Softmax(bottom=last_top) return n.to_proto()
def main(): from argparse import ArgumentParser from os import path import numpy as np parser = ArgumentParser() parser.add_argument('prototxt') parser.add_argument('output_caffemodel') parser.add_argument( '-l', '--load', help= 'Load a pretrained model and rescale it [bias and type are not supported]' ) parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]') parser.add_argument('-b', '--bias', type=float, default=0.1, help='Bias') parser.add_argument( '-t', '--type', default='elwise', help= 'Type: elwise, pca, zca, kmeans, rand (random input patches). Add fast_ to speed up the initialization, but you might lose in precision.' ) parser.add_argument( '--zero_from', default=None, help='Zero weights starting from this layer and reinitialize') parser.add_argument('-z', action='store_true', help='Zero all weights and reinitialize') parser.add_argument( '--post_zero_from', default=None, help= 'AFTER everything else, zero weights starting from this layer (they will NOT be reinitialized)' ) parser.add_argument('-cs', action='store_true', help='Correct for scaling') parser.add_argument('-q', action='store_true', help='Quiet execution') parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]') parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]') parser.add_argument('-nit', type=int, default=10, help='Number of iterations') parser.add_argument( '--mem-limit', type=int, default=500, help='How much memory should we use for the data buffer (MB)?') parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?') args = parser.parse_args() if args.q: from os import environ environ['GLOG_minloglevel'] = '2' import caffe, load from caffe import NetSpec, layers as L caffe.set_mode_gpu() if args.gpu is not None: caffe.set_device(args.gpu) if args.data is not None: model = load.ProtoDesc(args.prototxt) net = NetSpec() fl = getFileList(args.data) if len(fl) == 0: print("Unknown data type for '%s'" % args.data) exit(1) from tempfile import NamedTemporaryFile f = NamedTemporaryFile('w') f.write('\n'.join([path.abspath(i) + ' 0' for i in fl])) f.flush() net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict( mean_value=[104, 117, 123], scale=args.s), ntop=2) net.out = model(data=net.data, label=net.label) n = netFromString('force_backward:true\n' + str(net.to_proto()), caffe.TRAIN) else: n = caffe.Net(args.prototxt, caffe.TRAIN) if args.load is not None: n.copy_from(args.load) # Rescale existing layers? #if args.fix: #magicFix(n, args.nit) if args.z or args.zero_from: zeroLayers(n, start=args.zero_from) if any([ np.abs(l.blobs[0].data).sum() < 1e-10 for l in n.layers if len(l.blobs) > 0 ]): print([ m for l, m in zip(n.layers, n._layer_names) if len(l.blobs) > 0 and np.abs(l.blobs[0].data).sum() < 1e-10 ]) magicInitialize(n, args.bias, NIT=args.nit, type=args.type, max_data=args.mem_limit * 1024 * 1024 / 4) else: print("Network already initialized, skipping magic init") if args.cs: # A simply helper function that lets you figure out which layers are not # homogeneous #print( estimateHomogenety(n) ) print('Calibrating gradient ratio') calibrateGradientRatio(n) if args.post_zero_from: zeroLayers(n, start=args.post_zero_from) n.save(args.output_caffemodel)
def get_phocnet(self, word_image_lmdb_path, phoc_lmdb_path, phoc_size=604, generate_deploy=False): ''' Returns a NetSpec definition of the PHOCNet. The definition can then be transformed into a protobuffer message by casting it into a str. ''' n = NetSpec() relu_in_place = True # Data if generate_deploy: n.word_images = L.Input(shape=dict(dim=[1, 1, 100, 250])) relu_in_place = False else: n.word_images, n.label = L.Data(batch_size=1, backend=P.Data.LMDB, source=word_image_lmdb_path, prefetch=20, transform_param=dict( mean_value=255, scale=-1. / 255, ), ntop=2) n.phocs, n.label_phocs = L.Data(batch_size=1, backend=P.Data.LMDB, source=phoc_lmdb_path, prefetch=20, ntop=2) # Conv Part n.conv1_1, n.relu1_1 = self.conv_relu(n.word_images, nout=64, relu_in_place=relu_in_place) n.conv1_2, n.relu1_2 = self.conv_relu(n.relu1_1, nout=64, relu_in_place=relu_in_place) n.pool1 = L.Pooling(n.relu1_2, pooling_param=dict(pool=P.Pooling.MAX, kernel_size=2, stride=2)) n.conv2_1, n.relu2_1 = self.conv_relu(n.pool1, nout=128, relu_in_place=relu_in_place) n.conv2_2, n.relu2_2 = self.conv_relu(n.relu2_1, nout=128, relu_in_place=relu_in_place) n.pool2 = L.Pooling(n.relu2_2, pooling_param=dict(pool=P.Pooling.MAX, kernel_size=2, stride=2)) n.conv3_1, n.relu3_1 = self.conv_relu(n.pool2, nout=256, relu_in_place=relu_in_place) n.conv3_2, n.relu3_2 = self.conv_relu(n.relu3_1, nout=256, relu_in_place=relu_in_place) n.conv3_3, n.relu3_3 = self.conv_relu(n.relu3_2, nout=256, relu_in_place=relu_in_place) n.conv3_4, n.relu3_4 = self.conv_relu(n.relu3_3, nout=256, relu_in_place=relu_in_place) n.conv3_5, n.relu3_5 = self.conv_relu(n.relu3_4, nout=256, relu_in_place=relu_in_place) n.conv3_6, n.relu3_6 = self.conv_relu(n.relu3_5, nout=256, relu_in_place=relu_in_place) n.conv4_1, n.relu4_1 = self.conv_relu(n.relu3_6, nout=512, relu_in_place=relu_in_place) n.conv4_2, n.relu4_2 = self.conv_relu(n.relu4_1, nout=512, relu_in_place=relu_in_place) n.conv4_3, n.relu4_3 = self.conv_relu(n.relu4_2, nout=512, relu_in_place=relu_in_place) # FC Part n.spp5 = L.SPP(n.relu4_3, spp_param=dict(pool=P.SPP.MAX, pyramid_height=3, engine=self.spp_engine)) n.fc6, n.relu6, n.drop6 = self.fc_relu(bottom=n.spp5, layer_size=4096, dropout_ratio=0.5, relu_in_place=relu_in_place) n.fc7, n.relu7, n.drop7 = self.fc_relu(bottom=n.drop6, layer_size=4096, dropout_ratio=0.5, relu_in_place=relu_in_place) n.fc8 = L.InnerProduct(n.drop7, num_output=phoc_size, weight_filler=dict(type=self.initialization), bias_filler=dict(type='constant')) n.sigmoid = L.Sigmoid(n.fc8, include=dict(phase=self.phase_test)) # output part if not generate_deploy: n.silence = L.Silence(n.sigmoid, ntop=0, include=dict(phase=self.phase_test)) n.loss = L.SigmoidCrossEntropyLoss(n.fc8, n.phocs) return n.to_proto()
def __init__(self, number_of_neighbors, num_output=100, lr_mult): self.number_of_neighbors = number_of_neighbors #self.netP = self.net = NetSpec() self.shared_weight_counter = 0 self.num_output = num_output