def _net_output_for_word_list(self, word_list, cnn, min_img_width_height=32, image_size=None, max_pixel=50000, input_layer='word_images', output_layer='sigmoid', suppress_caffe_output=False): output = [] for idx, word in enumerate(word_list): # scale to correct pixel values (0 = background, 1 = text) word_img = word.get_word_image().astype(np.float32) word_img -= 255.0 word_img /= -255.0 word_img, resized = self._check_size(word_img,image_size) word_img = word_img.reshape((1,1,) + word_img.shape).astype(np.float32) # reshape the PHOCNet cnn.blobs[input_layer].reshape(*word_img.shape) cnn.reshape() # forward the word image through the PHOCNet cnn.blobs[input_layer].data[...] = word_img if suppress_caffe_output: with Suppressor(): output.append(cnn.forward()[output_layer].flatten()) else: output.append(cnn.forward()[output_layer].flatten()) if ((idx+1)%100 == 0 or (idx+1) == len(word_list)): self.logger.info(' [ %*d / %d ]', len(str(len(word_list))), idx+1, len(word_list)) return np.vstack(output)
def __solver_step(self, solver, steps): ''' Runs Caffe solver suppressing Caffe output if necessary ''' if not self.debug_mode: with Suppressor(): solver.step(steps) else: solver.step(steps)
def _load_pretrained_dense_net(self, phocnet_bin_path, gpu_id, dense_net, debug_mode): # create the Caffe Dense PHOCNet object self.logger.info('Creating Dense PHOCNet...') if debug_mode: phocnet = caffe.Net(dense_net, phocnet_bin_path,caffe.TEST) else: with Suppressor(): phocnet = caffe.Net(dense_net, phocnet_bin_path,caffe.TEST) return phocnet
def __get_solver(self, solver_proto_path): ''' Returns a caffe.SGDSolver for the given protofile path, ignoring Caffe command line chatter if debug mode is not set to True. ''' if not self.debug_mode: # disable Caffe init chatter when not in debug with Suppressor(): return caffe.SGDSolver(solver_proto_path) else: return caffe.SGDSolver(solver_proto_path)
def _load_pretrained_phocnet(self, phocnet_bin_path, gpu_id, debug_mode, deploy_proto_path, phoc_size): # create a deploy proto file self.logger.info('Saving PHOCNet deploy proto file to %s...', deploy_proto_path) mpg = ModelProtoGenerator(initialization='msra', use_cudnn_engine=gpu_id is not None) proto = mpg.get_phocnet(word_image_lmdb_path=None, phoc_lmdb_path=None, phoc_size=phoc_size, generate_deploy=True) with open(deploy_proto_path, 'w') as proto_file: proto_file.write(str(proto)) # create the Caffe PHOCNet object self.logger.info('Creating PHOCNet...') if debug_mode: phocnet = caffe.Net(deploy_proto_path, phocnet_bin_path, caffe.TEST) else: with Suppressor(): phocnet = caffe.Net(deploy_proto_path, phocnet_bin_path, caffe.TEST) return phocnet
def main(): parser = argparse.ArgumentParser() # required training parameters parser.add_argument('--net_file', '-f', action='store', type=str, help='The location of the net file.') parser.add_argument('--dense', '-d', action='store_true', help='dense network') params = vars(parser.parse_args()) caffe.set_mode_cpu() deploy_file = params["net_file"] print "Net: " + deploy_file with Suppressor(): net = caffe.Net(deploy_file, caffe.TEST) print "Layer-wise parameters: " pprint([(k, v[0].data.shape, prod(v[0].data.shape) ) for k, v in net.params.items()]) total = sum([prod(v[0].data.shape) for k, v in net.params.items()]) if params["dense"]: fc = sum([prod(net.params["fc6_d"][0].data.shape),prod(net.params["fc7_d"][0].data.shape),prod(net.params["fc8_d"][0].data.shape)]) else: fc = sum([prod(net.params["fc6"][0].data.shape),prod(net.params["fc7"][0].data.shape),prod(net.params["fc8"][0].data.shape)]) conv = total-fc print "Total number of parameters: " + str(total) print "Convolutional: " + str(conv) print "FC: " + str(fc) if params["dense"]: print "Depth at Tpp: " + str(net.params["fc6_d"][0].data.shape[1]/15) else: print "Depth at Tpp: " + str(net.params["fc6"][0].data.shape[1]/15)
def _net_output_for_word_list(self, word_list, cnn, min_img_width_height=26, input_layer='word_images', output_layer='sigmoid', suppress_caffe_output=False): output = [] for idx, word in enumerate(word_list): # scale to correct pixel values (0 = background, 1 = text) word_img = word.get_word_image().astype(np.float32) word_img -= 255.0 word_img /= -255.0 # check size if np.amin(word_img.shape[:2]) < min_img_width_height: scale = float(min_img_width_height + 1) / float( np.amin(word_img.shape[:2])) new_shape = (int(scale * word_img.shape[0]), int(scale * word_img.shape[1])) word_img = resize(image=word_img, output_shape=new_shape) word_img = word_img.reshape(( 1, 1, ) + word_img.shape).astype(np.float32) # reshape the PHOCNet cnn.blobs[input_layer].reshape(*word_img.shape) cnn.reshape() # forward the word image through the PHOCNet cnn.blobs[input_layer].data[...] = word_img if suppress_caffe_output: with Suppressor(): output.append(cnn.forward()[output_layer].flatten()) else: output.append(cnn.forward()[output_layer].flatten()) if ((idx + 1) % 100 == 0 or (idx + 1) == len(word_list)): self.logger.info(' [ %*d / %d ]', len(str(len(word_list))), idx + 1, len(word_list)) return np.vstack(output)