def loadStateDict(self): if not self.state_dict: LOG.logI("self.state_dict not initialized, omit loadStateDict()") return self.net.load_state_dict(self.state_dict, strict=False) self.net.eval() self.net = self.net.to(self.device)
def process(self): label_list = config.label_list for image_path in self.dataset(): self.getTask(image_path) self.request.set_content( HttpContentHelper.toValue({ "tasks": [self.task], "scenes": [config.scenes] })) response = self.clt.do_action_with_exception(self.request) result = json.loads(response) if result['code'] != 200: LOG.logE( f"got request error! please checkout your AccessKeyId and Secret" ) continue if result['data'][0]['code'] != 200: LOG.logE( f"got error! [file: {image_path}, msg: {result['data'][0]['msg']}]" ) continue pred = result['data'][0]['results'][0]['label'] label = image_path.split('/')[-2] if label not in label_list: LOG.logE(f"got error! test file structure not standard") continue LOG.logI( f"got Success! [file: {image_path}, label: {label}, pred: {pred}]" ) self.report.add(label_list.index(label), label_list.index(pred))
def process(self): self.iter = 0 for epoch in range(self.epoch, self.conf.epoch_num): self.epoch = epoch LOG.logI('Epoch {} started...'.format(self.epoch)) self.processTrain() self.processVal() self.processAccept()
def initOutputDir(self): if self.conf.output_dir != 'output': LOG.logW("According deepvac standard, you should save model files to output directory.") self.output_dir = '{}/{}'.format(self.conf.output_dir, self.branch) LOG.logI('model save dir: {}'.format(self.output_dir)) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir)
def exportONNX(self, img): if not self.conf.onnx_output_model_path: return torch.onnx._export(self.net, img, self.conf.onnx_output_model_path, export_params=True) LOG.logI( "Pytorch model convert to ONNX model succeed, save model in {}". format(self.conf.onnx_output_model_path))
def initDDP(self): parser = argparse.ArgumentParser(description='DeepvacDDP') parser.add_argument("--gpu", default=-1, type=int, help="gpu") parser.add_argument('--rank', default=-1, type=int, help='node rank for distributed training') self.args = parser.parse_args() self.map_location = {'cuda:%d' % 0: 'cuda:%d' % self.args.rank} LOG.logI("Start dist.init_process_group {} {}@{} on {}".format(self.conf.dist_url, self.args.rank, self.conf.world_size - 1, self.args.gpu)) dist.init_process_group(backend='nccl', init_method=self.conf.dist_url, world_size=self.conf.world_size, rank=self.args.rank) torch.cuda.set_device(self.args.gpu)
def exportNCNN(self, img): if not self.conf.ncnn_param_output_path or not self.conf.ncnn_bin_output_path: return if not self.conf.onnx2ncnn: LOG.logE( "You must set the onnx2ncnn executable program path in config file. If you want to compile onnx2ncnn tools, reference https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux-x86 ", exit=True) import onnx import subprocess import tempfile from onnxsim import simplify if not self.conf.onnx_output_model_path: f = tempfile.NamedTemporaryFile() self.conf.onnx_output_model_path = f.name self.exportONNX(img) cmd = self.conf.onnx2ncnn + " " + self.conf.onnx_output_model_path + " " + self.conf.ncnn_param_output_path + " " + self.conf.ncnn_bin_output_path pd = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if pd.stderr.read() != b"": LOG.logE( pd.stderr.read() + b". Error occured when export ncnn model. We try to simplify the model first" ) model_op, check_ok = simplify(self.conf.onnx_output_model_path, check_n=3, perform_optimization=True, skip_fuse_bn=True, skip_shape_inference=False) onnx.save(model_op, self.conf.onnx_output_model_path) if not check_ok: LOG.logE( "Maybe something wrong when simplify the model, we can't guarantee generate model is right" ) else: LOG.logI("Simplify model succeed") subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if pd.stderr.read() != b"": LOG.logE(pd.stderr.read() + b". we can't guarantee generate model is right") LOG.logI( "Pytorch model convert to NCNN model succeed, save ncnn param file in {}, save ncnn bin file in {}" .format(self.conf.ncnn_param_output_path, self.conf.ncnn_bin_output_path))
def initCheckpoint(self, map_location=None): if not self.conf.checkpoint_suffix or self.conf.checkpoint_suffix == "": LOG.logI('Omit the checkpoint file since not specified...') return LOG.logI('Load checkpoint from {} folder'.format(self.output_dir)) self.net.load_state_dict( torch.load(self.output_dir + '/model:{}'.format(conf.checkpoint_suffix), map_location=map_location)) self.optimizer.load_state_dict( torch.load(self.output_dir + '/optimizer:{}'.format(conf.checkpoint_suffix), map_location=map_location))
def initStateDict(self): LOG.logI('Loading State Dict from {}'.format(self.model_path)) device = torch.cuda.current_device() self.state_dict = torch.load( self.model_path, map_location=lambda storage, loc: storage.cuda(device)) #remove prefix begin prefix = 'module.' f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x if "state_dict" in self.state_dict.keys(): self.state_dict = { f(key): value for key, value in self.state_dict['state_dict'].items() } else: self.state_dict = { f(key): value for key, value in self.state_dict.items() } #remove prefix end # just do audit on model file state_dict_keys = set(self.state_dict.keys()) code_net_keys = set(self.net.state_dict().keys()) used_keys = code_net_keys & state_dict_keys unused_keys = state_dict_keys - code_net_keys missing_keys = code_net_keys - state_dict_keys LOG.logI('Missing keys:{}'.format(len(missing_keys))) LOG.logI('Unused checkpoint keys:{}'.format(len(unused_keys))) LOG.logI('Used keys:{}'.format(len(used_keys))) assert len(used_keys) > 0, 'load NONE from pretrained model' assert len(missing_keys) == 0, 'Net mismatched with pretrained model'
def processVal(self): self.setValContext() LOG.logI('Phase {} started...'.format(self.phase)) with torch.no_grad(): self.preEpoch() for i, (img, idx) in enumerate(self.loader): self.idx = idx self.img = img self.preIter() self.doForward() self.doLoss() LOG.logI('{}: [{}][{}/{}]'.format(self.phase, self.epoch, i, len(self.loader))) self.postIter() self.postEpoch() self.saveState(self.getTime())
def __init__(self, deepvac_config): self.path_prefix = deepvac_config.fileline_data_path_prefix self.fileline_path = deepvac_config.fileline_path self.transform = deepvac_config.transform self.samples = [] mark = [] with open(self.fileline_path) as f: for line in f: label = self._buildLabelFromLine(line) self.samples.append(label) mark.append(label[1]) self.len = len(self.samples) self.class_num = len(np.unique(mark)) LOG.logI('FileLineDataset size: {} / {}'.format( self.len, self.class_num))
def __init__(self, deepvac_config): self.path_prefix = deepvac_config.dataset.fileline_data_path_prefix self.fileline_path = deepvac_config.dataset.fileline_path self.transform = deepvac_config.dataset.transform self.samples = [] mark = [] with open(self.fileline_path) as f: for line in f: line = line.strip().split(" ") label = [line[0], int(line[1])] self.samples.append(label) mark.append(label[1]) self.len = len(self.samples) self.class_num = len(np.unique(mark)) LOG.logI('FileLineDataset size: {} / {}'.format( self.len, self.class_num))
def separateBN4OptimizerPG(self, modules): paras_only_bn = [] paras_wo_bn = [] memo = set() gemfield_set = set() gemfield_set.update(set(modules.parameters())) LOG.logI("separateBN4OptimizerPG set len: {}".format( len(gemfield_set))) named_modules = modules.named_modules(prefix='') for module_prefix, module in named_modules: if "module" not in module_prefix: LOG.logI( "separateBN4OptimizerPG skip {}".format(module_prefix)) continue members = module._parameters.items() for k, v in members: name = module_prefix + ('.' if module_prefix else '') + k if v is None: continue if v in memo: continue memo.add(v) if "batchnorm" in str(module.__class__): paras_only_bn.append(v) else: paras_wo_bn.append(v) LOG.logI("separateBN4OptimizerPG param len: {} - {}".format( len(paras_wo_bn), len(paras_only_bn))) return paras_only_bn, paras_wo_bn
def processTrain(self): self.setTrainContext() self.step = 0 LOG.logI('Phase {} started...'.format(self.phase)) self.preEpoch() loader_len = len(self.loader) save_every = loader_len // self.conf.save_num save_list = list(range(0, loader_len + 1, save_every)) self.save_list = save_list[1:-1] LOG.logI('SAVE LIST: {}'.format(self.save_list)) for i, (img, idx) in enumerate(self.loader): self.step = i self.iter += 1 self.idx = idx self.img = img self.preIter() self.doForward() self.doLoss() self.doBackward() self.doOptimize() if i % self.conf.log_every == 0: LOG.logI('{}: [{}][{}/{}] [Loss:{} Lr:{}]'.format( self.phase, self.epoch, self.step, loader_len, self.loss.item(), self.optimizer.param_groups[0]['lr'])) self.postIter() if self.step in self.save_list: self.processVal() self.setTrainContext() self.postEpoch()
def __call__(self): img_names = os.listdir(self.test_dataset) ocr_report = OcrReport(ds_name='deepvac_ocr1.0_test_plate', total_num=len(img_names)) for img_name in img_names: image = self.get_file_content( os.path.join(self.test_dataset, img_name)) # 高精度版通用ocr api res = self.client.basicAccurate(image) # 通用ocr api #res = self.client.basicGeneral(image) pred = '' if 'error_code' in res.keys(): ocr_report.add(self.labels[img_name], pred) LOG.logI('error code:{}'.format(res)) continue for dic in res['words_result']: pred += dic['words'].strip() ocr_report.add(self.labels[img_name], pred) LOG.logI('{}: {}'.format(img_name, res)) time.sleep(1) ocr_report()