def recognition(config, img, model, device): converter = utils.strLabelConverter(config.DATASET.ALPHABETS) # h, w,c = img.shape # img = cv2.resize(img, (0, 0), fx=config.MODEL.IMAGE_SIZE.H / h, fy=config.MODEL.IMAGE_SIZE.H / h, interpolation=cv2.INTER_CUBIC) # img = img.astype(np.float32) # img = (img / 255. - config.DATASET.MEAN) / config.DATASET.STD # img = img.transpose([2, 0, 1]) # img = torch.from_numpy(img) # img = img.to(device) # img = img.view(1, *img.size()) # if torch.cuda.is_available(): # img = img.cuda() # model.eval() # preds = model(img) transform2 = transforms.Compose([ transforms.ToTensor( ) # PIL Image/ndarray (H,W,C) [0,255] to tensor (C,H,W) [0.0,1.0] ]) h, w, c = img.shape r = w * 1.0 / h standard_ratio = config.MODEL.IMAGE_SIZE.W * 1.0 / config.MODEL.IMAGE_SIZE.H if r > standard_ratio: resized_width = config.MODEL.IMAGE_SIZE.W resized_height = int(config.MODEL.IMAGE_SIZE.W / r) else: resized_height = config.MODEL.IMAGE_SIZE.H resized_width = int(config.MODEL.IMAGE_SIZE.H * r) image = cv2.resize(img, (0, 0), fx=resized_width / w, fy=resized_height / h, interpolation=cv2.INTER_CUBIC) image = image.reshape((resized_height, resized_width, 3)) bg = np.zeros((config.MODEL.IMAGE_SIZE.H, config.MODEL.IMAGE_SIZE.W, 3), dtype='uint8') bg[:] = 255 bg[:resized_height, :resized_width, :] = image image = Image.fromarray(cv2.cvtColor(bg, cv2.COLOR_BGR2RGB)) image = transform2(image) image = image.view(1, *image.size()) image = Variable(image) if torch.cuda.is_available(): image = image.cuda() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
def recognize(self,img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) converter = utils.strLabelConverter(self.config.DATASET.ALPHABETS) h, w = img.shape # fisrt step: resize the height and width of image to (32, x) img = cv2.resize(img, (0, 0), fx=self.config.MODEL.IMAGE_SIZE.H / h, fy=self.config.MODEL.IMAGE_SIZE.H / h, interpolation=cv2.INTER_CUBIC) # second step: keep the ratio of image's text same with training h, w = img.shape inp_h = self.config.MODEL.IMAGE_SIZE.H inp_w = self.config.MODEL.IMAGE_SIZE.W w_cur = int(img.shape[1] / (self.config.MODEL.IMAGE_SIZE.OW / self.config.MODEL.IMAGE_SIZE.W)) img = cv2.resize(img, (0,0), fx=inp_w / w, fy=inp_h / h, interpolation=cv2.INTER_CUBIC) img = np.reshape(img, (inp_h, inp_w, 1)) # normalize img = img.astype(np.float32) img = (img / 255. - self.config.DATASET.MEAN) / self.config.DATASET.STD img = img.transpose([2, 0, 1]) img = torch.from_numpy(img) img = img.to(self.device) img = img.view(1, *img.size()) preds = self.model(img) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) # print('results: {0}'.format(sim_pred)) return sim_pred
def main(): img_raw = cv2.imread(image_path) img = cv2.cvtColor(img_raw, cv2.COLOR_BGR2GRAY) converter = utils.strLabelConverter(alphabets.alphabet) h, w = img.shape # fisrt step: resize the height and width of image to (32, x) img = cv2.resize(img, (0, 0), fx=32 / h, fy=32 / h, interpolation=cv2.INTER_CUBIC) # second step: keep the ratio of image's text same with training h, w = img.shape w_cur = int(img.shape[1] / (280 / 160)) img = cv2.resize(img, (0, 0), fx=w_cur / w, fy=1.0, interpolation=cv2.INTER_CUBIC) img = np.reshape(img, (32, w_cur, 1)) # normalize img = img.astype(np.float32) img = (img / 255. - 0.588) / 0.193 img = img.transpose([2, 0, 1]) with get_engine1(engine_file) as engine, engine.create_execution_context( ) as context: inputs, outputs, bindings, stream = allocate_buffers(engine) inputs[0].host = img trt_outputs = do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) output = trt_outputs[0] output = output.reshape((124, 1, -1)) output = torch.from_numpy(output) _, preds = output.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred))
def main(): config, args = parse_arg() model = crnn.get_crnn(config) criterion = torch.nn.CTCLoss() # get device if torch.cuda.is_available(): device = torch.device("cuda:{}".format(config.GPUID)) else: device = torch.device("cpu:0") model = model.to(device) val_dataset = get_dataset(config)(config, is_train=False) val_loader = DataLoader( dataset=val_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=config.TEST.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) converter = utils.strLabelConverter(config.DATASET.ALPHABETS) acc = validate(config, val_loader, val_dataset, converter, model, criterion, device)
def main(): # load config config = parse_arg() # create output folder output_dict = utils.create_log_folder(config, phase='train') # cudnn cudnn.benchmark = config.CUDNN.BENCHMARK cudnn.deterministic = config.CUDNN.DETERMINISTIC cudnn.enabled = config.CUDNN.ENABLED # writer dict writer_dict = { 'writer': SummaryWriter(log_dir=output_dict['tb_dir']), 'train_global_steps': 0, 'valid_global_steps': 0, } # construct face related neural networks model = crnn.get_crnn(config) # get device if torch.cuda.is_available(): device = torch.device("cuda:{}".format(config.GPUID)) else: device = torch.device("cpu:0") model = model.to(device) # define loss function criterion = torch.nn.CTCLoss() optimizer = utils.get_optimizer(config, model) last_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.RESUME.IS_RESUME: model_state_file = config.TRAIN.RESUME.FILE if model_state_file == '': print(" => no checkpoint found") checkpoint = torch.load(model_state_file, map_location='cpu') model.load_state_dict(checkpoint['state_dict']) last_epoch = checkpoint['epoch'] if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch-1 ) else: lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1 ) train_dataset = get_dataset(config)(config, is_train=True) train_loader = DataLoader( dataset=train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) val_dataset = get_dataset(config)(config, is_train=False) val_loader = DataLoader( dataset=val_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=config.TEST.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) best_acc = 0.5 converter = utils.strLabelConverter(config.DATASET.ALPHABETS) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): function.train(config, train_loader, train_dataset, converter, model, criterion, optimizer, device, epoch, writer_dict, output_dict) lr_scheduler.step() acc = function.validate(config, val_loader, val_dataset, converter, model, criterion, device, epoch, writer_dict, output_dict) is_best = acc > best_acc best_acc = max(acc, best_acc) print("is best:", is_best) print("best acc is:", best_acc) # save checkpoint torch.save( { "state_dict": model.state_dict(), "epoch": epoch + 1, "best_acc": best_acc, }, os.path.join(output_dict['chs_dir'], "checkpoint_{}_acc_{:.4f}.pth".format(epoch, acc)) ) writer_dict['writer'].close()
preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('results: {0}'.format(sim_pred)) if __name__ == '__main__': config, args = parse_arg() device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') model = crnn.get_crnn(config).to(device) print('loading pretrained model from {0}'.format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) if 'state_dict' in checkpoint.keys(): model.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint) started = time.time() img = cv2.imread(args.image_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) converter = utils.strLabelConverter(config.DATASET.ALPHABETS) recognition(config, img, model, converter, device) finished = time.time() print('elapsed time: {0}'.format(finished - started))
def main(): # load config config = parse_arg() # create output folder output_dict = utils.create_log_folder(config, phase='train') # cudnn cudnn.benchmark = config.CUDNN.BENCHMARK cudnn.deterministic = config.CUDNN.DETERMINISTIC cudnn.enabled = config.CUDNN.ENABLED # writer dict writer_dict = { 'writer': SummaryWriter(log_dir=output_dict['tb_dir']), 'train_global_steps': 0, 'valid_global_steps': 0, } # construct face related neural networks model = crnn.get_crnn(config) # # checkpoint = torch.load('/data/yolov5/CRNN_Chinese_Characters_Rec/output/OWN/crnn/2020-09-15-22-13/checkpoints/checkpoint_98_acc_1.0983.pth') # if 'state_dict' in checkpoint.keys(): # model.load_state_dict(checkpoint['state_dict']) # else: # model.load_state_dict(checkpoint) # get device if torch.cuda.is_available(): device = torch.device("cuda:{}".format(config.GPUID)) else: device = torch.device("cpu:0") model = model.to(device) # define loss function # criterion = torch.nn.CTCLoss() criterion = CTCLoss() last_epoch = config.TRAIN.BEGIN_EPOCH optimizer = utils.get_optimizer(config, model) if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) if config.TRAIN.FINETUNE.IS_FINETUNE: model_state_file = config.TRAIN.FINETUNE.FINETUNE_CHECKPOINIT if model_state_file == '': print(" => no checkpoint found") checkpoint = torch.load(model_state_file, map_location='cpu') if 'state_dict' in checkpoint.keys(): checkpoint = checkpoint['state_dict'] from collections import OrderedDict model_dict = OrderedDict() for k, v in checkpoint.items(): if 'cnn' in k: model_dict[k[4:]] = v model.cnn.load_state_dict(model_dict) if config.TRAIN.FINETUNE.FREEZE: for p in model.cnn.parameters(): p.requires_grad = False elif config.TRAIN.RESUME.IS_RESUME: model_state_file = config.TRAIN.RESUME.FILE if model_state_file == '': print(" => no checkpoint found") checkpoint = torch.load(model_state_file, map_location='cpu') if 'state_dict' in checkpoint.keys(): model.load_state_dict(checkpoint['state_dict']) last_epoch = checkpoint['epoch'] # optimizer.load_state_dict(checkpoint['optimizer']) # lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) else: model.load_state_dict(checkpoint) model_info(model) train_dataset = get_dataset(config)(config, is_train=True) train_loader = DataLoader( dataset=train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) val_dataset = get_dataset(config)(config, is_train=False) val_loader = DataLoader( dataset=val_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=config.TEST.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) best_acc = 0.5 converter = utils.strLabelConverter(config.DATASET.ALPHABETS) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): function.train(config, train_loader, train_dataset, converter, model, criterion, optimizer, device, epoch, writer_dict, output_dict) lr_scheduler.step() acc = function.validate(config, val_loader, val_dataset, converter, model, criterion, device, epoch, writer_dict, output_dict) is_best = acc > best_acc best_acc = max(acc, best_acc) print("is best:", is_best) print("best acc is:", best_acc) # save checkpoint torch.save( { "state_dict": model.state_dict(), "epoch": epoch + 1, # "optimizer": optimizer.state_dict(), # "lr_scheduler": lr_scheduler.state_dict(), "best_acc": best_acc, }, os.path.join(output_dict['chs_dir'], "checkpoint_{}_acc_{:.4f}.pth".format(epoch, acc))) writer_dict['writer'].close()