def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Redirect print to both console and log file if not args.evaluate: # make symlink if not os.path.exists(args.logs_dir): os.makedirs(args.logs_dir) make_symlink_if_not_exists(osp.join(args.real_logs_dir, args.logs_dir), osp.dirname(osp.normpath(args.logs_dir))) sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt')) train_tfLogger = TFLogger(osp.join(args.logs_dir, 'train')) eval_tfLogger = TFLogger(osp.join(args.logs_dir, 'eval')) # Save the args to disk if not args.evaluate: cfg_save_path = osp.join(args.logs_dir, 'cfg.txt') cfgs = vars(args) with open(cfg_save_path, 'w') as f: for k, v in cfgs.items(): f.write('{}: {}\n'.format(k, v)) # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) print('height:', args.height, ' width: ', args.width) if not args.evaluate: train_dataset, train_loader = \ get_data(args.train_data_dir, args.voc_type, args.max_len, args.num_train, args.height, args.width, args.batch_size, args.workers, True, args.keep_ratio, n_max_samples=args.n_max_samples) test_dataset, test_loader = \ get_data(args.test_data_dir, args.voc_type, args.max_len, args.num_test, args.height, args.width, args.batch_size, args.workers, False, args.keep_ratio) if args.evaluate: max_len = test_dataset.max_len else: max_len = max(train_dataset.max_len, test_dataset.max_len) train_dataset.max_len = test_dataset.max_len = max_len # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=test_dataset.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=max_len, eos=test_dataset.char2id[test_dataset.EOS], args=args, STN_ON=args.STN_ON) #print('model: ', model) # import ipdb; ipdb.set_trace() params_num = sum(p.numel() for p in model.parameters() if p.requires_grad) encoder_flops, _ = get_model_complexity_info(model.encoder, input_res=(3, 32, 100), as_strings=False) print('num of parameters: ', params_num) print('encoder flops: ', encoder_flops) # Load from checkpoint if args.evaluation_metric == 'accuracy': best_res = 0 elif args.evaluation_metric == 'editdistance': best_res = math.inf else: raise ValueError("Unsupported evaluation metric:", args.evaluation_metric) start_epoch = 0 start_iters = 0 if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) # compatibility with the epoch-wise evaluation version if 'epoch' in checkpoint.keys(): start_epoch = checkpoint['epoch'] else: start_iters = checkpoint['iters'] start_epoch = int(start_iters // len(train_loader)) if not args.evaluate else 0 best_res = checkpoint['best_res'] print("=> Start iters {} best res {:.1%}".format( start_iters, best_res)) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluator evaluator = Evaluator(model, args.evaluation_metric, args.cuda) if args.evaluate: print('Test on {0}:'.format(args.test_data_dir)) if len(args.vis_dir) > 0: vis_dir = osp.join(args.logs_dir, args.vis_dir) if not osp.exists(vis_dir): os.makedirs(vis_dir) else: vis_dir = None start = time.time() evaluator.evaluate(test_loader, dataset=test_dataset, vis_dir=vis_dir) print('it took {0} s.'.format(time.time() - start)) return # Optimizer param_groups = model.parameters() param_groups = filter(lambda p: p.requires_grad, param_groups) optimizer = optim.Adadelta(param_groups, lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=eval( args.milestones), gamma=0.1) # Trainer loss_weights = {} loss_weights['loss_rec'] = 1. if args.debug: args.print_freq = 1 trainer = Trainer(model, args.evaluation_metric, args.logs_dir, iters=start_iters, best_res=best_res, grad_clip=args.grad_clip, use_cuda=args.cuda, loss_weights=loss_weights) # Start training evaluator.evaluate(test_loader, step=0, tfLogger=eval_tfLogger, dataset=test_dataset) for epoch in range(start_epoch, args.epochs): scheduler.step(epoch) current_lr = optimizer.param_groups[0]['lr'] trainer.train(epoch, train_loader, optimizer, current_lr, print_freq=args.print_freq, train_tfLogger=train_tfLogger, is_debug=args.debug, evaluator=evaluator, test_loader=test_loader, eval_tfLogger=eval_tfLogger, test_dataset=test_dataset) # Final test print('Test with best model:') checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) evaluator.evaluate(test_loader, dataset=test_dataset) # Close the tensorboard logger train_tfLogger.close() eval_tfLogger.close()
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() # args.cuda = False if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON, encoder_block=4, decoder_block=4) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) #Save model torch.save(model, "model.pth") # Evaluation model.eval() img = image_process(args.image_path) with torch.no_grad(): img = img.to(device) input_dict = {} input_dict['images'] = img.unsqueeze(0) # TODO: testing should be more clean. # to be compatible with the lmdb-based testing, need to construct some meaningless variables. rec_targets = torch.IntTensor(1, args.max_len).fill_(1) rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [args.max_len] start = timeit.timeit() output_dict = model(input_dict) end = timeit.timeit() pred_rec = output_dict['output']['pred_rec'] import cv2 from matplotlib import cm import matplotlib.pyplot as plt rec_im = output_dict['output']['rectified_images'].squeeze().transpose( 2, 0) rec_im = rec_im.transpose(1, 0) rec_im = (rec_im * 0.5 + 0.5) * 255 rec_im = rec_im.cpu().detach().numpy() print(rec_im.shape) # new_im = Image.fromarray(rec_im) # plt.imsave("rec_im.png", rec_im) # print(rec_im*255) cv2.imwrite("rec.png", rec_im) pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info) print('Recognition result: {0}'.format(pred_str[0])) print('{:f}'.format(end - start))
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) device = torch.device('cpu') # TODO: delete after mapping on CUDA # Evaluation model.eval() img = image_process(args.image_path) with torch.no_grad(): img = img.to(device) input_dict = {} input_dict['images'] = img.unsqueeze(0) # TODO: testing should be more clean. # to be compatible with the lmdb-based testing, need to construct some meaningless variables. rec_targets = torch.IntTensor(1, args.max_len).fill_(1) rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [args.max_len] output_dict = model(input_dict) pred_rec = output_dict['output']['pred_rec'] pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info) print('Recognition result: {0}'.format(pred_str[0]))
# Set feature extractor to inference mode #feature_extractor.eval() # Losses criterion_GAN = torch.nn.MSELoss() criterion_content = torch.nn.L1Loss() if cuda: generator = generator.cuda(0) discriminator = discriminator.cuda(0) #feature_extractor = feature_extractor.cuda() criterion_GAN = criterion_GAN.cuda(0) criterion_content = criterion_content.cuda(0) aster = aster.cuda(0) # load pretrained aster checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if epoch != 0: # Load pretrained generator and discriminator generator.load_state_dict(torch.load("saved_models/generator_%d.pth")) discriminator.load_state_dict(torch.load("saved_models/discriminator_%d.pth")) # Optimizers optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2)) Tensor = torch.cuda.FloatTensor if cuda else torch.Tensor dataloader = DataLoader( ImageDataset(dataset_name, hr_shape=hr_shape),
def main(): args = parse_args() update_config(args.cfg_file) if args.gpus: config.GPUS = args.gpus else: config.CUDA = False if args.workers: config.WORKERS = args.workers print('Using config:') pprint.pprint(config) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED if config.CUDA: os.environ["CUDA_VISIBLE_DEVICES"] = config.GPUS device = torch.device('cuda' if config.CUDA else 'cpu') # Redirect print to both console and log file sys.stdout = Logger(osp.join(config.OUTPUT_DIR, 'log-eval.txt')) # Create data loaders dataset = DataSet(config.DATASET.ROOT, config.DATASET.DATASET) normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transformer = T.Compose([ T.Resize(config.MODEL.IMAGE_SIZE, interpolation=3), T.ToTensor(), normalizer, ]) query_loader = DataLoader( Preprocessor(dataset.query, root=osp.join(dataset.images_dir, dataset.query_path), transform=transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) gallery_loader = DataLoader( Preprocessor(dataset.gallery, root=osp.join(dataset.images_dir, dataset.gallery_path), transform=transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) # Create model model = models.create(config.MODEL.NAME) # Load from checkpoint checkpoint = load_checkpoint(config.TEST.MODEL_FILE) print('best model at epoch: {}'.format(checkpoint['epoch'])) model.load_state_dict(checkpoint['state_dict'], strict=False) # Set model model = nn.DataParallel(model).to(device) print('Test with best model:') evaluator = Evaluator(model) evaluator.evaluate(query_loader, gallery_loader, dataset.query, dataset.gallery, config.TEST.OUTPUT_FEATURES)
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') if not args.evaluate: # make symlink make_symlink_if_not_exists(osp.join(args.real_logs_dir, args.logs_dir), osp.dirname(osp.normpath(args.logs_dir))) # Save the args to disk if not args.evaluate: cfg_save_path = osp.join(args.logs_dir, 'cfg.txt') cfgs = vars(args) with open(cfg_save_path, 'w') as f: for k, v in cfgs.items(): f.write('{}: {}\n'.format(k, v)) # Create data loaders if args.height is None or args.width is None: args.height, args.width = (128, 128) if not args.evaluate: train_dataset, train_loader = \ get_data(args.synthetic_train_data_dir, args.num_train,args.height, args.width, args.batch_size, args.workers, True) test_dataset, test_loader = \ get_data(args.test_data_dir, args.num_test, args.height, args.width, args.batch_size, args.workers, False) # Create model if args.model_arch == 'resnet34': model = resnet.resnet34(num_classes=args.num_class) print('########## Using resnet34') elif args.model_arch == 'resnet18': model = resnet.resnet18(num_classes=args.num_class) print('########## Using resnet18') elif args.model_arch == 'mobilenet_v2': model = torchvision.models.mobilenet_v2(num_classes=args.num_class) print('########## Using mobilenet_v2') else: print('Wrong Model!') return # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() # Load from checkpoint if args.evaluation_metric == 'accuracy': best_res = 0 else: raise ValueError("Unsupported evaluation metric:", args.evaluation_metric) start_epoch = 0 start_iters = 0 if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) # compatibility with the epoch-wise evaluation version if 'epoch' in checkpoint.keys(): # start_epoch = checkpoint['epoch'] start_epoch = 0 else: # start_iters = checkpoint['iters'] start_epoch = int(start_iters // len(train_loader)) if not args.evaluate else 0 start_iters = 0 start_epoch = 0 best_res = 0 print("=> Start iters {} best res {:.1%}".format( start_iters, best_res)) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluator evaluator = Evaluator(model, args.evaluation_metric, args.logs_dir, criterion, args.cuda) if args.evaluate: print('Test on {0}:'.format(args.test_data_dir)) if len(args.vis_dir) > 0: vis_dir = osp.join(args.logs_dir, args.vis_dir) if not osp.exists(vis_dir): os.makedirs(vis_dir) else: vis_dir = None start = time.time() evaluator.evaluate(test_loader, dataset=test_dataset, vis_dir=vis_dir) print('it took {0} s.'.format(time.time() - start)) return # Optimizer param_groups = model.parameters() param_groups = filter(lambda p: p.requires_grad, param_groups) optimizer = optim.Adadelta(param_groups, lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=0.1) # Trainer loss_weights = {} loss_weights['loss_rec'] = 1. if args.debug: args.print_freq = 1 trainer = Trainer(model, args.evaluation_metric, args.logs_dir, criterion, iters=start_iters, best_res=best_res, grad_clip=args.grad_clip, use_cuda=args.cuda) # Start training evaluator.evaluate(test_loader, step=0, dataset=test_dataset) for epoch in range(start_epoch, args.epochs): print('here') scheduler.step(epoch) current_lr = optimizer.param_groups[0]['lr'] trainer.train(epoch, train_loader, optimizer, current_lr, print_freq=args.print_freq, is_debug=args.debug, evaluator=evaluator, test_loader=test_loader, test_dataset=test_dataset, test_freq=args.test_freq) # Final test print('Test with best model:') checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) evaluator.evaluate(test_loader, dataset=test_dataset)
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluation model.eval() images_path = args.images_path box_path = args.box_path imgs = os.listdir(images_path) for img in imgs: image_path = os.path.join(images_path, img) print("Image path:", image_path) gt_name = img.replace('jpg', 'txt') gt_path = os.path.join(box_path, gt_name) recognizer(image_path, gt_path, model, device, dataset_info, savedir="outputs/", only_price=False)
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() print(torch.cuda.is_available()) if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Redirect print to both console and log file if not args.evaluate: # make symlink make_symlink_if_not_exists(osp.join(args.real_logs_dir, args.logs_dir), osp.dirname(osp.normpath(args.logs_dir))) sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt')) train_tfLogger = TFLogger(osp.join(args.logs_dir, 'train')) eval_tfLogger = TFLogger(osp.join(args.logs_dir, 'eval')) # Save the args to disk if not args.evaluate: cfg_save_path = osp.join(args.logs_dir, 'cfg.txt') # print() cfgs = vars(args) with open(cfg_save_path, 'w') as f: for k, v in cfgs.items(): f.write('{}: {}\n'.format(k, v)) # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) if not args.evaluate: train_dataset, train_loader = \ get_data(args.synthetic_train_data_dir, args.voc_type, args.max_len, args.num_train, args.height, args.width, args.batch_size, args.workers, True, args.keep_ratio) voc = get_vocabulary('ALLCASES_SYMBOLS', EOS='EOS', PADDING='PADDING', UNKNOWN='UNKNOWN') id2char = dict(zip(range(len(voc)), voc)) char2number = dict(zip(voc, [0]*len(voc))) # for _, label, _ in train_dataset: # # word = '' # for i in label: # if not id2char[i] in ['EOS','PADDING','UNKNOWN']: # char2number[id2char[i]] += 1 # # word += id2char[i] # # print(char2number) # for key in char2number.keys(): # print("{}:{}".format(key, char2number[key])) test_dataset, test_loader = \ get_data(args.test_data_dir, args.voc_type, args.max_len, args.num_test, args.height, args.width, args.batch_size, args.workers, False, args.keep_ratio) # print("len(trainset) ", len(train_dataset)) if args.evaluate: max_len = test_dataset.max_len else: max_len = max(train_dataset.max_len, test_dataset.max_len) train_dataset.max_len = test_dataset.max_len = max_len # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=test_dataset.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=max_len, eos=test_dataset.char2id[test_dataset.EOS], STN_ON=args.STN_ON, encoder_block= args.encoder_block, decoder_block= args.decoder_block) for param in model.decoder.parameters(): if isinstance(param, Parameter): param.requires_grad = False # for param in model.encoder.parameters(): # param.requires_grad = False # for param in model.stn_head.parameters(): # param.requires_grad = False # Load from checkpoint if args.evaluation_metric == 'accuracy': best_res = 0 elif args.evaluation_metric == 'editdistance': best_res = math.inf else: raise ValueError("Unsupported evaluation metric:", args.evaluation_metric) start_epoch = 0 start_iters = 0 if args.resume: print("args.resume: ",args.resume) checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) # for param in model.stn_head.parameters(): # # print(param.data) # param.requires_grad = False # for param in model.encoder.parameters(): # param.requires_grad = False # compatibility with the epoch-wise evaluation version if 'epoch' in checkpoint.keys(): start_epoch = checkpoint['epoch'] else: start_iters = checkpoint['iters'] start_epoch = int(start_iters // len(train_loader)) if not args.evaluate else 0 # checkpoint['best_res'] = 0.802 best_res = checkpoint['best_res'] print("=> Start iters {} best res {:.1%}" .format(start_iters, best_res)) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluator evaluator = Evaluator(model, args.evaluation_metric, args.cuda) if args.evaluate: print('Test on {0}:'.format(args.test_data_dir)) if len(args.vis_dir) > 0: vis_dir = osp.join(args.logs_dir, args.vis_dir) if not osp.exists(vis_dir): os.makedirs(vis_dir) else: vis_dir = None start = time.time() # print(test_dataset.lexicons50) evaluator.evaluate(test_loader, dataset=test_dataset, vis_dir=vis_dir) print('it took {0} s.'.format(time.time() - start)) return # Optimizer param_groups = model.parameters() # model.stn_head.weight.requires_grad = False # model.encoder.weight.requires_grad = False param_groups = filter(lambda p: p.requires_grad, param_groups) # optimizer = optim.Adadelta(param_groups, lr=args.lr, weight_decay=args.weight_decay) optimizer = optim.Adam(param_groups, lr=args.lr, betas=(0.9, 0.98), eps=1e-09, weight_decay=args.weight_decay, amsgrad=False) # optimizer = optim.SGD(param_groups, lr=args.lr, momentum=0.9) # optimizer = optim.AdamW(param_groups, lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False) # optimizer = optim.ASGD(param_groups, lr=args.lr, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) # optimizer = optim.Adagrad(param_groups, lr=args.lr, lr_decay=0, weight_decay=0, initial_accumulator_value=0) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)) # Trainer loss_weights = {} loss_weights['loss_rec'] = 1. if args.debug: args.print_freq = 1 trainer = Trainer(model, args.evaluation_metric, args.logs_dir, iters=start_iters, best_res=best_res, grad_clip=args.grad_clip, use_cuda=args.cuda, loss_weights=loss_weights) # Start training # evaluator.evaluate(test_loader, step=0, tfLogger=eval_tfLogger, dataset=test_dataset) # print("args.epoch: ", args.epochs) for epoch in range(start_epoch, args.epochs): scheduler.step(epoch) current_lr = optimizer.param_groups[0]['lr'] # current_lr = (1.0/(512.0**0.5))*min(1.0/float(trainer.iters + 1)**0.5, float(trainer.iters+1)*1.0/16000.0**1.5) # optimizer.param_groups[0]['lr'] = current_lr trainer.train(epoch, train_loader, optimizer, current_lr, print_freq=args.print_freq, train_tfLogger=train_tfLogger, is_debug=args.debug, evaluator=evaluator, test_loader=test_loader, eval_tfLogger=eval_tfLogger, test_dataset=test_dataset) # Final test print('Test with best model:') checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar')) model.load_state_dict(checkpoint['state_dict']) # print("naruto") evaluator.evaluate(test_loader, dataset=test_dataset) # print("sasuke") # Close the tensorboard logger train_tfLogger.close() eval_tfLogger.close()
def main(): args = parse_args() update_config(args.cfg_file) if args.gpus: config.GPUS = args.gpus else: config.CUDA = False if args.workers: config.WORKERS = args.workers print('Using config:') pprint.pprint(config) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if config.CUDA: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED if config.CUDA: os.environ["CUDA_VISIBLE_DEVICES"] = config.GPUS device = torch.device('cuda' if config.CUDA else 'cpu') # Redirect print to both console and log file sys.stdout = Logger(osp.join(config.OUTPUT_DIR, 'log.txt')) # Create data loaders dataset = DataSet(config.DATASET.ROOT, config.DATASET.DATASET) normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transformer = T.Compose([ T.RandomSizedRectCrop(*config.MODEL.IMAGE_SIZE), T.RandomHorizontalFlip(), T.RandomRotation(10), T.ColorJitter(0.2, 0.2, 0.2), T.ToTensor(), normalizer, T.RandomErasing(EPSILON=config.DATASET.RE), ]) test_transformer = T.Compose([ T.Resize(config.MODEL.IMAGE_SIZE, interpolation=3), T.ToTensor(), normalizer, ]) train_loader = DataLoader(UnsupervisedCamStylePreprocessor( dataset.train, root=osp.join(dataset.images_dir, dataset.train_path), camstyle_root=osp.join(dataset.images_dir, dataset.train_camstyle_path), num_cam=dataset.num_cam, use_gan=True, transform=train_transformer), batch_size=config.TRAIN.BATCH_SIZE, num_workers=config.WORKERS, shuffle=config.TRAIN.SHUFFLE, pin_memory=True, drop_last=False) query_loader = DataLoader(Preprocessor(dataset.query, root=osp.join( dataset.images_dir, dataset.query_path), transform=test_transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) gallery_loader = DataLoader(Preprocessor(dataset.gallery, root=osp.join( dataset.images_dir, dataset.gallery_path), transform=test_transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) # Create model model = models.create(config.MODEL.NAME, pretrained=config.MODEL.PRETRAINED, num_classes=dataset.num_train_ids) # Memory Network num_tgt = len(dataset.train) memory = models.create('memory', config.MODEL.FEATURES, num_tgt) # Load from checkpoint if config.TRAIN.RESUME: checkpoint = load_checkpoint(config.TRAIN.CHECKPOINT) model.load_state_dict(checkpoint['state_dict'], strict=False) memory.load_state_dict(checkpoint['state_dict_memory'], strict=False) print("=> Start epoch {} ".format(checkpoint['epoch'])) # Set model model = nn.DataParallel(model).to(device) memory = memory.to(device) # Optimizer base_param_ids = set(map(id, model.module.base.parameters())) base_params_need_for_grad = filter(lambda p: p.requires_grad, model.module.base.parameters()) new_params = [p for p in model.parameters() if id(p) not in base_param_ids] param_groups = [{ 'params': base_params_need_for_grad, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = get_optimizer(config, param_groups) # Trainer trainer = Trainer(config, model, memory) def adjust_lr(epoch): step_size = config.TRAIN.LR_STEP lr = config.TRAIN.LR * (config.TRAIN.LR_FACTOR**(epoch // step_size)) for g in optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) best_r1 = 0.0 # Start training for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): # lr_scheduler.step() adjust_lr(epoch) trainer.train(epoch, train_loader, optimizer) print('Test with latest model:') evaluator = Evaluator(model) r1 = evaluator.evaluate(query_loader, gallery_loader, dataset.query, dataset.gallery, config.TEST.OUTPUT_FEATURES) if r1 > best_r1: best_r1 = r1 save_checkpoint( { 'state_dict': model.module.state_dict(), 'state_dict_memory': memory.state_dict(), 'epoch': epoch + 1, }, fpath=osp.join(config.OUTPUT_DIR, 'checkpoint.pth.tar')) print('\n * Finished epoch {:3d} \n'.format(epoch)) # Final test print('Test with best model:') evaluator = Evaluator(model) checkpoint = load_checkpoint( osp.join(config.OUTPUT_DIR, 'checkpoint.pth.tar')) print('best model at epoch: {}'.format(checkpoint['epoch'])) model.module.load_state_dict(checkpoint['state_dict']) evaluator.evaluate(query_loader, gallery_loader, dataset.query, dataset.gallery, config.TEST.OUTPUT_FEATURES)
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) print(dataset_info.char2id) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluation model.eval() try: test_list_file = open(os.path.join(args.image_path, 'annotation_test.txt'), 'r') test_list = test_list_file.read().splitlines() test_list_file.close() except IOError: test_list = os.listdir(args.image_path) # print(test_list) data_n = min(100, len(test_list)) aster_correct_cnt = 0 tesseract_correct_cnt = 0 custom_oem_psm_config = '--oem 3 --psm 7' for test_name in tqdm(test_list[:data_n]): img_path = os.path.join(args.image_path, test_name).split(' ')[0] target_str = img_path.split('_')[-2] print(img_path, target_str) img = image_process(img_path) with torch.no_grad(): img = img.to(device) input_dict = {} input_dict['images'] = img.unsqueeze(0) # TODO: testing should be more clean. # to be compatible with the lmdb-based testing, need to construct some meaningless variables. rec_targets = torch.IntTensor(1, args.max_len).fill_(1) rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [args.max_len] output_dict = model(input_dict) pred_rec = output_dict['output']['pred_rec'] # print(pred_rec) pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info, lower_flag=False) if pred_str[0] == target_str: aster_correct_cnt += 1 img = load_image_in_PIL(img_path).convert('RGB') detected_str = pytesseract.image_to_string(img, config=custom_oem_psm_config) # print(i, detected_str, dataset_info['id2char'][predicted[i].item()], dataset_info['id2char'][sample['target'][i].item()]) if detected_str == target_str: tesseract_correct_cnt += 1 print(f'GT: {target_str}, ASTER: {pred_str[0]}, Tesseract: {detected_str}') if detected_str == target_str: print('===================== correct') print(f'Aster acc: {aster_correct_cnt} / {data_n}. {aster_correct_cnt/data_n}') print(f'Tesseract acc: {tesseract_correct_cnt} / {data_n}. {tesseract_correct_cnt/data_n}')