Beispiel #1
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    args.cuda = args.cuda and torch.cuda.is_available()
    if args.cuda:
        print('using cuda.')
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (32, 100)

    dataset_info = DataInfo(args.voc_type)

    # Create model
    model = ModelBuilder(arch=args.arch,
                         rec_num_classes=dataset_info.rec_num_classes,
                         sDim=args.decoder_sdim,
                         attDim=args.attDim,
                         max_len_labels=args.max_len,
                         eos=dataset_info.char2id[dataset_info.EOS],
                         STN_ON=args.STN_ON)

    # Load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])

    if args.cuda:
        device = torch.device("cuda")
        model = model.to(device)
        model = nn.DataParallel(model)

    # Evaluation
    model.eval()
    img = image_process(args.image_path)
    with torch.no_grad():
        img = img.to(device)
    input_dict = {}
    input_dict['images'] = img.unsqueeze(0)
    # TODO: testing should be more clean.
    # to be compatible with the lmdb-based testing, need to construct some meaningless variables.
    rec_targets = torch.IntTensor(1, args.max_len).fill_(1)
    rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS]
    input_dict['rec_targets'] = rec_targets
    input_dict['rec_lengths'] = [args.max_len]
    output_dict = model(input_dict)
    pred_rec = output_dict['output']['pred_rec']
    pred_str, _ = get_str_list(pred_rec,
                               input_dict['rec_targets'],
                               dataset=dataset_info)
    print('Recognition result: {0}'.format(pred_str[0]))
Beispiel #2
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    args.cuda = args.cuda and torch.cuda.is_available()
    if args.cuda:
        print('using cuda.')
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    # Redirect print to both console and log file
    if not args.evaluate:
        # make symlink
        if not os.path.exists(args.logs_dir):
            os.makedirs(args.logs_dir)
        make_symlink_if_not_exists(osp.join(args.real_logs_dir, args.logs_dir),
                                   osp.dirname(osp.normpath(args.logs_dir)))
        sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))

        train_tfLogger = TFLogger(osp.join(args.logs_dir, 'train'))
        eval_tfLogger = TFLogger(osp.join(args.logs_dir, 'eval'))

    # Save the args to disk
    if not args.evaluate:
        cfg_save_path = osp.join(args.logs_dir, 'cfg.txt')
        cfgs = vars(args)
        with open(cfg_save_path, 'w') as f:
            for k, v in cfgs.items():
                f.write('{}: {}\n'.format(k, v))

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (32, 100)
    print('height:', args.height, ' width: ', args.width)

    if not args.evaluate:
        train_dataset, train_loader = \
          get_data(args.train_data_dir, args.voc_type, args.max_len, args.num_train,
                   args.height, args.width, args.batch_size, args.workers, True, args.keep_ratio, n_max_samples=args.n_max_samples)
    test_dataset, test_loader = \
      get_data(args.test_data_dir, args.voc_type, args.max_len, args.num_test,
               args.height, args.width, args.batch_size, args.workers, False, args.keep_ratio)

    if args.evaluate:
        max_len = test_dataset.max_len
    else:
        max_len = max(train_dataset.max_len, test_dataset.max_len)
        train_dataset.max_len = test_dataset.max_len = max_len
    # Create model
    model = ModelBuilder(arch=args.arch,
                         rec_num_classes=test_dataset.rec_num_classes,
                         sDim=args.decoder_sdim,
                         attDim=args.attDim,
                         max_len_labels=max_len,
                         eos=test_dataset.char2id[test_dataset.EOS],
                         args=args,
                         STN_ON=args.STN_ON)
    #print('model: ', model)
    # import ipdb; ipdb.set_trace()
    params_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
    encoder_flops, _ = get_model_complexity_info(model.encoder,
                                                 input_res=(3, 32, 100),
                                                 as_strings=False)
    print('num of parameters: ', params_num)
    print('encoder flops: ', encoder_flops)

    # Load from checkpoint
    if args.evaluation_metric == 'accuracy':
        best_res = 0
    elif args.evaluation_metric == 'editdistance':
        best_res = math.inf
    else:
        raise ValueError("Unsupported evaluation metric:",
                         args.evaluation_metric)
    start_epoch = 0
    start_iters = 0
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])

        # compatibility with the epoch-wise evaluation version
        if 'epoch' in checkpoint.keys():
            start_epoch = checkpoint['epoch']
        else:
            start_iters = checkpoint['iters']
            start_epoch = int(start_iters //
                              len(train_loader)) if not args.evaluate else 0
        best_res = checkpoint['best_res']
        print("=> Start iters {}  best res {:.1%}".format(
            start_iters, best_res))

    if args.cuda:
        device = torch.device("cuda")
        model = model.to(device)
        model = nn.DataParallel(model)

    # Evaluator
    evaluator = Evaluator(model, args.evaluation_metric, args.cuda)

    if args.evaluate:
        print('Test on {0}:'.format(args.test_data_dir))
        if len(args.vis_dir) > 0:
            vis_dir = osp.join(args.logs_dir, args.vis_dir)
            if not osp.exists(vis_dir):
                os.makedirs(vis_dir)
        else:
            vis_dir = None

        start = time.time()
        evaluator.evaluate(test_loader, dataset=test_dataset, vis_dir=vis_dir)
        print('it took {0} s.'.format(time.time() - start))
        return

    # Optimizer
    param_groups = model.parameters()
    param_groups = filter(lambda p: p.requires_grad, param_groups)
    optimizer = optim.Adadelta(param_groups,
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=eval(
                                                   args.milestones),
                                               gamma=0.1)

    # Trainer
    loss_weights = {}
    loss_weights['loss_rec'] = 1.
    if args.debug:
        args.print_freq = 1
    trainer = Trainer(model,
                      args.evaluation_metric,
                      args.logs_dir,
                      iters=start_iters,
                      best_res=best_res,
                      grad_clip=args.grad_clip,
                      use_cuda=args.cuda,
                      loss_weights=loss_weights)

    # Start training
    evaluator.evaluate(test_loader,
                       step=0,
                       tfLogger=eval_tfLogger,
                       dataset=test_dataset)
    for epoch in range(start_epoch, args.epochs):
        scheduler.step(epoch)
        current_lr = optimizer.param_groups[0]['lr']
        trainer.train(epoch,
                      train_loader,
                      optimizer,
                      current_lr,
                      print_freq=args.print_freq,
                      train_tfLogger=train_tfLogger,
                      is_debug=args.debug,
                      evaluator=evaluator,
                      test_loader=test_loader,
                      eval_tfLogger=eval_tfLogger,
                      test_dataset=test_dataset)

    # Final test
    print('Test with best model:')
    checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
    model.module.load_state_dict(checkpoint['state_dict'])
    evaluator.evaluate(test_loader, dataset=test_dataset)

    # Close the tensorboard logger
    train_tfLogger.close()
    eval_tfLogger.close()
Beispiel #3
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    args.cuda = args.cuda and torch.cuda.is_available()
    # args.cuda = False
    if args.cuda:
        print('using cuda.')
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (32, 100)

    dataset_info = DataInfo(args.voc_type)

    # Create model
    model = ModelBuilder(arch=args.arch,
                         rec_num_classes=dataset_info.rec_num_classes,
                         sDim=args.decoder_sdim,
                         attDim=args.attDim,
                         max_len_labels=args.max_len,
                         eos=dataset_info.char2id[dataset_info.EOS],
                         STN_ON=args.STN_ON,
                         encoder_block=4,
                         decoder_block=4)

    # Load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])

    if args.cuda:
        device = torch.device("cuda")
        model = model.to(device)
        model = nn.DataParallel(model)

    #Save model
    torch.save(model, "model.pth")
    # Evaluation
    model.eval()
    img = image_process(args.image_path)
    with torch.no_grad():
        img = img.to(device)
    input_dict = {}
    input_dict['images'] = img.unsqueeze(0)
    # TODO: testing should be more clean.
    # to be compatible with the lmdb-based testing, need to construct some meaningless variables.
    rec_targets = torch.IntTensor(1, args.max_len).fill_(1)
    rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS]
    input_dict['rec_targets'] = rec_targets
    input_dict['rec_lengths'] = [args.max_len]
    start = timeit.timeit()
    output_dict = model(input_dict)
    end = timeit.timeit()
    pred_rec = output_dict['output']['pred_rec']
    import cv2
    from matplotlib import cm
    import matplotlib.pyplot as plt
    rec_im = output_dict['output']['rectified_images'].squeeze().transpose(
        2, 0)
    rec_im = rec_im.transpose(1, 0)
    rec_im = (rec_im * 0.5 + 0.5) * 255
    rec_im = rec_im.cpu().detach().numpy()
    print(rec_im.shape)
    # new_im = Image.fromarray(rec_im)

    # plt.imsave("rec_im.png", rec_im)
    # print(rec_im*255)
    cv2.imwrite("rec.png", rec_im)
    pred_str, _ = get_str_list(pred_rec,
                               input_dict['rec_targets'],
                               dataset=dataset_info)
    print('Recognition result: {0}'.format(pred_str[0]))
    print('{:f}'.format(end - start))
Beispiel #4
0
    self.UNKNOWN = 'UNKNOWN'
    self.voc = get_vocabulary(voc_type, EOS=self.EOS, PADDING=self.PADDING, UNKNOWN=self.UNKNOWN)
    self.char2id = dict(zip(self.voc, range(len(self.voc))))
    self.id2char = dict(zip(range(len(self.voc)), self.voc))

    self.rec_num_classes = len(self.voc)

cuda = torch.cuda.is_available()

hr_shape = (hr_height, hr_width)
# Initialize generator and discriminator and aster model
generator = GeneratorResNet()
discriminator = Discriminator(input_shape=(channels, hr_shape[0], hr_shape[1]))
dataset_info = DataInfo(args.voc_type)
aster = model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes,
                       sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=max_len,
                       eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON)
#feature_extractor = FeatureExtractor()
# Set feature extractor to inference mode
#feature_extractor.eval()

# Losses
criterion_GAN = torch.nn.MSELoss()
criterion_content = torch.nn.L1Loss()

if cuda:
    generator = generator.cuda(0)
    discriminator = discriminator.cuda(0)
    #feature_extractor = feature_extractor.cuda()
    criterion_GAN = criterion_GAN.cuda(0)
    criterion_content = criterion_content.cuda(0)
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    args.cuda = args.cuda and torch.cuda.is_available()
    if args.cuda:
        print('using cuda.')
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (32, 100)

    dataset_info = DataInfo(args.voc_type)

    # Create model
    model = ModelBuilder(arch=args.arch,
                         rec_num_classes=dataset_info.rec_num_classes,
                         sDim=args.decoder_sdim,
                         attDim=args.attDim,
                         max_len_labels=args.max_len,
                         eos=dataset_info.char2id[dataset_info.EOS],
                         STN_ON=args.STN_ON)

    # Load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])

    if args.cuda:
        device = torch.device("cuda")
        model = model.to(device)
        model = nn.DataParallel(model)

    # Evaluation
    model.eval()
    images_path = args.images_path
    box_path = args.box_path
    imgs = os.listdir(images_path)

    for img in imgs:
        image_path = os.path.join(images_path, img)

        print("Image path:", image_path)

        gt_name = img.replace('jpg', 'txt')
        gt_path = os.path.join(box_path, gt_name)

        recognizer(image_path,
                   gt_path,
                   model,
                   device,
                   dataset_info,
                   savedir="outputs/",
                   only_price=False)
    cfg_save_path = osp.join(args.logs_dir, 'cfg.txt')
    cfgs = vars(args)
    with open(cfg_save_path, 'w') as f:
        for k, v in cfgs.items():
            f.write('{}: {}\n'.format(k, v))
    
    if args.height is None or args.width is None:
        args.height, args.width = (32, 100)
    train_dataset, train_loader = get_data(
        args.train_data_dir, args.voc_type, args.max_len, args.num_train,
        args.height, args.width, args.batch_size, args.workers, True, args.keep_ratio)
    eval_dataset, eval_loader = get_data(
        args.eval_data_dir, args.voc_type, args.max_len, args.num_eval,
        args.height, args.width, args.batch_size, args.workers, True, args.keep_ratio)
    assert train_dataset is not None and eval_dataset is not None 
    rec_num_classes = train_dataset.rec_num_classes
    max_len = train_dataset.max_len
    eos = train_dataset.char2id[train_dataset.EOS]

    print('arch: ', args.arch)
    model = ModelBuilder(arch=args.arch, rec_num_classes=rec_num_classes,
                        sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=max_len,
                        eos=eos,  args=args, STN_ON=args.STN_ON)
    model = model.cuda()
    model = nn.DataParallel(model)
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    manager = ArchSearchManager(
        model, train_dataset, train_loader, eval_dataset, eval_loader, args)
    manager.start_search()
Beispiel #7
0
def main(args):
  np.random.seed(args.seed)
  torch.manual_seed(args.seed)
  torch.cuda.manual_seed(args.seed)
  torch.cuda.manual_seed_all(args.seed)
  cudnn.benchmark = True
  torch.backends.cudnn.deterministic = True

  args.cuda = args.cuda and torch.cuda.is_available()
  print(torch.cuda.is_available())
  if args.cuda:
    print('using cuda.')
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
  else:
    torch.set_default_tensor_type('torch.FloatTensor')
  # Redirect print to both console and log file
  if not args.evaluate:
    # make symlink
    make_symlink_if_not_exists(osp.join(args.real_logs_dir, args.logs_dir), osp.dirname(osp.normpath(args.logs_dir)))
    sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))
    train_tfLogger = TFLogger(osp.join(args.logs_dir, 'train'))
    eval_tfLogger = TFLogger(osp.join(args.logs_dir, 'eval'))

  # Save the args to disk
  if not args.evaluate:
    cfg_save_path = osp.join(args.logs_dir, 'cfg.txt')
    # print()
    cfgs = vars(args)
    with open(cfg_save_path, 'w') as f:
      for k, v in cfgs.items():
        f.write('{}: {}\n'.format(k, v))

  # Create data loaders
  if args.height is None or args.width is None:
    args.height, args.width = (32, 100)

  if not args.evaluate: 
    train_dataset, train_loader = \
      get_data(args.synthetic_train_data_dir, args.voc_type, args.max_len, args.num_train,
               args.height, args.width, args.batch_size, args.workers, True, args.keep_ratio)
    voc = get_vocabulary('ALLCASES_SYMBOLS', EOS='EOS', PADDING='PADDING', UNKNOWN='UNKNOWN')
    id2char = dict(zip(range(len(voc)), voc))
    char2number = dict(zip(voc, [0]*len(voc)))
    # for _, label, _ in train_dataset:
    #   # word = ''
    #   for i in label:
    #     if not id2char[i] in ['EOS','PADDING','UNKNOWN']:
    #       char2number[id2char[i]] += 1
    #       # word += id2char[i]
    # # print(char2number)
    # for key in char2number.keys():
    #   print("{}:{}".format(key, char2number[key]))
      
      

  test_dataset, test_loader = \
    get_data(args.test_data_dir, args.voc_type, args.max_len, args.num_test,
             args.height, args.width, args.batch_size, args.workers, False, args.keep_ratio)
  # print("len(trainset) ", len(train_dataset))

  if args.evaluate:
    max_len = test_dataset.max_len
  else:
    max_len = max(train_dataset.max_len, test_dataset.max_len)
    train_dataset.max_len = test_dataset.max_len = max_len
  # Create model
  

  model = ModelBuilder(arch=args.arch, rec_num_classes=test_dataset.rec_num_classes,
                       sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=max_len,
                       eos=test_dataset.char2id[test_dataset.EOS], STN_ON=args.STN_ON,
                       encoder_block= args.encoder_block, decoder_block= args.decoder_block)

  for param in model.decoder.parameters():
    if isinstance(param, Parameter):
      param.requires_grad = False

  # for param in model.encoder.parameters():
  #   param.requires_grad = False
  # for param in model.stn_head.parameters():
  #   param.requires_grad = False

  # Load from checkpoint
  if args.evaluation_metric == 'accuracy':
    best_res = 0
  elif args.evaluation_metric == 'editdistance':
    best_res = math.inf
  else:
    raise ValueError("Unsupported evaluation metric:", args.evaluation_metric)
  start_epoch = 0
  start_iters = 0
  if args.resume:
    print("args.resume: ",args.resume)
    checkpoint = load_checkpoint(args.resume)
    model.load_state_dict(checkpoint['state_dict'])
    # for param in model.stn_head.parameters():
    #   # print(param.data)
    #   param.requires_grad = False
    # for param in model.encoder.parameters():
    #   param.requires_grad = False

    # compatibility with the epoch-wise evaluation version
    if 'epoch' in checkpoint.keys():
      start_epoch = checkpoint['epoch']
    else:
      start_iters = checkpoint['iters']
      start_epoch = int(start_iters // len(train_loader)) if not args.evaluate else 0
    # checkpoint['best_res'] = 0.802
    best_res = checkpoint['best_res']
    print("=> Start iters {}  best res {:.1%}"
          .format(start_iters, best_res))
  
  if args.cuda:
    device = torch.device("cuda")
    model = model.to(device)
    model = nn.DataParallel(model)
  # Evaluator
  evaluator = Evaluator(model, args.evaluation_metric, args.cuda)

  if args.evaluate:
    print('Test on {0}:'.format(args.test_data_dir))
    if len(args.vis_dir) > 0:
      vis_dir = osp.join(args.logs_dir, args.vis_dir)
      if not osp.exists(vis_dir):
        os.makedirs(vis_dir)
    else:
      vis_dir = None

    start = time.time()
    # print(test_dataset.lexicons50)
    evaluator.evaluate(test_loader, dataset=test_dataset, vis_dir=vis_dir)
    print('it took {0} s.'.format(time.time() - start))
    return

  # Optimizer
  param_groups = model.parameters()
  # model.stn_head.weight.requires_grad = False
  # model.encoder.weight.requires_grad = False
  param_groups = filter(lambda p: p.requires_grad, param_groups)
  # optimizer = optim.Adadelta(param_groups, lr=args.lr, weight_decay=args.weight_decay)
  optimizer = optim.Adam(param_groups, lr=args.lr, betas=(0.9, 0.98), eps=1e-09, weight_decay=args.weight_decay, amsgrad=False)
  # optimizer = optim.SGD(param_groups, lr=args.lr, momentum=0.9)
  # optimizer = optim.AdamW(param_groups, lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
  # optimizer = optim.ASGD(param_groups, lr=args.lr, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
  # optimizer = optim.Adagrad(param_groups, lr=args.lr, lr_decay=0, weight_decay=0, initial_accumulator_value=0)
  scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader))

  # Trainer
  loss_weights = {}
  loss_weights['loss_rec'] = 1.
  if args.debug:
    args.print_freq = 1
  trainer = Trainer(model, args.evaluation_metric, args.logs_dir, 
                    iters=start_iters, best_res=best_res, grad_clip=args.grad_clip,
                    use_cuda=args.cuda, loss_weights=loss_weights)

  # Start training
  # evaluator.evaluate(test_loader, step=0, tfLogger=eval_tfLogger, dataset=test_dataset)
  # print("args.epoch: ", args.epochs)
  for epoch in range(start_epoch, args.epochs):
    scheduler.step(epoch)
    current_lr = optimizer.param_groups[0]['lr']
    # current_lr = (1.0/(512.0**0.5))*min(1.0/float(trainer.iters + 1)**0.5, float(trainer.iters+1)*1.0/16000.0**1.5)
    # optimizer.param_groups[0]['lr'] = current_lr 
    trainer.train(epoch, train_loader, optimizer, current_lr,
                  print_freq=args.print_freq,
                  train_tfLogger=train_tfLogger, 
                  is_debug=args.debug,
                  evaluator=evaluator, 
                  test_loader=test_loader, 
                  eval_tfLogger=eval_tfLogger,
                  test_dataset=test_dataset)

  # Final test
  print('Test with best model:')
  checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
  model.load_state_dict(checkpoint['state_dict'])
  # print("naruto")
  evaluator.evaluate(test_loader, dataset=test_dataset)
  # print("sasuke")

  # Close the tensorboard logger
  train_tfLogger.close()
  eval_tfLogger.close()
Beispiel #8
0
def detect_NSyolov3(save_txt=False, save_img=True):
    img_size = (960, 960) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img,save_img,save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img,opt.save_img,opt.save_txt
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, img_size)
    print('Load NSYOLOv3 Model ...')
    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        _ = load_darknet_weights(model, weights)
    # Eval mode
    model.to(device).eval()
    print('NSYOLOv3 加载成功!')
    model_TSEAST = EAST_PVANet(inception_mid = False,inception_end = True,version=1,conv1_5=False,acb_block = False,dcn =False,with_modulated_dcn=True).to(device)
    print('Load  TSEAST Model ...')
    model_TSEAST.load_state_dict(torch.load('pths/TSEAST.pth'))
    model_TSEAST.to(device).eval()
    print('TSEAST 加载成功!')

    np.random.seed(1001)
    torch.manual_seed(1001)
    torch.cuda.manual_seed(1001)
    torch.cuda.manual_seed_all(1001)
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    
    dataset_info = DataInfo('Traffic_Sign')
    print('Load  ASTER Model ...')
    # Create model
    model_ASTER = ModelBuilder(arch='ResNet_ASTER', rec_num_classes=dataset_info.rec_num_classes,
                        sDim=512, attDim=512, max_len_labels=22,
                        eos=dataset_info.char2id[dataset_info.EOS], STN_ON=True)
    model_ASTER.load_state_dict(torch.load('pths/ASTER.pth'))
    device = torch.device("cuda")
    model_ASTER = model_ASTER.to(device)
    model_ASTER = nn.DataParallel(model_ASTER)
    model_ASTER.eval()
    print('ASTER 加载成功!')
    # Export mode
    if ONNX_EXPORT:
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        torch.onnx.export(model, img, 'pths/export.onnx', verbose=True)
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = opt.save_img
        dataset = LoadImages(source, img_size=img_size, half=half)

    # Get classes and colors
    classes = ['Text-Based Traffic Sign']#load_classes(parse_data_cfg(opt.data)['names'])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]

    # Run inference
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        t = time.time()

        # Get detections
        img = torch.from_numpy(img).to(device)
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        pred, _ = model(img)

        if opt.half:
            pred = pred.float()
     
        for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i]
            else:
                p, s, im0 = path, '', im0s
            print(s)
            image_ori_PIL = Image.fromarray(cv2.cvtColor(im0,cv2.COLOR_BGR2RGB))
            plot_img = image_ori_PIL
            save_path = str(Path(out) / Path(p).name)
            # s += '%gx%g ' % img.shape[2:]  # print string
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '检测到 %g %s' % (n, '个文字类型交通标志')  # add to string
                print(s)
                # Write results
                for *xyxy, conf, _, cls in det:
                    label = '%s %.2f' % (classes[int(cls)], conf)
                    img_east = image_ori_PIL.crop(list(map(int,xyxy)))
                    boxes = detect_TSEAST(img_east, model_TSEAST, device)
                    # if boxes is None:
                    #     # print('图片中 部分交通牌上 未检测 到文字 ! ', end = ' ')
                    #     continue
                    plot_img = plot_boxes(plot_img,xyxy,boxes)############画图
                    if boxes is not None and xyxy is not None:
                        for i,box in enumerate (boxes):
                            pts1 = np.float32([[box[0]+xyxy[0], box[1]+xyxy[1]], [box[2]+xyxy[0], box[3]+xyxy[1]], [box[4]+xyxy[0], box[5]+xyxy[1]], [box[6]+xyxy[0], box[7]+xyxy[1]]])
                            w1 = np.sqrt(np.sum((box[2]-box[0])**2))
                            w2 = np.sqrt(np.sum((box[6]-box[4])**2))
                            h1 = np.sqrt(np.sum((box[7]-box[1])**2))
                            h2 = np.sqrt(np.sum((box[5]-box[3])**2))
                            w = int((w1+w2)//2)
                            h = int((h1+h2)//2)
                            pts2 = np.float32(([0,0],[w,0],[w,h],[0,h]))
                            M = cv2.getPerspectiveTransform(pts1,pts2)
                            dst = cv2.warpPerspective(im0,M,(w,h))
                            img = image_process(dst)
                            # cv2.imwrite('/home/zj/OCR/projects/EAST/ICDAR_2015/temp/'+str(i)+'.jpg',dst)
                            with torch.no_grad():
                                img = img.cuda()
                            input_dict = {}
                            input_dict['images'] = img.unsqueeze(0)
                            rec_targets = torch.IntTensor(1, 22).fill_(1)
                            rec_targets[:,22-1] = dataset_info.char2id[dataset_info.EOS]
                            input_dict['rec_targets'] = rec_targets
                            input_dict['rec_lengths'] = [22]
                            output_dict = model_ASTER(input_dict)
                            pred_rec = output_dict['output']['pred_rec']
                            pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info)
                            print('Recognition result: {0} '.format(pred_str[0]),end=' ') 
                            box =list(map(int,[box[0]+xyxy[0], box[1]+xyxy[1], box[2]+xyxy[0], box[3]+xyxy[1], box[4]+xyxy[0], box[5]+xyxy[1], box[6]+xyxy[0], box[7]+xyxy[1]]))
                            print(box,sep=',')
                            if save_txt:  # Write to file
                                
                                with open(str(Path(out))+'/'  + 'results.txt', 'a') as file:
                                    file.write(('%s %s %g %g %g %g %g %g %g %g '  + '\n') % (path,pred_str[0]  ,*box))
                if save_img:
                    plot_img.save(save_path)
            else:
                print('图片中 未检测 到文字型交通标志 !', end = ' ')

            print('Done. (%.3fs)' % (time.time() - t))

            # Stream results
            # if view_img:
            #     cv2.imshow(p, im0)

            # # Save results (image with detections)
            # if save_img:
            #     if dataset.mode == 'images':
            #         cv2.imwrite(save_path, im0)
            #     else:
            #         if vid_path != save_path:  # new video
            #             vid_path = save_path
            #             if isinstance(vid_writer, cv2.VideoWriter):
            #                 vid_writer.release()  # release previous video writer

            #             fps = vid_cap.get(cv2.CAP_PROP_FPS)
            #             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            #             h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #             vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
            #         vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('All Done. (%.3fs)' % (time.time() - t0))
Beispiel #9
0
    def __init__(self, opt, dataset=None):
        super(SRGANModel, self).__init__(opt)

        if dataset:
            self.cri_text = True

        if opt['dist']:
            self.rank = torch.distributed.get_rank()
        else:
            self.rank = -1  # non dist training
        train_opt = opt['train']

        # define networks and load pretrained models
        self.netG = networks.define_G(opt).to(self.device)
        if opt['dist']:
            self.netG = DistributedDataParallel(
                self.netG, device_ids=[torch.cuda.current_device()])
        else:
            self.netG = DataParallel(self.netG)
        if self.is_train:
            self.netD = networks.define_D(opt).to(self.device)
            if opt['dist']:
                self.netD = DistributedDataParallel(
                    self.netD, device_ids=[torch.cuda.current_device()])
            else:
                self.netD = DataParallel(self.netD)

            self.netG.train()
            self.netD.train()

        # define losses, optimizer and scheduler
        if self.is_train:
            # G pixel loss
            if train_opt['pixel_weight'] > 0:
                l_pix_type = train_opt['pixel_criterion']
                if l_pix_type == 'l1':
                    self.cri_pix = nn.L1Loss().to(self.device)
                elif l_pix_type == 'l2':
                    self.cri_pix = nn.MSELoss().to(self.device)
                else:
                    raise NotImplementedError(
                        'Loss type [{:s}] not recognized.'.format(l_pix_type))
                self.l_pix_w = train_opt['pixel_weight']
            else:
                logger.info('Remove pixel loss.')
                self.cri_pix = None

            # G feature loss
            if train_opt['feature_weight'] > 0:
                l_fea_type = train_opt['feature_criterion']
                if l_fea_type == 'l1':
                    self.cri_fea = nn.L1Loss().to(self.device)
                elif l_fea_type == 'l2':
                    self.cri_fea = nn.MSELoss().to(self.device)
                else:
                    raise NotImplementedError(
                        'Loss type [{:s}] not recognized.'.format(l_fea_type))
                self.l_fea_w = train_opt['feature_weight']
            else:
                logger.info('Remove feature loss.')
                self.cri_fea = None
            if self.cri_fea:  # load VGG perceptual loss
                self.netF = networks.define_F(opt,
                                              use_bn=False).to(self.device)
                if opt['dist']:
                    pass  # do not need to use DistributedDataParallel for netF
                else:
                    self.netF = DataParallel(self.netF)
            if self.cri_text:
                from lib.models.model_builder import ModelBuilder
                self.netT = ModelBuilder(
                    arch="ResNet_ASTER",
                    rec_num_classes=dataset.rec_num_classes,
                    sDim=512,
                    attDim=512,
                    max_len_labels=100,
                    eos=dataset.char2id[dataset.EOS],
                    STN_ON=True).to(self.device)

                self.netT = DataParallel(self.netT)
                self.netT.eval()
                from lib.util.serialization import load_checkpoint
                checkpoint = load_checkpoint(train_opt['text_model'])
                self.netT.load_state_dict(checkpoint['state_dict'])

            # GD gan loss
            self.cri_gan = GANLoss(train_opt['gan_type'], 1.0,
                                   0.0).to(self.device)
            self.l_gan_w = train_opt['gan_weight']
            # D_update_ratio and D_init_iters
            self.D_update_ratio = train_opt['D_update_ratio'] if train_opt[
                'D_update_ratio'] else 1
            self.D_init_iters = train_opt['D_init_iters'] if train_opt[
                'D_init_iters'] else 0

            # optimizers
            # G
            wd_G = train_opt['weight_decay_G'] if train_opt[
                'weight_decay_G'] else 0
            optim_params = []
            for k, v in self.netG.named_parameters(
            ):  # can optimize for a part of the model
                if v.requires_grad:
                    optim_params.append(v)
                else:
                    if self.rank <= 0:
                        logger.warning(
                            'Params [{:s}] will not optimize.'.format(k))
            self.optimizer_G = torch.optim.Adam(optim_params,
                                                lr=train_opt['lr_G'],
                                                weight_decay=wd_G,
                                                betas=(train_opt['beta1_G'],
                                                       train_opt['beta2_G']))
            self.optimizers.append(self.optimizer_G)
            # D
            wd_D = train_opt['weight_decay_D'] if train_opt[
                'weight_decay_D'] else 0
            self.optimizer_D = torch.optim.Adam(self.netD.parameters(),
                                                lr=train_opt['lr_D'],
                                                weight_decay=wd_D,
                                                betas=(train_opt['beta1_D'],
                                                       train_opt['beta2_D']))
            self.optimizers.append(self.optimizer_D)

            # schedulers
            if train_opt['lr_scheme'] == 'MultiStepLR':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_scheduler.MultiStepLR_Restart(
                            optimizer,
                            train_opt['lr_steps'],
                            restarts=train_opt['restarts'],
                            weights=train_opt['restart_weights'],
                            gamma=train_opt['lr_gamma'],
                            clear_state=train_opt['clear_state']))
            elif train_opt['lr_scheme'] == 'CosineAnnealingLR_Restart':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_scheduler.CosineAnnealingLR_Restart(
                            optimizer,
                            train_opt['T_period'],
                            eta_min=train_opt['eta_min'],
                            restarts=train_opt['restarts'],
                            weights=train_opt['restart_weights']))
            else:
                raise NotImplementedError(
                    'MultiStepLR learning rate scheme is enough.')

            self.log_dict = OrderedDict()

        self.print_network()  # print network
        self.load()  # load G and D if needed
Beispiel #10
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    args.cuda = args.cuda and torch.cuda.is_available()
    if args.cuda:
        print('using cuda.')
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    # Create data loaders
    if args.height is None or args.width is None:
        args.height, args.width = (32, 100)

    dataset_info = DataInfo(args.voc_type)
    print(dataset_info.char2id)

    # Create model
    model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes,
                         sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len,
                         eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON)

    # Load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])

    if args.cuda:
        device = torch.device("cuda")
        model = model.to(device)
        model = nn.DataParallel(model)

    # Evaluation
    model.eval()

    try:
        test_list_file = open(os.path.join(args.image_path, 'annotation_test.txt'),  'r')
        test_list = test_list_file.read().splitlines()
        test_list_file.close()
    except IOError:
        test_list = os.listdir(args.image_path)

    # print(test_list)
    data_n = min(100, len(test_list))
    aster_correct_cnt = 0
    tesseract_correct_cnt = 0

    custom_oem_psm_config = '--oem 3 --psm 7'

    for test_name in tqdm(test_list[:data_n]):

        img_path = os.path.join(args.image_path, test_name).split(' ')[0]
        target_str = img_path.split('_')[-2]
        print(img_path, target_str)

        img = image_process(img_path)
        with torch.no_grad():
            img = img.to(device)
        input_dict = {}
        input_dict['images'] = img.unsqueeze(0)
        # TODO: testing should be more clean.
        # to be compatible with the lmdb-based testing, need to construct some meaningless variables.
        rec_targets = torch.IntTensor(1, args.max_len).fill_(1)
        rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS]
        input_dict['rec_targets'] = rec_targets
        input_dict['rec_lengths'] = [args.max_len]
        output_dict = model(input_dict)
        pred_rec = output_dict['output']['pred_rec']
        # print(pred_rec)
        pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info, lower_flag=False)
        if pred_str[0] == target_str:
            aster_correct_cnt += 1

        img = load_image_in_PIL(img_path).convert('RGB')
        detected_str = pytesseract.image_to_string(img, config=custom_oem_psm_config)
        # print(i, detected_str,  dataset_info['id2char'][predicted[i].item()], dataset_info['id2char'][sample['target'][i].item()])
        if detected_str == target_str:
            tesseract_correct_cnt += 1

        print(f'GT: {target_str}, ASTER: {pred_str[0]}, Tesseract: {detected_str}')
        if detected_str == target_str:
            print('===================== correct')

    print(f'Aster acc: {aster_correct_cnt} / {data_n}. {aster_correct_cnt/data_n}')
    print(f'Tesseract acc: {tesseract_correct_cnt} / {data_n}. {tesseract_correct_cnt/data_n}')