Esempio n. 1
0
    add_argument("--use_new_phoc", action="store_true")
    add_argument("syms", help="Symbols table mapping from strings to integers")
    add_argument("tr_img_dir", help="Directory containing word images")
    add_argument("tr_txt_table",
                 help="Character transcriptions of each training image")
    add_argument("va_txt_table",
                 help="Character transcriptions of each validation image")
    args = args()

    laia.common.random.manual_seed(args.seed)

    syms = laia.utils.SymbolsTable(args.syms)

    phoc_size = sum(args.phoc_levels) * len(syms)
    model = DortmundPHOCNet(phoc_size=phoc_size,
                            tpp_levels=args.tpp_levels,
                            spp_levels=args.spp_levels)
    if args.load_checkpoint:
        model_ckpt = torch.load(args.load_checkpoint)
        model.load_state_dict(model_ckpt)
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    logger.info("PHOC embedding size = {}", phoc_size)
    logger.info(
        "Model has {} parameters",
        sum(param.data.numel() for param in model.parameters()),
    )

    if args.use_adam_optim:
        logger.info(
            "Using ADAM optimizer with learning rate = {:g} and weight decay = {:g}",
            args.learning_rate,
Esempio n. 2
0
        nargs="+",
        help="PHOC levels used to encode the transcript",
    )
    add_argument("syms", help="Symbols table mapping from strings to integers")
    add_argument("img_dir", help="Directory containing word images")
    add_argument("candidates", help="Transcription of each candidate image")
    add_argument("queries", help="Transcription of each query image")
    add_argument("model_checkpoint", help="Filepath of the model checkpoint")
    add_argument("output",
                 type=argparse.FileType("w"),
                 help="Filepath of the output file")
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = DortmundPHOCNet(phoc_size)
    log.info(
        "Model has {} parameters",
        sum(param.data.numel() for param in model.parameters()),
    )
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    candidates_dataset = TextImageFromTextTableDataset(
        args.candidates, args.img_dir, img_transform=ImageToTensor())
    candidates_loader = DataLoader(candidates_dataset)

    queries_dataset = TextImageFromTextTableDataset(
        args.queries, args.img_dir, img_transform=ImageToTensor())
    queries_loader = DataLoader(queries_dataset)
Esempio n. 3
0
 def test_batch_output_size(self):
     m = DortmundPHOCNet(phoc_size=125, tpp_levels=[1, 2, 3])
     x = torch.randn(3, 1, 93, 30)
     y = m(x)
     self.assertEqual([3, 125], list(y.size()))
Esempio n. 4
0
 def test_padded_batch_output_size_tpp_and_spp(self):
     m = DortmundPHOCNet(phoc_size=40, tpp_levels=[1, 2, 3], spp_levels=[1, 2])
     x = torch.randn(3, 1, 93, 30)
     xs = torch.tensor([[93, 30], [40, 30], [93, 20]])
     y = m(PaddedTensor(x, xs))
     self.assertEqual([3, 40], list(y.size()))
Esempio n. 5
0
 def test_number_parameters(self):
     m = DortmundPHOCNet(phoc_size=540, tpp_levels=[1, 2, 3, 4, 5])
     self.assertEqual(59859420, sum(p.numel() for p in m.parameters()))