def test_filename(tmpdir): with tmpdir.as_cwd(): filepath = "test" log.config(filepath=filepath) log.info("test!") log.clear() assert tmpdir.join(filepath).exists()
def common_main(args: Dict[str, Any]) -> Dict[str, Any]: del args["config"] # configure logging logging = args.pop("logging") if logging["filepath"] is not None: logging["filepath"] = join( args["common"].experiment_dirpath, logging["filepath"] ) log.config(**logging) log.info(f"Arguments: {args}") versions = get_installed_versions() if versions: log.info("Installed:", versions) return args
def run( syms: str, fixed_input_height: Optional[NonNegativeInt] = 0, adaptive_pooling: str = "avgpool-16", common: CommonArgs = CommonArgs(), crnn: CreateCRNNArgs = CreateCRNNArgs(), save_model: bool = False, ) -> LaiaCRNN: seed_everything(common.seed) crnn.num_output_labels = len(SymbolsTable(syms)) if crnn is not None: if fixed_input_height: conv_output_size = LaiaCRNN.get_conv_output_size( size=(fixed_input_height, fixed_input_height), cnn_kernel_size=crnn.cnn_kernel_size, cnn_stride=crnn.cnn_stride, cnn_dilation=crnn.cnn_dilation, cnn_poolsize=crnn.cnn_poolsize, ) fixed_size_after_conv = conv_output_size[1 if crnn. vertical_text else 0] assert (fixed_size_after_conv > 0), "The image size is too small for the CNN architecture" crnn.image_sequencer = f"none-{fixed_size_after_conv}" else: crnn.image_sequencer = adaptive_pooling crnn.rnn_type = getattr(nn, crnn.rnn_type) crnn.cnn_activation = [getattr(nn, act) for act in crnn.cnn_activation] model = LaiaCRNN(**vars(crnn)) log.info( "Model has {} parameters", sum(param.numel() for param in model.parameters()), ) if save_model: ModelSaver(common.train_path, common.model_filename).save(LaiaCRNN, **vars(crnn)) return model
default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('queries', help='Transcription of each query image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() queries_dataset = TextImageFromTextTableDataset( args.queries, args.img_dir, img_transform=ImageToTensor()) queries_loader = DataLoader(queries_dataset) def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu() phoc = torch.nn.functional.logsigmoid(model(sample)) return phoc.data.cpu().squeeze() # Predict PHOC vectors
if __name__ == '__main__': add_defaults('gpu') add_argument('--phoc_levels', type=int, default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('gt_txt', help='Transcription of each image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() dataset = TextImageFromTextTableDataset( args.gt_txt, args.img_dir, img_transform=ImageToTensor()) loader = DataLoader(dataset) def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu() phoc = torch.nn.functional.sigmoid(model(sample)) return phoc.data.cpu().numpy() # Predict PHOC vectors
default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('--distractors', help='Transcription of each distractor image') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('queries', help='Transcription of each query image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=FileType('w'), help='Output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu() phoc = torch.nn.functional.sigmoid(model(sample)) return phoc.data.cpu().numpy() def process_dataset(filename): dataset = TextImageFromTextTableDataset(filename, args.img_dir, img_transform=ImageToTensor()) data_loader = DataLoader(dataset)
def run( syms: str, img_dirs: List[str], tr_txt_table: str, va_txt_table: str, common: CommonArgs = CommonArgs(), train: TrainArgs = TrainArgs(), optimizer: OptimizerArgs = OptimizerArgs(), scheduler: SchedulerArgs = SchedulerArgs(), data: DataArgs = DataArgs(), trainer: TrainerArgs = TrainerArgs(), ): pl.seed_everything(common.seed) loader = ModelLoader(common.train_path, filename=common.model_filename, device="cpu") # maybe load a checkpoint checkpoint = None if train.resume: checkpoint = loader.prepare_checkpoint(common.checkpoint, common.experiment_dirpath, common.monitor) trainer.max_epochs = torch.load(checkpoint)["epoch"] + train.resume log.info(f'Using checkpoint "{checkpoint}"') log.info(f"Max epochs set to {trainer.max_epochs}") # load the non-pytorch_lightning model model = loader.load() assert ( model is not None ), "Could not find the model. Have you run pylaia-htr-create-model?" # prepare the symbols syms = SymbolsTable(syms) for d in train.delimiters: assert d in syms, f'The delimiter "{d}" is not available in the symbols file' # prepare the engine engine_module = HTREngineModule( model, [syms[d] for d in train.delimiters], optimizer=optimizer, scheduler=scheduler, batch_input_fn=Compose([ItemFeeder("img"), ImageFeeder()]), batch_target_fn=ItemFeeder("txt"), batch_id_fn=ItemFeeder("id"), # Used to print image ids on exception ) # prepare the data data_module = DataModule( syms=syms, img_dirs=img_dirs, tr_txt_table=tr_txt_table, va_txt_table=va_txt_table, batch_size=data.batch_size, color_mode=data.color_mode, shuffle_tr=not bool(trainer.limit_train_batches), augment_tr=train.augment_training, stage="fit", ) # prepare the training callbacks # TODO: save on lowest_va_wer and every k epochs https://github.com/PyTorchLightning/pytorch-lightning/issues/2908 checkpoint_callback = pl.callbacks.ModelCheckpoint( dirpath=common.experiment_dirpath, filename="{epoch}-lowest_" + common.monitor, monitor=common.monitor, verbose=True, save_top_k=train.checkpoint_k, mode="min", save_last=True, ) checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last" early_stopping_callback = pl.callbacks.EarlyStopping( monitor=common.monitor, patience=train.early_stopping_patience, verbose=True, mode="min", strict=False, # training_step may return None ) callbacks = [ ProgressBar(refresh_rate=trainer.progress_bar_refresh_rate), checkpoint_callback, early_stopping_callback, checkpoint_callback, ] if train.gpu_stats: callbacks.append(ProgressBarGPUStats()) if scheduler.active: callbacks.append(LearningRate(logging_interval="epoch")) # prepare the trainer trainer = pl.Trainer( default_root_dir=common.train_path, resume_from_checkpoint=checkpoint, callbacks=callbacks, logger=EpochCSVLogger(common.experiment_dirpath), checkpoint_callback=True, **vars(trainer), ) # train! trainer.fit(engine_module, datamodule=data_module) # training is over if early_stopping_callback.stopped_epoch: log.info( "Early stopping triggered after epoch" f" {early_stopping_callback.stopped_epoch + 1} (waited for" f" {early_stopping_callback.wait_count} epochs). The best score was" f" {early_stopping_callback.best_score}") log.info(f"Model has been trained for {trainer.current_epoch + 1} epochs" f" ({trainer.global_step + 1} steps)") log.info( f"Best {checkpoint_callback.monitor}={checkpoint_callback.best_model_score} " f"obtained with model={checkpoint_callback.best_model_path}")
nargs='+', help='PHOC levels used to encode the transcript') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('candidates', help='Transcription of each candidate image') add_argument('queries', help='Transcription of each query image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() candidates_dataset = TextImageFromTextTableDataset( args.candidates, args.img_dir, img_transform=ImageToTensor()) candidates_loader = DataLoader(candidates_dataset) queries_dataset = TextImageFromTextTableDataset( args.queries, args.img_dir, img_transform=ImageToTensor()) queries_loader = DataLoader(queries_dataset) def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
def test_filepath(tmpdir): filepath = tmpdir / "test" log.config(filepath=filepath) log.info("test!") log.clear() assert filepath.exists()
help='PHOC levels used to encode the transcript') add_argument('--distractors', help='Transcription of each distractor image') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('queries', help='Transcription of each query image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu() phoc = torch.nn.functional.logsigmoid(model(sample)) return phoc.data.cpu().squeeze() def process_dataset(filename): dataset = TextImageFromTextTableDataset(filename, args.img_dir, img_transform=ImageToTensor()) data_loader = DataLoader(dataset)