"valid_samples_per_epoch", "seed", "train_path", # Override default values for these arguments, but use the # same help/checks: learning_rate=0.0001, momentum=0.9, num_rolling_checkpoints=5, iterations_per_update=10, save_checkpoint_interval=5, show_progress_bar=True, use_distortions=True, weight_l2_penalty=0.00005, ) add_argument("--load_checkpoint", type=str, help="Path to the checkpoint to load.") add_argument("--continue_epoch", type=int) add_argument( "--phoc_levels", type=int, default=[1, 2, 3, 4, 5], nargs="+", help="PHOC levels used to encode the transcript", ) add_argument( "--tpp_levels", type=int, default=[1, 2, 3, 4, 5], nargs="*", help="Temporal Pyramid Pooling levels",
for t in range(output.size(0)): for k in range(output.size(1)): print( "{:d}\t{:d}\t{:d}\t0,{:.10g},{:d}".format( t, t + 1, k + 1, -float(output[t, k]), k + 1), file=fileout, ) print(output.size(0), file=fileout) print("", file=fileout) if __name__ == "__main__": add_defaults("gpu") add_argument( "--image_sequencer", type=str, default="avgpool-16", help="Average adaptive pooling of the images before the LSTM layers", ) add_argument("--lstm_hidden_size", type=int, default=128) add_argument("--lstm_num_layers", type=int, default=1) add_argument("--add_softmax", action="store_true") add_argument("syms", help="Symbols table mapping from strings to integers") add_argument("img_dir", help="Directory containing word images") add_argument("gt_file", help="") add_argument("checkpoint", help="") add_argument("output", type=argparse.FileType("w")) args = args() # Build neural network syms = SymbolsTable(args.syms) model = DortmundCRNN(
"seed", "train_path", # Override default values for these arguments, but use the # same help/checks: batch_size=1, learning_rate=0.0001, momentum=0.9, num_rolling_checkpoints=5, iterations_per_update=10, save_checkpoint_interval=5, show_progress_bar=True, use_distortions=True, weight_l2_penalty=0.00005, ) add_argument("--load_checkpoint", type=str, help="Path to the checkpoint to load.") add_argument("--continue_epoch", type=int) add_argument( "--image_sequencer", type=str, default="avgpool-16", help="Average adaptive pooling of the images before the LSTM layers", ) add_argument( "--use_adam_optim", type=str2bool, nargs="?", const=True, default=False, help="If true, use Adam optimizer instead of SGD",
from torch.utils.data import DataLoader from tqdm import tqdm import laia import laia.logging as log from laia.data import TextImageFromTextTableDataset from laia.models.kws.dortmund_phocnet import DortmundPHOCNet from laia.plugins.arguments import add_argument, add_defaults, args from laia.utils import ImageToTensor if __name__ == "__main__": add_defaults("gpu") add_argument( "--phoc_levels", type=int, default=[1, 2, 3, 4, 5], nargs="+", help="PHOC levels used to encode the transcript", ) add_argument("syms", help="Symbols table mapping from strings to integers") add_argument("img_dir", help="Directory containing word images") add_argument("candidates", help="Transcription of each candidate image") add_argument("queries", help="Transcription of each query image") add_argument("model_checkpoint", help="Filepath of the model checkpoint") add_argument("output", type=argparse.FileType("w"), help="Filepath of the output file") args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms)
from laia.data import ImageDataLoader from laia.data import TextImageFromTextTableDataset from laia.decoders import CTCGreedyDecoder from laia.models.htr.dortmund_crnn import DortmundCRNN from laia.plugins.arguments import add_argument, add_defaults, args from laia.plugins.arguments_types import str2bool from laia.utils import ImageToTensor, TextToTensor from laia.utils.symbols_table import SymbolsTable if __name__ == "__main__": add_defaults("gpu") add_argument( "--output_symbols", type=str2bool, nargs="?", const=True, default=False, help="Print the output with symbols instead of integers", ) add_argument( "--image_sequencer", type=str, default="avgpool-16", help="Average adaptive pooling of the images before the LSTM layers", ) add_argument("--lstm_hidden_size", type=int, default=128) add_argument("--lstm_num_layers", type=int, default=1) add_argument("syms", help="Symbols table mapping from strings to integers") add_argument("img_dir", help="Directory containing word images") add_argument("gt_file", help="") add_argument("checkpoint", help="")
if x.shape != 3: x = np.expand_dims(x, axis=-1) x = np.transpose(x, (2, 0, 1)) return torch.from_numpy(x) if __name__ == "__main__": import matplotlib.pyplot as plt import laia.random from laia.data import TextImageFromTextTableDataset, ImageDataLoader from laia.plugins.arguments import add_argument, add_defaults, args add_defaults("seed") add_argument("--num_images", type=int, help="Show only this number of images") add_argument("--shuffle", action="store_true", help="Shuffle the list of images") add_argument("img_dir", help="Directory containing images") add_argument("txt_table", help="Transcriptions of each image") args = args() laia.random.manual_seed(args.seed) dataset = TextImageFromTextTableDataset( args.txt_table, args.img_dir, img_transform=DortmundImageToTensor()) dataset_loader = ImageDataLoader(dataset=dataset, image_channels=1, shuffle=args.shuffle)
from tqdm import tqdm import laia import laia.logging as log from laia.data import TextImageFromTextTableDataset from laia.models.kws.dortmund_phocnet import DortmundPHOCNet from laia.plugins.arguments import add_argument, add_defaults, args from laia.utils import ImageToTensor from laia.utils.phoc import pphoc if __name__ == "__main__": add_defaults("gpu") add_argument( "--phoc_levels", type=int, default=[1, 2, 3, 4, 5], nargs="+", help="PHOC levels used to encode the transcript", ) add_argument( "--tpp_levels", type=int, default=[1, 2, 3, 4, 5], nargs="*", help="Temporal Pyramid Pooling levels", ) add_argument( "--spp_levels", type=int, default=None, nargs="*",