output_path = os.path.join("..", "output", args.source, args.arch) target_path = os.path.join(output_path, "checkpoint_weights.hdf5") input_size = (1024, 128, 1) max_text_length = 128 charset_base = string.printable[:95] if args.transform: assert os.path.exists(raw_path) print(f"The {args.source} dataset will be transformed...") ds = Dataset(source=raw_path, name=args.source) ds.read_partitions() print("Partitions will be preprocessed...") ds.preprocess_partitions(input_size=input_size) print("Partitions will be saved...") os.makedirs(os.path.dirname(source_path), exist_ok=True) for i in ds.partitions: with h5py.File(source_path, "a") as hf: hf.create_dataset(f"{i}/dt", data=ds.dataset[i]['dt'], compression="gzip", compression_opts=9) hf.create_dataset(f"{i}/gt", data=ds.dataset[i]['gt'], compression="gzip", compression_opts=9) print(f"[OK] {i} partition.")
) target_path = os.path.join(output_path, "checkpoint_weights.pt") input_size = (128, 128, 1) max_text_length = 16 charset_base = string.printable[:95] tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) if args.transform: print(f"{args.source} dataset will be transformed...") ds = Dataset(source=raw_path, name=args.source) ds.read_partitions() print("Partitions will be preprocessed...") ds.preprocess_partitions(input_size=input_size, no_aug=args.no_aug) print("Partitions will be saved...") os.makedirs(os.path.dirname(source_path), exist_ok=True) for i in ds.partitions: with h5py.File(source_path, "a") as hf: hf.create_dataset(f"{i}/dt", data=ds.dataset[i]['dt'], compression="gzip", compression_opts=9) hf.create_dataset(f"{i}/gt", data=ds.dataset[i]['gt'], compression="gzip", compression_opts=9) print(f"[OK] {i} partition.")