output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        assert os.path.exists(raw_path)
        print(f"The {args.source} dataset will be transformed...")

        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()

        print("Partitions will be preprocessed...")
        ds.preprocess_partitions(input_size=input_size)

        print("Partitions will be saved...")
        os.makedirs(os.path.dirname(source_path), exist_ok=True)

        for i in ds.partitions:
            with h5py.File(source_path, "a") as hf:
                hf.create_dataset(f"{i}/dt",
                                  data=ds.dataset[i]['dt'],
                                  compression="gzip",
                                  compression_opts=9)
                hf.create_dataset(f"{i}/gt",
                                  data=ds.dataset[i]['gt'],
                                  compression="gzip",
                                  compression_opts=9)
                print(f"[OK] {i} partition.")
Exemple #2
0
    )
    target_path = os.path.join(output_path, "checkpoint_weights.pt")

    input_size = (128, 128, 1)
    max_text_length = 16
    charset_base = string.printable[:95]
    tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length)

    if args.transform:
        print(f"{args.source} dataset will be transformed...")

        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()

        print("Partitions will be preprocessed...")
        ds.preprocess_partitions(input_size=input_size, no_aug=args.no_aug)

        print("Partitions will be saved...")
        os.makedirs(os.path.dirname(source_path), exist_ok=True)

        for i in ds.partitions:
            with h5py.File(source_path, "a") as hf:
                hf.create_dataset(f"{i}/dt",
                                  data=ds.dataset[i]['dt'],
                                  compression="gzip",
                                  compression_opts=9)
                hf.create_dataset(f"{i}/gt",
                                  data=ds.dataset[i]['gt'],
                                  compression="gzip",
                                  compression_opts=9)
                print(f"[OK] {i} partition.")