예제 #1
0
    parser.add_argument("--batch_size", type=int, default=64)
    parser.add_argument("--N", type=int, default=2)
    args = parser.parse_args()

    raw_path = os.path.join("..", "raw")
    data_path = os.path.join("..", "data")
    source_path = os.path.join(data_path, f"{args.source}.txt")
    output_path = os.path.join("..", "output", args.source, args.mode)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    max_text_length = 128
    charset_base = string.printable[:95]
    charset_special = """ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöùúûüý"""

    if args.transform:
        data = Dataset(source=os.path.join(raw_path, args.source))
        data.read_lines(maxlen=max_text_length)

        valid_noised = pp.add_noise(data.dataset['valid'], max_text_length)
        test_noised = pp.add_noise(data.dataset['test'], max_text_length)

        valid_metrics = ev.ocr_metrics(ground_truth=data.dataset['valid'],
                                       data=valid_noised)

        info = "\n".join([
            f"####",
            f"#### {args.source} partitions (number of sentences)",
            f"####",
            f"#### Total:      {data.size['total']}",
            f"####",
            f"#### Train:      {data.size['train']}",
예제 #2
0
    args = parser.parse_args()

    raw_path = os.path.join("..", "raw", args.source)
    source_path = os.path.join("..", "data", f"{args.source}.hdf5")
    output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        assert os.path.exists(raw_path)
        print(f"The {args.source} dataset will be transformed...")

        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()

        print("Partitions will be preprocessed...")
        ds.preprocess_partitions(input_size=input_size)

        print("Partitions will be saved...")
        os.makedirs(os.path.dirname(source_path), exist_ok=True)

        for i in ds.partitions:
            with h5py.File(source_path, "a") as hf:
                hf.create_dataset(f"{i}/dt",
                                  data=ds.dataset[i]['dt'],
                                  compression="gzip",
                                  compression_opts=9)
                hf.create_dataset(f"{i}/gt",
예제 #3
0
    args = parser.parse_args()

    raw_path = os.path.join("..", "raw", args.source)
    source_path = os.path.join("..", "data", f"{args.source}.hdf5")
    output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        assert os.path.exists(raw_path)
        print(f"The {args.source} dataset will be transformed...")

        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()

        print("Partitions will be preprocessed...")
        ds.preprocess_partitions(input_size=input_size)

        print("Partitions will be saved...")
        os.makedirs(os.path.dirname(source_path), exist_ok=True)

        for i in ds.partitions:
            with h5py.File(source_path, "a") as hf:
                hf.create_dataset(f"{i}/dt",
                                  data=ds.dataset[i]['dt'],
                                  compression="gzip",
                                  compression_opts=9)
                hf.create_dataset(f"{i}/gt",
예제 #4
0
    os.makedirs(output_path, exist_ok = True)

    if args.transform:

        # from data.create_hdf5 import Dataset

        # if os.path.isfile(source_path): 
        #     print("Dataset file already exists")
        # else:
        #     ds = Dataset()
        #     ds.save_partitions()

        from data.reader import Dataset
        print(f"iam dataset will be transformed...")
        ds = Dataset(source=os.path.join(raw_path, "iam"), name="iam")
        ds.read_partitions()
        ds.save_partitions(source_path, target_image_size, maxTextLength)

    elif args.predict:

        input_image_path = os.path.join(output_path, "prediction")
        output_image_path = os.path.join(input_image_path, "out")
        os.makedirs(output_image_path, exist_ok=True)

        if args.image:
            images = sorted(glob(os.path.join(input_image_path, args.image)))
        else:
            images = sorted(glob(os.path.join(input_image_path, "*.png")))
        
        from network.model import MyModel
예제 #5
0
    raw_path = os.path.join("raw")
    source_path = os.path.join("data", f"{ds_names_str}.hdf5")
    output_path = os.path.join("output", ds_names_str, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    if args.charset_path is not None:
        with open(args.charset_path, 'r') as f:
            charset_base = f.readline().strip()
            print('hej')
    else:
        charset_base = string.printable[:95]

    if args.transform:
        print(os.getcwd())
        print(f"{ds_names_str} dataset will be transformed...")
        ds = Dataset(source=raw_path, name=args.ds_names)
        ds.read_partitions()
        ds.save_partitions(source_path,
                           input_size,
                           max_text_length,
                           binarize=args.binarize)

    elif args.cv2:
        with h5py.File(source_path, "r") as hf:
            dt = hf['test']['dt'][:256]
            gt = hf['test']['gt'][:256]

        predict_file = os.path.join(output_path, "predict.txt")
        predicts = [''] * len(dt)

        if os.path.isfile(predict_file):
    parser.add_argument("--epochs", type=int, default=1000)
    parser.add_argument("--batch_size", type=int, default=16)
    args = parser.parse_args()

    raw_path = os.path.join("..", "raw", args.source)
    source_path = os.path.join("..", "data", f"{args.source}.hdf5")
    output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        print(f"{args.source} dataset will be transformed...")
        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()
        ds.save_partitions(source_path, input_size, max_text_length)

    elif args.cv2:
        with h5py.File(source_path, "r") as hf:
            dt = hf['test']['dt'][:256]
            gt = hf['test']['gt'][:256]

        predict_file = os.path.join(output_path, "predict.txt")
        predicts = [''] * len(dt)

        if os.path.isfile(predict_file):
            with open(predict_file, "r") as lg:
                predicts = [line[5:] for line in lg if line.startswith("TE_P")]