Python Dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: data.reader

클래스/타입: Dataset

hotexamples.com에서의 예제들: 6

Python Dataset - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 data.reader.Dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Dataset(4)

save_partitions(3)

preprocess_partitions(2)

check_text(1)

read_lines(1)

read_partitions(1)

예제 #1

파일 보기

파일: main.py 프로젝트: arthurflor23/spelling-correction

    parser.add_argument("--batch_size", type=int, default=64)
    parser.add_argument("--N", type=int, default=2)
    args = parser.parse_args()

    raw_path = os.path.join("..", "raw")
    data_path = os.path.join("..", "data")
    source_path = os.path.join(data_path, f"{args.source}.txt")
    output_path = os.path.join("..", "output", args.source, args.mode)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    max_text_length = 128
    charset_base = string.printable[:95]
    charset_special = """ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöùúûüý"""

    if args.transform:
        data = Dataset(source=os.path.join(raw_path, args.source))
        data.read_lines(maxlen=max_text_length)

        valid_noised = pp.add_noise(data.dataset['valid'], max_text_length)
        test_noised = pp.add_noise(data.dataset['test'], max_text_length)

        valid_metrics = ev.ocr_metrics(ground_truth=data.dataset['valid'],
                                       data=valid_noised)

        info = "\n".join([
            f"####",
            f"#### {args.source} partitions (number of sentences)",
            f"####",
            f"#### Total:      {data.size['total']}",
            f"####",
            f"#### Train:      {data.size['train']}",

예제 #2

파일 보기

파일: main.py 프로젝트: pant0ng/handwritten-text-recognition

    args = parser.parse_args()

    raw_path = os.path.join("..", "raw", args.source)
    source_path = os.path.join("..", "data", f"{args.source}.hdf5")
    output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        assert os.path.exists(raw_path)
        print(f"The {args.source} dataset will be transformed...")

        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()

        print("Partitions will be preprocessed...")
        ds.preprocess_partitions(input_size=input_size)

        print("Partitions will be saved...")
        os.makedirs(os.path.dirname(source_path), exist_ok=True)

        for i in ds.partitions:
            with h5py.File(source_path, "a") as hf:
                hf.create_dataset(f"{i}/dt",
                                  data=ds.dataset[i]['dt'],
                                  compression="gzip",
                                  compression_opts=9)
                hf.create_dataset(f"{i}/gt",

예제 #3

파일 보기

    args = parser.parse_args()

    raw_path = os.path.join("..", "raw", args.source)
    source_path = os.path.join("..", "data", f"{args.source}.hdf5")
    output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        assert os.path.exists(raw_path)
        print(f"The {args.source} dataset will be transformed...")

        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()

        print("Partitions will be preprocessed...")
        ds.preprocess_partitions(input_size=input_size)

        print("Partitions will be saved...")
        os.makedirs(os.path.dirname(source_path), exist_ok=True)

        for i in ds.partitions:
            with h5py.File(source_path, "a") as hf:
                hf.create_dataset(f"{i}/dt",
                                  data=ds.dataset[i]['dt'],
                                  compression="gzip",
                                  compression_opts=9)
                hf.create_dataset(f"{i}/gt",

예제 #4

파일 보기

    os.makedirs(output_path, exist_ok = True)

    if args.transform:

        # from data.create_hdf5 import Dataset

        # if os.path.isfile(source_path): 
        #     print("Dataset file already exists")
        # else:
        #     ds = Dataset()
        #     ds.save_partitions()

        from data.reader import Dataset
        print(f"iam dataset will be transformed...")
        ds = Dataset(source=os.path.join(raw_path, "iam"), name="iam")
        ds.read_partitions()
        ds.save_partitions(source_path, target_image_size, maxTextLength)

    elif args.predict:

        input_image_path = os.path.join(output_path, "prediction")
        output_image_path = os.path.join(input_image_path, "out")
        os.makedirs(output_image_path, exist_ok=True)

        if args.image:
            images = sorted(glob(os.path.join(input_image_path, args.image)))
        else:
            images = sorted(glob(os.path.join(input_image_path, "*.png")))
        
        from network.model import MyModel

예제 #5

파일 보기

    raw_path = os.path.join("raw")
    source_path = os.path.join("data", f"{ds_names_str}.hdf5")
    output_path = os.path.join("output", ds_names_str, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    if args.charset_path is not None:
        with open(args.charset_path, 'r') as f:
            charset_base = f.readline().strip()
            print('hej')
    else:
        charset_base = string.printable[:95]

    if args.transform:
        print(os.getcwd())
        print(f"{ds_names_str} dataset will be transformed...")
        ds = Dataset(source=raw_path, name=args.ds_names)
        ds.read_partitions()
        ds.save_partitions(source_path,
                           input_size,
                           max_text_length,
                           binarize=args.binarize)

    elif args.cv2:
        with h5py.File(source_path, "r") as hf:
            dt = hf['test']['dt'][:256]
            gt = hf['test']['gt'][:256]

        predict_file = os.path.join(output_path, "predict.txt")
        predicts = [''] * len(dt)

        if os.path.isfile(predict_file):

예제 #6

파일 보기

파일: main.py 프로젝트: yanqi1811/handwritten-text-recognition-1

    parser.add_argument("--epochs", type=int, default=1000)
    parser.add_argument("--batch_size", type=int, default=16)
    args = parser.parse_args()

    raw_path = os.path.join("..", "raw", args.source)
    source_path = os.path.join("..", "data", f"{args.source}.hdf5")
    output_path = os.path.join("..", "output", args.source, args.arch)
    target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

    input_size = (1024, 128, 1)
    max_text_length = 128
    charset_base = string.printable[:95]

    if args.transform:
        print(f"{args.source} dataset will be transformed...")
        ds = Dataset(source=raw_path, name=args.source)
        ds.read_partitions()
        ds.save_partitions(source_path, input_size, max_text_length)

    elif args.cv2:
        with h5py.File(source_path, "r") as hf:
            dt = hf['test']['dt'][:256]
            gt = hf['test']['gt'][:256]

        predict_file = os.path.join(output_path, "predict.txt")
        predicts = [''] * len(dt)

        if os.path.isfile(predict_file):
            with open(predict_file, "r") as lg:
                predicts = [line[5:] for line in lg if line.startswith("TE_P")]