parser.add_argument("--batch_size", type=int, default=64) parser.add_argument("--N", type=int, default=2) args = parser.parse_args() raw_path = os.path.join("..", "raw") data_path = os.path.join("..", "data") source_path = os.path.join(data_path, f"{args.source}.txt") output_path = os.path.join("..", "output", args.source, args.mode) target_path = os.path.join(output_path, "checkpoint_weights.hdf5") max_text_length = 128 charset_base = string.printable[:95] charset_special = """ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöùúûüý""" if args.transform: data = Dataset(source=os.path.join(raw_path, args.source)) data.read_lines(maxlen=max_text_length) valid_noised = pp.add_noise(data.dataset['valid'], max_text_length) test_noised = pp.add_noise(data.dataset['test'], max_text_length) valid_metrics = ev.ocr_metrics(ground_truth=data.dataset['valid'], data=valid_noised) info = "\n".join([ f"####", f"#### {args.source} partitions (number of sentences)", f"####", f"#### Total: {data.size['total']}", f"####", f"#### Train: {data.size['train']}",
args = parser.parse_args() raw_path = os.path.join("..", "raw", args.source) source_path = os.path.join("..", "data", f"{args.source}.hdf5") output_path = os.path.join("..", "output", args.source, args.arch) target_path = os.path.join(output_path, "checkpoint_weights.hdf5") input_size = (1024, 128, 1) max_text_length = 128 charset_base = string.printable[:95] if args.transform: assert os.path.exists(raw_path) print(f"The {args.source} dataset will be transformed...") ds = Dataset(source=raw_path, name=args.source) ds.read_partitions() print("Partitions will be preprocessed...") ds.preprocess_partitions(input_size=input_size) print("Partitions will be saved...") os.makedirs(os.path.dirname(source_path), exist_ok=True) for i in ds.partitions: with h5py.File(source_path, "a") as hf: hf.create_dataset(f"{i}/dt", data=ds.dataset[i]['dt'], compression="gzip", compression_opts=9) hf.create_dataset(f"{i}/gt",
os.makedirs(output_path, exist_ok = True) if args.transform: # from data.create_hdf5 import Dataset # if os.path.isfile(source_path): # print("Dataset file already exists") # else: # ds = Dataset() # ds.save_partitions() from data.reader import Dataset print(f"iam dataset will be transformed...") ds = Dataset(source=os.path.join(raw_path, "iam"), name="iam") ds.read_partitions() ds.save_partitions(source_path, target_image_size, maxTextLength) elif args.predict: input_image_path = os.path.join(output_path, "prediction") output_image_path = os.path.join(input_image_path, "out") os.makedirs(output_image_path, exist_ok=True) if args.image: images = sorted(glob(os.path.join(input_image_path, args.image))) else: images = sorted(glob(os.path.join(input_image_path, "*.png"))) from network.model import MyModel
raw_path = os.path.join("raw") source_path = os.path.join("data", f"{ds_names_str}.hdf5") output_path = os.path.join("output", ds_names_str, args.arch) target_path = os.path.join(output_path, "checkpoint_weights.hdf5") if args.charset_path is not None: with open(args.charset_path, 'r') as f: charset_base = f.readline().strip() print('hej') else: charset_base = string.printable[:95] if args.transform: print(os.getcwd()) print(f"{ds_names_str} dataset will be transformed...") ds = Dataset(source=raw_path, name=args.ds_names) ds.read_partitions() ds.save_partitions(source_path, input_size, max_text_length, binarize=args.binarize) elif args.cv2: with h5py.File(source_path, "r") as hf: dt = hf['test']['dt'][:256] gt = hf['test']['gt'][:256] predict_file = os.path.join(output_path, "predict.txt") predicts = [''] * len(dt) if os.path.isfile(predict_file):
parser.add_argument("--epochs", type=int, default=1000) parser.add_argument("--batch_size", type=int, default=16) args = parser.parse_args() raw_path = os.path.join("..", "raw", args.source) source_path = os.path.join("..", "data", f"{args.source}.hdf5") output_path = os.path.join("..", "output", args.source, args.arch) target_path = os.path.join(output_path, "checkpoint_weights.hdf5") input_size = (1024, 128, 1) max_text_length = 128 charset_base = string.printable[:95] if args.transform: print(f"{args.source} dataset will be transformed...") ds = Dataset(source=raw_path, name=args.source) ds.read_partitions() ds.save_partitions(source_path, input_size, max_text_length) elif args.cv2: with h5py.File(source_path, "r") as hf: dt = hf['test']['dt'][:256] gt = hf['test']['gt'][:256] predict_file = os.path.join(output_path, "predict.txt") predicts = [''] * len(dt) if os.path.isfile(predict_file): with open(predict_file, "r") as lg: predicts = [line[5:] for line in lg if line.startswith("TE_P")]