Python main Examples, calamari_ocr.scripts.cross_fold_train.main Python Examples

Example #1

0

Show file

def run_for_single_line(args):
    # lines/network/pretraining as base dir
    args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines))
    pretrain_prefix = "scratch"
    if args.weights and len(args.weights) > 0:
        pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights])

    args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix)

    if not os.path.exists(args.base_dir):
        os.makedirs(args.base_dir)

    tmp_dir = os.path.join(args.base_dir, "tmp")
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    best_models_dir = os.path.join(args.base_dir, "models")
    if not os.path.exists(best_models_dir):
        os.makedirs(best_models_dir)

    prediction_dir = os.path.join(args.base_dir, "predictions")
    if not os.path.exists(prediction_dir):
        os.makedirs(prediction_dir)

    # select number of files
    files = args.train_files
    if args.n_lines > 0:
        all_files = glob_all(args.train_files)
        files = random.sample(all_files, args.n_lines)

    # run the cross-fold-training
    setattr(args, "max_parallel_models", args.max_parallel_models)
    setattr(args, "best_models_dir", best_models_dir)
    setattr(args, "temporary_dir", tmp_dir)
    setattr(args, "keep_temporary_files", False)
    setattr(args, "files", files)
    setattr(args, "text_files", None)
    setattr(args, "gt_extension", None)
    setattr(args, "dataset", DataSetType.FILE)
    setattr(args, "best_model_label", "{id}")
    if not args.skip_train:
        cross_fold_train.main(args)

    dump_file = os.path.join(tmp_dir, "prediction.pkl")

    # run the prediction
    if not args.skip_eval:
        # locate the eval script (must be in the same dir as "this")
        predict_script_path = os.path.join(this_absdir, "experiment_eval.py")

        if len(args.single_fold) > 0:
            models = [os.path.join(best_models_dir, "{}.ckpt.json".format(sf)) for sf in args.single_fold]
            for m in models:
                if not os.path.exists(m):
                    raise Exception("Expected model at '{}', but file does not exist".format(m))
        else:
            models = [os.path.join(best_models_dir, d) for d in sorted(os.listdir(best_models_dir)) if d.endswith("json")]
            if len(models) != args.n_folds:
                raise Exception("Expected {} models, one for each fold respectively, but only {} models were found".format(
                    args.n_folds, len(models)
                ))

        for line in run(prefix_run_command([
                "python3", "-u",
                predict_script_path,
                "-j", str(args.num_threads),
                "--batch_size", str(args.batch_size),
                "--dump", dump_file,
                "--eval_imgs"] + args.eval_files + [
                ] + (["--verbose"] if args.verbose else []) + [
                "--checkpoint"] + models + [
                ], args.run, {"threads": args.num_threads}), verbose=args.verbose):
            # Print the output of the thread
            if args.verbose:
                print(line)

    import pickle
    with open(dump_file, 'rb') as f:
        prediction = pickle.load(f)

    return prediction

Example #2

0

Show file

File: test_cross_fold_train.py Project: jacektl/calamari

 def test_on_pagexml_mixed_color_as_color(self):
     cfp = default_cross_fold_params(
         default_pagexml_trainer_params(img_suffix="*.png", channels=3))
     with tempfile.TemporaryDirectory() as d:
         cfp.best_models_dir = d
         main(cfp)

Example #3

0

Show file

File: experiment.py Project: AIRob/calamari

def run_for_single_line(args):
    # lines/network/pretraining as base dir
    args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines))
    pretrain_prefix = "scratch"
    if args.weights and len(args.weights) > 0:
        pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights])

    args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix)

    if not os.path.exists(args.base_dir):
        os.makedirs(args.base_dir)

    tmp_dir = os.path.join(args.base_dir, "tmp")
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    best_models_dir = os.path.join(args.base_dir, "models")
    if not os.path.exists(best_models_dir):
        os.makedirs(best_models_dir)

    prediction_dir = os.path.join(args.base_dir, "predictions")
    if not os.path.exists(prediction_dir):
        os.makedirs(prediction_dir)

    # select number of files
    files = args.train_files
    if args.n_lines > 0:
        all_files = glob_all(args.train_files)
        files = random.sample(all_files, args.n_lines)

    # run the cross-fold-training
    setattr(args, "max_parallel_models", args.max_parallel_models)
    setattr(args, "best_models_dir", best_models_dir)
    setattr(args, "temporary_dir", tmp_dir)
    setattr(args, "keep_temporary_files", False)
    setattr(args, "files", files)
    setattr(args, "best_model_label", "{id}")
    if not args.skip_train:
        cross_fold_train.main(args)

    dump_file = os.path.join(tmp_dir, "prediction.pkl")

    # run the prediction
    if not args.skip_eval:
        # locate the eval script (must be in the same dir as "this")
        predict_script_path = os.path.join(this_absdir, "experiment_eval.py")

        if len(args.single_fold) > 0:
            models = [os.path.join(best_models_dir, "{}.ckpt.json".format(sf)) for sf in args.single_fold]
            for m in models:
                if not os.path.exists(m):
                    raise Exception("Expected model at '{}', but file does not exist".format(m))
        else:
            models = [os.path.join(best_models_dir, d) for d in sorted(os.listdir(best_models_dir)) if d.endswith("json")]
            if len(models) != args.n_folds:
                raise Exception("Expected {} models, one for each fold respectively, but only {} models were found".format(
                    args.n_folds, len(models)
                ))

        for line in run(prefix_run_command([
                "python3", "-u",
                predict_script_path,
                "-j", str(args.num_threads),
                "--batch_size", str(args.batch_size),
                "--dump", dump_file,
                "--eval_imgs"] + args.eval_files + [
                ] + (["--verbose"] if args.verbose else []) + [
                "--checkpoint"] + models + [
                ], args.run, {"threads": args.num_threads}), verbose=args.verbose):
            # Print the output of the thread
            if args.verbose:
                print(line)

    import pickle
    with open(dump_file, 'rb') as f:
        prediction = pickle.load(f)

    return prediction

Example #4

0

Show file

File: test_cross_fold_train.py Project: jacektl/calamari

 def test_on_pagexml(self):
     cfp = default_cross_fold_params(default_pagexml_trainer_params())
     with tempfile.TemporaryDirectory() as d:
         cfp.best_models_dir = d
         main(cfp)

Example #5

0

Show file

File: test_cross_fold_train.py Project: jacektl/calamari

 def test_on_files_no_preload(self):
     cfp = default_cross_fold_params(
         default_files_trainer_params(preload=False))
     with tempfile.TemporaryDirectory() as d:
         cfp.best_models_dir = d
         main(cfp)

Example #6

0

Show file

File: test_cross_fold_train.py Project: jacektl/calamari

 def test_on_files_all_pretrained(self):
     cfp = default_cross_fold_params(default_files_trainer_params(),
                                     pretrained="all")
     with tempfile.TemporaryDirectory() as d:
         cfp.best_models_dir = d
         main(cfp)

Example #7

0

Show file

File: test_cross_fold_train.py Project: jacektl/calamari

 def test_on_files_augmentation(self):
     cfp = default_cross_fold_params(default_files_trainer_params(),
                                     with_augmentation=True)
     with tempfile.TemporaryDirectory() as d:
         cfp.best_models_dir = d
         main(cfp)

Example #8

0

Show file

File: test_cross_fold_train.py Project: jacektl/calamari

 def test_on_files(self):
     cfp = default_cross_fold_params(default_files_trainer_params())
     cfp.run_split = ""
     with tempfile.TemporaryDirectory() as d:
         cfp.best_models_dir = d
         main(cfp)