def run_for_single_line(args): # lines/network/pretraining as base dir args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines)) pretrain_prefix = "scratch" if args.weights and len(args.weights) > 0: pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights]) args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix) if not os.path.exists(args.base_dir): os.makedirs(args.base_dir) tmp_dir = os.path.join(args.base_dir, "tmp") if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) best_models_dir = os.path.join(args.base_dir, "models") if not os.path.exists(best_models_dir): os.makedirs(best_models_dir) prediction_dir = os.path.join(args.base_dir, "predictions") if not os.path.exists(prediction_dir): os.makedirs(prediction_dir) # select number of files files = args.train_files if args.n_lines > 0: all_files = glob_all(args.train_files) files = random.sample(all_files, args.n_lines) # run the cross-fold-training setattr(args, "max_parallel_models", args.max_parallel_models) setattr(args, "best_models_dir", best_models_dir) setattr(args, "temporary_dir", tmp_dir) setattr(args, "keep_temporary_files", False) setattr(args, "files", files) setattr(args, "text_files", None) setattr(args, "gt_extension", None) setattr(args, "dataset", DataSetType.FILE) setattr(args, "best_model_label", "{id}") if not args.skip_train: cross_fold_train.main(args) dump_file = os.path.join(tmp_dir, "prediction.pkl") # run the prediction if not args.skip_eval: # locate the eval script (must be in the same dir as "this") predict_script_path = os.path.join(this_absdir, "experiment_eval.py") if len(args.single_fold) > 0: models = [os.path.join(best_models_dir, "{}.ckpt.json".format(sf)) for sf in args.single_fold] for m in models: if not os.path.exists(m): raise Exception("Expected model at '{}', but file does not exist".format(m)) else: models = [os.path.join(best_models_dir, d) for d in sorted(os.listdir(best_models_dir)) if d.endswith("json")] if len(models) != args.n_folds: raise Exception("Expected {} models, one for each fold respectively, but only {} models were found".format( args.n_folds, len(models) )) for line in run(prefix_run_command([ "python3", "-u", predict_script_path, "-j", str(args.num_threads), "--batch_size", str(args.batch_size), "--dump", dump_file, "--eval_imgs"] + args.eval_files + [ ] + (["--verbose"] if args.verbose else []) + [ "--checkpoint"] + models + [ ], args.run, {"threads": args.num_threads}), verbose=args.verbose): # Print the output of the thread if args.verbose: print(line) import pickle with open(dump_file, 'rb') as f: prediction = pickle.load(f) return prediction
def test_on_pagexml_mixed_color_as_color(self): cfp = default_cross_fold_params( default_pagexml_trainer_params(img_suffix="*.png", channels=3)) with tempfile.TemporaryDirectory() as d: cfp.best_models_dir = d main(cfp)
def run_for_single_line(args): # lines/network/pretraining as base dir args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines)) pretrain_prefix = "scratch" if args.weights and len(args.weights) > 0: pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights]) args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix) if not os.path.exists(args.base_dir): os.makedirs(args.base_dir) tmp_dir = os.path.join(args.base_dir, "tmp") if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) best_models_dir = os.path.join(args.base_dir, "models") if not os.path.exists(best_models_dir): os.makedirs(best_models_dir) prediction_dir = os.path.join(args.base_dir, "predictions") if not os.path.exists(prediction_dir): os.makedirs(prediction_dir) # select number of files files = args.train_files if args.n_lines > 0: all_files = glob_all(args.train_files) files = random.sample(all_files, args.n_lines) # run the cross-fold-training setattr(args, "max_parallel_models", args.max_parallel_models) setattr(args, "best_models_dir", best_models_dir) setattr(args, "temporary_dir", tmp_dir) setattr(args, "keep_temporary_files", False) setattr(args, "files", files) setattr(args, "best_model_label", "{id}") if not args.skip_train: cross_fold_train.main(args) dump_file = os.path.join(tmp_dir, "prediction.pkl") # run the prediction if not args.skip_eval: # locate the eval script (must be in the same dir as "this") predict_script_path = os.path.join(this_absdir, "experiment_eval.py") if len(args.single_fold) > 0: models = [os.path.join(best_models_dir, "{}.ckpt.json".format(sf)) for sf in args.single_fold] for m in models: if not os.path.exists(m): raise Exception("Expected model at '{}', but file does not exist".format(m)) else: models = [os.path.join(best_models_dir, d) for d in sorted(os.listdir(best_models_dir)) if d.endswith("json")] if len(models) != args.n_folds: raise Exception("Expected {} models, one for each fold respectively, but only {} models were found".format( args.n_folds, len(models) )) for line in run(prefix_run_command([ "python3", "-u", predict_script_path, "-j", str(args.num_threads), "--batch_size", str(args.batch_size), "--dump", dump_file, "--eval_imgs"] + args.eval_files + [ ] + (["--verbose"] if args.verbose else []) + [ "--checkpoint"] + models + [ ], args.run, {"threads": args.num_threads}), verbose=args.verbose): # Print the output of the thread if args.verbose: print(line) import pickle with open(dump_file, 'rb') as f: prediction = pickle.load(f) return prediction
def test_on_pagexml(self): cfp = default_cross_fold_params(default_pagexml_trainer_params()) with tempfile.TemporaryDirectory() as d: cfp.best_models_dir = d main(cfp)
def test_on_files_no_preload(self): cfp = default_cross_fold_params( default_files_trainer_params(preload=False)) with tempfile.TemporaryDirectory() as d: cfp.best_models_dir = d main(cfp)
def test_on_files_all_pretrained(self): cfp = default_cross_fold_params(default_files_trainer_params(), pretrained="all") with tempfile.TemporaryDirectory() as d: cfp.best_models_dir = d main(cfp)
def test_on_files_augmentation(self): cfp = default_cross_fold_params(default_files_trainer_params(), with_augmentation=True) with tempfile.TemporaryDirectory() as d: cfp.best_models_dir = d main(cfp)
def test_on_files(self): cfp = default_cross_fold_params(default_files_trainer_params()) cfp.run_split = "" with tempfile.TemporaryDirectory() as d: cfp.best_models_dir = d main(cfp)