Exemple #1
0
 def test_simple_train(self):
     trainer_params = uw3_trainer_params()
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         train.main(trainer_params)
         keras.backend.clear_session()
         resume_training.main([os.path.join(d, "checkpoint", "checkpoint_0001")])
Exemple #2
0
 def test_train_without_center_normalizer(self):
     trainer_params = uw3_trainer_params(with_validation=False)
     trainer_params.scenario.data.pre_proc.replace_all(
         CenterNormalizerProcessorParams, ScaleToHeightProcessorParams())
     trainer_params.scenario.data.__post_init__()
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #3
0
 def test_pure_lstm_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         BiLSTMLayerParams(hidden_nodes=10),
         BiLSTMLayerParams(hidden_nodes=20),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #4
0
 def test_dilated_block_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         DilatedBlockLayerParams(filters=10),
         DilatedBlockLayerParams(filters=10),
         Conv2DLayerParams(filters=10),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #5
0
 def test_pure_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         Conv2DLayerParams(filters=20,
                           strides=IntVec2D(2, 2),
                           kernel_size=IntVec2D(4, 4)),
         Conv2DLayerParams(filters=30),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def test_concat_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         DilatedBlockLayerParams(filters=10),
         TransposedConv2DLayerParams(filters=10),
         ConcatLayerParams(
             concat_indices=[1, 4]
         ),  # corresponds to output of first and fourth layer
         Conv2DLayerParams(filters=10),
         BiLSTMLayerParams(hidden_nodes=10),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args([
         "--network",
         "conv=10,pool=2x2,db=10:2,tconv=10,concat=1:4,conv=10,lstm=10"
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "Conv",
         "Pool",
         "DilatedBlock",
         "TConv",
         "Concat",
         "Conv",
         "BiLSTM",
         "--model.layers.0.filters",
         "10",
         "--model.layers.2.filters",
         "10",
         "--model.layers.3.filters",
         "10",
         "--model.layers.4.concat_indices",
         "1",
         "4",
         "--model.layers.5.filters",
         "10",
         "--model.layers.6.hidden_nodes",
         "10",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #7
0
 def test_concat_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         DilatedBlockLayerParams(filters=10),
         TransposedConv2DLayerParams(filters=10),
         ConcatLayerParams(
             concat_indices=[1, 4]
         ),  # corresponds to output of first and fourth layer
         Conv2DLayerParams(filters=10),
         BiLSTMLayerParams(hidden_nodes=10),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #8
0
 def test_pretraining_same_codec(self):
     # Trained on uw3, applied on uw3
     trainer_params = default_uw3_trainer_params()
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         logs = main(trainer_params)
         self.assertLess(logs["val_CER"], 0.002)
 def test_pure_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         Conv2DLayerParams(filters=20,
                           strides=IntVec2D(2, 2),
                           kernel_size=IntVec2D(4, 4)),
         Conv2DLayerParams(filters=30),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args(
         ["--network", "conv=10,pool=2x2,conv=20:4x4:2x2,conv=30"])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "Conv",
         "Pool",
         "Conv",
         "Conv",
         "--model.layers.0.filters",
         "10",
         "--model.layers.2.filters",
         "20",
         "--model.layers.2.kernel_size.x",
         "4",
         "--model.layers.2.kernel_size.y",
         "4",
         "--model.layers.2.strides.x",
         "2",
         "--model.layers.2.strides.y",
         "2",
         "--model.layers.3.filters",
         "30",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def test_dilated_block_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(strides=IntVec2D(2, 2)),
         DilatedBlockLayerParams(filters=10),
         DilatedBlockLayerParams(filters=10),
         Conv2DLayerParams(filters=10),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args(
         ["--network", "conv=10,pool=2x2:2x2,db=10:2,db=10:2,conv=10"])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "Conv",
         "Pool",
         "DilatedBlock",
         "DilatedBlock",
         "Conv",
         "--model.layers.0.filters",
         "10",
         "--model.layers.1.strides",
         "2",
         "2",
         "--model.layers.2.filters",
         "10",
         "--model.layers.3.filters",
         "10",
         "--model.layers.4.filters",
         "10",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def test_pure_lstm_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         BiLSTMLayerParams(hidden_nodes=10),
         BiLSTMLayerParams(hidden_nodes=20),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args(["--network", "lstm=10,lstm=20"])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "BiLSTM",
         "BiLSTM",
         "--model.layers.0.hidden_nodes",
         "10",
         "--model.layers.1.hidden_nodes",
         "20",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #12
0
 def test_train_abbyy_test_page(self):
     trainer_params = default_trainer_params()
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #13
0
 def test_pretraining_with_codec_adaption_no_preload(self):
     trainer_params = default_pagexml_trainer_params(preload=False)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def test_default_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = default_layers()
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #15
0
 def test_simple_train_no_preload(self):
     trainer_params = setup_trainer_params(preload=False)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #16
0
def run_for_single_line(args):
    # lines/network/pretraining as base dir
    args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines))
    pretrain_prefix = "scratch"
    if args.weights and len(args.weights) > 0:
        pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights])

    args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix)

    if not os.path.exists(args.base_dir):
        os.makedirs(args.base_dir)

    tmp_dir = os.path.join(args.base_dir, "tmp")
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    best_models_dir = os.path.join(args.base_dir, "models")
    if not os.path.exists(best_models_dir):
        os.makedirs(best_models_dir)

    prediction_dir = os.path.join(args.base_dir, "predictions")
    if not os.path.exists(prediction_dir):
        os.makedirs(prediction_dir)

    # select number of files
    files = args.files
    if args.n_lines > 0:
        all_files = glob_all(args.files)
        files = random.sample(all_files, args.n_lines)

    # run the cross-fold-training
    setattr(args, "early_stopping_best_model_output_dir", best_models_dir)
    setattr(args, "output_dir", tmp_dir)
    setattr(args, "keep_temporary_files", False)
    setattr(args, "files", files)
    setattr(args, "text_files", None)
    setattr(args, "gt_extension", None)
    setattr(args, "dataset", DataSetType.FILE)
    setattr(args, "best_model_label", "{id}")
    if not args.skip_train:
        train_script.main(args)

    dump_file = os.path.join(tmp_dir, "prediction.pkl")

    # run the prediction
    if not args.skip_eval:
        # locate the eval script (must be in the same dir as "this")
        predict_script_path = os.path.join(this_absdir, "predict_and_eval.py")

        model = os.path.join(best_models_dir, "best.ckpt.json")
        if not os.path.exists(model):
            raise Exception(f"Expected model at '{model}', but file does not exist")

        for line in run(prefix_run_command([
                "python3", "-u",
                predict_script_path,
                "-j", str(args.num_threads),
                "--batch_size", str(args.batch_size),
                "--dump", dump_file,
                "--files"] + args.eval_files + [
                ] + (["--verbose"] if args.verbose else []) + [
                "--checkpoint"] + [model] + [
                ], args.run, {"threads": args.num_threads}), verbose=args.verbose):
            # Print the output of the thread
            if args.verbose:
                print(line)

    import pickle
    with open(dump_file, 'rb') as f:
        prediction = pickle.load(f)

    return prediction
Exemple #17
0
 def test_simple_train(self):
     trainer_params = default_trainer_params(with_validation=False)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #18
0
 def test_train_with_val(self):
     trainer_params = default_trainer_params(with_validation=True, preload=False)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #19
0
 def test_pretraining_with_codec_adaption(self):
     trainer_params = default_trainer_params()
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #20
0
 def test_pretraining_without_codec_adaption(self):
     trainer_params = default_trainer_params()
     trainer_params.codec.keep_loaded = True
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #21
0
 def test_train_from_files_file(self):
     trainer_params = uw3_trainer_params(with_validation=True,
                                         from_files_file=True)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #22
0
 def test_train_split(self):
     trainer_params = uw3_trainer_params(with_split=True, preload=False)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #23
0
 def test_augmentation_train_val(self):
     trainer_params = default_trainer_params(with_validation=True)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #24
0
 def test_augmentation_train_val_split(self):
     trainer_params = default_trainer_params(preload=False, with_split=True)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #25
0
 def test_train_split(self):
     trainer_params = default_trainer_params(with_split=True)
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)