Exemple #1
0
def default_layers():
    from calamari_ocr.ocr.model.layers.conv2d import Conv2DLayerParams
    from calamari_ocr.ocr.model.layers.pool2d import MaxPool2DLayerParams
    from calamari_ocr.ocr.model.layers.bilstm import BiLSTMLayerParams
    from calamari_ocr.ocr.model.layers.dropout import DropoutLayerParams
    return [
        Conv2DLayerParams(filters=40),
        MaxPool2DLayerParams(),
        Conv2DLayerParams(filters=60),
        MaxPool2DLayerParams(),
        BiLSTMLayerParams(),
        DropoutLayerParams(rate=0.5),
    ]
Exemple #2
0
 def test_dilated_block_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         DilatedBlockLayerParams(filters=10),
         DilatedBlockLayerParams(filters=10),
         Conv2DLayerParams(filters=10),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #3
0
 def test_pure_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         Conv2DLayerParams(filters=20,
                           strides=IntVec2D(2, 2),
                           kernel_size=IntVec2D(4, 4)),
         Conv2DLayerParams(filters=30),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def test_concat_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         DilatedBlockLayerParams(filters=10),
         TransposedConv2DLayerParams(filters=10),
         ConcatLayerParams(
             concat_indices=[1, 4]
         ),  # corresponds to output of first and fourth layer
         Conv2DLayerParams(filters=10),
         BiLSTMLayerParams(hidden_nodes=10),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args([
         "--network",
         "conv=10,pool=2x2,db=10:2,tconv=10,concat=1:4,conv=10,lstm=10"
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "Conv",
         "Pool",
         "DilatedBlock",
         "TConv",
         "Concat",
         "Conv",
         "BiLSTM",
         "--model.layers.0.filters",
         "10",
         "--model.layers.2.filters",
         "10",
         "--model.layers.3.filters",
         "10",
         "--model.layers.4.concat_indices",
         "1",
         "4",
         "--model.layers.5.filters",
         "10",
         "--model.layers.6.hidden_nodes",
         "10",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #5
0
 def test_concat_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         DilatedBlockLayerParams(filters=10),
         TransposedConv2DLayerParams(filters=10),
         ConcatLayerParams(
             concat_indices=[1, 4]
         ),  # corresponds to output of first and fourth layer
         Conv2DLayerParams(filters=10),
         BiLSTMLayerParams(hidden_nodes=10),
     ]
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def default_trainer_params(cls):
     p = super().default_trainer_params()
     p.scenario.model.layers = [
         Conv2DLayerParams(filters=2),
         MaxPool2DLayerParams(pool_size=IntVec2D(4, 4)),
         BiLSTMLayerParams(hidden_nodes=2),
         DropoutLayerParams(rate=0.5),
     ]
     p.gen.setup.val.batch_size = 1
     p.gen.setup.val.num_processes = 1
     p.gen.setup.train.batch_size = 1
     p.gen.setup.train.num_processes = 1
     p.epochs = 1
     p.samples_per_epoch = 2
     p.scenario.data.pre_proc.run_parallel = False
     post_init(p)
     return p
 def test_pure_cnn_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(),
         Conv2DLayerParams(filters=20,
                           strides=IntVec2D(2, 2),
                           kernel_size=IntVec2D(4, 4)),
         Conv2DLayerParams(filters=30),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args(
         ["--network", "conv=10,pool=2x2,conv=20:4x4:2x2,conv=30"])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "Conv",
         "Pool",
         "Conv",
         "Conv",
         "--model.layers.0.filters",
         "10",
         "--model.layers.2.filters",
         "20",
         "--model.layers.2.kernel_size.x",
         "4",
         "--model.layers.2.kernel_size.y",
         "4",
         "--model.layers.2.strides.x",
         "2",
         "--model.layers.2.strides.y",
         "2",
         "--model.layers.3.filters",
         "30",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
 def test_dilated_block_architecture(self):
     trainer_params = uw3_trainer_params()
     trainer_params.scenario.model.layers = [
         Conv2DLayerParams(filters=10),
         MaxPool2DLayerParams(strides=IntVec2D(2, 2)),
         DilatedBlockLayerParams(filters=10),
         DilatedBlockLayerParams(filters=10),
         Conv2DLayerParams(filters=10),
     ]
     post_init(trainer_params)
     cmd_line_trainer_params = parse_args(
         ["--network", "conv=10,pool=2x2:2x2,db=10:2,db=10:2,conv=10"])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     cmd_line_trainer_params = parse_args([
         "--model.layers",
         "Conv",
         "Pool",
         "DilatedBlock",
         "DilatedBlock",
         "Conv",
         "--model.layers.0.filters",
         "10",
         "--model.layers.1.strides",
         "2",
         "2",
         "--model.layers.2.filters",
         "10",
         "--model.layers.3.filters",
         "10",
         "--model.layers.4.filters",
         "10",
     ])
     self.assertDictEqual(trainer_params.scenario.model.to_dict(),
                          cmd_line_trainer_params.scenario.model.to_dict())
     with tempfile.TemporaryDirectory() as d:
         trainer_params.output_dir = d
         main(trainer_params)
Exemple #9
0
def graph_params_from_definition_string(s: str) -> List[LayerParams]:
    layers = []
    cnn_matcher = re.compile(
        r"^([\d]+)(:([\d]+)x([\d]+))?(:([\d]+)x([\d]+))?$")
    db_matcher = re.compile(r"^([\d]+):([\d]+)(:([\d]+)(x([\d]+))?)?$")
    concat_matcher = re.compile(r"^([\-\d]+):([\-\d]+)$")
    pool_matcher = re.compile(r"^([\d]+)(x([\d]+))?(:([\d]+)x([\d]+))?$")
    str_params = s.split(",")
    lstm_appeared = False
    for param in str_params:
        label, value = tuple(param.split("="))
        if label == "dropout":
            layers.append(DropoutLayerParams(rate=float(value)))
        elif label == "lstm":
            lstm_appeared = True
            layers.append(BiLSTMLayerParams(hidden_nodes=int(value)))
        elif label == "concat":
            if lstm_appeared:
                raise Exception(
                    "LSTM layers must be placed proceeding to CNN/Pool")

            match = concat_matcher.match(value)
            if match is None:
                raise Exception(
                    "Concat structure needs: concat=[index0]:[index1] but got concat={}"
                    .format(value))

            match = match.groups()
            layers.append(
                ConcatLayerParams(concat_indices=list(map(int, match))))
        elif label == "db":
            if lstm_appeared:
                raise Exception(
                    "LSTM layers must be placed proceeding to CNN/Pool")

            match = db_matcher.match(value)
            if match is None:
                raise Exception(
                    "Dilated block structure needs: db=[filters]:[depth>0]:[h]x[w]"
                )

            match = match.groups()
            kernel_size = [3, 3]
            if match[2] is not None:
                kernel_size = [int(match[3])] * 2
            if match[4] is not None:
                kernel_size = [int(match[3]), int(match[5])]

            layers.append(
                DilatedBlockLayerParams(
                    filters=int(match[0]),
                    dilated_depth=int(match[1]),
                    kernel_size=IntVec2D(*kernel_size),
                    strides=IntVec2D(1, 1),
                ))
        elif label in {"cnn", "conv", "conv2d"}:
            if lstm_appeared:
                raise Exception(
                    "LSTM layers must be placed proceeding to CNN/Pool")

            match = cnn_matcher.match(value)
            if match is None:
                raise Exception(
                    f"CNN structure needs: cnn=[filters]:[h]x[w]:[sx]x[sy] but got {value}"
                )

            match = match.groups()
            kernel_size = [3, 3]
            stride = [1, 1]
            if match[1] is not None:
                kernel_size = [int(match[2]), int(match[3])]
            if match[4] is not None:
                stride = [int(match[5]), int(match[6])]

            layers.append(
                Conv2DLayerParams(
                    filters=int(match[0]),
                    kernel_size=IntVec2D(*kernel_size),
                    strides=IntVec2D(*stride),
                ))
        elif label in {"tcnn", "tconv", "tconv2d"}:
            if lstm_appeared:
                raise Exception(
                    "LSTM layers must be placed proceeding to CNN/Pool")

            match = cnn_matcher.match(value)
            if match is None:
                raise Exception(
                    "Transposed CNN structure needs: tcnn=[filters]:[sx]x[sy]")

            match = match.groups()
            kernel_size = [3, 3]
            stride = [2, 2]
            if match[1] is not None:
                stride = [int(match[2]), int(match[3])]
            if match[4] is not None:
                kernel_size = [int(match[5]), int(match[6])]

            layers.append(
                TransposedConv2DLayerParams(
                    filters=int(match[0]),
                    kernel_size=IntVec2D(*kernel_size),
                    strides=IntVec2D(*stride),
                ))
        elif label in {"pool", "max_pool", "pool2d"}:
            if lstm_appeared:
                raise Exception(
                    "LSTM layers must be placed proceeding to CNN/Pool")
            match = pool_matcher.match(value)
            if match is None:
                raise Exception("Pool structure needs: pool=[h];[w]")

            match = match.groups()
            kernel_size = [int(match[0])] * 2
            if match[1] is not None:
                kernel_size = [int(match[0]), int(match[2])]

            if match[3] is not None:
                stride = IntVec2D(int(match[4]), int(match[5]))
            else:
                stride = IntVec2D(-1, -1)

            layers.append(
                MaxPool2DLayerParams(pool_size=IntVec2D(*kernel_size),
                                     strides=stride))
        else:
            raise Exception("Unknown layer with name: {}".format(label))

    return layers