def test_dilated_block_architecture(self): trainer_params = uw3_trainer_params() trainer_params.scenario.model.layers = [ Conv2DLayerParams(filters=10), MaxPool2DLayerParams(), DilatedBlockLayerParams(filters=10), DilatedBlockLayerParams(filters=10), Conv2DLayerParams(filters=10), ] with tempfile.TemporaryDirectory() as d: trainer_params.output_dir = d main(trainer_params)
def default_layers(): from calamari_ocr.ocr.model.layers.conv2d import Conv2DLayerParams from calamari_ocr.ocr.model.layers.pool2d import MaxPool2DLayerParams from calamari_ocr.ocr.model.layers.bilstm import BiLSTMLayerParams from calamari_ocr.ocr.model.layers.dropout import DropoutLayerParams return [ Conv2DLayerParams(filters=40), MaxPool2DLayerParams(), Conv2DLayerParams(filters=60), MaxPool2DLayerParams(), BiLSTMLayerParams(), DropoutLayerParams(rate=0.5), ]
def test_pure_cnn_architecture(self): trainer_params = uw3_trainer_params() trainer_params.scenario.model.layers = [ Conv2DLayerParams(filters=10), MaxPool2DLayerParams(), Conv2DLayerParams(filters=20, strides=IntVec2D(2, 2), kernel_size=IntVec2D(4, 4)), Conv2DLayerParams(filters=30), ] with tempfile.TemporaryDirectory() as d: trainer_params.output_dir = d main(trainer_params)
def test_concat_cnn_architecture(self): trainer_params = uw3_trainer_params() trainer_params.scenario.model.layers = [ Conv2DLayerParams(filters=10), MaxPool2DLayerParams(), DilatedBlockLayerParams(filters=10), TransposedConv2DLayerParams(filters=10), ConcatLayerParams( concat_indices=[1, 4] ), # corresponds to output of first and fourth layer Conv2DLayerParams(filters=10), BiLSTMLayerParams(hidden_nodes=10), ] post_init(trainer_params) cmd_line_trainer_params = parse_args([ "--network", "conv=10,pool=2x2,db=10:2,tconv=10,concat=1:4,conv=10,lstm=10" ]) self.assertDictEqual(trainer_params.scenario.model.to_dict(), cmd_line_trainer_params.scenario.model.to_dict()) cmd_line_trainer_params = parse_args([ "--model.layers", "Conv", "Pool", "DilatedBlock", "TConv", "Concat", "Conv", "BiLSTM", "--model.layers.0.filters", "10", "--model.layers.2.filters", "10", "--model.layers.3.filters", "10", "--model.layers.4.concat_indices", "1", "4", "--model.layers.5.filters", "10", "--model.layers.6.hidden_nodes", "10", ]) self.assertDictEqual(trainer_params.scenario.model.to_dict(), cmd_line_trainer_params.scenario.model.to_dict()) with tempfile.TemporaryDirectory() as d: trainer_params.output_dir = d main(trainer_params)
def test_concat_cnn_architecture(self): trainer_params = uw3_trainer_params() trainer_params.scenario.model.layers = [ Conv2DLayerParams(filters=10), MaxPool2DLayerParams(), DilatedBlockLayerParams(filters=10), TransposedConv2DLayerParams(filters=10), ConcatLayerParams( concat_indices=[1, 4] ), # corresponds to output of first and fourth layer Conv2DLayerParams(filters=10), BiLSTMLayerParams(hidden_nodes=10), ] with tempfile.TemporaryDirectory() as d: trainer_params.output_dir = d main(trainer_params)
def test_pure_cnn_architecture(self): trainer_params = uw3_trainer_params() trainer_params.scenario.model.layers = [ Conv2DLayerParams(filters=10), MaxPool2DLayerParams(), Conv2DLayerParams(filters=20, strides=IntVec2D(2, 2), kernel_size=IntVec2D(4, 4)), Conv2DLayerParams(filters=30), ] post_init(trainer_params) cmd_line_trainer_params = parse_args( ["--network", "conv=10,pool=2x2,conv=20:4x4:2x2,conv=30"]) self.assertDictEqual(trainer_params.scenario.model.to_dict(), cmd_line_trainer_params.scenario.model.to_dict()) cmd_line_trainer_params = parse_args([ "--model.layers", "Conv", "Pool", "Conv", "Conv", "--model.layers.0.filters", "10", "--model.layers.2.filters", "20", "--model.layers.2.kernel_size.x", "4", "--model.layers.2.kernel_size.y", "4", "--model.layers.2.strides.x", "2", "--model.layers.2.strides.y", "2", "--model.layers.3.filters", "30", ]) self.assertDictEqual(trainer_params.scenario.model.to_dict(), cmd_line_trainer_params.scenario.model.to_dict()) with tempfile.TemporaryDirectory() as d: trainer_params.output_dir = d main(trainer_params)
def test_dilated_block_architecture(self): trainer_params = uw3_trainer_params() trainer_params.scenario.model.layers = [ Conv2DLayerParams(filters=10), MaxPool2DLayerParams(strides=IntVec2D(2, 2)), DilatedBlockLayerParams(filters=10), DilatedBlockLayerParams(filters=10), Conv2DLayerParams(filters=10), ] post_init(trainer_params) cmd_line_trainer_params = parse_args( ["--network", "conv=10,pool=2x2:2x2,db=10:2,db=10:2,conv=10"]) self.assertDictEqual(trainer_params.scenario.model.to_dict(), cmd_line_trainer_params.scenario.model.to_dict()) cmd_line_trainer_params = parse_args([ "--model.layers", "Conv", "Pool", "DilatedBlock", "DilatedBlock", "Conv", "--model.layers.0.filters", "10", "--model.layers.1.strides", "2", "2", "--model.layers.2.filters", "10", "--model.layers.3.filters", "10", "--model.layers.4.filters", "10", ]) self.assertDictEqual(trainer_params.scenario.model.to_dict(), cmd_line_trainer_params.scenario.model.to_dict()) with tempfile.TemporaryDirectory() as d: trainer_params.output_dir = d main(trainer_params)
def default_trainer_params(cls): p = super().default_trainer_params() p.scenario.model.layers = [ Conv2DLayerParams(filters=2), MaxPool2DLayerParams(pool_size=IntVec2D(4, 4)), BiLSTMLayerParams(hidden_nodes=2), DropoutLayerParams(rate=0.5), ] p.gen.setup.val.batch_size = 1 p.gen.setup.val.num_processes = 1 p.gen.setup.train.batch_size = 1 p.gen.setup.train.num_processes = 1 p.epochs = 1 p.samples_per_epoch = 2 p.scenario.data.pre_proc.run_parallel = False post_init(p) return p
def graph_params_from_definition_string(s: str) -> List[LayerParams]: layers = [] cnn_matcher = re.compile( r"^([\d]+)(:([\d]+)x([\d]+))?(:([\d]+)x([\d]+))?$") db_matcher = re.compile(r"^([\d]+):([\d]+)(:([\d]+)(x([\d]+))?)?$") concat_matcher = re.compile(r"^([\-\d]+):([\-\d]+)$") pool_matcher = re.compile(r"^([\d]+)(x([\d]+))?(:([\d]+)x([\d]+))?$") str_params = s.split(",") lstm_appeared = False for param in str_params: label, value = tuple(param.split("=")) if label == "dropout": layers.append(DropoutLayerParams(rate=float(value))) elif label == "lstm": lstm_appeared = True layers.append(BiLSTMLayerParams(hidden_nodes=int(value))) elif label == "concat": if lstm_appeared: raise Exception( "LSTM layers must be placed proceeding to CNN/Pool") match = concat_matcher.match(value) if match is None: raise Exception( "Concat structure needs: concat=[index0]:[index1] but got concat={}" .format(value)) match = match.groups() layers.append( ConcatLayerParams(concat_indices=list(map(int, match)))) elif label == "db": if lstm_appeared: raise Exception( "LSTM layers must be placed proceeding to CNN/Pool") match = db_matcher.match(value) if match is None: raise Exception( "Dilated block structure needs: db=[filters]:[depth>0]:[h]x[w]" ) match = match.groups() kernel_size = [3, 3] if match[2] is not None: kernel_size = [int(match[3])] * 2 if match[4] is not None: kernel_size = [int(match[3]), int(match[5])] layers.append( DilatedBlockLayerParams( filters=int(match[0]), dilated_depth=int(match[1]), kernel_size=IntVec2D(*kernel_size), strides=IntVec2D(1, 1), )) elif label in {"cnn", "conv", "conv2d"}: if lstm_appeared: raise Exception( "LSTM layers must be placed proceeding to CNN/Pool") match = cnn_matcher.match(value) if match is None: raise Exception( f"CNN structure needs: cnn=[filters]:[h]x[w]:[sx]x[sy] but got {value}" ) match = match.groups() kernel_size = [3, 3] stride = [1, 1] if match[1] is not None: kernel_size = [int(match[2]), int(match[3])] if match[4] is not None: stride = [int(match[5]), int(match[6])] layers.append( Conv2DLayerParams( filters=int(match[0]), kernel_size=IntVec2D(*kernel_size), strides=IntVec2D(*stride), )) elif label in {"tcnn", "tconv", "tconv2d"}: if lstm_appeared: raise Exception( "LSTM layers must be placed proceeding to CNN/Pool") match = cnn_matcher.match(value) if match is None: raise Exception( "Transposed CNN structure needs: tcnn=[filters]:[sx]x[sy]") match = match.groups() kernel_size = [3, 3] stride = [2, 2] if match[1] is not None: stride = [int(match[2]), int(match[3])] if match[4] is not None: kernel_size = [int(match[5]), int(match[6])] layers.append( TransposedConv2DLayerParams( filters=int(match[0]), kernel_size=IntVec2D(*kernel_size), strides=IntVec2D(*stride), )) elif label in {"pool", "max_pool", "pool2d"}: if lstm_appeared: raise Exception( "LSTM layers must be placed proceeding to CNN/Pool") match = pool_matcher.match(value) if match is None: raise Exception("Pool structure needs: pool=[h];[w]") match = match.groups() kernel_size = [int(match[0])] * 2 if match[1] is not None: kernel_size = [int(match[0]), int(match[2])] if match[3] is not None: stride = IntVec2D(int(match[4]), int(match[5])) else: stride = IntVec2D(-1, -1) layers.append( MaxPool2DLayerParams(pool_size=IntVec2D(*kernel_size), strides=stride)) else: raise Exception("Unknown layer with name: {}".format(label)) return layers