def build_sequential(config, train_data): model = Sequential() input_width = train_data[config.data_name[0]].shape[1] model.add(build_embedding_layer(config, input_width=input_width)) model.add(build_convolutional_layer(config)) if config.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) model.add(build_pooling_layer(config, input_width=input_width)) model.add(Flatten()) for i,n_hidden in enumerate(config.fully_connected): model.add(build_dense_layer(config, n_hidden=n_hidden)) if config.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) model.add(build_dense_layer(config, n_hidden=2)) if config.batch_normalization: model.add(BatchNormalization()) model.add(Activation('softmax')) load_weights(config, model) optimizer = build_optimizer(config) model.compile(loss=config.loss, optimizer=optimizer) return model
def build_residual_block(name, input_shape, n_hidden, n_skip=2): """ Rough sketch of building blocks of layers for residual learning. See http://arxiv.org/abs/1512.03385 for motivation. """ block = Graph() input_name = 'x' block.add_input(input_name, input_shape=input_shape) # The current keras graph implementation doesn't allow you to connect # an input node to an output node. Use Identity to work around that. block.add_node(Identity(), name=name+'identity', input=input_name) prev_layer = name+'identity' for i in range(n_skip): layer_name = 'h' + str(i) l = build_dense_layer(args, n_hidden=n_hidden) block.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name # Haven't gotten this to work yet. #bn = BatchNormalization() #block.add_node(bn, name=layer_name+'bn', input=prev_layer) #prev_layer = layer_name+'bn' if i < n_skip: a = Activation('relu') block.add_node(a, name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' block.add_output(name=name+'output', inputs=[name+'identity', prev_layer], merge_mode='sum') return block
def build_fully_connected_layers(graph, config, prev_non_word_layer, prev_context_layer): if config.pool_merge_mode == 'cos': dot_axes = ([1], [1]) else: dot_axes = -1 # Add some number of fully-connected layers without skip connections. prev_layer = None for i,n_hidden in enumerate(config.fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(config, n_hidden=n_hidden, init=config.dense_init, max_norm=config.dense_max_norm) if i == 0: graph.add_node(l, name=layer_name, inputs=[prev_non_word_layer, prev_context_layer], merge_mode=config.pool_merge_mode, dot_axes=dot_axes) else: graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' return prev_layer
def build_model(args): np.random.seed(args.seed) model = Sequential() model.add(build_embedding_layer(args)) if args.dropout_embedding_p > 0.: model.add(Dropout(args.dropout_embedding_p)) model.add(build_convolutional_layer(args)) if 'normalization' in args.regularization_layer: model.add(BatchNormalization()) model.add(Activation('relu')) if args.dropout_conv_p > 0.: model.add(Dropout(args.dropout_conv_p)) model.add(build_pooling_layer(args)) model.add(Flatten()) for i in range(args.n_fully_connected): model.add(build_dense_layer(args)) if 'normalization' in args.regularization_layer: model.add(BatchNormalization()) model.add(Activation('relu')) if 'dropout' in args.regularization_layer: model.add(Dropout(args.dropout_p)) model.add(build_dense_layer(args, args.n_classes, activation='softmax')) load_weights(args, model) optimizer = build_optimizer(args) model.compile(loss=args.loss, optimizer=optimizer) for k,v in json.loads(model.to_json()).items(): if k == 'layers': for l in v: print(' %s => %s' %(l['name'], l)) return model
def build_ordinary_model(args): model = Sequential() model.add(build_embedding_layer(args)) if args.dropout_embedding_p > 0.: model.add(Dropout(args.dropout_embedding_p)) model.add(build_convolutional_layer(args)) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) if args.dropout_conv_p > 0.: model.add(Dropout(args.dropout_conv_p)) model.add(build_pooling_layer(args)) model.add(Flatten()) for i in range(args.n_fully_connected): model.add(build_dense_layer(args)) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) if args.dropout_fc_p > 0.: model.add(Dropout(args.dropout_fc_p)) model.add(build_dense_layer(args, args.n_classes, activation='softmax')) load_weights(args, model) optimizer = build_optimizer(args) model.compile(loss=args.loss, optimizer=optimizer) if args.verbose: for k,v in json.loads(model.to_json()).items(): if k == 'layers': for l in v: print(' => %s' % l['name']) return model
def build_dictionary_model(args): model = Sequential() model.add(build_embedding_layer(args)) model.add(build_convolutional_layer(args)) model.add(build_pooling_layer(args)) model.add(Flatten()) model.add(build_dense_layer(args, args.n_classes, activation='softmax')) load_weights(args, model) optimizer = build_optimizer(args) model.compile(loss=args.loss, optimizer=optimizer) return model
def build_graph(config, train_data): graph = Graph() input_width = train_data[config.data_name[0]].shape[1] graph.add_input(config.data_name[0], input_shape=(input_width,), dtype='int') graph.add_node(build_embedding_layer(config, input_width=input_width), name='embedding', input=config.data_name[0]) graph.add_node(build_convolutional_layer(config), name='conv', input='embedding') prev_layer = 'conv' if config.batch_normalization: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(config, input_width=input_width), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add some number of fully-connected layers without skip connections. for i,n_hidden in enumerate(config.fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(config, n_hidden=n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(config.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = config.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(config, n_hidden=config.fully_connected[-1]) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' graph.add_node(Dense(2), name='softmax_dense', input=prev_layer) prev_layer = 'softmax_dense' if config.batch_normalization: graph.add_node(BatchNormalization(), name='softmax_bn', input=prev_layer) prev_layer = 'softmax_bn' graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer) graph.add_output(name=config.target_name, input='softmax_activation') load_weights(config, graph) optimizer = build_optimizer(config) graph.compile(loss={config.target_name: config.loss}, optimizer=optimizer) return graph
def build_model(args, train_data): np.random.seed(args.seed) graph = Graph() non_word_input = "non_word_marked_chars" non_word_input_width = train_data[non_word_input].shape[1] graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype="int") graph.add_node( build_embedding_layer(args, input_width=non_word_input_width), name="non_word_embedding", input=non_word_input ) graph.add_node(build_convolutional_layer(args), name="non_word_conv", input="non_word_embedding") non_word_prev_layer = add_bn_relu(graph, args, "non_word_conv") graph.add_node( build_pooling_layer(args, input_width=non_word_input_width), name="non_word_pool", input=non_word_prev_layer ) graph.add_node(Flatten(), name="non_word_flatten", input="non_word_pool") # Add some number of fully-connected layers without skip connections. prev_layer = "non_word_flatten" for i in range(args.n_fully_connected): layer_name = "dense%02d" % i l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer) prev_layer = layer_name + "bn" if args.dropout_fc_p > 0.0: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer) prev_layer = layer_name + "do" # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = "%02d" % i graph.add_node(Identity(), name=block_name + "input", input=prev_layer) prev_layer = block_input_layer = block_name + "input" try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = "h%s%02d" % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer) prev_layer = layer_name + "bn" if i < n_layers_per_residual_block: a = Activation("relu") graph.add_node(Activation("relu"), name=layer_name + "relu", input=prev_layer) prev_layer = layer_name + "relu" if args.dropout_fc_p > 0.0: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer) prev_layer = layer_name + "do" graph.add_node(Identity(), name=block_name + "output", inputs=[block_input_layer, prev_layer], merge_mode="sum") graph.add_node(Activation("relu"), name=block_name + "relu", input=block_name + "output") prev_layer = block_input_layer = block_name + "relu" n_classes = np.max(train_data["multiclass_correction_target"]) + 1 if hasattr(args, "n_hsm_classes"): graph.add_node(build_hierarchical_softmax_layer(args), name="softmax", input=prev_layer) else: graph.add_node(build_dense_layer(args, n_classes, activation="softmax"), name="softmax", input=prev_layer) graph.add_output(name="multiclass_correction_target", input="softmax") load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={"multiclass_correction_target": args.loss}, optimizer=optimizer) return graph
def build_residual_model(args): graph = Graph() graph.add_input('input', input_shape=(args.input_width, ), dtype='int') graph.add_node(build_embedding_layer(args), name='embedding', input='input') graph.add_node(build_convolutional_layer(args), name='conv', input='embedding') prev_layer = 'conv' if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add two dense layers. for i in range(2): layer_name = 'dense%02d' % i l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name + 'input', input=prev_layer) prev_layer = block_input_layer = block_name + 'input' for layer_num in range(args.n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if i < args.n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name + 'relu', input=prev_layer) prev_layer = layer_name + 'relu' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' graph.add_node(Identity(), name=block_name + 'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name + 'relu', input=block_name + 'output') prev_layer = block_input_layer = block_name + 'relu' graph.add_node(build_dense_layer(args, args.n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='output', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def build_model(args, train_data, validation_data): np.random.seed(args.seed) graph = Graph() non_word_input = 'non_word_marked_chars' real_word_input = 'real_word_marked_chars' non_word_input_width = train_data.data[non_word_input].shape[1] real_word_input_width = train_data.data[real_word_input].shape[1] print('non_word_input_width', non_word_input_width) print('real_word_input_width', real_word_input_width) graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype='int') graph.add_node(build_embedding_layer(args, input_width=non_word_input_width), name='non_word_embedding', input=non_word_input) graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding') non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv') graph.add_node(build_pooling_layer(args, input_width=non_word_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') graph.add_input(real_word_input, input_shape=(real_word_input_width,), dtype='int') graph.add_node(build_embedding_layer(args, input_width=real_word_input_width), name='real_word_embedding', input=real_word_input) graph.add_node(build_convolutional_layer(args), name='real_word_conv', input='real_word_embedding') real_word_prev_layer = add_bn_relu(graph, args, 'real_word_conv') graph.add_node(build_pooling_layer(args, input_width=real_word_input_width), name='real_word_pool', input=real_word_prev_layer) graph.add_node(Flatten(), name='real_word_flatten', input='real_word_pool') # Add some number of fully-connected layers without skip connections. prev_layer = 'join_non_and_real' for i in range(args.n_fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(args, n_hidden=args.n_hidden) if i == 0: graph.add_node(l, name=layer_name, inputs=['non_word_flatten', 'real_word_flatten']) else: graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' #if hasattr(args, 'n_hsm_classes'): # graph.add_node(build_hierarchical_softmax_layer(args), # name='softmax', input=prev_layer) #else: graph.add_node(build_dense_layer(args, 2, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='binary_target', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'binary_target': args.loss}, optimizer=optimizer) return graph
def build_residual_model(args): graph = Graph() graph.add_input('input', input_shape=(args.input_width,), dtype='int') graph.add_node(build_embedding_layer(args), name='embedding', input='input') graph.add_node(build_convolutional_layer(args), name='conv', input='embedding') prev_layer = 'conv' if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add two dense layers. for i in range(2): layer_name = 'dense%02d' %i l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' for layer_num in range(args.n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < args.n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' graph.add_node(build_dense_layer(args, args.n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='output', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def build_residual_model(args): graph = Graph() graph.add_input('input', input_shape=(args.input_width,), dtype='int') graph.add_node(build_embedding_layer(args), name='embedding', input='input') graph.add_node(build_convolutional_layer(args), name='conv', input='embedding') prev_layer = 'conv' if args.batch_normalization: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add some number of fully-connected layers without skip connections. for i in range(args.n_fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' graph.add_node(build_dense_layer(args, args.n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='output', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def build_model(args, train_data): np.random.seed(args.seed) graph = Graph() non_word_input = 'non_word_marked_chars' non_word_input_width = train_data[non_word_input].shape[1] graph.add_input(non_word_input, input_shape=(non_word_input_width, ), dtype='int') graph.add_node(build_embedding_layer(args, input_width=non_word_input_width), name='non_word_embedding', input=non_word_input) graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding') non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv') graph.add_node(build_pooling_layer(args, input_width=non_word_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') # Add some number of fully-connected layers without skip connections. prev_layer = 'non_word_flatten' for i in range(args.n_fully_connected): layer_name = 'dense%02d' % i l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name + 'input', input=prev_layer) prev_layer = block_input_layer = block_name + 'input' try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name + 'relu', input=prev_layer) prev_layer = layer_name + 'relu' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' graph.add_node(Identity(), name=block_name + 'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name + 'relu', input=block_name + 'output') prev_layer = block_input_layer = block_name + 'relu' n_classes = np.max(train_data['multiclass_correction_target']) + 1 if hasattr(args, 'n_hsm_classes'): graph.add_node(build_hierarchical_softmax_layer(args), name='softmax', input=prev_layer) else: graph.add_node(build_dense_layer(args, n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='multiclass_correction_target', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'multiclass_correction_target': args.loss}, optimizer=optimizer) return graph
def build_model(config): np.random.seed(config.seed) graph = Graph() graph.add_input(config.non_word_input_name, input_shape=(config.model_input_width,), dtype='int') graph.add_input(config.candidate_word_input_name, input_shape=(config.model_input_width,), dtype='int') graph.add_shared_node( build_embedding_layer(config, input_width=config.model_input_width), name='embedding', inputs=[config.non_word_input_name, config.candidate_word_input_name], outputs=['non_word_embedding', 'candidate_word_embedding']) non_word_prev_layer = 'non_word_embedding' if config.dropout_embedding_p > 0.: graph.add_node(Dropout(config.dropout_embedding_p), name='non_word_embedding_do', input='non_word_embedding') non_word_prev_layer = 'non_word_embedding_do' # Add noise only to non-words. if config.gaussian_noise_sd > 0.: graph.add_node(GaussianNoise(config.gaussian_noise_sd), name='non_word_embedding_noise', input=non_word_prev_layer) non_word_prev_layer = 'non_word_embedding_noise' graph.add_shared_node( build_convolutional_layer(config), name='conv', inputs=[non_word_prev_layer, 'candidate_word_embedding'], outputs=['non_word_conv', 'candidate_word_conv']) non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv') graph.add_node(build_pooling_layer(config, input_width=config.model_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') candidate_word_prev_layer = add_bn_relu(graph, config, 'candidate_word_conv') graph.add_node(build_pooling_layer(config, input_width=config.model_input_width), name='candidate_word_pool', input=candidate_word_prev_layer) graph.add_node(Flatten(), name='candidate_word_flatten', input='candidate_word_pool') # Compute similarity of the non-word and candidate. if config.char_merge_mode == 'cos': dot_axes = ([1], [1]) else: dot_axes = -1 char_merge_layer = Dense(config.char_merge_n_hidden, W_constraint=maxnorm(config.char_merge_max_norm)) graph.add_node(char_merge_layer, name='char_merge', inputs=['non_word_flatten', 'candidate_word_flatten'], merge_mode=config.char_merge_mode, dot_axes=dot_axes) prev_char_layer = 'char_merge' if config.scale_char_merge_output: if config.char_merge_act == "sigmoid": lambda_layer = Lambda(lambda x: 12.*x-6.) elif config.char_merge_act == "tanh": lambda_layer = Lambda(lambda x: 6.*x-3.) else: lambda_layer = Lambda(lambda x: x) graph.add_node(lambda_layer, name='char_merge_scale', input='char_merge') prev_char_layer = 'char_merge_scale' # Add some number of fully-connected layers without skip connections. prev_layer = prev_char_layer for i,n_hidden in enumerate(config.fully_connected): layer_name = 'dense%02d' % i l = build_dense_layer(config, n_hidden=n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer=layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(config.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = config.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(config, n_hidden=config.n_hidden_residual) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' # Save the name of the last dense layer for the distance and rank targets. last_dense_layer = prev_layer # Add softmax for binary prediction of whether the real word input # is the true correction for the non-word input. graph.add_node(Dense(2, W_constraint=maxnorm(config.softmax_max_norm)), name='softmax', inputs=[prev_char_layer, prev_layer], merge_mode='concat') prev_layer = 'softmax' if config.batch_normalization: graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax') prev_layer = 'softmax_bn' graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer) graph.add_output(name=config.target_name, input='softmax_activation') lossdict = {} lossdict[config.target_name] = config.loss for distance_name in config.distance_targets: #add_linear_output_mlp(graph, ['non_word_flatten', 'candidate_word_flatten'], # distance_name+'_first', # config.fully_connected[-1], 10, config.batch_normalization, lossdict) add_linear_output_mlp(graph, 'dense00', distance_name+'_first', config.fully_connected[-1], 10, config.batch_normalization, lossdict) add_linear_output_mlp(graph, last_dense_layer, distance_name+'_last', config.fully_connected[-1], 10, config.batch_normalization, lossdict) if config.use_rank_target: add_linear_output_mlp(graph, 'dense00', 'candidate_rank_first', config.fully_connected[-1], 10, config.batch_normalization, lossdict) add_linear_output_mlp(graph, last_dense_layer, 'candidate_rank_last', config.fully_connected[-1], 10, config.batch_normalization, lossdict) load_weights(config, graph) optimizer = build_optimizer(config) graph.compile(loss=lossdict, optimizer=optimizer) return graph