def build_sequential(config, train_data): model = Sequential() input_width = train_data[config.data_name[0]].shape[1] model.add(build_embedding_layer(config, input_width=input_width)) model.add(build_convolutional_layer(config)) if config.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) model.add(build_pooling_layer(config, input_width=input_width)) model.add(Flatten()) for i,n_hidden in enumerate(config.fully_connected): model.add(build_dense_layer(config, n_hidden=n_hidden)) if config.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) model.add(build_dense_layer(config, n_hidden=2)) if config.batch_normalization: model.add(BatchNormalization()) model.add(Activation('softmax')) load_weights(config, model) optimizer = build_optimizer(config) model.compile(loss=config.loss, optimizer=optimizer) return model
def build_char_model(graph, config): # Character-level input for the non-word error. graph.add_input(config.non_word_char_input_name, input_shape=(config.non_word_char_input_width,), dtype='int') non_word_embedding = build_embedding_layer(config, input_width=config.non_word_char_input_width, n_embeddings=config.n_char_embeddings, n_embed_dims=config.n_char_embed_dims) graph.add_node(non_word_embedding, name='non_word_embedding', input=config.non_word_char_input_name) graph.add_node(GaussianNoise(config.gaussian_noise_sd), name='non_word_embedding_noise', input='non_word_embedding') non_word_conv = build_convolutional_layer(config, n_filters=config.n_char_filters, filter_width=config.char_filter_width) non_word_conv.trainable = config.train_filters graph.add_node(non_word_conv, name='non_word_conv', input='non_word_embedding_noise') non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv') non_word_pool = build_pooling_layer(config, input_width=config.non_word_char_input_width, filter_width=config.char_filter_width) graph.add_node(non_word_pool, name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') prev_non_word_layer = 'non_word_flatten' return prev_non_word_layer
def build_convolutional_context_model(config, n_classes): graph = Graph() prev_non_word_layer = build_char_model(graph, config) # Word-level input for the context of the non-word error. graph.add_input(config.context_input_name, input_shape=(config.context_input_width,), dtype='int') context_embedding = build_embedding_layer(config, input_width=config.context_input_width, n_embeddings=config.n_context_embeddings, n_embed_dims=config.n_context_embed_dims) graph.add_node(context_embedding, name='context_embedding', input=config.context_input_name) context_conv = build_convolutional_layer(config, n_filters=config.n_context_filters, filter_width=config.context_filter_width) context_conv.trainable = config.train_filters graph.add_node(context_conv, name='context_conv', input='context_embedding') context_prev_layer = add_bn_relu(graph, config, 'context_conv') context_pool = build_pooling_layer(config, input_width=config.context_input_width, filter_width=config.context_filter_width) graph.add_node(context_pool, name='context_pool', input=context_prev_layer) graph.add_node(Flatten(), name='context_flatten', input='context_pool') prev_context_layer = 'context_flatten' prev_layer = build_fully_connected_layers(graph, config, prev_non_word_layer, prev_context_layer) prev_layer = build_residual_blocks(graph, config, prev_layer) if hasattr(config, 'n_hsm_classes'): graph.add_node(build_hierarchical_softmax_layer(config), name='softmax', input=prev_layer) else: graph.add_node(Dense(n_classes, init=config.softmax_init, W_constraint=maxnorm(config.softmax_max_norm)), name='softmax', input=prev_layer) prev_layer = 'softmax' if config.batch_normalization: graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax') prev_layer = 'softmax_bn' graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer) graph.add_output(name='multiclass_correction_target', input='softmax_activation') load_weights(config, graph) optimizer = build_optimizer(config) graph.compile(loss={'multiclass_correction_target': config.loss}, optimizer=optimizer) return graph
def build_dictionary_model(args): model = Sequential() model.add(build_embedding_layer(args)) model.add(build_convolutional_layer(args)) model.add(build_pooling_layer(args)) model.add(Flatten()) model.add(build_dense_layer(args, args.n_classes, activation='softmax')) load_weights(args, model) optimizer = build_optimizer(args) model.compile(loss=args.loss, optimizer=optimizer) return model
def build_model(args): np.random.seed(args.seed) model = Sequential() model.add(build_embedding_layer(args)) if args.dropout_embedding_p > 0.: model.add(Dropout(args.dropout_embedding_p)) model.add(build_convolutional_layer(args)) if 'normalization' in args.regularization_layer: model.add(BatchNormalization()) model.add(Activation('relu')) if args.dropout_conv_p > 0.: model.add(Dropout(args.dropout_conv_p)) model.add(build_pooling_layer(args)) model.add(Flatten()) for i in range(args.n_fully_connected): model.add(build_dense_layer(args)) if 'normalization' in args.regularization_layer: model.add(BatchNormalization()) model.add(Activation('relu')) if 'dropout' in args.regularization_layer: model.add(Dropout(args.dropout_p)) model.add(build_dense_layer(args, args.n_classes, activation='softmax')) load_weights(args, model) optimizer = build_optimizer(args) model.compile(loss=args.loss, optimizer=optimizer) for k,v in json.loads(model.to_json()).items(): if k == 'layers': for l in v: print(' %s => %s' %(l['name'], l)) return model
def build_ordinary_model(args): model = Sequential() model.add(build_embedding_layer(args)) if args.dropout_embedding_p > 0.: model.add(Dropout(args.dropout_embedding_p)) model.add(build_convolutional_layer(args)) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) if args.dropout_conv_p > 0.: model.add(Dropout(args.dropout_conv_p)) model.add(build_pooling_layer(args)) model.add(Flatten()) for i in range(args.n_fully_connected): model.add(build_dense_layer(args)) if args.batch_normalization: model.add(BatchNormalization()) model.add(Activation('relu')) if args.dropout_fc_p > 0.: model.add(Dropout(args.dropout_fc_p)) model.add(build_dense_layer(args, args.n_classes, activation='softmax')) load_weights(args, model) optimizer = build_optimizer(args) model.compile(loss=args.loss, optimizer=optimizer) if args.verbose: for k,v in json.loads(model.to_json()).items(): if k == 'layers': for l in v: print(' => %s' % l['name']) return model
def build_graph(config, train_data): graph = Graph() input_width = train_data[config.data_name[0]].shape[1] graph.add_input(config.data_name[0], input_shape=(input_width,), dtype='int') graph.add_node(build_embedding_layer(config, input_width=input_width), name='embedding', input=config.data_name[0]) graph.add_node(build_convolutional_layer(config), name='conv', input='embedding') prev_layer = 'conv' if config.batch_normalization: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(config, input_width=input_width), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add some number of fully-connected layers without skip connections. for i,n_hidden in enumerate(config.fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(config, n_hidden=n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(config.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = config.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(config, n_hidden=config.fully_connected[-1]) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' graph.add_node(Dense(2), name='softmax_dense', input=prev_layer) prev_layer = 'softmax_dense' if config.batch_normalization: graph.add_node(BatchNormalization(), name='softmax_bn', input=prev_layer) prev_layer = 'softmax_bn' graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer) graph.add_output(name=config.target_name, input='softmax_activation') load_weights(config, graph) optimizer = build_optimizer(config) graph.compile(loss={config.target_name: config.loss}, optimizer=optimizer) return graph
def build_model(args, train_data): np.random.seed(args.seed) graph = Graph() non_word_input = "non_word_marked_chars" non_word_input_width = train_data[non_word_input].shape[1] graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype="int") graph.add_node( build_embedding_layer(args, input_width=non_word_input_width), name="non_word_embedding", input=non_word_input ) graph.add_node(build_convolutional_layer(args), name="non_word_conv", input="non_word_embedding") non_word_prev_layer = add_bn_relu(graph, args, "non_word_conv") graph.add_node( build_pooling_layer(args, input_width=non_word_input_width), name="non_word_pool", input=non_word_prev_layer ) graph.add_node(Flatten(), name="non_word_flatten", input="non_word_pool") # Add some number of fully-connected layers without skip connections. prev_layer = "non_word_flatten" for i in range(args.n_fully_connected): layer_name = "dense%02d" % i l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer) prev_layer = layer_name + "bn" if args.dropout_fc_p > 0.0: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer) prev_layer = layer_name + "do" # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = "%02d" % i graph.add_node(Identity(), name=block_name + "input", input=prev_layer) prev_layer = block_input_layer = block_name + "input" try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = "h%s%02d" % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer) prev_layer = layer_name + "bn" if i < n_layers_per_residual_block: a = Activation("relu") graph.add_node(Activation("relu"), name=layer_name + "relu", input=prev_layer) prev_layer = layer_name + "relu" if args.dropout_fc_p > 0.0: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer) prev_layer = layer_name + "do" graph.add_node(Identity(), name=block_name + "output", inputs=[block_input_layer, prev_layer], merge_mode="sum") graph.add_node(Activation("relu"), name=block_name + "relu", input=block_name + "output") prev_layer = block_input_layer = block_name + "relu" n_classes = np.max(train_data["multiclass_correction_target"]) + 1 if hasattr(args, "n_hsm_classes"): graph.add_node(build_hierarchical_softmax_layer(args), name="softmax", input=prev_layer) else: graph.add_node(build_dense_layer(args, n_classes, activation="softmax"), name="softmax", input=prev_layer) graph.add_output(name="multiclass_correction_target", input="softmax") load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={"multiclass_correction_target": args.loss}, optimizer=optimizer) return graph
def build_residual_model(args): graph = Graph() graph.add_input('input', input_shape=(args.input_width, ), dtype='int') graph.add_node(build_embedding_layer(args), name='embedding', input='input') graph.add_node(build_convolutional_layer(args), name='conv', input='embedding') prev_layer = 'conv' if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add two dense layers. for i in range(2): layer_name = 'dense%02d' % i l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name + 'input', input=prev_layer) prev_layer = block_input_layer = block_name + 'input' for layer_num in range(args.n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if i < args.n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name + 'relu', input=prev_layer) prev_layer = layer_name + 'relu' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' graph.add_node(Identity(), name=block_name + 'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name + 'relu', input=block_name + 'output') prev_layer = block_input_layer = block_name + 'relu' graph.add_node(build_dense_layer(args, args.n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='output', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def build_model(args, train_data, validation_data): np.random.seed(args.seed) graph = Graph() non_word_input = 'non_word_marked_chars' real_word_input = 'real_word_marked_chars' non_word_input_width = train_data.data[non_word_input].shape[1] real_word_input_width = train_data.data[real_word_input].shape[1] print('non_word_input_width', non_word_input_width) print('real_word_input_width', real_word_input_width) graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype='int') graph.add_node(build_embedding_layer(args, input_width=non_word_input_width), name='non_word_embedding', input=non_word_input) graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding') non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv') graph.add_node(build_pooling_layer(args, input_width=non_word_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') graph.add_input(real_word_input, input_shape=(real_word_input_width,), dtype='int') graph.add_node(build_embedding_layer(args, input_width=real_word_input_width), name='real_word_embedding', input=real_word_input) graph.add_node(build_convolutional_layer(args), name='real_word_conv', input='real_word_embedding') real_word_prev_layer = add_bn_relu(graph, args, 'real_word_conv') graph.add_node(build_pooling_layer(args, input_width=real_word_input_width), name='real_word_pool', input=real_word_prev_layer) graph.add_node(Flatten(), name='real_word_flatten', input='real_word_pool') # Add some number of fully-connected layers without skip connections. prev_layer = 'join_non_and_real' for i in range(args.n_fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(args, n_hidden=args.n_hidden) if i == 0: graph.add_node(l, name=layer_name, inputs=['non_word_flatten', 'real_word_flatten']) else: graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' #if hasattr(args, 'n_hsm_classes'): # graph.add_node(build_hierarchical_softmax_layer(args), # name='softmax', input=prev_layer) #else: graph.add_node(build_dense_layer(args, 2, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='binary_target', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'binary_target': args.loss}, optimizer=optimizer) return graph
def build_residual_model(args): graph = Graph() graph.add_input('input', input_shape=(args.input_width,), dtype='int') graph.add_node(build_embedding_layer(args), name='embedding', input='input') graph.add_node(build_convolutional_layer(args), name='conv', input='embedding') prev_layer = 'conv' if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add two dense layers. for i in range(2): layer_name = 'dense%02d' %i l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' for layer_num in range(args.n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_filters) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if 'normalization' in args.regularization_layer: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < args.n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if 'dropout' in args.regularization_layer: graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' graph.add_node(build_dense_layer(args, args.n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='output', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def build_residual_model(args): graph = Graph() graph.add_input('input', input_shape=(args.input_width,), dtype='int') graph.add_node(build_embedding_layer(args), name='embedding', input='input') graph.add_node(build_convolutional_layer(args), name='conv', input='embedding') prev_layer = 'conv' if args.batch_normalization: graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer) prev_layer = 'conv_bn' graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer) graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu') graph.add_node(Flatten(), name='flatten', input='pool') prev_layer = 'flatten' # Add some number of fully-connected layers without skip connections. for i in range(args.n_fully_connected): layer_name = 'dense%02d' %i l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' graph.add_node(build_dense_layer(args, args.n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='output', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def build_model(config, n_classes): np.random.seed(config.seed) graph = Graph() graph.add_input(config.non_word_input_name, input_shape=(config.model_input_width,), dtype='int') graph.add_node(build_embedding_layer(config, input_width=config.model_input_width), name='non_word_embedding', input=config.non_word_input_name) conv = build_convolutional_layer(config) conv.trainable = config.train_filters graph.add_node(conv, name='non_word_conv', input='non_word_embedding') non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv') graph.add_node(build_pooling_layer(config, input_width=config.model_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') prev_layer = 'non_word_flatten' if config.gaussian_noise_sd > 0.: print('gaussian noise %.02f' % config.gaussian_noise_sd) graph.add_node(GaussianNoise(config.gaussian_noise_sd), name="non_word_noise", input="non_word_flatten") prev_layer = 'non_word_noise' # Add some number of fully-connected layers without skip connections. last_dense_layer = None for i,n_hidden in enumerate(config.fully_connected): layer_name = 'dense%02d' %i l = Dense(n_hidden, init=config.dense_init, W_constraint=maxnorm(config.dense_max_norm)) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' last_dense_layer = layer_name # Add sequence of residual blocks. for i in range(config.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = config.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = Dense(config.n_hidden_residual, init=config.dense_init, W_constraint=maxnorm(config.residual_max_norm)) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' if hasattr(config, 'n_hsm_classes'): graph.add_node(build_hierarchical_softmax_layer(config), name='softmax', input=prev_layer) else: graph.add_node(Dense(n_classes, init=config.dense_init, W_constraint=maxnorm(config.softmax_max_norm)), name='softmax', input=prev_layer) prev_layer = 'softmax' if config.batch_normalization: graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax') prev_layer = 'softmax_bn' graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer) graph.add_output(name='multiclass_correction_target', input='softmax_activation') load_weights(config, graph) optimizer = build_optimizer(config) graph.compile(loss={'multiclass_correction_target': config.loss}, optimizer=optimizer) return graph
def build_model(args, train_data): np.random.seed(args.seed) graph = Graph() non_word_input = 'non_word_marked_chars' non_word_input_width = train_data[non_word_input].shape[1] graph.add_input(non_word_input, input_shape=(non_word_input_width, ), dtype='int') graph.add_node(build_embedding_layer(args, input_width=non_word_input_width), name='non_word_embedding', input=non_word_input) graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding') non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv') graph.add_node(build_pooling_layer(args, input_width=non_word_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') # Add some number of fully-connected layers without skip connections. prev_layer = 'non_word_flatten' for i in range(args.n_fully_connected): layer_name = 'dense%02d' % i l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' # Add sequence of residual blocks. for i in range(args.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name + 'input', input=prev_layer) prev_layer = block_input_layer = block_name + 'input' try: n_layers_per_residual_block = args.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(args, n_hidden=args.n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if args.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name + 'bn', input=prev_layer) prev_layer = layer_name + 'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name + 'relu', input=prev_layer) prev_layer = layer_name + 'relu' if args.dropout_fc_p > 0.: graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + 'do', input=prev_layer) prev_layer = layer_name + 'do' graph.add_node(Identity(), name=block_name + 'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name + 'relu', input=block_name + 'output') prev_layer = block_input_layer = block_name + 'relu' n_classes = np.max(train_data['multiclass_correction_target']) + 1 if hasattr(args, 'n_hsm_classes'): graph.add_node(build_hierarchical_softmax_layer(args), name='softmax', input=prev_layer) else: graph.add_node(build_dense_layer(args, n_classes, activation='softmax'), name='softmax', input=prev_layer) graph.add_output(name='multiclass_correction_target', input='softmax') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'multiclass_correction_target': args.loss}, optimizer=optimizer) return graph
def build_model(config): np.random.seed(config.seed) graph = Graph() graph.add_input(config.non_word_input_name, input_shape=(config.model_input_width,), dtype='int') graph.add_input(config.candidate_word_input_name, input_shape=(config.model_input_width,), dtype='int') graph.add_shared_node( build_embedding_layer(config, input_width=config.model_input_width), name='embedding', inputs=[config.non_word_input_name, config.candidate_word_input_name], outputs=['non_word_embedding', 'candidate_word_embedding']) non_word_prev_layer = 'non_word_embedding' if config.dropout_embedding_p > 0.: graph.add_node(Dropout(config.dropout_embedding_p), name='non_word_embedding_do', input='non_word_embedding') non_word_prev_layer = 'non_word_embedding_do' # Add noise only to non-words. if config.gaussian_noise_sd > 0.: graph.add_node(GaussianNoise(config.gaussian_noise_sd), name='non_word_embedding_noise', input=non_word_prev_layer) non_word_prev_layer = 'non_word_embedding_noise' graph.add_shared_node( build_convolutional_layer(config), name='conv', inputs=[non_word_prev_layer, 'candidate_word_embedding'], outputs=['non_word_conv', 'candidate_word_conv']) non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv') graph.add_node(build_pooling_layer(config, input_width=config.model_input_width), name='non_word_pool', input=non_word_prev_layer) graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool') candidate_word_prev_layer = add_bn_relu(graph, config, 'candidate_word_conv') graph.add_node(build_pooling_layer(config, input_width=config.model_input_width), name='candidate_word_pool', input=candidate_word_prev_layer) graph.add_node(Flatten(), name='candidate_word_flatten', input='candidate_word_pool') # Compute similarity of the non-word and candidate. if config.char_merge_mode == 'cos': dot_axes = ([1], [1]) else: dot_axes = -1 char_merge_layer = Dense(config.char_merge_n_hidden, W_constraint=maxnorm(config.char_merge_max_norm)) graph.add_node(char_merge_layer, name='char_merge', inputs=['non_word_flatten', 'candidate_word_flatten'], merge_mode=config.char_merge_mode, dot_axes=dot_axes) prev_char_layer = 'char_merge' if config.scale_char_merge_output: if config.char_merge_act == "sigmoid": lambda_layer = Lambda(lambda x: 12.*x-6.) elif config.char_merge_act == "tanh": lambda_layer = Lambda(lambda x: 6.*x-3.) else: lambda_layer = Lambda(lambda x: x) graph.add_node(lambda_layer, name='char_merge_scale', input='char_merge') prev_char_layer = 'char_merge_scale' # Add some number of fully-connected layers without skip connections. prev_layer = prev_char_layer for i,n_hidden in enumerate(config.fully_connected): layer_name = 'dense%02d' % i l = build_dense_layer(config, n_hidden=n_hidden) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer=layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' # Add sequence of residual blocks. for i in range(config.n_residual_blocks): # Add a fixed number of layers per residual block. block_name = '%02d' % i graph.add_node(Identity(), name=block_name+'input', input=prev_layer) prev_layer = block_input_layer = block_name+'input' try: n_layers_per_residual_block = config.n_layers_per_residual_block except AttributeError: n_layers_per_residual_block = 2 for layer_num in range(n_layers_per_residual_block): layer_name = 'h%s%02d' % (block_name, layer_num) l = build_dense_layer(config, n_hidden=config.n_hidden_residual) graph.add_node(l, name=layer_name, input=prev_layer) prev_layer = layer_name if config.batch_normalization: graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer) prev_layer = layer_name+'bn' if i < n_layers_per_residual_block: a = Activation('relu') graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer) prev_layer = layer_name+'relu' if config.dropout_fc_p > 0.: graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer) prev_layer = layer_name+'do' graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum') graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output') prev_layer = block_input_layer = block_name+'relu' # Save the name of the last dense layer for the distance and rank targets. last_dense_layer = prev_layer # Add softmax for binary prediction of whether the real word input # is the true correction for the non-word input. graph.add_node(Dense(2, W_constraint=maxnorm(config.softmax_max_norm)), name='softmax', inputs=[prev_char_layer, prev_layer], merge_mode='concat') prev_layer = 'softmax' if config.batch_normalization: graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax') prev_layer = 'softmax_bn' graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer) graph.add_output(name=config.target_name, input='softmax_activation') lossdict = {} lossdict[config.target_name] = config.loss for distance_name in config.distance_targets: #add_linear_output_mlp(graph, ['non_word_flatten', 'candidate_word_flatten'], # distance_name+'_first', # config.fully_connected[-1], 10, config.batch_normalization, lossdict) add_linear_output_mlp(graph, 'dense00', distance_name+'_first', config.fully_connected[-1], 10, config.batch_normalization, lossdict) add_linear_output_mlp(graph, last_dense_layer, distance_name+'_last', config.fully_connected[-1], 10, config.batch_normalization, lossdict) if config.use_rank_target: add_linear_output_mlp(graph, 'dense00', 'candidate_rank_first', config.fully_connected[-1], 10, config.batch_normalization, lossdict) add_linear_output_mlp(graph, last_dense_layer, 'candidate_rank_last', config.fully_connected[-1], 10, config.batch_normalization, lossdict) load_weights(config, graph) optimizer = build_optimizer(config) graph.compile(loss=lossdict, optimizer=optimizer) return graph