Example #1
0
def build_sequential(config, train_data):
    model = Sequential()

    input_width = train_data[config.data_name[0]].shape[1]

    model.add(build_embedding_layer(config, input_width=input_width))
    model.add(build_convolutional_layer(config))
    if config.batch_normalization:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(build_pooling_layer(config, input_width=input_width))
    model.add(Flatten())

    for i,n_hidden in enumerate(config.fully_connected):
        model.add(build_dense_layer(config, n_hidden=n_hidden))
        if config.batch_normalization:
            model.add(BatchNormalization())
        model.add(Activation('relu'))

    model.add(build_dense_layer(config, n_hidden=2))
    if config.batch_normalization:
        model.add(BatchNormalization())
    model.add(Activation('softmax'))

    load_weights(config, model)

    optimizer = build_optimizer(config)

    model.compile(loss=config.loss, optimizer=optimizer)

    return model
Example #2
0
def build_convolutional_context_model(config, n_classes):
    graph = Graph()
    prev_non_word_layer = build_char_model(graph, config)

    # Word-level input for the context of the non-word error.
    graph.add_input(config.context_input_name,
            input_shape=(config.context_input_width,), dtype='int')
    context_embedding = build_embedding_layer(config,
            input_width=config.context_input_width,
            n_embeddings=config.n_context_embeddings,
            n_embed_dims=config.n_context_embed_dims)
    graph.add_node(context_embedding,
            name='context_embedding', input=config.context_input_name)
    context_conv = build_convolutional_layer(config,
            n_filters=config.n_context_filters,
            filter_width=config.context_filter_width)
    context_conv.trainable = config.train_filters
    graph.add_node(context_conv, name='context_conv', input='context_embedding')
    context_prev_layer = add_bn_relu(graph, config, 'context_conv')
    context_pool = build_pooling_layer(config,
            input_width=config.context_input_width,
            filter_width=config.context_filter_width)
    graph.add_node(context_pool,
            name='context_pool', input=context_prev_layer)
    graph.add_node(Flatten(), name='context_flatten', input='context_pool')
    prev_context_layer = 'context_flatten'

    prev_layer = build_fully_connected_layers(graph, config,
            prev_non_word_layer,
            prev_context_layer)

    prev_layer = build_residual_blocks(graph, config, prev_layer)

    if hasattr(config, 'n_hsm_classes'):
        graph.add_node(build_hierarchical_softmax_layer(config),
            name='softmax', input=prev_layer)
    else:
        graph.add_node(Dense(n_classes, init=config.softmax_init,
            W_constraint=maxnorm(config.softmax_max_norm)),
            name='softmax', input=prev_layer)
        prev_layer = 'softmax'
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax')
            prev_layer = 'softmax_bn'
        graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer)

    graph.add_output(name='multiclass_correction_target', input='softmax_activation')

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss={'multiclass_correction_target': config.loss}, optimizer=optimizer)

    return graph
Example #3
0
def build_model(args):
    np.random.seed(args.seed)

    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width, ), dtype='int')

    graph.add_node(build_embedding_layer(args),
                   input='input',
                   name='embedding')

    graph.add_node(LSTM(args.n_units,
                        truncate_gradient=args.truncate_gradient,
                        return_sequences=True),
                   input='embedding',
                   name='lstm0')

    graph.add_node(LSTM(args.n_units,
                        truncate_gradient=args.truncate_gradient,
                        return_sequences=True),
                   input='lstm0',
                   name='lstm1')

    # Attention module.
    graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
                   input='lstm1',
                   name='attention0')
    graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
                   input='attention0',
                   name='attention1')
    graph.add_node(TimeDistributedDense(args.n_units, activation='softmax'),
                   input='attention1',
                   name='attention2')

    # Apply mask from output of attention module to LSTM output.
    graph.add_node(TimeDistributedMerge(mode='sum'),
                   inputs=['lstm1', 'attention2'],
                   name='applyattn',
                   merge_mode='mul')

    graph.add_node(Dense(args.n_classes, activation='softmax'),
                   input='applyattn',
                   name='softmax')

    graph.add_output(input='softmax', name='output')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #4
0
def build_dictionary_model(args):
    model = Sequential()
    model.add(build_embedding_layer(args))
    model.add(build_convolutional_layer(args))
    model.add(build_pooling_layer(args))
    model.add(Flatten())
    model.add(build_dense_layer(args, args.n_classes, activation='softmax'))

    load_weights(args, model)

    optimizer = build_optimizer(args)
    model.compile(loss=args.loss, optimizer=optimizer)

    return model
Example #5
0
def build_model(args):
    np.random.seed(args.seed)

    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width,), dtype='int')

    graph.add_node(build_embedding_layer(args), 
            input='input', name='embedding')

    graph.add_node(LSTM(args.n_units,
        truncate_gradient=args.truncate_gradient,
        return_sequences=True),
        input='embedding', name='lstm0')

    graph.add_node(LSTM(args.n_units,
        truncate_gradient=args.truncate_gradient,
        return_sequences=True),
        input='lstm0', name='lstm1')

    # Attention module.
    graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
            input='lstm1', name='attention0')
    graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
            input='attention0', name='attention1')
    graph.add_node(TimeDistributedDense(args.n_units, activation='softmax'),
            input='attention1', name='attention2')

    # Apply mask from output of attention module to LSTM output.
    graph.add_node(TimeDistributedMerge(mode='sum'),
            inputs=['lstm1', 'attention2'],
            name='applyattn',
            merge_mode='mul')

    graph.add_node(Dense(args.n_classes, activation='softmax'),
            input='applyattn', name='softmax')

    graph.add_output(input='softmax', name='output')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #6
0
def build_model(args):
    np.random.seed(args.seed)

    model = Sequential()
    model.add(build_embedding_layer(args))
    if args.dropout_embedding_p > 0.:
        model.add(Dropout(args.dropout_embedding_p))
    model.add(build_convolutional_layer(args))
    if 'normalization' in args.regularization_layer:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    if args.dropout_conv_p > 0.:
        model.add(Dropout(args.dropout_conv_p))

    model.add(build_pooling_layer(args))
    model.add(Flatten())

    for i in range(args.n_fully_connected):
        model.add(build_dense_layer(args))
        if 'normalization' in args.regularization_layer:
            model.add(BatchNormalization())
        model.add(Activation('relu'))
        if 'dropout' in args.regularization_layer:
            model.add(Dropout(args.dropout_p))

    model.add(build_dense_layer(args, args.n_classes,
            activation='softmax'))

    load_weights(args, model)

    optimizer = build_optimizer(args)

    model.compile(loss=args.loss, optimizer=optimizer)

    for k,v in json.loads(model.to_json()).items():
        if k == 'layers':
            for l in v:
                print('  %s => %s' %(l['name'], l))

    return model
Example #7
0
def build_ordinary_model(args):
    model = Sequential()
    model.add(build_embedding_layer(args))
    if args.dropout_embedding_p > 0.:
        model.add(Dropout(args.dropout_embedding_p))
    model.add(build_convolutional_layer(args))
    if args.batch_normalization:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    if args.dropout_conv_p > 0.:
        model.add(Dropout(args.dropout_conv_p))

    model.add(build_pooling_layer(args))
    model.add(Flatten())

    for i in range(args.n_fully_connected):
        model.add(build_dense_layer(args))
        if args.batch_normalization:
            model.add(BatchNormalization())
        model.add(Activation('relu'))
        if args.dropout_fc_p > 0.:
            model.add(Dropout(args.dropout_fc_p))

    model.add(build_dense_layer(args, args.n_classes,
            activation='softmax'))

    load_weights(args, model)

    optimizer = build_optimizer(args)

    model.compile(loss=args.loss, optimizer=optimizer)

    if args.verbose:
        for k,v in json.loads(model.to_json()).items():
            if k == 'layers':
                for l in v:
                    print('  => %s' % l['name'])

    return model
Example #8
0
def build_model(args, train_data):
    np.random.seed(args.seed)

    graph = Graph()

    non_word_input = "non_word_marked_chars"
    non_word_input_width = train_data[non_word_input].shape[1]

    graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype="int")
    graph.add_node(
        build_embedding_layer(args, input_width=non_word_input_width), name="non_word_embedding", input=non_word_input
    )
    graph.add_node(build_convolutional_layer(args), name="non_word_conv", input="non_word_embedding")
    non_word_prev_layer = add_bn_relu(graph, args, "non_word_conv")
    graph.add_node(
        build_pooling_layer(args, input_width=non_word_input_width), name="non_word_pool", input=non_word_prev_layer
    )
    graph.add_node(Flatten(), name="non_word_flatten", input="non_word_pool")

    # Add some number of fully-connected layers without skip connections.
    prev_layer = "non_word_flatten"
    for i in range(args.n_fully_connected):
        layer_name = "dense%02d" % i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer)
            prev_layer = layer_name + "bn"
        if args.dropout_fc_p > 0.0:
            graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer)
            prev_layer = layer_name + "do"

    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = "%02d" % i

        graph.add_node(Identity(), name=block_name + "input", input=prev_layer)
        prev_layer = block_input_layer = block_name + "input"

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = "h%s%02d" % (block_name, layer_num)

            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name

            if args.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer)
                prev_layer = layer_name + "bn"

            if i < n_layers_per_residual_block:
                a = Activation("relu")
                graph.add_node(Activation("relu"), name=layer_name + "relu", input=prev_layer)
                prev_layer = layer_name + "relu"
                if args.dropout_fc_p > 0.0:
                    graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer)
                    prev_layer = layer_name + "do"

        graph.add_node(Identity(), name=block_name + "output", inputs=[block_input_layer, prev_layer], merge_mode="sum")
        graph.add_node(Activation("relu"), name=block_name + "relu", input=block_name + "output")
        prev_layer = block_input_layer = block_name + "relu"

    n_classes = np.max(train_data["multiclass_correction_target"]) + 1
    if hasattr(args, "n_hsm_classes"):
        graph.add_node(build_hierarchical_softmax_layer(args), name="softmax", input=prev_layer)
    else:
        graph.add_node(build_dense_layer(args, n_classes, activation="softmax"), name="softmax", input=prev_layer)

    graph.add_output(name="multiclass_correction_target", input="softmax")

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={"multiclass_correction_target": args.loss}, optimizer=optimizer)

    return graph
Example #9
0
def build_residual_model(args):
    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width, ), dtype='int')

    graph.add_node(build_embedding_layer(args),
                   name='embedding',
                   input='input')

    graph.add_node(build_convolutional_layer(args),
                   name='conv',
                   input='embedding')
    prev_layer = 'conv'
    if 'normalization' in args.regularization_layer:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add two dense layers.
    for i in range(2):
        layer_name = 'dense%02d' % i
        l = build_dense_layer(args, n_hidden=args.n_filters)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if 'normalization' in args.regularization_layer:
            graph.add_node(BatchNormalization(),
                           name=layer_name + 'bn',
                           input=prev_layer)
            prev_layer = layer_name + 'bn'
        if 'dropout' in args.regularization_layer:
            graph.add_node(Dropout(args.dropout_p),
                           name=layer_name + 'do',
                           input=prev_layer)
            prev_layer = layer_name + 'do'

    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name + 'input', input=prev_layer)
        prev_layer = block_input_layer = block_name + 'input'

        for layer_num in range(args.n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)

            l = build_dense_layer(args, n_hidden=args.n_filters)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name

            if 'normalization' in args.regularization_layer:
                graph.add_node(BatchNormalization(),
                               name=layer_name + 'bn',
                               input=prev_layer)
                prev_layer = layer_name + 'bn'

            if i < args.n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'),
                               name=layer_name + 'relu',
                               input=prev_layer)
                prev_layer = layer_name + 'relu'
                if 'dropout' in args.regularization_layer:
                    graph.add_node(Dropout(args.dropout_p),
                                   name=layer_name + 'do',
                                   input=prev_layer)
                    prev_layer = layer_name + 'do'

        graph.add_node(Identity(),
                       name=block_name + 'output',
                       inputs=[block_input_layer, prev_layer],
                       merge_mode='sum')
        graph.add_node(Activation('relu'),
                       name=block_name + 'relu',
                       input=block_name + 'output')
        prev_layer = block_input_layer = block_name + 'relu'

    graph.add_node(build_dense_layer(args,
                                     args.n_classes,
                                     activation='softmax'),
                   name='softmax',
                   input=prev_layer)

    graph.add_output(name='output', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #10
0
def build_model(args, train_data, validation_data):
    np.random.seed(args.seed)

    graph = Graph()

    non_word_input = 'non_word_marked_chars'
    real_word_input = 'real_word_marked_chars'

    non_word_input_width = train_data.data[non_word_input].shape[1]
    real_word_input_width = train_data.data[real_word_input].shape[1]

    print('non_word_input_width', non_word_input_width)
    print('real_word_input_width', real_word_input_width)

    graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype='int')
    graph.add_node(build_embedding_layer(args, input_width=non_word_input_width),
            name='non_word_embedding', input=non_word_input)
    graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding')
    non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv')
    graph.add_node(build_pooling_layer(args, input_width=non_word_input_width),
            name='non_word_pool', input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')

    graph.add_input(real_word_input, input_shape=(real_word_input_width,), dtype='int')
    graph.add_node(build_embedding_layer(args, input_width=real_word_input_width),
            name='real_word_embedding', input=real_word_input)
    graph.add_node(build_convolutional_layer(args), name='real_word_conv', input='real_word_embedding')
    real_word_prev_layer = add_bn_relu(graph, args, 'real_word_conv')
    graph.add_node(build_pooling_layer(args, input_width=real_word_input_width),
            name='real_word_pool', input=real_word_prev_layer)
    graph.add_node(Flatten(), name='real_word_flatten', input='real_word_pool')

    # Add some number of fully-connected layers without skip connections.
    prev_layer = 'join_non_and_real'
    for i in range(args.n_fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        if i == 0:
            graph.add_node(l, name=layer_name,
                inputs=['non_word_flatten', 'real_word_flatten'])
        else:
            graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if args.dropout_fc_p > 0.:
            graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if args.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if args.dropout_fc_p > 0.:
                    graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    #if hasattr(args, 'n_hsm_classes'):
    #    graph.add_node(build_hierarchical_softmax_layer(args),
    #            name='softmax', input=prev_layer)
    #else:

    graph.add_node(build_dense_layer(args, 2,
        activation='softmax'), name='softmax', input=prev_layer)

    graph.add_output(name='binary_target', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'binary_target': args.loss}, optimizer=optimizer)

    return graph
Example #11
0
def build_residual_model(args):
    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width,), dtype='int')

    graph.add_node(build_embedding_layer(args), name='embedding', input='input')

    graph.add_node(build_convolutional_layer(args), name='conv', input='embedding')
    prev_layer = 'conv'
    if 'normalization' in args.regularization_layer:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add two dense layers.
    for i in range(2):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(args, n_hidden=args.n_filters)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if 'normalization' in args.regularization_layer:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if 'dropout' in args.regularization_layer:
            graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        for layer_num in range(args.n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(args, n_hidden=args.n_filters)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if 'normalization' in args.regularization_layer:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < args.n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if 'dropout' in args.regularization_layer:
                    graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    graph.add_node(build_dense_layer(args, args.n_classes,
            activation='softmax'), name='softmax', input=prev_layer)

    graph.add_output(name='output', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #12
0
def build_flat_context_model(config, n_classes):
    graph = Graph()
    prev_non_word_layer = build_char_model(graph, config)

    context_embedding_inputs = []
    context_embedding_outputs = []
    context_reshape_outputs = []
    for i in range(1, 6):  
        name = '%s_%02d' % (config.context_input_name, i)
        graph.add_input(name, input_shape=(1,), dtype='int')
        context_embedding_inputs.append(name)
        context_embedding_outputs.append(
            'context_embedding_%02d' % i)
        context_reshape_outputs.append(   
            'context_reshape_%02d' % i)

    context_embedding = build_embedding_layer(config,
        input_width=1,
        n_embeddings=config.n_context_embeddings,
        n_embed_dims=config.n_context_embed_dims,
        dropout=config.dropout_embedding_p)

    graph.add_shared_node(context_embedding,
        name='context_embedding',
        inputs=context_embedding_inputs,
        outputs=context_embedding_outputs)

    graph.add_shared_node(Reshape((config.n_context_embed_dims,)),
        name='context_reshape',
        inputs=context_embedding_outputs,
        outputs=context_reshape_outputs)

    graph.add_node(Identity(),
        name='word_context',
        inputs=context_reshape_outputs,
        merge_mode='concat')

    prev_context_layer = 'word_context'

    prev_layer = build_fully_connected_layers(graph, config,
            prev_non_word_layer,
            prev_context_layer)

    prev_layer = build_residual_blocks(graph, config, prev_layer)

    if hasattr(config, 'n_hsm_classes'):
        graph.add_node(build_hierarchical_softmax_layer(config),
            name='softmax', input=prev_layer)
    else:
        graph.add_node(Dense(n_classes, init=config.softmax_init,
            W_constraint=maxnorm(config.softmax_max_norm)),
            name='softmax', input=prev_layer)
        prev_layer = 'softmax'
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax')
            prev_layer = 'softmax_bn'
        graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer)

    graph.add_output(name='multiclass_correction_target', input='softmax_activation')

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss={'multiclass_correction_target': config.loss}, optimizer=optimizer)

    return graph
Example #13
0
def build_model(config, n_classes):
    np.random.seed(config.seed)

    graph = Graph()

    graph.add_input(config.non_word_input_name,
            input_shape=(config.model_input_width,), dtype='int')
    graph.add_node(build_embedding_layer(config, input_width=config.model_input_width),
            name='non_word_embedding', input=config.non_word_input_name)
    conv = build_convolutional_layer(config)
    conv.trainable = config.train_filters
    graph.add_node(conv, name='non_word_conv', input='non_word_embedding')
    non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv')
    graph.add_node(build_pooling_layer(config, input_width=config.model_input_width),
            name='non_word_pool', input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')
    prev_layer = 'non_word_flatten'
    if config.gaussian_noise_sd > 0.:
        print('gaussian noise %.02f' % config.gaussian_noise_sd)
        graph.add_node(GaussianNoise(config.gaussian_noise_sd),
                name="non_word_noise", input="non_word_flatten")
        prev_layer = 'non_word_noise'

    # Add some number of fully-connected layers without skip connections.
    last_dense_layer = None
    for i,n_hidden in enumerate(config.fully_connected):
        layer_name = 'dense%02d' %i
        l = Dense(n_hidden,
                init=config.dense_init,
                W_constraint=maxnorm(config.dense_max_norm))
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
        if config.dropout_fc_p > 0.:
            graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
        last_dense_layer = layer_name
    
    # Add sequence of residual blocks.
    for i in range(config.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = config.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = Dense(config.n_hidden_residual,
                    init=config.dense_init,
                    W_constraint=maxnorm(config.residual_max_norm))
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if config.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if config.dropout_fc_p > 0.:
                    graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    if hasattr(config, 'n_hsm_classes'):
        graph.add_node(build_hierarchical_softmax_layer(config),
            name='softmax', input=prev_layer)
    else:
        graph.add_node(Dense(n_classes,
                init=config.dense_init,
                W_constraint=maxnorm(config.softmax_max_norm)),
            name='softmax', input=prev_layer)
        prev_layer = 'softmax'
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name='softmax_bn', input='softmax')
            prev_layer = 'softmax_bn'
        graph.add_node(Activation('softmax'), name='softmax_activation', input=prev_layer)

    graph.add_output(name='multiclass_correction_target', input='softmax_activation')

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss={'multiclass_correction_target': config.loss}, optimizer=optimizer)

    return graph
Example #14
0
def build_model(args, train_data):
    np.random.seed(args.seed)

    graph = Graph()

    non_word_input = 'non_word_marked_chars'
    non_word_input_width = train_data[non_word_input].shape[1]

    graph.add_input(non_word_input,
                    input_shape=(non_word_input_width, ),
                    dtype='int')
    graph.add_node(build_embedding_layer(args,
                                         input_width=non_word_input_width),
                   name='non_word_embedding',
                   input=non_word_input)
    graph.add_node(build_convolutional_layer(args),
                   name='non_word_conv',
                   input='non_word_embedding')
    non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv')
    graph.add_node(build_pooling_layer(args, input_width=non_word_input_width),
                   name='non_word_pool',
                   input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')

    # Add some number of fully-connected layers without skip connections.
    prev_layer = 'non_word_flatten'
    for i in range(args.n_fully_connected):
        layer_name = 'dense%02d' % i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(),
                           name=layer_name + 'bn',
                           input=prev_layer)
            prev_layer = layer_name + 'bn'
        if args.dropout_fc_p > 0.:
            graph.add_node(Dropout(args.dropout_fc_p),
                           name=layer_name + 'do',
                           input=prev_layer)
            prev_layer = layer_name + 'do'

    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name + 'input', input=prev_layer)
        prev_layer = block_input_layer = block_name + 'input'

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)

            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name

            if args.batch_normalization:
                graph.add_node(BatchNormalization(),
                               name=layer_name + 'bn',
                               input=prev_layer)
                prev_layer = layer_name + 'bn'

            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'),
                               name=layer_name + 'relu',
                               input=prev_layer)
                prev_layer = layer_name + 'relu'
                if args.dropout_fc_p > 0.:
                    graph.add_node(Dropout(args.dropout_fc_p),
                                   name=layer_name + 'do',
                                   input=prev_layer)
                    prev_layer = layer_name + 'do'

        graph.add_node(Identity(),
                       name=block_name + 'output',
                       inputs=[block_input_layer, prev_layer],
                       merge_mode='sum')
        graph.add_node(Activation('relu'),
                       name=block_name + 'relu',
                       input=block_name + 'output')
        prev_layer = block_input_layer = block_name + 'relu'

    n_classes = np.max(train_data['multiclass_correction_target']) + 1
    if hasattr(args, 'n_hsm_classes'):
        graph.add_node(build_hierarchical_softmax_layer(args),
                       name='softmax',
                       input=prev_layer)
    else:
        graph.add_node(build_dense_layer(args, n_classes,
                                         activation='softmax'),
                       name='softmax',
                       input=prev_layer)

    graph.add_output(name='multiclass_correction_target', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'multiclass_correction_target': args.loss},
                  optimizer=optimizer)

    return graph
Example #15
0
def build_residual_model(args):
    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width,), dtype='int')

    graph.add_node(build_embedding_layer(args), name='embedding', input='input')

    graph.add_node(build_convolutional_layer(args), name='conv', input='embedding')
    prev_layer = 'conv'
    if args.batch_normalization:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add some number of fully-connected layers without skip connections.
    for i in range(args.n_fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if args.dropout_fc_p > 0.:
            graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if args.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if args.dropout_fc_p > 0.:
                    graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    graph.add_node(build_dense_layer(args, args.n_classes,
            activation='softmax'), name='softmax', input=prev_layer)

    graph.add_output(name='output', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #16
0
def build_graph(config, train_data):
    graph = Graph()

    input_width = train_data[config.data_name[0]].shape[1]

    graph.add_input(config.data_name[0], input_shape=(input_width,), dtype='int')

    graph.add_node(build_embedding_layer(config, input_width=input_width),
            name='embedding', input=config.data_name[0])

    graph.add_node(build_convolutional_layer(config), name='conv', input='embedding')
    prev_layer = 'conv'
    if config.batch_normalization:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(config, input_width=input_width),
            name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add some number of fully-connected layers without skip connections.
    for i,n_hidden in enumerate(config.fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(config, n_hidden=n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if config.dropout_fc_p > 0.:
            graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(config.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = config.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(config, n_hidden=config.fully_connected[-1])
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if config.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if config.dropout_fc_p > 0.:
                    graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    graph.add_node(Dense(2), 
            name='softmax_dense', input=prev_layer)
    prev_layer = 'softmax_dense'
    if config.batch_normalization:
        graph.add_node(BatchNormalization(),
                name='softmax_bn', input=prev_layer)
        prev_layer = 'softmax_bn'
    graph.add_node(Activation('softmax'),
        name='softmax_activation', input=prev_layer)

    graph.add_output(name=config.target_name, input='softmax_activation')

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss={config.target_name: config.loss}, optimizer=optimizer)

    return graph
Example #17
0
def build_model(config):
    np.random.seed(config.seed)

    graph = Graph()

    graph.add_input(config.non_word_input_name,
            input_shape=(config.model_input_width,), dtype='int')
    graph.add_input(config.candidate_word_input_name,
            input_shape=(config.model_input_width,), dtype='int')

    graph.add_shared_node(
        build_embedding_layer(config, input_width=config.model_input_width),
        name='embedding',
        inputs=[config.non_word_input_name, config.candidate_word_input_name],
        outputs=['non_word_embedding', 'candidate_word_embedding'])

    non_word_prev_layer = 'non_word_embedding'

    if config.dropout_embedding_p > 0.:
        graph.add_node(Dropout(config.dropout_embedding_p),
                name='non_word_embedding_do', input='non_word_embedding')
        non_word_prev_layer = 'non_word_embedding_do'

    # Add noise only to non-words.
    if config.gaussian_noise_sd > 0.:
        graph.add_node(GaussianNoise(config.gaussian_noise_sd),
            name='non_word_embedding_noise', input=non_word_prev_layer)
        non_word_prev_layer = 'non_word_embedding_noise'

    graph.add_shared_node(
            build_convolutional_layer(config),
            name='conv',
            inputs=[non_word_prev_layer, 'candidate_word_embedding'],
            outputs=['non_word_conv', 'candidate_word_conv'])

    non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv')
    graph.add_node(build_pooling_layer(config, input_width=config.model_input_width),
            name='non_word_pool', input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')

    candidate_word_prev_layer = add_bn_relu(graph, config, 'candidate_word_conv')
    graph.add_node(build_pooling_layer(config, input_width=config.model_input_width),
            name='candidate_word_pool', input=candidate_word_prev_layer)
    graph.add_node(Flatten(), name='candidate_word_flatten', input='candidate_word_pool')

    # Compute similarity of the non-word and candidate.
    if config.char_merge_mode == 'cos':
        dot_axes = ([1], [1])
    else:
        dot_axes = -1

    char_merge_layer = Dense(config.char_merge_n_hidden,
        W_constraint=maxnorm(config.char_merge_max_norm))
    graph.add_node(char_merge_layer,
        name='char_merge',
        inputs=['non_word_flatten', 'candidate_word_flatten'],
        merge_mode=config.char_merge_mode,
        dot_axes=dot_axes)

    prev_char_layer = 'char_merge'
    if config.scale_char_merge_output:
        if config.char_merge_act == "sigmoid":
            lambda_layer = Lambda(lambda x: 12.*x-6.)
        elif config.char_merge_act == "tanh":
            lambda_layer = Lambda(lambda x: 6.*x-3.)
        else:
            lambda_layer = Lambda(lambda x: x)
        graph.add_node(lambda_layer,
            name='char_merge_scale', input='char_merge')
        prev_char_layer = 'char_merge_scale'

    # Add some number of fully-connected layers without skip connections.
    prev_layer = prev_char_layer

    for i,n_hidden in enumerate(config.fully_connected):
        layer_name = 'dense%02d' % i
        l = build_dense_layer(config, n_hidden=n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        graph.add_node(Activation('relu'),
            name=layer_name+'relu', input=prev_layer)
        prev_layer=layer_name+'relu'
        if config.dropout_fc_p > 0.:
            graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'

    # Add sequence of residual blocks.
    for i in range(config.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = config.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(config, n_hidden=config.n_hidden_residual)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if config.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if config.dropout_fc_p > 0.:
                    graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    # Save the name of the last dense layer for the distance and rank targets.
    last_dense_layer = prev_layer

    # Add softmax for binary prediction of whether the real word input
    # is the true correction for the non-word input.
    graph.add_node(Dense(2, W_constraint=maxnorm(config.softmax_max_norm)),
            name='softmax',
            inputs=[prev_char_layer, prev_layer],
            merge_mode='concat')
    prev_layer = 'softmax'

    if config.batch_normalization:
        graph.add_node(BatchNormalization(), 
                name='softmax_bn', input='softmax')
        prev_layer = 'softmax_bn'
    graph.add_node(Activation('softmax'),
            name='softmax_activation', input=prev_layer)
    graph.add_output(name=config.target_name, input='softmax_activation')

    lossdict = {}
    lossdict[config.target_name] = config.loss

    for distance_name in config.distance_targets:
        #add_linear_output_mlp(graph, ['non_word_flatten', 'candidate_word_flatten'],
        #        distance_name+'_first',
        #        config.fully_connected[-1], 10, config.batch_normalization, lossdict)
        add_linear_output_mlp(graph, 'dense00', distance_name+'_first',
                config.fully_connected[-1], 10, config.batch_normalization, lossdict)
        add_linear_output_mlp(graph, last_dense_layer, distance_name+'_last',
                config.fully_connected[-1], 10, config.batch_normalization, lossdict)

    if config.use_rank_target:
        add_linear_output_mlp(graph, 'dense00', 'candidate_rank_first', 
                config.fully_connected[-1], 10,
                config.batch_normalization, lossdict)
        add_linear_output_mlp(graph, last_dense_layer, 'candidate_rank_last', 
                config.fully_connected[-1], 10,
                config.batch_normalization, lossdict)

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss=lossdict, optimizer=optimizer)

    return graph