Example #1
0
def build_sequential(config, train_data):
    model = Sequential()

    input_width = train_data[config.data_name[0]].shape[1]

    model.add(build_embedding_layer(config, input_width=input_width))
    model.add(build_convolutional_layer(config))
    if config.batch_normalization:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(build_pooling_layer(config, input_width=input_width))
    model.add(Flatten())

    for i,n_hidden in enumerate(config.fully_connected):
        model.add(build_dense_layer(config, n_hidden=n_hidden))
        if config.batch_normalization:
            model.add(BatchNormalization())
        model.add(Activation('relu'))

    model.add(build_dense_layer(config, n_hidden=2))
    if config.batch_normalization:
        model.add(BatchNormalization())
    model.add(Activation('softmax'))

    load_weights(config, model)

    optimizer = build_optimizer(config)

    model.compile(loss=config.loss, optimizer=optimizer)

    return model
Example #2
0
def build_residual_block(name, input_shape, n_hidden, n_skip=2):
    """
    Rough sketch of building blocks of layers for residual learning.
    See http://arxiv.org/abs/1512.03385 for motivation.
    """
    block = Graph()
    input_name = 'x'
    block.add_input(input_name, input_shape=input_shape)

    # The current keras graph implementation doesn't allow you to connect
    # an input node to an output node.  Use Identity to work around that.
    block.add_node(Identity(), name=name+'identity', input=input_name)
    prev_layer = name+'identity'

    for i in range(n_skip):
        layer_name = 'h' + str(i)

        l = build_dense_layer(args, n_hidden=n_hidden)
        block.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name

        # Haven't gotten this to work yet.
        #bn = BatchNormalization()
        #block.add_node(bn, name=layer_name+'bn', input=prev_layer)
        #prev_layer = layer_name+'bn'

        if i < n_skip:
            a = Activation('relu')
            block.add_node(a, name=layer_name+'relu', input=prev_layer)
            prev_layer = layer_name+'relu'

    block.add_output(name=name+'output', inputs=[name+'identity', prev_layer], merge_mode='sum')

    return block
Example #3
0
def build_fully_connected_layers(graph, config, prev_non_word_layer, prev_context_layer):
    if config.pool_merge_mode == 'cos':
        dot_axes = ([1], [1])
    else:
        dot_axes = -1

    # Add some number of fully-connected layers without skip connections.
    prev_layer = None

    for i,n_hidden in enumerate(config.fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(config, n_hidden=n_hidden,
                init=config.dense_init,
                max_norm=config.dense_max_norm)
        if i == 0:
            graph.add_node(l, name=layer_name,
                    inputs=[prev_non_word_layer, prev_context_layer],
                    merge_mode=config.pool_merge_mode,
                    dot_axes=dot_axes)
        else:
            graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
        if config.dropout_fc_p > 0.:
            graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'

    return prev_layer
Example #4
0
def build_model(args):
    np.random.seed(args.seed)

    model = Sequential()
    model.add(build_embedding_layer(args))
    if args.dropout_embedding_p > 0.:
        model.add(Dropout(args.dropout_embedding_p))
    model.add(build_convolutional_layer(args))
    if 'normalization' in args.regularization_layer:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    if args.dropout_conv_p > 0.:
        model.add(Dropout(args.dropout_conv_p))

    model.add(build_pooling_layer(args))
    model.add(Flatten())

    for i in range(args.n_fully_connected):
        model.add(build_dense_layer(args))
        if 'normalization' in args.regularization_layer:
            model.add(BatchNormalization())
        model.add(Activation('relu'))
        if 'dropout' in args.regularization_layer:
            model.add(Dropout(args.dropout_p))

    model.add(build_dense_layer(args, args.n_classes,
            activation='softmax'))

    load_weights(args, model)

    optimizer = build_optimizer(args)

    model.compile(loss=args.loss, optimizer=optimizer)

    for k,v in json.loads(model.to_json()).items():
        if k == 'layers':
            for l in v:
                print('  %s => %s' %(l['name'], l))

    return model
Example #5
0
def build_ordinary_model(args):
    model = Sequential()
    model.add(build_embedding_layer(args))
    if args.dropout_embedding_p > 0.:
        model.add(Dropout(args.dropout_embedding_p))
    model.add(build_convolutional_layer(args))
    if args.batch_normalization:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    if args.dropout_conv_p > 0.:
        model.add(Dropout(args.dropout_conv_p))

    model.add(build_pooling_layer(args))
    model.add(Flatten())

    for i in range(args.n_fully_connected):
        model.add(build_dense_layer(args))
        if args.batch_normalization:
            model.add(BatchNormalization())
        model.add(Activation('relu'))
        if args.dropout_fc_p > 0.:
            model.add(Dropout(args.dropout_fc_p))

    model.add(build_dense_layer(args, args.n_classes,
            activation='softmax'))

    load_weights(args, model)

    optimizer = build_optimizer(args)

    model.compile(loss=args.loss, optimizer=optimizer)

    if args.verbose:
        for k,v in json.loads(model.to_json()).items():
            if k == 'layers':
                for l in v:
                    print('  => %s' % l['name'])

    return model
Example #6
0
def build_dictionary_model(args):
    model = Sequential()
    model.add(build_embedding_layer(args))
    model.add(build_convolutional_layer(args))
    model.add(build_pooling_layer(args))
    model.add(Flatten())
    model.add(build_dense_layer(args, args.n_classes, activation='softmax'))

    load_weights(args, model)

    optimizer = build_optimizer(args)
    model.compile(loss=args.loss, optimizer=optimizer)

    return model
Example #7
0
def build_graph(config, train_data):
    graph = Graph()

    input_width = train_data[config.data_name[0]].shape[1]

    graph.add_input(config.data_name[0], input_shape=(input_width,), dtype='int')

    graph.add_node(build_embedding_layer(config, input_width=input_width),
            name='embedding', input=config.data_name[0])

    graph.add_node(build_convolutional_layer(config), name='conv', input='embedding')
    prev_layer = 'conv'
    if config.batch_normalization:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(config, input_width=input_width),
            name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add some number of fully-connected layers without skip connections.
    for i,n_hidden in enumerate(config.fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(config, n_hidden=n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if config.dropout_fc_p > 0.:
            graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(config.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = config.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(config, n_hidden=config.fully_connected[-1])
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if config.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if config.dropout_fc_p > 0.:
                    graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    graph.add_node(Dense(2), 
            name='softmax_dense', input=prev_layer)
    prev_layer = 'softmax_dense'
    if config.batch_normalization:
        graph.add_node(BatchNormalization(),
                name='softmax_bn', input=prev_layer)
        prev_layer = 'softmax_bn'
    graph.add_node(Activation('softmax'),
        name='softmax_activation', input=prev_layer)

    graph.add_output(name=config.target_name, input='softmax_activation')

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss={config.target_name: config.loss}, optimizer=optimizer)

    return graph
Example #8
0
def build_model(args, train_data):
    np.random.seed(args.seed)

    graph = Graph()

    non_word_input = "non_word_marked_chars"
    non_word_input_width = train_data[non_word_input].shape[1]

    graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype="int")
    graph.add_node(
        build_embedding_layer(args, input_width=non_word_input_width), name="non_word_embedding", input=non_word_input
    )
    graph.add_node(build_convolutional_layer(args), name="non_word_conv", input="non_word_embedding")
    non_word_prev_layer = add_bn_relu(graph, args, "non_word_conv")
    graph.add_node(
        build_pooling_layer(args, input_width=non_word_input_width), name="non_word_pool", input=non_word_prev_layer
    )
    graph.add_node(Flatten(), name="non_word_flatten", input="non_word_pool")

    # Add some number of fully-connected layers without skip connections.
    prev_layer = "non_word_flatten"
    for i in range(args.n_fully_connected):
        layer_name = "dense%02d" % i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer)
            prev_layer = layer_name + "bn"
        if args.dropout_fc_p > 0.0:
            graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer)
            prev_layer = layer_name + "do"

    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = "%02d" % i

        graph.add_node(Identity(), name=block_name + "input", input=prev_layer)
        prev_layer = block_input_layer = block_name + "input"

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = "h%s%02d" % (block_name, layer_num)

            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name

            if args.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name + "bn", input=prev_layer)
                prev_layer = layer_name + "bn"

            if i < n_layers_per_residual_block:
                a = Activation("relu")
                graph.add_node(Activation("relu"), name=layer_name + "relu", input=prev_layer)
                prev_layer = layer_name + "relu"
                if args.dropout_fc_p > 0.0:
                    graph.add_node(Dropout(args.dropout_fc_p), name=layer_name + "do", input=prev_layer)
                    prev_layer = layer_name + "do"

        graph.add_node(Identity(), name=block_name + "output", inputs=[block_input_layer, prev_layer], merge_mode="sum")
        graph.add_node(Activation("relu"), name=block_name + "relu", input=block_name + "output")
        prev_layer = block_input_layer = block_name + "relu"

    n_classes = np.max(train_data["multiclass_correction_target"]) + 1
    if hasattr(args, "n_hsm_classes"):
        graph.add_node(build_hierarchical_softmax_layer(args), name="softmax", input=prev_layer)
    else:
        graph.add_node(build_dense_layer(args, n_classes, activation="softmax"), name="softmax", input=prev_layer)

    graph.add_output(name="multiclass_correction_target", input="softmax")

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={"multiclass_correction_target": args.loss}, optimizer=optimizer)

    return graph
Example #9
0
def build_residual_model(args):
    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width, ), dtype='int')

    graph.add_node(build_embedding_layer(args),
                   name='embedding',
                   input='input')

    graph.add_node(build_convolutional_layer(args),
                   name='conv',
                   input='embedding')
    prev_layer = 'conv'
    if 'normalization' in args.regularization_layer:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add two dense layers.
    for i in range(2):
        layer_name = 'dense%02d' % i
        l = build_dense_layer(args, n_hidden=args.n_filters)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if 'normalization' in args.regularization_layer:
            graph.add_node(BatchNormalization(),
                           name=layer_name + 'bn',
                           input=prev_layer)
            prev_layer = layer_name + 'bn'
        if 'dropout' in args.regularization_layer:
            graph.add_node(Dropout(args.dropout_p),
                           name=layer_name + 'do',
                           input=prev_layer)
            prev_layer = layer_name + 'do'

    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name + 'input', input=prev_layer)
        prev_layer = block_input_layer = block_name + 'input'

        for layer_num in range(args.n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)

            l = build_dense_layer(args, n_hidden=args.n_filters)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name

            if 'normalization' in args.regularization_layer:
                graph.add_node(BatchNormalization(),
                               name=layer_name + 'bn',
                               input=prev_layer)
                prev_layer = layer_name + 'bn'

            if i < args.n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'),
                               name=layer_name + 'relu',
                               input=prev_layer)
                prev_layer = layer_name + 'relu'
                if 'dropout' in args.regularization_layer:
                    graph.add_node(Dropout(args.dropout_p),
                                   name=layer_name + 'do',
                                   input=prev_layer)
                    prev_layer = layer_name + 'do'

        graph.add_node(Identity(),
                       name=block_name + 'output',
                       inputs=[block_input_layer, prev_layer],
                       merge_mode='sum')
        graph.add_node(Activation('relu'),
                       name=block_name + 'relu',
                       input=block_name + 'output')
        prev_layer = block_input_layer = block_name + 'relu'

    graph.add_node(build_dense_layer(args,
                                     args.n_classes,
                                     activation='softmax'),
                   name='softmax',
                   input=prev_layer)

    graph.add_output(name='output', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #10
0
def build_model(args, train_data, validation_data):
    np.random.seed(args.seed)

    graph = Graph()

    non_word_input = 'non_word_marked_chars'
    real_word_input = 'real_word_marked_chars'

    non_word_input_width = train_data.data[non_word_input].shape[1]
    real_word_input_width = train_data.data[real_word_input].shape[1]

    print('non_word_input_width', non_word_input_width)
    print('real_word_input_width', real_word_input_width)

    graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype='int')
    graph.add_node(build_embedding_layer(args, input_width=non_word_input_width),
            name='non_word_embedding', input=non_word_input)
    graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding')
    non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv')
    graph.add_node(build_pooling_layer(args, input_width=non_word_input_width),
            name='non_word_pool', input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')

    graph.add_input(real_word_input, input_shape=(real_word_input_width,), dtype='int')
    graph.add_node(build_embedding_layer(args, input_width=real_word_input_width),
            name='real_word_embedding', input=real_word_input)
    graph.add_node(build_convolutional_layer(args), name='real_word_conv', input='real_word_embedding')
    real_word_prev_layer = add_bn_relu(graph, args, 'real_word_conv')
    graph.add_node(build_pooling_layer(args, input_width=real_word_input_width),
            name='real_word_pool', input=real_word_prev_layer)
    graph.add_node(Flatten(), name='real_word_flatten', input='real_word_pool')

    # Add some number of fully-connected layers without skip connections.
    prev_layer = 'join_non_and_real'
    for i in range(args.n_fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        if i == 0:
            graph.add_node(l, name=layer_name,
                inputs=['non_word_flatten', 'real_word_flatten'])
        else:
            graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if args.dropout_fc_p > 0.:
            graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if args.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if args.dropout_fc_p > 0.:
                    graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    #if hasattr(args, 'n_hsm_classes'):
    #    graph.add_node(build_hierarchical_softmax_layer(args),
    #            name='softmax', input=prev_layer)
    #else:

    graph.add_node(build_dense_layer(args, 2,
        activation='softmax'), name='softmax', input=prev_layer)

    graph.add_output(name='binary_target', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'binary_target': args.loss}, optimizer=optimizer)

    return graph
Example #11
0
def build_residual_model(args):
    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width,), dtype='int')

    graph.add_node(build_embedding_layer(args), name='embedding', input='input')

    graph.add_node(build_convolutional_layer(args), name='conv', input='embedding')
    prev_layer = 'conv'
    if 'normalization' in args.regularization_layer:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add two dense layers.
    for i in range(2):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(args, n_hidden=args.n_filters)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if 'normalization' in args.regularization_layer:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if 'dropout' in args.regularization_layer:
            graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        for layer_num in range(args.n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(args, n_hidden=args.n_filters)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if 'normalization' in args.regularization_layer:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < args.n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if 'dropout' in args.regularization_layer:
                    graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    graph.add_node(build_dense_layer(args, args.n_classes,
            activation='softmax'), name='softmax', input=prev_layer)

    graph.add_output(name='output', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #12
0
def build_residual_model(args):
    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width,), dtype='int')

    graph.add_node(build_embedding_layer(args), name='embedding', input='input')

    graph.add_node(build_convolutional_layer(args), name='conv', input='embedding')
    prev_layer = 'conv'
    if args.batch_normalization:
        graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
        prev_layer = 'conv_bn'
    graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)

    graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')

    graph.add_node(Flatten(), name='flatten', input='pool')
    prev_layer = 'flatten'

    # Add some number of fully-connected layers without skip connections.
    for i in range(args.n_fully_connected):
        layer_name = 'dense%02d' %i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        if args.dropout_fc_p > 0.:
            graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'
    
    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if args.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if args.dropout_fc_p > 0.:
                    graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    graph.add_node(build_dense_layer(args, args.n_classes,
            activation='softmax'), name='softmax', input=prev_layer)

    graph.add_output(name='output', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Example #13
0
def build_model(args, train_data):
    np.random.seed(args.seed)

    graph = Graph()

    non_word_input = 'non_word_marked_chars'
    non_word_input_width = train_data[non_word_input].shape[1]

    graph.add_input(non_word_input,
                    input_shape=(non_word_input_width, ),
                    dtype='int')
    graph.add_node(build_embedding_layer(args,
                                         input_width=non_word_input_width),
                   name='non_word_embedding',
                   input=non_word_input)
    graph.add_node(build_convolutional_layer(args),
                   name='non_word_conv',
                   input='non_word_embedding')
    non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv')
    graph.add_node(build_pooling_layer(args, input_width=non_word_input_width),
                   name='non_word_pool',
                   input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')

    # Add some number of fully-connected layers without skip connections.
    prev_layer = 'non_word_flatten'
    for i in range(args.n_fully_connected):
        layer_name = 'dense%02d' % i
        l = build_dense_layer(args, n_hidden=args.n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if args.batch_normalization:
            graph.add_node(BatchNormalization(),
                           name=layer_name + 'bn',
                           input=prev_layer)
            prev_layer = layer_name + 'bn'
        if args.dropout_fc_p > 0.:
            graph.add_node(Dropout(args.dropout_fc_p),
                           name=layer_name + 'do',
                           input=prev_layer)
            prev_layer = layer_name + 'do'

    # Add sequence of residual blocks.
    for i in range(args.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name + 'input', input=prev_layer)
        prev_layer = block_input_layer = block_name + 'input'

        try:
            n_layers_per_residual_block = args.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)

            l = build_dense_layer(args, n_hidden=args.n_hidden)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name

            if args.batch_normalization:
                graph.add_node(BatchNormalization(),
                               name=layer_name + 'bn',
                               input=prev_layer)
                prev_layer = layer_name + 'bn'

            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'),
                               name=layer_name + 'relu',
                               input=prev_layer)
                prev_layer = layer_name + 'relu'
                if args.dropout_fc_p > 0.:
                    graph.add_node(Dropout(args.dropout_fc_p),
                                   name=layer_name + 'do',
                                   input=prev_layer)
                    prev_layer = layer_name + 'do'

        graph.add_node(Identity(),
                       name=block_name + 'output',
                       inputs=[block_input_layer, prev_layer],
                       merge_mode='sum')
        graph.add_node(Activation('relu'),
                       name=block_name + 'relu',
                       input=block_name + 'output')
        prev_layer = block_input_layer = block_name + 'relu'

    n_classes = np.max(train_data['multiclass_correction_target']) + 1
    if hasattr(args, 'n_hsm_classes'):
        graph.add_node(build_hierarchical_softmax_layer(args),
                       name='softmax',
                       input=prev_layer)
    else:
        graph.add_node(build_dense_layer(args, n_classes,
                                         activation='softmax'),
                       name='softmax',
                       input=prev_layer)

    graph.add_output(name='multiclass_correction_target', input='softmax')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'multiclass_correction_target': args.loss},
                  optimizer=optimizer)

    return graph
Example #14
0
def build_model(config):
    np.random.seed(config.seed)

    graph = Graph()

    graph.add_input(config.non_word_input_name,
            input_shape=(config.model_input_width,), dtype='int')
    graph.add_input(config.candidate_word_input_name,
            input_shape=(config.model_input_width,), dtype='int')

    graph.add_shared_node(
        build_embedding_layer(config, input_width=config.model_input_width),
        name='embedding',
        inputs=[config.non_word_input_name, config.candidate_word_input_name],
        outputs=['non_word_embedding', 'candidate_word_embedding'])

    non_word_prev_layer = 'non_word_embedding'

    if config.dropout_embedding_p > 0.:
        graph.add_node(Dropout(config.dropout_embedding_p),
                name='non_word_embedding_do', input='non_word_embedding')
        non_word_prev_layer = 'non_word_embedding_do'

    # Add noise only to non-words.
    if config.gaussian_noise_sd > 0.:
        graph.add_node(GaussianNoise(config.gaussian_noise_sd),
            name='non_word_embedding_noise', input=non_word_prev_layer)
        non_word_prev_layer = 'non_word_embedding_noise'

    graph.add_shared_node(
            build_convolutional_layer(config),
            name='conv',
            inputs=[non_word_prev_layer, 'candidate_word_embedding'],
            outputs=['non_word_conv', 'candidate_word_conv'])

    non_word_prev_layer = add_bn_relu(graph, config, 'non_word_conv')
    graph.add_node(build_pooling_layer(config, input_width=config.model_input_width),
            name='non_word_pool', input=non_word_prev_layer)
    graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')

    candidate_word_prev_layer = add_bn_relu(graph, config, 'candidate_word_conv')
    graph.add_node(build_pooling_layer(config, input_width=config.model_input_width),
            name='candidate_word_pool', input=candidate_word_prev_layer)
    graph.add_node(Flatten(), name='candidate_word_flatten', input='candidate_word_pool')

    # Compute similarity of the non-word and candidate.
    if config.char_merge_mode == 'cos':
        dot_axes = ([1], [1])
    else:
        dot_axes = -1

    char_merge_layer = Dense(config.char_merge_n_hidden,
        W_constraint=maxnorm(config.char_merge_max_norm))
    graph.add_node(char_merge_layer,
        name='char_merge',
        inputs=['non_word_flatten', 'candidate_word_flatten'],
        merge_mode=config.char_merge_mode,
        dot_axes=dot_axes)

    prev_char_layer = 'char_merge'
    if config.scale_char_merge_output:
        if config.char_merge_act == "sigmoid":
            lambda_layer = Lambda(lambda x: 12.*x-6.)
        elif config.char_merge_act == "tanh":
            lambda_layer = Lambda(lambda x: 6.*x-3.)
        else:
            lambda_layer = Lambda(lambda x: x)
        graph.add_node(lambda_layer,
            name='char_merge_scale', input='char_merge')
        prev_char_layer = 'char_merge_scale'

    # Add some number of fully-connected layers without skip connections.
    prev_layer = prev_char_layer

    for i,n_hidden in enumerate(config.fully_connected):
        layer_name = 'dense%02d' % i
        l = build_dense_layer(config, n_hidden=n_hidden)
        graph.add_node(l, name=layer_name, input=prev_layer)
        prev_layer = layer_name
        if config.batch_normalization:
            graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
            prev_layer = layer_name+'bn'
        graph.add_node(Activation('relu'),
            name=layer_name+'relu', input=prev_layer)
        prev_layer=layer_name+'relu'
        if config.dropout_fc_p > 0.:
            graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
            prev_layer = layer_name+'do'

    # Add sequence of residual blocks.
    for i in range(config.n_residual_blocks):
        # Add a fixed number of layers per residual block.
        block_name = '%02d' % i

        graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
        prev_layer = block_input_layer = block_name+'input'

        try:
            n_layers_per_residual_block = config.n_layers_per_residual_block
        except AttributeError:
            n_layers_per_residual_block = 2

        for layer_num in range(n_layers_per_residual_block):
            layer_name = 'h%s%02d' % (block_name, layer_num)
    
            l = build_dense_layer(config, n_hidden=config.n_hidden_residual)
            graph.add_node(l, name=layer_name, input=prev_layer)
            prev_layer = layer_name
    
            if config.batch_normalization:
                graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
                prev_layer = layer_name+'bn'
    
            if i < n_layers_per_residual_block:
                a = Activation('relu')
                graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
                prev_layer = layer_name+'relu'
                if config.dropout_fc_p > 0.:
                    graph.add_node(Dropout(config.dropout_fc_p), name=layer_name+'do', input=prev_layer)
                    prev_layer = layer_name+'do'

        graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
        graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
        prev_layer = block_input_layer = block_name+'relu'

    # Save the name of the last dense layer for the distance and rank targets.
    last_dense_layer = prev_layer

    # Add softmax for binary prediction of whether the real word input
    # is the true correction for the non-word input.
    graph.add_node(Dense(2, W_constraint=maxnorm(config.softmax_max_norm)),
            name='softmax',
            inputs=[prev_char_layer, prev_layer],
            merge_mode='concat')
    prev_layer = 'softmax'

    if config.batch_normalization:
        graph.add_node(BatchNormalization(), 
                name='softmax_bn', input='softmax')
        prev_layer = 'softmax_bn'
    graph.add_node(Activation('softmax'),
            name='softmax_activation', input=prev_layer)
    graph.add_output(name=config.target_name, input='softmax_activation')

    lossdict = {}
    lossdict[config.target_name] = config.loss

    for distance_name in config.distance_targets:
        #add_linear_output_mlp(graph, ['non_word_flatten', 'candidate_word_flatten'],
        #        distance_name+'_first',
        #        config.fully_connected[-1], 10, config.batch_normalization, lossdict)
        add_linear_output_mlp(graph, 'dense00', distance_name+'_first',
                config.fully_connected[-1], 10, config.batch_normalization, lossdict)
        add_linear_output_mlp(graph, last_dense_layer, distance_name+'_last',
                config.fully_connected[-1], 10, config.batch_normalization, lossdict)

    if config.use_rank_target:
        add_linear_output_mlp(graph, 'dense00', 'candidate_rank_first', 
                config.fully_connected[-1], 10,
                config.batch_normalization, lossdict)
        add_linear_output_mlp(graph, last_dense_layer, 'candidate_rank_last', 
                config.fully_connected[-1], 10,
                config.batch_normalization, lossdict)

    load_weights(config, graph)

    optimizer = build_optimizer(config)

    graph.compile(loss=lossdict, optimizer=optimizer)

    return graph