Esempio n. 1
0
def get_model(encoding):

    return CSM(
        layers=[
            DictionaryEncoding(vocabulary=encoding),

            WordEmbedding(
                dimension={{embedding_dimension}},
                vocabulary_size=len(encoding),
                padding=encoding['PADDING']),

            {% for layer in word_layers %}
            {% set layer_index = loop.index0 %}

            SentenceConvolution(
                n_feature_maps={{layer.n_feature_maps}},
                kernel_width={{layer.kernel_width}},
                n_channels={{layer.n_channels}},
                n_input_dimensions=1),

            Bias(
                n_input_dims=1,
                n_feature_maps={{layer.n_feature_maps}}),

            KMaxPooling(k={{layer.k_pooling}}, k_dynamic={{layer.k_dynamic}} if {{layer.k_dynamic}} > 0 else None),

            {{layer.nonlinearity}}(),

            {% endfor %}

            ReshapeForDocuments(),

            {% for layer in sentence_layers %}
            {% set layer_index = loop.index0 %}

            SentenceConvolution(
                n_feature_maps={{layer.n_feature_maps}},
                kernel_width={{layer.kernel_width}},
                n_channels={{layer.n_channels}},
                n_input_dimensions=1),

            Bias(
                n_input_dims=1,
                n_feature_maps={{layer.n_feature_maps}}),

            KMaxPooling(k={{layer.k_pooling}}, k_dynamic={{layer.k_dynamic}} if {{layer.k_dynamic}} > 0 else None),

            {{layer.nonlinearity}}(),

            {% endfor %}

            {% if dropout %}
            Dropout(('b', 'd', 'f', 'w'), 0.5),
            {% endif %}

            Softmax(
                n_classes={{n_classes}},
                n_input_dimensions={{softmax_input_dimensions}}),
            ])
def model_two_layer_small_embedding(alphabet):
    return CSM(layers=[
        DictionaryEncoding(vocabulary=alphabet),
        WordEmbedding(dimension=32, vocabulary_size=len(alphabet)),
        SentenceConvolution(n_feature_maps=5,
                            kernel_width=10,
                            n_channels=1,
                            n_input_dimensions=32),
        SumFolding(),
        KMaxPooling(k=7),
        Bias(n_input_dims=16, n_feature_maps=5),
        Tanh(),
        SentenceConvolution(n_feature_maps=5,
                            kernel_width=5,
                            n_channels=5,
                            n_input_dimensions=16),
        KMaxPooling(k=4),
        Bias(n_input_dims=16, n_feature_maps=5),
        Tanh(),
        Softmax(n_classes=2, n_input_dimensions=320),
    ])
def model_one_layer_variant_2(alphabet):
    return CSM(layers=[
        DictionaryEncoding(vocabulary=alphabet),
        WordEmbedding(dimension=42, vocabulary_size=len(alphabet)),
        SentenceConvolution(n_feature_maps=5,
                            kernel_width=6,
                            n_channels=1,
                            n_input_dimensions=42),
        SumFolding(),
        KMaxPooling(k=4),
        Bias(n_input_dims=21, n_feature_maps=5),
        Tanh(),
        Softmax(n_classes=2, n_input_dimensions=420),
    ])
Esempio n. 4
0
    ])

    word_embedding = WordFromCharacterEmbedding(
        embedding_model=word_embedding_model, alphabet_encoding=alphabet)

    # print word_embedding.fprop(X, meta)

    tweet_model = CSM(layers=[
        word_embedding,
        SentenceConvolution(n_feature_maps=5,
                            kernel_width=10,
                            n_channels=1,
                            n_input_dimensions=80),
        SumFolding(),
        KMaxPooling(k=7),
        Bias(n_input_dims=40, n_feature_maps=5),
        Tanh(),

        # Linear(
        #     n_input=1400,
        #     n_output=500),
        #
        # Tanh(),
        MaxFolding(),
        Softmax(n_classes=2, n_input_dimensions=700),
    ])

    print tweet_model

    # X, Y, meta = train_data_provider.next_batch()
    # Y, meta, fprop_state = model.fprop(X, meta, return_state=True)
    #         ]
    # )

    tweet_model = CSM(layers=[
        # cpu.model.encoding.
        DictionaryEncoding(vocabulary=alphabet),

        # cpu.model.embedding.
        WordEmbedding(dimension=28, vocabulary_size=len(alphabet)),

        # HostToDevice(),
        SentenceConvolution(n_feature_maps=6,
                            kernel_width=7,
                            n_channels=1,
                            n_input_dimensions=28),
        Bias(n_input_dims=28, n_feature_maps=6),
        SumFolding(),
        KMaxPooling(k=4, k_dynamic=0.5),
        Tanh(),
        SentenceConvolution(n_feature_maps=14,
                            kernel_width=5,
                            n_channels=6,
                            n_input_dimensions=14),
        Bias(n_input_dims=14, n_feature_maps=14),
        SumFolding(),
        KMaxPooling(k=4),
        Tanh(),
        Softmax(n_classes=2, n_input_dimensions=392),
    ])

    print tweet_model
Esempio n. 6
0
                                                 Y=Y_valid,
                                                 lengths=lengths_valid)

    ## BUILD THE MODEL

    model = CSM(
        layers=[
            WordEmbedding(dimension=embedding_dimension,
                          vocabulary_size=vocabulary_size),
            SentenceConvolution(n_feature_maps=n_feature_maps,
                                kernel_width=kernel_width,
                                n_channels=1,
                                n_input_dimensions=embedding_dimension),
            SumFolding(),
            KMaxPooling(k=pooling_size * 2),
            Bias(n_input_dims=embedding_dimension / 2,
                 n_feature_maps=n_feature_maps),
            Tanh(),

            # Softmax(
            #     n_classes=n_classes,
            #     n_input_dimensions=420),
            SentenceConvolution(n_feature_maps=n_feature_maps,
                                kernel_width=3,
                                n_channels=n_feature_maps,
                                n_input_dimensions=embedding_dimension / 2),
            KMaxPooling(k=pooling_size),
            Bias(n_input_dims=embedding_dimension / 2,
                 n_feature_maps=n_feature_maps),
            Tanh(),
            Softmax(n_classes=n_classes, n_input_dimensions=420),
        ], )
        batch_size=batch_size,
        padding='PADDING',
        fixed_n_sentences=15,
        fixed_n_words=50)

    model = CSM(layers=[
        DictionaryEncoding(vocabulary=encoding),
        WordEmbedding(dimension=20,
                      vocabulary_size=len(encoding),
                      padding=encoding['PADDING']),
        Dropout(('b', 'w', 'f'), 0.2),
        SentenceConvolution(n_feature_maps=10,
                            kernel_width=15,
                            n_channels=20,
                            n_input_dimensions=1),
        Bias(n_input_dims=1, n_feature_maps=10),
        KMaxPooling(k=7, k_dynamic=0.5),
        Tanh(),
        SentenceConvolution(n_feature_maps=30,
                            kernel_width=9,
                            n_channels=10,
                            n_input_dimensions=1),
        Bias(n_input_dims=1, n_feature_maps=30),
        KMaxPooling(k=5),
        Tanh(),
        ReshapeForDocuments(),
        SentenceConvolution(n_feature_maps=20,
                            kernel_width=11,
                            n_channels=30 * 5,
                            n_input_dimensions=1),
        Bias(n_input_dims=1, n_feature_maps=20),