コード例 #1
0
ファイル: mocap_conv.py プロジェクト: junbohuang/HearYou2.0
def load(feat_size):

    input_layer = Input(shape=(200, 189, 1))
    layer = input_layer

    layer = Conv2D(32, 3, strides=(2, 2), padding='same',
                   activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(64, 3, strides=(2, 2), padding='same',
                   activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(64, 3, strides=(2, 2), padding='same',
                   activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(128, 3, strides=(2, 2), padding='same',
                   activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Conv2D(128, 3, strides=(2, 2), padding='same',
                   activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)
    layer = Flatten()(layer)

    output_layer = Dense(4, activation='softmax')(layer)

    model = Model(inputs=input_layer, outputs=output_layer)
    metrics = top_k_accuracy()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=metrics)

    return model
コード例 #2
0
def load(nb_words, g_word_embedding_matrix, feat_size):

    input_layer = Input(shape=(500, ))
    layer = input_layer

    layer = Embedding(nb_words,
                      300,
                      weights=[g_word_embedding_matrix],
                      input_length=500,
                      trainable=True)(layer)
    layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(layer)
    layer = Dropout(0.2)(layer)
    layer = Bidirectional(
        LSTM(256, return_sequences=False, recurrent_dropout=0.2))(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(256, activation='relu')(layer)

    output_layer = Dense(4, activation='softmax')(layer)

    model = Model(inputs=input_layer, outputs=output_layer)
    metrics = top_k_accuracy()
    #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False, clipnorm=3.0)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=metrics)
    return model
コード例 #3
0
ファイル: speech_mocap.py プロジェクト: junbohuang/HearYou2.0
def load(feat_size):


    speech_input_layer = Input(shape=(100, feat_size))
    speech_layer = speech_input_layer

    speech_layer = Bidirectional(LSTM(256, return_sequences=True))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Bidirectional(LSTM(256, return_sequences=False))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Dense(256, activation='relu')(speech_layer)


    mocap_input_layer = Input(shape=(200, 189, 1))
    mocap_layer = mocap_input_layer


    mocap_layer = Conv2D(32, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(128, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Flatten()(mocap_layer)
    mocap_layer = Dense(256, activation='relu')(mocap_layer)

    combined_layer = concatenate([speech_layer, mocap_layer])
    combined_layer = Dense(256, activation='relu')(combined_layer)

    output_layer = Dense(4, activation='softmax')(combined_layer)

    model = Model(inputs=[speech_input_layer, mocap_input_layer], outputs=output_layer)

    metrics = top_k_accuracy()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics)

    return model
コード例 #4
0
def load(feat_size):


    input_layer = Input(shape=(100, feat_size))
    layer = input_layer

    layer = Bidirectional(LSTM(256, return_sequences=True, recurrent_dropout=0.2))(layer)
    layer = Dropout(0.2)(layer)
    layer = Bidirectional(LSTM(256, return_sequences=False, recurrent_dropout=0.2))(layer)
    layer = Dropout(0.2)(layer)
    # layer = Dense(256, activation='relu')(layer)

    output_layer = Dense(4, activation='softmax')(layer)

    model = Model(inputs=input_layer, outputs=output_layer)

    metrics = top_k_accuracy()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics)

    return model
コード例 #5
0
ファイル: text_speech.py プロジェクト: junbohuang/HearYou2.0
def load(nb_words, g_word_embedding_matrix, feat_size):

    text_input_layer = Input(shape=(500, ))
    text_layer = text_input_layer
    text_layer = Embedding(nb_words,
                           300,
                           weights=[g_word_embedding_matrix],
                           input_length=500,
                           trainable=True)(text_layer)
    text_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(text_layer)
    text_layer = Dropout(0.2)(text_layer)
    text_layer = Bidirectional(
        LSTM(256, return_sequences=False, recurrent_dropout=0.2))(text_layer)
    text_layer = Dropout(0.2)(text_layer)
    text_layer = Dense(256, activation='relu')(text_layer)

    speech_input_layer = Input(shape=(100, feat_size))
    speech_layer = speech_input_layer

    speech_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Bidirectional(
        LSTM(256, return_sequences=False, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Dense(256, activation='relu')(speech_layer)

    combined_layer = concatenate([text_layer, speech_layer])

    output_layer = Dense(4, activation='softmax')(combined_layer)

    model = Model(inputs=[text_input_layer, speech_input_layer],
                  outputs=output_layer)

    metrics = top_k_accuracy()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=metrics)

    return model
コード例 #6
0
ファイル: speech_dense.py プロジェクト: junbohuang/HearYou2.0
def load(feat_size):

    input_layer = Input(shape=(100, feat_size))
    layer = input_layer

    layer = Dense(1024, activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(512, activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(256, activation='relu')(layer)
    layer = BatchNormalization()(layer)
    layer = Dropout(0.2)(layer)

    output_layer = Dense(4, activation='softmax')(layer)

    model = Model(inputs=input_layer, outputs=output_layer)
    metrics = top_k_accuracy()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics)

    return model
コード例 #7
0
def load(feat_size):

    speech_input_layer = Input(shape=(100, feat_size, 3))
    speech_layer = speech_input_layer

    speech_layer = Conv2D(128,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = MaxPooling2D(padding='same')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Dense(512, activation="relu")(speech_layer)
    speech_layer = BatchNormalization()(speech_layer)
    speech_layer = Reshape((100, -1))(speech_layer)

    speech_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = AttentionDecoder(256, 256)(speech_layer)
    speech_layer = Flatten()(speech_layer)
    speech_layer = Dense(256, activation="relu")(speech_layer)

    mocap_input_layer = Input(shape=(200, 189, 1))
    mocap_layer = mocap_input_layer

    mocap_layer = Conv2D(32,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(64,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(64,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(128,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Flatten()(mocap_layer)
    mocap_layer = Dense(256, activation='relu')(mocap_layer)

    combined_layer = concatenate([speech_layer, mocap_layer])
    combined_layer = Dense(256, activation='relu')(combined_layer)

    output_layer = Dense(4, activation='softmax')(combined_layer)

    model = Model(inputs=[speech_input_layer, mocap_input_layer],
                  outputs=output_layer)

    metrics = top_k_accuracy()
    adam = Adam(lr=0.001,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=1e-6,
                amsgrad=False,
                clipnorm=3.0)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=metrics)

    return model
コード例 #8
0
def load(feat_size):

    input_layer = Input(shape=(100, feat_size, 3))
    layer = input_layer

    layer = Conv2D(128,
                   kernel_size=(5, 3),
                   strides=(1, 1),
                   padding='same',
                   activation='relu')(layer)
    layer = MaxPooling2D(padding='same')(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(256,
                   kernel_size=(5, 3),
                   strides=(1, 1),
                   padding='same',
                   activation='relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(256,
                   kernel_size=(5, 3),
                   strides=(1, 1),
                   padding='same',
                   activation='relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(256,
                   kernel_size=(5, 3),
                   strides=(1, 1),
                   padding='same',
                   activation='relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Conv2D(256,
                   kernel_size=(5, 3),
                   strides=(1, 1),
                   padding='same',
                   activation='relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(512, activation="relu")(layer)
    layer = BatchNormalization()(layer)
    layer = Reshape((100, -1))(layer)

    layer = LSTM(256, return_sequences=True, recurrent_dropout=0.2)(layer)
    layer = Dropout(0.2)(layer)
    layer = LSTM(256, return_sequences=True, recurrent_dropout=0.2)(layer)
    layer = Dropout(0.2)(layer)
    layer = AttentionDecoder(128, 128)(layer)
    layer = Flatten()(layer)
    layer = Dense(512, activation="relu")(layer)
    layer = BatchNormalization()(layer)
    output_layer = Dense(4, activation='softmax')(layer)

    model = Model(inputs=input_layer, outputs=output_layer)

    metrics = top_k_accuracy()
    adam = Adam(lr=0.001,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=1e-6,
                amsgrad=False)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=metrics)

    return model
コード例 #9
0
def load(nb_words, g_word_embedding_matrix, feat_size):

    text_input_layer = Input(shape=(500, ))
    text_layer = text_input_layer
    text_layer = Embedding(nb_words,
                           300,
                           weights=[g_word_embedding_matrix],
                           input_length=500,
                           trainable=True)(text_layer)
    text_layer = LSTM(256, return_sequences=True,
                      recurrent_dropout=0.2)(text_layer)
    text_layer = Dropout(0.2)(text_layer)
    text_layer = LSTM(256, return_sequences=False,
                      recurrent_dropout=0.2)(text_layer)
    text_layer = Dropout(0.2)(text_layer)
    #text_layer = AttentionDecoder(256, 256, name='AttentionDecoder_tx')(text_layer)
    #text_layer = Flatten()(text_layer)
    text_layer = Dense(256, activation='relu')(text_layer)

    speech_input_layer = Input(shape=(100, feat_size, 3))
    speech_layer = speech_input_layer

    speech_layer = Conv2D(128,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = MaxPooling2D(padding='same')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Conv2D(256,
                          kernel_size=(5, 3),
                          strides=(1, 1),
                          padding='same',
                          activation='relu')(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Dense(512, activation="relu")(speech_layer)
    speech_layer = BatchNormalization()(speech_layer)
    speech_layer = Reshape((100, -1))(speech_layer)

    speech_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = LSTM(256, return_sequences=True,
                        recurrent_dropout=0.2)(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = AttentionDecoder(256, 256,
                                    name='AttentionDecoder_sp')(speech_layer)
    speech_layer = Flatten()(speech_layer)
    speech_layer = Dense(256, activation="relu")(speech_layer)

    combined_layer = concatenate([text_layer, speech_layer])

    output_layer = Dense(4, activation='softmax')(combined_layer)

    model = Model(inputs=[text_input_layer, speech_input_layer],
                  outputs=output_layer)

    metrics = top_k_accuracy()
    adam = Adam(lr=0.001,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=1e-6,
                amsgrad=False,
                clipnorm=4.0)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=metrics)

    return model
コード例 #10
0
def load(nb_words, g_word_embedding_matrix, feat_size):

    text_input_layer = Input(shape=(500, ))
    text_layer = text_input_layer
    text_layer = Embedding(nb_words,
                           300,
                           weights=[g_word_embedding_matrix],
                           input_length=500,
                           trainable=True)(text_layer)
    text_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(text_layer)
    text_layer = Dropout(0.2)(text_layer)
    text_layer = Bidirectional(
        LSTM(256, return_sequences=False, recurrent_dropout=0.2))(text_layer)
    text_layer = Dropout(0.2)(text_layer)
    text_layer = Dense(256, activation='relu')(text_layer)

    speech_input_layer = Input(shape=(100, feat_size))
    speech_layer = speech_input_layer

    speech_layer = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Bidirectional(
        LSTM(256, return_sequences=False, recurrent_dropout=0.2))(speech_layer)
    speech_layer = Dropout(0.2)(speech_layer)
    speech_layer = Dense(256, activation='relu')(speech_layer)

    mocap_input_layer = Input(shape=(200, 189, 1))
    mocap_layer = mocap_input_layer

    mocap_layer = Conv2D(32,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(64,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(64,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Conv2D(128,
                         3,
                         strides=(2, 2),
                         padding='same',
                         activation='relu')(mocap_layer)
    mocap_layer = BatchNormalization()(mocap_layer)
    mocap_layer = Dropout(0.2)(mocap_layer)
    mocap_layer = Flatten()(mocap_layer)
    mocap_layer = Dense(256, activation='relu')(mocap_layer)

    combined_layer = concatenate([text_layer, speech_layer, mocap_layer])

    combined_layer = Dense(256, activation='relu')(combined_layer)
    output_layer = Dense(4, activation='softmax')(combined_layer)

    model = Model(
        inputs=[text_input_layer, speech_input_layer, mocap_input_layer],
        outputs=output_layer)

    metrics = top_k_accuracy()
    adam = Adam(lr=0.001,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=0.0,
                amsgrad=False,
                clipnorm=3.0)
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=metrics)

    return model