예제 #1
0
def UttrAtten_AttenVec(atten):
    time_step = 62  # same as the number of frames within a chunk (i.e., m)
    feat_num = 130  # number of LLDs features
    chunk_num = 11  # number of chunks splitted for a sentence (i.e., C)
    # Input & LSTM Layer
    inputs = Input((time_step, feat_num))
    encode = LSTM(units=feat_num,
                  activation='tanh',
                  dropout=0.5,
                  return_sequences=True)(inputs)
    encode = LSTM(units=feat_num,
                  activation='tanh',
                  dropout=0.5,
                  return_sequences=False)(encode)
    encode = BatchNormalization()(encode)
    # Uttr Attention Layer
    batch_atten_out = []
    for uttr_idx in range(0, batch_size * chunk_num, chunk_num):
        _start = uttr_idx
        _end = uttr_idx + chunk_num
        encode_crop = crop(0, _start, _end)(encode)
        encode_crop = reshape()(encode_crop)
        atten_out = atten(encode_crop)
        batch_atten_out.append(atten_out)
    # Output-Layer
    concat_atten_out = Concatenate(axis=0)(batch_atten_out)
    outputs = output_net(feat_num)(concat_atten_out)
    outputs = repeat()(outputs)  # for matching the input batch size
    model = Model(inputs=inputs, outputs=outputs)
    return model
예제 #2
0
def UttrAtten_GatedVec(atten):
    time_step = 62    # same as the number of frames within a chunk (i.e., m)
    feat_num = 130    # number of LLDs features
    chunk_num = 11    # number of chunks splitted for a sentence (i.e., C)
    # Input Layer
    inputs = Input((time_step, feat_num))
    cnn_inputs = Permute((2, 1))(inputs)
    # cnn1: [128, 128]
    encode = Conv1D(filters=128, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(cnn_inputs)
    encode = BatchNormalization()(encode)
    encode = Activation('relu')(encode)
    encode = Conv1D(filters=128, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(encode)
    encode = BatchNormalization()(encode)
    encode = Activation('relu')(encode)   
    # cnn2: [64, 64]
    encode = Conv1D(filters=64, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(encode)
    encode = BatchNormalization()(encode)
    encode = Activation('relu')(encode)
    encode = Conv1D(filters=64, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(encode)
    encode = BatchNormalization()(encode)
    encode = Activation('relu')(encode)   
    # cnn3: [32]
    encode = Conv1D(filters=32, kernel_size=3, strides=2, dilation_rate=1, data_format='channels_first')(encode)
    encode = BatchNormalization()(encode)
    encode = Activation('relu')(encode)
    # cnn flatten output
    encode = Flatten()(encode)
    encode = Dense(units=feat_num, activation='relu')(encode)
    # Uttr Attention Layer
    batch_atten_out = []
    for uttr_idx in range(0, batch_size*chunk_num, chunk_num):
        _start = uttr_idx
        _end = uttr_idx+chunk_num
        encode_crop = crop(0, _start, _end)(encode)
        encode_crop = reshape()(encode_crop)
        atten_weights = atten(encode_crop)
        atten_out = Multiply()([encode_crop, atten_weights])
        atten_out = mean()(atten_out)
        batch_atten_out.append(atten_out)
    # Output-Layer
    concat_atten_out= Concatenate(axis=0)(batch_atten_out)
    outputs = output_net(feat_num)(concat_atten_out)
    outputs = repeat()(outputs)  # for matching the input batch size
    model = Model(inputs=inputs, outputs=outputs)
    return model
time_step = 62  # same as the number of frames within a chunk (i.e., m)
feat_num = 130  # number of LLDs features

if atten_type == 'GatedVec':
    # LSTM Layer
    inputs = Input((time_step, feat_num))
    encode = LSTM(units=feat_num,
                  activation='tanh',
                  dropout=0.5,
                  return_sequences=True)(inputs)
    encode = LSTM(units=feat_num,
                  activation='tanh',
                  dropout=0.5,
                  return_sequences=False)(encode)
    encode = BatchNormalization()(encode)
    encode = reshape()(encode)
    # Attention Layer
    a_weighted = atten_gated(feat_num=130, C=11)(encode)
    attention_vector = Multiply()([encode, a_weighted])
    attention_vector = mean()(attention_vector)
    # Output Layer
    outputs = output_net(feat_num)(attention_vector)
    model = Model(inputs=inputs, outputs=outputs)

elif atten_type == 'RnnAttenVec':
    # LSTM Layer
    inputs = Input((time_step, feat_num))
    encode = LSTM(units=feat_num,
                  activation='tanh',
                  dropout=0.5,
                  return_sequences=True)(inputs)