Example #1
0
def DNNAFx(win_length, filters, kernel_size_1, learning_rate):

    x = Input(shape=(win_length, 1), name='input')

    conv = Conv1D(filters,
                  kernel_size_1,
                  strides=1,
                  padding='same',
                  kernel_initializer='lecun_uniform',
                  input_shape=(win_length, 1),
                  name='conv')

    conv_smoothing = Conv1D_local(filters,
                                  kernel_size_1 * 2,
                                  strides=1,
                                  padding='same',
                                  name='conv_smoothing',
                                  kernel_initializer='lecun_uniform')

    dense_in = Dense_local(win_length // 64,
                           activation='softplus',
                           name='dense_local_in')

    deconv = Conv1D_tied(1, kernel_size_1, conv, padding='same', name='deconv')

    X = conv(x)
    X_abs = Activation(K.abs, name='conv_activation')(X)
    M = conv_smoothing(X_abs)
    M = Activation('softplus', name='conv_smoothing_activation')(M)
    P = X
    Z = MaxPooling1D(pool_size=win_length // 64, name='max_pooling')(M)
    Z = Lambda((toPermuteDimensions), name='permute_dimensions_dnn_in')(Z)

    Z = dense_in(Z)
    Z = TimeDistributed(Dense(win_length // 64, activation='softplus'),
                        name='dense_out')(Z)
    Z = Lambda((toPermuteDimensions), name='permute_dimensions_dnn_out')(Z)
    M_ = UpSampling1D(size=win_length // 64, name='up_sampling_naive')(Z)

    Y = Multiply(name='phase_unpool_multiplication')([P, M_])
    Y = deconv(Y)

    model = Model(inputs=[x], outputs=[Y])

    model.compile(loss={'deconv': 'mae'},
                  loss_weights={'deconv': 1.0},
                  optimizer=Adam(lr=learning_rate))

    return model
Example #2
0
def pretrainingModel(win_length, filters, kernel_size_1, learning_rate):

    x = Input(shape=(win_length, 1), name='input')
    conv = Conv1D(filters,
                  kernel_size_1,
                  strides=1,
                  padding='same',
                  kernel_initializer='lecun_uniform',
                  input_shape=(win_length, 1),
                  name='conv')

    conv_smoothing = Conv1D_local(filters,
                                  kernel_size_1 * 2,
                                  strides=1,
                                  padding='same',
                                  kernel_initializer='lecun_uniform',
                                  name='conv_smoothing')

    deconv = Conv1D_tied(1, kernel_size_1, conv, padding='same', name='deconv')

    X = conv(x)
    X_abs = Activation(K.abs, name='conv_activation')(X)
    M = conv_smoothing(X_abs)
    M = Activation('softplus', name='conv_smoothing_activation')(M)
    P = X
    Z = MaxPooling1D(pool_size=win_length // 64, name='max_pooling')(M)
    M_ = UpSampling1D(size=win_length // 64, name='up_sampling_naive')(Z)
    M_ = Lambda((BooleanMask), name='boolean_mask')([M, M_])

    Y = Multiply(name='phase_unpool_multiplication')([P, M_])
    Y = deconv(Y)

    model = Model(inputs=[x], outputs=[Y])

    model.compile(loss={'deconv': 'mae'},
                  loss_weights={'deconv': 1.0},
                  optimizer=Adam(lr=learning_rate))

    return model
Example #3
0
def CWAFx(win_length, filters, kernel_size_1, learning_rate, wavenetConfig):

    kContext = 4  # past and subsequent frames

    x = Input(shape=(kContext * 2 + 1, win_length, 1), name='input')

    conv = Conv1D(filters,
                  kernel_size_1,
                  strides=1,
                  padding='same',
                  kernel_initializer='lecun_uniform',
                  input_shape=(win_length, 1))

    activation_abs = Activation(K.abs)
    activation_sp = Activation('softplus')
    max_pooling = MaxPooling1D(pool_size=win_length // 64)

    conv_smoothing = Conv1D_local(filters,
                                  kernel_size_1 * 2,
                                  strides=1,
                                  padding='same',
                                  kernel_initializer='lecun_uniform')

    deconv = Conv1D_tied(1, kernel_size_1, conv, padding='same', name='deconv')

    X = TimeDistributed(conv, name='conv')(x)
    X_abs = TimeDistributed(activation_abs, name='conv_activation')(X)
    M = TimeDistributed(conv_smoothing, name='conv_smoothing')(X_abs)
    M = TimeDistributed(activation_sp, name='conv_smoothing_activation')(M)
    P = X
    Z = TimeDistributed(max_pooling, name='max_pooling')(M)
    Z = Lambda(lambda inputs: tf.unstack(
        inputs, num=kContext * 2 + 1, axis=1, name='unstack2'))(Z)
    Z = Concatenate(name='concatenate', axis=-2)(Z)

    Z = wavenet(Z,
                wavenetConfig,
                contextFrames=kContext,
                output_channels=filters,
                context=True)

    Z = Lambda((toPermuteDimensions), name='perm_1')(Z)
    Z = Dense(win_length // 64, activation='tanh', name='dense_wn')(Z)
    Z = Lambda((toPermuteDimensions), name='perm_2')(Z)

    M_ = UpSampling1D(size=win_length // 64, name='up_sampling_naive')(Z)
    P = Lambda(lambda inputs: tf.unstack(
        inputs, num=kContext * 2 + 1, axis=1, name='unstack'))(P)
    Y = Multiply(name='phase_unpool_multiplication')([P[kContext], M_])

    Y_ = Dense(filters, activation='tanh', name='dense_in')(Y)
    Y_ = Dense(filters // 2, activation='tanh', name='dense_h1')(Y_)
    Y_ = Dense(filters // 2, activation='tanh', name='dense_h2')(Y_)
    Y_ = Dense(filters, activation='linear', name='dense_out')(Y_)

    Y_ = SAAF(break_points=25,
              break_range=0.2,
              magnitude=100,
              order=2,
              tied_feamap=True,
              kernel_initializer='random_normal',
              name='saaf_out')(Y_)

    Y_ = se_block(Y_, filters, weight_decay=0., amplifying_ratio=16, idx=1)

    Y = Add(name='addition')([Y, Y_])

    Y = deconv(Y)

    model = Model(inputs=[x], outputs=[Y])

    model.compile(loss={'deconv': 'mae'},
                  loss_weights={'deconv': 1.0},
                  optimizer=Adam(lr=learning_rate))

    return model
def model_2(win_length, filters, kernel_size_1, learning_rate):

    kContext = 4  # past and subsequent frames

    x = Input(shape=(kContext * 2 + 1, win_length, 1), name='input')

    conv = Conv1D(filters,
                  kernel_size_1,
                  strides=1,
                  padding='same',
                  kernel_initializer='lecun_uniform',
                  input_shape=(win_length, 1))

    activation_abs = Activation(K.abs)
    activation_sp = Activation('softplus')
    max_pooling = MaxPooling1D(pool_size=win_length // 64)

    conv_smoothing = Conv1D_local(filters,
                                  kernel_size_1 * 2,
                                  strides=1,
                                  padding='same',
                                  kernel_initializer='lecun_uniform')

    bi_rnn = Bidirectional(LSTM(filters * 2,
                                activation='tanh',
                                stateful=False,
                                return_sequences=True,
                                dropout=0.1,
                                recurrent_dropout=0.1),
                           merge_mode='concat',
                           name='birnn_in')
    bi_rnn1 = Bidirectional(LSTM(filters,
                                 activation='tanh',
                                 stateful=False,
                                 return_sequences=True,
                                 dropout=0.1,
                                 recurrent_dropout=0.1),
                            merge_mode='concat',
                            name='birnn_1')
    bi_rnn2 = Bidirectional(LSTM(filters // 2,
                                 activation='linear',
                                 stateful=False,
                                 return_sequences=True,
                                 dropout=0.1,
                                 recurrent_dropout=0.1),
                            merge_mode='concat',
                            name='birnn_2')

    deconv = Conv1D_tied(1, kernel_size_1, conv, padding='same', name='deconv')

    X = TimeDistributed(conv, name='conv')(x)
    X_abs = TimeDistributed(activation_abs, name='conv_activation')(X)
    M = TimeDistributed(conv_smoothing, name='conv_smoothing')(X_abs)
    M = TimeDistributed(activation_sp, name='conv_smoothing_activation')(M)
    P = X
    Z = TimeDistributed(max_pooling, name='max_pooling')(M)
    Z = Lambda(lambda inputs: tf.unstack(
        inputs, num=kContext * 2 + 1, axis=1, name='unstack2'))(Z)
    Z = Concatenate(name='concatenate')(Z)

    Z = bi_rnn(Z)
    Z = bi_rnn1(Z)
    Z = bi_rnn2(Z)
    Z = SAAF(break_points=25,
             break_range=0.2,
             magnitude=100,
             order=2,
             tied_feamap=True,
             kernel_initializer='random_normal',
             name='saaf_1')(Z)

    M_ = UpSampling1D(size=win_length // 64, name='up_sampling_naive')(Z)
    P = Lambda(lambda inputs: tf.unstack(
        inputs, num=kContext * 2 + 1, axis=1, name='unstack'))(P)
    Y = Multiply(name='phase_unpool_multiplication')([P[kContext], M_])

    Y_ = Dense(filters, activation='tanh', name='dense_in')(Y)
    Y_ = Dense(filters // 2, activation='tanh', name='dense_h1')(Y_)
    Y_ = Dense(filters // 2, activation='tanh', name='dense_h2')(Y_)
    Y_ = Dense(filters, activation='linear', name='dense_out')(Y_)
    Y_ = SAAF(break_points=25,
              break_range=0.2,
              magnitude=100,
              order=2,
              tied_feamap=True,
              kernel_initializer='random_normal',
              name='saaf_out')(Y_)

    Y_ = se_block(Y_, filters, weight_decay=0., amplifying_ratio=16, idx=1)
    Y = Add(name='addition')([Y, Y_])
    Y = deconv(Y)

    Y = Lambda((Window), name='waveform')(Y)

    loss_output = Spectrogram(n_dft=win_length,
                              n_hop=win_length,
                              input_shape=(1, win_length),
                              return_decibel_spectrogram=True,
                              power_spectrogram=2.0,
                              trainable_kernel=False,
                              name='spec')

    spec = Lambda((toPermuteDimensions), name='perm_spec')(Y)
    spec = loss_output(spec)

    model = Model(inputs=[x], outputs=[spec, Y])

    model.compile(loss={
        'spec': 'mse',
        'waveform': MAE_preEmphasis
    },
                  loss_weights={
                      'spec': 0.0001,
                      'waveform': 1.0
                  },
                  optimizer=Adam(lr=learning_rate))

    return model
def model_1(win_length, filters, kernel_size_1, learning_rate, batch):

    kPs = int((win_length * 2000 / kSR))
    kN = int(win_length)

    ini1 = tf.initializers.random_uniform(minval=-1, maxval=1)
    ini2 = tf.initializers.random_uniform(minval=0, maxval=1)

    x = Input(shape=(kContext * 2 + 1, win_length, 1),
              name='input',
              batch_shape=(batch, kContext * 2 + 1, win_length, 1))

    conv = Conv1D(filters,
                  kernel_size_1,
                  strides=1,
                  padding='same',
                  kernel_initializer='lecun_uniform',
                  input_shape=(win_length, 1))

    activation_abs = Activation(K.abs)
    activation_sp = Activation('softplus')
    max_pooling = MaxPooling1D(pool_size=win_length // 64)

    conv_smoothing = Conv1D_local(filters,
                                  kernel_size_1 * 2,
                                  strides=1,
                                  padding='same',
                                  kernel_initializer='lecun_uniform')

    dense_sgn = Dense(kPs,
                      activation='tanh',
                      kernel_initializer=ini1,
                      name='dense_l_sgn')

    dense_idx = Dense(kPs, activation='sigmoid', name='dense_l_idx')

    bi_rnn = Bidirectional(LSTM(filters * 2,
                                activation='tanh',
                                stateful=False,
                                return_sequences=True,
                                dropout=0.1,
                                recurrent_dropout=0.1),
                           merge_mode='concat',
                           name='birnn_in')
    bi_rnn1 = Bidirectional(LSTM(filters,
                                 activation='tanh',
                                 stateful=False,
                                 return_sequences=True,
                                 dropout=0.1,
                                 recurrent_dropout=0.1),
                            merge_mode='concat',
                            name='birnn_1')
    bi_rnn2 = Bidirectional(LSTM(filters // 2,
                                 activation='linear',
                                 stateful=False,
                                 return_sequences=True,
                                 dropout=0.1,
                                 recurrent_dropout=0.1),
                            merge_mode='concat',
                            name='birnn_2')

    bi_rnn3 = Bidirectional(LSTM(filters // 2,
                                 activation='linear',
                                 stateful=False,
                                 return_sequences=True,
                                 dropout=0.1,
                                 recurrent_dropout=0.1),
                            merge_mode='concat',
                            name='birnn_3')

    convTensors = Conv1D_localTensor(filters,
                                     win_length,
                                     batch,
                                     strides=1,
                                     padding='same',
                                     name='convTensors')

    deconv = Conv1D_tied(1, kernel_size_1, conv, padding='same', name='deconv')

    velvet = VelvetNoise(kPs,
                         batch,
                         input_dim=filters,
                         input_length=win_length,
                         name='velvet')

    X = TimeDistributed(conv, name='conv')(x)
    X_abs = TimeDistributed(activation_abs, name='conv_activation')(X)
    M = TimeDistributed(conv_smoothing, name='conv_smoothing')(X_abs)
    M = TimeDistributed(activation_sp, name='conv_smoothing_activation')(M)
    P = X
    Z = TimeDistributed(max_pooling, name='max_pooling')(M)
    Z = Lambda(lambda inputs: tf.unstack(
        inputs, num=kContext * 2 + 1, axis=1, name='unstack2'))(Z)
    Z = Concatenate(name='concatenate')(Z)

    Z = bi_rnn(Z)
    Z1 = bi_rnn1(Z)
    Z1 = bi_rnn2(Z1)
    Z1 = SAAF(break_points=25,
              break_range=0.2,
              magnitude=100,
              order=2,
              tied_feamap=True,
              kernel_initializer='random_normal',
              name='saaf_1')(Z1)

    Z2 = bi_rnn3(Z)
    Z2 = SAAF(break_points=25,
              break_range=0.2,
              magnitude=100,
              order=2,
              tied_feamap=True,
              kernel_initializer='random_normal',
              name='saaf_2')(Z2)

    Z1 = Lambda((toPermuteDimensions), name='perm_1')(Z1)

    sgn = dense_sgn(Z1)
    idx = dense_idx(Z1)

    sgn = Lambda((toPermuteDimensions), name='perm_2')(sgn)
    idx = Lambda((toPermuteDimensions), name='perm_3')(idx)

    P = Lambda(lambda inputs: tf.unstack(
        inputs, num=kContext * 2 + 1, axis=1, name='unstack'))(P)
    V = Concatenate(name='concatenate2', axis=-1)([sgn, idx])
    V = velvet(V)

    Y = Concatenate(name='concatenate3')([P[kContext], V])
    Y = convTensors(Y)
    Y = SAAF(break_points=25,
             break_range=0.2,
             magnitude=100,
             order=2,
             tied_feamap=True,
             kernel_initializer='random_normal',
             name='saaf_out_conv')(Y)

    M_ = UpSampling1D(size=win_length // 64, name='up_sampling_naive')(Z2)
    Y = Multiply(name='phase_unpool_multiplication')([Y, M_])

    Y_ = Dense(filters, activation='tanh', name='dense_in')(Y)
    Y_ = Dense(filters // 2, activation='tanh', name='dense_h1')(Y_)
    Y_ = Dense(filters // 2, activation='tanh', name='dense_h2')(Y_)
    Y_ = Dense(filters, activation='linear', name='dense_out')(Y_)
    Y_ = SAAF(break_points=25,
              break_range=0.2,
              magnitude=100,
              order=2,
              tied_feamap=True,
              kernel_initializer='random_normal',
              name='saaf_out')(Y_)

    Y = se_block_lstm(Y, filters, weight_decay=0., amplifying_ratio=16, idx=1)
    Y_ = se_block_lstm(Y_,
                       filters,
                       weight_decay=0.,
                       amplifying_ratio=16,
                       idx=2)

    Y = Add(name='addition')([Y, Y_])
    Y = deconv(Y)

    Y = Lambda((Window), name='waveform')(Y)

    loss_output = Spectrogram(n_dft=win_length,
                              n_hop=win_length,
                              input_shape=(1, win_length),
                              return_decibel_spectrogram=True,
                              power_spectrogram=2.0,
                              trainable_kernel=False,
                              name='spec')

    spec = Lambda((toPermuteDimensions), name='perm_spec')(Y)
    spec = loss_output(spec)

    model = Model(inputs=[x], outputs=[spec, Y])

    model.compile(loss={
        'spec': 'mse',
        'waveform': MAE_preEmphasis
    },
                  loss_weights={
                      'spec': 0.0001,
                      'waveform': 1.0
                  },
                  optimizer=Adam(lr=learning_rate))

    return model
def CAFxR(win_length, filters, kernel_size_1, learning_rate):
    # The same architecture as the CAFx model but with a regulariser applied to
    # each layer with trainable weights.

    x = Input(shape=(win_length, 1), name='input')

    conv = Conv1D(filters,
                  kernel_size_1,
                  strides=1,
                  padding='same',
                  kernel_initializer='lecun_uniform',
                  input_shape=(win_length, 1),
                  name='conv',
                  kernel_regularizer=regularizers.l2(0.01))

    conv_smoothing = Conv1D_local(filters,
                                  kernel_size_1 * 2,
                                  strides=1,
                                  padding='same',
                                  name='conv_smoothing',
                                  kernel_initializer='lecun_uniform',
                                  kernel_regularizer=regularizers.l2(0.01))

    dense_in = Dense_local(win_length // 64,
                           activation='softplus',
                           name='dense_local_in',
                           kernel_regularizer=regularizers.l2(0.01))

    deconv = Conv1D_tied(1, kernel_size_1, conv, padding='same', name='deconv')

    X = conv(x)
    X_abs = Activation(K.abs, name='conv_activation')(X)
    M = conv_smoothing(X_abs)
    M = Activation('softplus', name='conv_smoothing_activation')(M)
    P = X
    Z = MaxPooling1D(pool_size=win_length // 64, name='max_pooling')(M)

    Z = Lambda((toPermuteDimensions), name='permute_dimensions_dnn_in')(Z)
    Z = dense_in(Z)
    Z = TimeDistributed(Dense(win_length // 64, activation='softplus'),
                        name='dense_out')(Z)
    Z = Lambda((toPermuteDimensions), name='permute_dimensions_dnn_out')(Z)

    M_ = UpSampling1D(size=win_length // 64, name='up_sampling_naive')(Z)
    Y_ = Multiply(name='phase_unpool_multiplication')([P, M_])

    Y_ = Dense(filters,
               activation='relu',
               name='dense_saaf_in',
               kernel_regularizer=regularizers.l2(0.01))(Y_)
    Y_ = Dense(filters // 2,
               activation='relu',
               name='dense_saaf_h1',
               kernel_regularizer=regularizers.l2(0.01))(Y_)
    Y_ = Dense(filters // 2,
               activation='relu',
               name='dense_saaf_h2',
               kernel_regularizer=regularizers.l2(0.01))(Y_)
    Y_ = Dense(filters // 2,
               activation='relu',
               name='dense_saaf_h3',
               kernel_regularizer=regularizers.l2(0.01))(Y_)
    Y_ = Dense(filters,
               activation='linear',
               name='dense_saaf_out',
               kernel_regularizer=regularizers.l2(0.01))(Y_)

    Y_ = SAAF(break_points=25,
              break_range=0.2,
              magnitude=100,
              order=2,
              tied_feamap=True,
              kernel_initializer='random_normal',
              name='saaf_out',
              kernel_regularizer=regularizers.l2(0.01))(Y_)

    Y = deconv(Y_)

    model = Model(inputs=[x], outputs=[Y])

    model.compile(loss={'deconv': 'mae'},
                  loss_weights={'deconv': 1.0},
                  optimizer=Adam(lr=learning_rate))

    return model