def baseline_model(seq_dim=3):
    input_1 = Input(shape=(None, 3))
    input_2 = Input(shape=(None, seq_dim))

    base_model = encoder(seq_dim=3)

    x1 = base_model(input_1)
    x2 = base_model(input_2)

    x1 = Concatenate(axis=-1)([GlobalMaxPool1D()(x1), GlobalAvgPool1D()(x1)])
    x2 = Concatenate(axis=-1)([GlobalMaxPool1D()(x2), GlobalAvgPool1D()(x2)])

    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])

    x = Multiply()([x1, x2])

    x = Concatenate(axis=-1)([x, x3])
    x = Dropout(0.1)(x)
    x = Dense(100, activation="relu")(x)
    x = Dropout(0.1)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model([input_1, input_2], out)

    model.compile(loss="binary_crossentropy", metrics=[acc], optimizer=Adam(0.0001))

    model.summary()

    return model
예제 #2
0
def build_encoder(n_input_dim,
                  n_encoding_dim,
                  n_conv_block,
                  n_conv_layers,
                  n_conv_filters,
                  conv_filter_size,
                  n_dense_layers,
                  n_dense_units,
                  activation,
                  batch_norm=False,
                  l2_lambda=0,
                  dropout_prob=0):
    ##  build model graph
    # convolution part
    input_op = Input([FEATURE_VEC_LEN, n_input_dim])
    x = input_op
    for i in range(n_conv_block):
        x = conv_block(n_conv_filters[i], conv_filter_size[i],
                       n_conv_layers[i], activation, batch_norm, l2_lambda,
                       dropout_prob)(x)
        x = MaxPool1D((2, ))(x)
    x = GlobalAvgPool1D()(x)
    # dense part
    x = dense_block(n_dense_units, n_dense_layers, activation, batch_norm,
                    l2_lambda, dropout_prob)(x)
    # produce encoding
    encoding = Dense(n_encoding_dim)(x)
    return Model(input_op, encoding)
예제 #3
0
def basic_cnn(num_frame, num_artist):
    x_input = Input(shape=(num_frame, 128))

    out = Conv1D(128,
                 kernel_size=3,
                 padding='same',
                 use_bias=True,
                 kernel_regularizer=l2(1e-5),
                 kernel_initializer='he_normal')(x_input)
    out = BatchNormalization(axis=2)(out)
    out = LeakyReLU(0.2)(out)
    out = MaxPool1D(pool_size=3)(out)

    out = Conv1D(128,
                 kernel_size=3,
                 padding='same',
                 use_bias=True,
                 kernel_regularizer=l2(1e-5),
                 kernel_initializer='he_normal')(out)
    out = BatchNormalization(axis=2)(out)
    out = LeakyReLU(0.2)(out)
    out = MaxPool1D(pool_size=3)(out)

    out = Conv1D(128,
                 kernel_size=3,
                 padding='same',
                 use_bias=True,
                 kernel_regularizer=l2(1e-5),
                 kernel_initializer='he_normal')(out)
    out = BatchNormalization(axis=2)(out)
    out = LeakyReLU(0.2)(out)
    out = MaxPool1D(pool_size=3)(out)
    out = Dropout(0.5)(out)

    out = Conv1D(128,
                 kernel_size=3,
                 padding='same',
                 use_bias=True,
                 kernel_regularizer=l2(1e-5),
                 kernel_initializer='he_normal')(out)
    out = BatchNormalization(axis=2)(out)
    out = LeakyReLU(0.2)(out)
    out = MaxPool1D(pool_size=3)(out)
    out = Dropout(0.5)(out)

    out = Conv1D(256,
                 kernel_size=1,
                 padding='same',
                 use_bias=True,
                 kernel_regularizer=l2(1e-5),
                 kernel_initializer='he_normal')(out)
    out = BatchNormalization(axis=2)(out)
    out = LeakyReLU(0.2)(out)
    out = Dropout(0.5)(out)

    out = GlobalAvgPool1D()(out)

    out = Dense(num_artist, activation='softmax')(out)
    model = Model(inputs=x_input, outputs=out)
    return model
예제 #4
0
def finetuning_siamese_cnn(mymodel_tmp, num_frame, num_neg_singers,
                           num_pos_tracks):
    anchor = Input(shape=(num_frame, config.n_mels))
    pos_items = [
        Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_tracks)
    ]
    neg_items = [
        Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_singers)
    ]

    dense = Dense(256)
    ap = GlobalAvgPool1D()

    anchor_out = mymodel_tmp(anchor)
    pos_outs = [mymodel_tmp(pos_item) for pos_item in pos_items]
    neg_outs = [mymodel_tmp(neg_item) for neg_item in neg_items]

    ### cosine
    pos_dists = [
        dot([anchor_out, pos_out], axes=1, normalize=True)
        for pos_out in pos_outs
    ]
    neg_dists = [
        dot([anchor_out, neg_out], axes=1, normalize=True)
        for neg_out in neg_outs
    ]

    all_dists = concatenate(pos_dists + neg_dists)

    outputs = Activation('linear')(all_dists)

    model = Model(inputs=[anchor] + pos_items + neg_items, outputs=outputs)

    return model
예제 #5
0
def buildModel():
    inp = Input((MAXIMUM_SEQ_LEN, ))
    #use embeddings
    emb = Embedding(VOCAB_LENGTH,
                    EMBEDDING_DIM,
                    weights=[embedding_matrix],
                    trainable=False)(inp)
    #to drop some embedding instead of particular cells
    emb = SpatialDropout1D(0.2)(emb)
    #generate 100(fwd) + 100(bwd) hidden states
    hidden_states = Bidirectional(
        LSTM(100, return_sequences=True, dropout=0.1,
             recurrent_dropout=0.1))(emb)
    #on each hidden state use 100*64 kernels of size 3
    conv = Conv1D(64,
                  kernel_size=3,
                  padding="valid",
                  kernel_initializer="glorot_uniform")(hidden_states)
    #take maximum for each cell of all hidden state
    x1 = GlobalMaxPool1D()(conv)
    x2 = GlobalAvgPool1D()(conv)
    #cocatenate both polling
    x = Concatenate()([x1, x2])
    x = Dropout(0.2)(x)
    x = Dense(50, activation='relu')(x)
    x = Dropout(0.1)(x)
    out = Dense(6, activation='sigmoid')(x)
    model = Model(inp, out)

    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=[AUC(name="auc")])
예제 #6
0
def rnn_classifier(
    d_model=128,
    n_layers=2,
    n_classes=16,
):
    inp = Input((None, d_model))

    x = Bidirectional(GRU(d_model, return_sequences=True))(inp)

    if n_classes > 1:
        for i in range(n_layers - 1):
            x = Bidirectional(GRU(d_model, return_sequences=True))(x)

    x = Dropout(0.2)(x)

    x = GlobalAvgPool1D()(x)

    x = Dense(4 * n_classes, activation="selu")(x)

    out = Dense(n_classes, activation="sigmoid")(x)

    model = Model(inputs=inp, outputs=out)

    opt = Adam(0.00001)

    model.compile(optimizer=opt,
                  loss=custom_binary_crossentropy,
                  metrics=[custom_binary_accuracy])

    model.summary()

    return model
예제 #7
0
def baseline_model(seq_dim=3):
    input_1 = Input(shape=(None, seq_dim))

    base_model = encoder(seq_dim=seq_dim)

    x1 = base_model(input_1)

    x1 = Dropout(0.5)(x1)

    x1 = Concatenate(axis=-1)([GlobalMaxPool1D()(x1), GlobalAvgPool1D()(x1)])

    x = Dropout(0.5)(x1)
    x = Dense(100, activation="relu")(x)
    x = Dropout(0.5)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model(input_1, out)

    model.compile(loss="binary_crossentropy",
                  metrics=[acc],
                  optimizer=Adam(0.0001))

    model.summary()

    return model
def squeeze_exciation(x, amplifying_ratio, name):
    num_features = x.shape[-1].value
    x = GlobalAvgPool1D(name=f'squeeze_{name}')(x)
    x = Reshape((1, num_features), name=f'reshape_{name}')(x)
    x = Dense(int(num_features * amplifying_ratio),
              activation='relu',
              name=f'ex0_{name}')(x)
    x = Dense(num_features, activation='sigmoid', name=f'ex1_{name}')(x)
    return x
예제 #9
0
def skeleton_cnn(num_frame, weights):
    x_input = Input(shape=(num_frame, 128))
    
    # audio model 
    conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn1 = BatchNormalization()
    activ1 = LeakyReLU(0.2)
    # activ1 = Activation('relu')
    mp1 = MaxPool1D(pool_size=3)

    conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn2 = BatchNormalization()
    activ2 = LeakyReLU(0.2)
    # activ2 = Activation('relu')
    mp2 = MaxPool1D(pool_size=3)
    
    conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn3 = BatchNormalization()
    activ3 = LeakyReLU(0.2)
    # activ3 = Activation('relu')
    mp3 = MaxPool1D(pool_size=3)
    do3 = Dropout(0.5)
    
    conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn4 = BatchNormalization()
    activ4 = LeakyReLU(0.2)
    # activ4 = Activation('relu')
    mp4 = MaxPool1D(pool_size=3)

    conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn5 = BatchNormalization()
    activ5 = LeakyReLU(0.2)
    # activ5 = Activation('relu')
    do5 = Dropout(0.5)

    ap = GlobalAvgPool1D()
    
    # Anchor 
    out = mp1(activ1(bn1(conv1(x_input))))
    out = mp2(activ2(bn2(conv2(out))))
    out = mp3(activ3(bn3(conv3(out))))
    out = do3(out)
    out = mp4(activ4(bn4(conv4(out))))
    out = activ5(bn5(conv5(out)))
    out = do5(out)
    out = ap(out)
   
    # out = Dense(num_artist, activation='softmax')(out)
    out = dot([out, out], axes=1, normalize=True)
    out = Activation('linear')(out)
    model = Model(inputs=x_input, outputs = out)

    model.load_weights(weights)
    return model
예제 #10
0
def se_fn(x, amplifying_ratio):
    num_features = x.shape[-1]
    x = GlobalAvgPool1D()(x)
    x = Reshape((1, num_features))(x)
    x = Dense(
        num_features * amplifying_ratio,
        activation="relu",
        kernel_initializer="glorot_uniform",
    )(x)
    x = Dense(num_features,
              activation="sigmoid",
              kernel_initializer="glorot_uniform")(x)
    return x
예제 #11
0
def squeeze_excitation(x, amplifying_ratio, name):
    num_features = x.shape[-1].value
    x = GlobalAvgPool1D(name=f'{name}_squeeze')(x)
    x = Reshape((1, num_features), name=f'{name}_reshape')(x)
    x = Dense(num_features * amplifying_ratio,
              activation='relu',
              kernel_initializer='glorot_uniform',
              name=f'{name}_ex0')(x)
    x = Dense(num_features,
              activation='sigmoid',
              kernel_initializer='glorot_uniform',
              name=f'{name}_ex1')(x)
    return x
예제 #12
0
def train_and_evaluate_model(hp):
    """Trains and evaluates a model."""
    t = Tokenizer(oov_token=UNK)
    t.fit_on_texts(INPUTS)
    id2word = [PAD] + list(t.index_word.values())
    word2id = {v: i for i, v in enumerate(id2word)}
    encoded_inputs = t.texts_to_sequences(INPUTS)
    padded_inputs = pad_sequences(
        encoded_inputs,
        maxlen=hp["pad_len"],
        padding="post",
        truncating="post",
        value=word2id[PAD],
    )

    num_classes = len(set(LABELS))
    labels = np.array(LABELS)
    loss = "binary_crossentropy"
    if hp["categorical"]:
        labels = to_categorical(LABELS, num_classes)
        loss = "categorical_crossentropy"

    model = Sequential()

    embedding = _rand_embedding(
        id2word, hp["emb_output_dim"], mask_zero=True, input_length=hp["pad_len"]
    )
    model.add(embedding)

    if hp["arch"] == "flatten":
        model.add(Flatten())
    elif hp["arch"] == "avg_pool":
        model.add(GlobalAvgPool1D())
    elif hp["arch"] == "max_pool":
        model.add(GlobalMaxPool1D())
    else:
        assert False, hp["arch"]

    if hp["categorical"]:
        model.add(Dense(num_classes, activation="softmax"))
    else:
        model.add(Dense(1, activation="sigmoid"))

    model.compile(loss=loss, optimizer="adam", metrics=["acc"])
    print(model.summary())
    model.fit(padded_inputs, labels, epochs=50, verbose=0)
    loss, acc = model.evaluate(padded_inputs, labels, verbose=0)
    print(f"loss={loss:.4f} accuracy={acc:.4f}")
def naive_attention(x):
    '''
        squeeze and excitation
    '''
    reduction_ratio = 1
    num_filter = int(x.shape[-1])
    num_neurons = num_filter//reduction_ratio
    
    # squeeze
    x1 = GlobalAvgPool1D()(x)  

    # attention map prediction
    x2 = Dense(num_neurons,activation='relu',use_bias=False)(x1)
    attention_map = Dense(num_filter,activation='sigmoid',name='att_map',use_bias=False)(x2)
    
    # feature recalibration
    x = Multiply()([x,attention_map])

    return x
예제 #14
0
def transformer_classifier(
    num_layers=4,
    d_model=128,
    num_heads=8,
    dff=256,
    maximum_position_encoding=2048,
    n_classes=16,
):
    inp = Input((None, d_model))

    encoder = Encoder(
        num_layers=num_layers,
        d_model=d_model,
        num_heads=num_heads,
        dff=dff,
        maximum_position_encoding=maximum_position_encoding,
        rate=0.3,
    )

    x = encoder(inp)

    x = Dropout(0.2)(x)

    x = GlobalAvgPool1D()(x)

    x = Dense(4 * n_classes, activation="selu")(x)

    out = Dense(n_classes, activation="sigmoid")(x)

    model = Model(inputs=inp, outputs=out)

    opt = Adam(0.00001)

    model.compile(optimizer=opt,
                  loss=custom_binary_crossentropy,
                  metrics=[custom_binary_accuracy])

    model.summary()

    return model
def feature_attention(x,original_input,i):
    '''
        squeeze and excitation
    '''
    reduction_ratio = 1
    num_filter = int(x.shape[-1])
    num_neurons = num_filter//reduction_ratio
    
    # squeeze
    x1 = GlobalAvgPool1D()(x)  

    # concatenate with extracted features from the original input
    #original_input = Flatten()(original_input)
    #features = Dense(32,activation='relu')(original_input)
    #features = Dense(3,activation='linear')(features)
    #x1 = Concatenate()([x1,features])  


    # conv layers for feature extraction
    features = Conv1D(filters=16,kernel_size=7,padding='same')(original_input)
    features = BatchNormalization()(features)
    features = Activation('relu')(features)
    features = Conv1D(filters=4,kernel_size=5,padding='same')(features)
    features = BatchNormalization()(features)
    features = Activation('relu')(features)
    features = Flatten()(features)
    features = Dense(32,activation='linear')(features)
    
    # attention map prediction
    x1 = Concatenate()([x1,features])  
    x2 = Dense(num_neurons,activation='relu',name='att_input_%d'%i)(x1)
    attention_map = Dense(num_filter,activation='sigmoid',name='att_map_%d'%i)(x2)
    
    # feature recalibration
    x = Multiply()([x,attention_map])

    return x
예제 #16
0
def siamese_cnn_track_level(num_frame, num_neg_artist, num_vocal_segments):
    anchor_items = [Input(shape=(num_frame,config.n_mels)) for i in range(num_vocal_segments)]
    pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_vocal_segments)]
    neg_items_of_items= [[Input(shape=(num_frame, config.n_mels)) for i in range(num_vocal_segments)] for j in range(num_neg_artist)]

    # audio model 
    conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn1 = BatchNormalization(axis=2)
    activ1 = LeakyReLU(0.2)
    mp1 = MaxPool1D(pool_size=3)

    conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn2 = BatchNormalization(axis=2)
    activ2 = LeakyReLU(0.2)
    mp2 = MaxPool1D(pool_size=3)
    
    conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn3 = BatchNormalization(axis=2)
    activ3 = LeakyReLU(0.2)
    mp3 = MaxPool1D(pool_size=3)
    do3 = Dropout(0.5)
    
    conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn4 = BatchNormalization(axis=2)
    activ4 = LeakyReLU(0.2)
    mp4 = MaxPool1D(pool_size=3)

    conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn5 = BatchNormalization(axis=2)
    activ5 = LeakyReLU(0.2)
    do5 = Dropout(0.5)

    ap = GlobalAvgPool1D()

    track_avg = Lambda(track_average, track_average_output_shape)


    # Anchor 
    anchor_outs = [mp1(activ1(bn1(conv1(anchor)))) for anchor in anchor_items]
    anchor_outs = [mp2(activ2(bn2(conv2(anchor_out)))) for anchor_out in anchor_outs]
    anchor_outs = [mp3(activ3(bn3(conv3(anchor_out)))) for anchor_out in anchor_outs]
    anchor_outs = [do3(anchor_out) for anchor_out in anchor_outs]
    anchor_outs = [mp4(activ4(bn4(conv4(anchor_out)))) for anchor_out in anchor_outs]
    anchor_outs = [activ5(bn5(conv5(anchor_out))) for anchor_out in anchor_outs]
    anchor_outs = [do5(anchor_out) for anchor_out in anchor_outs]
    anchor_outs = [ap(anchor_out) for anchor_out in anchor_outs]
    print ('anchor out', len(anchor_outs), np.array(anchor_outs).shape, anchor_outs[0].shape)

    # Pos 
    pos_outs = [mp1(activ1(bn1(conv1(pos_item)))) for pos_item in pos_items]
    pos_outs = [mp2(activ2(bn2(conv2(pos_out)))) for pos_out in pos_outs]
    pos_outs = [mp3(activ3(bn3(conv3(pos_out)))) for pos_out in pos_outs]
    pos_outs = [do3(pos_out) for pos_out in pos_outs]
    pos_outs = [mp4(activ4(bn4(conv4(pos_out)))) for pos_out in pos_outs]
    pos_outs = [activ5(bn5(conv5(pos_out))) for pos_out in pos_outs]
    pos_outs = [do5(pos_out) for pos_out in pos_outs]
    pos_outs = [ap(pos_out) for pos_out in pos_outs]


    # Negs
    neg_outs_of_outs = [[mp1(activ1(bn1(conv1(neg_item)))) for neg_item in neg_items] for neg_items in neg_items_of_items]
    neg_outs_of_outs = [[mp2(activ2(bn2(conv2(neg_out)))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]
    neg_outs_of_outs = [[mp3(activ3(bn3(conv3(neg_out)))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]
    neg_outs_of_outs = [[do3(neg_out) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]
    neg_outs_of_outs = [[mp4(activ4(bn4(conv4(neg_out)))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]
    neg_outs_of_outs = [[activ5(bn5(conv5(neg_out))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]
    neg_outs_of_outs = [[do5(neg_out) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]
    neg_outs_of_outs = [[ap(neg_out) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs]

    
    # track level averaging 


    anchor_mean = track_avg(anchor_outs)
    pos_mean = track_avg(pos_outs)
    neg_means = [track_avg(neg_outs)for neg_outs in neg_outs_of_outs]

    print ('mean', anchor_mean.shape)


    pos_dist = dot([anchor_mean, pos_mean], axes=1, normalize=True)
    neg_dists = [dot([anchor_mean, neg_mean], axes=1, normalize=True) for neg_mean in neg_means]

    all_dists = concatenate([pos_dist] +  neg_dists)

    outputs = Activation('linear')(all_dists)

    
    inputs = [] 
    for track_specs in neg_items_of_items:
        for ts in track_specs:
            inputs.append(ts)
    inputs = anchor_items + pos_items + inputs 
    print ('inputs', len(inputs))
    model = Model(inputs=inputs, outputs=outputs)
    return model 
예제 #17
0
def siamese_cnn_mono2mix(num_frame, num_neg_artist, num_pos_track):
    anchor = Input(shape=(num_frame,config.n_mels))
    pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_track)]
    neg_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_artist)]

    # vocal audio model 
    conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn1 = BatchNormalization()
    activ1 = LeakyReLU(0.2)
    mp1 = MaxPool1D(pool_size=3)

    conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn2 = BatchNormalization()
    activ2 = LeakyReLU(0.2)
    mp2 = MaxPool1D(pool_size=3)
    
    conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn3 = BatchNormalization()
    activ3 = LeakyReLU(0.2)
    mp3 = MaxPool1D(pool_size=3)
    # do3 = Dropout(0.2)
    
    conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn4 = BatchNormalization()
    activ4 = LeakyReLU(0.2)
    mp4 = MaxPool1D(pool_size=3)

    conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn5 = BatchNormalization()
    activ5 = LeakyReLU(0.2)
    do5 = Dropout(0.3)

    ap = GlobalAvgPool1D()
    

    # mix audio model 
    m_conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    m_bn1 = BatchNormalization()
    m_activ1 = LeakyReLU(0.2)
    m_mp1 = MaxPool1D(pool_size=3)

    m_conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    m_bn2 = BatchNormalization()
    m_activ2 = LeakyReLU(0.2)
    m_mp2 = MaxPool1D(pool_size=3)
    
    m_conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    m_bn3 = BatchNormalization()
    m_activ3 = LeakyReLU(0.2)
    m_mp3 = MaxPool1D(pool_size=3)
    do3 = Dropout(0.5)
    
    m_conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    m_bn4 = BatchNormalization()
    m_activ4 = LeakyReLU(0.2)
    # activ4 = Activation('relu')
    m_mp4 = MaxPool1D(pool_size=3)

    m_conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    m_bn5 = BatchNormalization()
    m_activ5 = LeakyReLU(0.2)
    m_do5 = Dropout(0.3)

    m_ap = GlobalAvgPool1D()
    
    
    # Anchor 
    anchor_out = mp1(activ1(bn1(conv1(anchor))))
    anchor_out = mp2(activ2(bn2(conv2(anchor_out))))
    anchor_out = mp3(activ3(bn3(conv3(anchor_out))))
    anchor_out = do3(anchor_out)
    anchor_out = mp4(activ4(bn4(conv4(anchor_out))))
    anchor_out = activ5(bn5(conv5(anchor_out)))
    anchor_out = do5(anchor_out)
    anchor_out = ap(anchor_out)

    # Pos 
    pos_outs = [m_mp1(m_activ1(m_bn1(m_conv1(pos_item)))) for pos_item in pos_items]
    pos_outs = [m_mp2(m_activ2(m_bn2(m_conv2(pos_out)))) for pos_out in pos_outs]
    pos_outs = [m_mp3(m_activ3(m_bn3(m_conv3(pos_out)))) for pos_out in pos_outs]
    pos_outs = [do3(pos_out) for pos_out in pos_outs]
    pos_outs = [m_mp4(m_activ4(m_bn4(m_conv4(pos_out)))) for pos_out in pos_outs]
    pos_outs = [m_activ5(m_bn5(m_conv5(pos_out))) for pos_out in pos_outs]
    pos_outs = [m_do5(pos_out) for pos_out in pos_outs]
    pos_outs = [m_ap(pos_out) for pos_out in pos_outs]
    
    # Negs
    neg_outs = [m_mp1(m_activ1(m_bn1(m_conv1(neg_item)))) for neg_item in neg_items]
    neg_outs = [m_mp2(m_activ2(m_bn2(m_conv2(neg_out)))) for neg_out in neg_outs]
    neg_outs = [m_mp3(m_activ3(m_bn3(m_conv3(neg_out)))) for neg_out in neg_outs]
    neg_outs = [do3(neg_out) for neg_out in neg_outs]
    neg_outs = [m_mp4(m_activ4(m_bn4(m_conv4(neg_out)))) for neg_out in neg_outs]
    neg_outs = [m_activ5(m_bn5(m_conv5(neg_out))) for neg_out in neg_outs]
    neg_outs = [m_do5(neg_out) for neg_out in neg_outs]
    neg_outs = [m_ap(neg_out) for neg_out in neg_outs]


    #### cosine  
    pos_dists = [dot([anchor_out, pos_out], axes=1, normalize=True) for pos_out in pos_outs]
    neg_dists = [dot([anchor_out, neg_out], axes=1, normalize=True) for neg_out in neg_outs]
    
    all_dists = concatenate(pos_dists + neg_dists)
    outputs = Activation('linear', name='siamese')(all_dists)

    model = Model(inputs=[anchor]+ pos_items + neg_items, outputs=outputs)
    return model 
예제 #18
0
def siamese_cnn(num_frame, num_neg_artist, num_pos_track):
    anchor = Input(shape=(num_frame,config.n_mels))
    pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_track)]
    neg_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_artist)]

    # audio model 
    conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn1 = BatchNormalization()
    activ1 = LeakyReLU(0.2)
    # activ1 = Activation('relu')
    mp1 = MaxPool1D(pool_size=3)

    conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn2 = BatchNormalization()
    activ2 = LeakyReLU(0.2)
    # activ2 = Activation('relu')
    mp2 = MaxPool1D(pool_size=3)
    
    conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn3 = BatchNormalization()
    activ3 = LeakyReLU(0.2)
    # activ3 = Activation('relu')
    mp3 = MaxPool1D(pool_size=3)
    # do3 = Dropout(0.2)
    
    conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn4 = BatchNormalization()
    activ4 = LeakyReLU(0.2)
    # activ4 = Activation('relu')
    mp4 = MaxPool1D(pool_size=3)

    conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn5 = BatchNormalization()
    activ5 = LeakyReLU(0.2)
    # activ5 = Activation('relu')
    do5 = Dropout(0.3)

    ap = GlobalAvgPool1D()
    
    # euc_dist = Lambda(euclidean_dist, euclidean_dist_output_shape)
    # negative_sampling = Lambda(neg_sample, neg_sample_output_shape)

    # l2_dist = Lambda(lambda  x: K.l2_normalize(x[0] - x[1],axis=1))

    # Anchor 
    anchor_out = mp1(activ1(bn1(conv1(anchor))))
    anchor_out = mp2(activ2(bn2(conv2(anchor_out))))
    anchor_out = mp3(activ3(bn3(conv3(anchor_out))))
    # anchor_out = do3(anchor_out)
    anchor_out = mp4(activ4(bn4(conv4(anchor_out))))
    anchor_out = activ5(bn5(conv5(anchor_out)))
    anchor_out = do5(anchor_out)
    anchor_out = ap(anchor_out)

    # Pos 
    pos_outs = [mp1(activ1(bn1(conv1(pos_item)))) for pos_item in pos_items]
    pos_outs = [mp2(activ2(bn2(conv2(pos_out)))) for pos_out in pos_outs]
    pos_outs = [mp3(activ3(bn3(conv3(pos_out)))) for pos_out in pos_outs]
    # pos_outs = [do3(pos_out) for pos_out in pos_outs]
    pos_outs = [mp4(activ4(bn4(conv4(pos_out)))) for pos_out in pos_outs]
    pos_outs = [activ5(bn5(conv5(pos_out))) for pos_out in pos_outs]
    pos_outs = [do5(pos_out) for pos_out in pos_outs]
    pos_outs = [ap(pos_out) for pos_out in pos_outs]


    # Negs
    neg_outs = [mp1(activ1(bn1(conv1(neg_item)))) for neg_item in neg_items]
    neg_outs = [mp2(activ2(bn2(conv2(neg_out)))) for neg_out in neg_outs]
    neg_outs = [mp3(activ3(bn3(conv3(neg_out)))) for neg_out in neg_outs]
    # neg_outs = [do3(neg_out) for neg_out in neg_outs]
    neg_outs = [mp4(activ4(bn4(conv4(neg_out)))) for neg_out in neg_outs]
    neg_outs = [activ5(bn5(conv5(neg_out))) for neg_out in neg_outs]
    neg_outs = [do5(neg_out) for neg_out in neg_outs]
    neg_outs = [ap(neg_out) for neg_out in neg_outs]


    #### cosine  
    pos_dists = [dot([anchor_out, pos_out], axes=1, normalize=True) for pos_out in pos_outs]
    neg_dists = [dot([anchor_out, neg_out], axes=1, normalize=True) for neg_out in neg_outs]
    # pos_dists = [l2_dist([anchor_out, pos_out]) for pos_out in pos_outs]
    # neg_dists = [l2_dist([anchor_out, neg_out]) for neg_out in neg_outs]
    
    all_dists = concatenate(pos_dists + neg_dists)
    # all_dists = negative_sampling(all_dists)
    outputs = Activation('linear')(all_dists)

    

    ### euclidean 
    '''
    distance  = Lambda(euclidean_dist, output_shape=euclidean_dist_output_shape)
    pos_dists = [distance([anchor_out, pos_out]) for pos_out in pos_outs]
    neg_dists = [distance([anchor_out, neg_out]) for neg_out in neg_outs]
    all_dists = concatenate(pos_dists + neg_dists)
    outputs = all_dists 
    '''

    model = Model(inputs=[anchor]+ pos_items + neg_items, outputs=outputs)
    return model 
    def create_model(
            self,
            min_filters_number,
            max_kernel_size,
            network_depth=3,
            learning_rate=0.01,
            regularization_rate=0.01):
        """
        Generate a ResNet model (see also https://arxiv.org/pdf/1611.06455.pdf).

        The compiled Keras model is returned.

        Parameters
        ----------
        min_filters_number : int
            Number of filters for first convolutional layer
        max_kernel_size: int,
            Maximum kernel size for convolutions within Inception module
        network_depth : int
            Depth of network, i.e. number of Inception modules to stack.
            Default is 3.
        learning_rate : float
            Set learning rate. Default is 0.01.
        regularization_rate : float
            Set regularization rate. Default is 0.01.

        Returns
        -------
        model : Keras model
            The compiled Keras model
        """
        dim_length = self.x_shape[1]  # number of samples in a time series
        dim_channels = self.x_shape[2]  # number of channels
        weightinit = 'lecun_uniform'
        regularization = 0  # ignore input on purpose

        def conv_bn_relu_3_sandwich(x, filters, kernel_size):
            first_x = x
            for _ in range(3):
                x = Convolution1D(filters, kernel_size, padding='same',
                                  kernel_initializer=weightinit,
                                  kernel_regularizer=l2(regularization))(x)
                x = BatchNormalization()(x)
                x = ReLU()(x)

            first_x = Convolution1D(filters, kernel_size=1, padding='same',
                                    kernel_initializer=weightinit,
                                    kernel_regularizer=l2(regularization))(x)
            x = Add()([x, first_x])
            return x

        x = Input((dim_length, dim_channels))
        inputs = x

        x = BatchNormalization()(inputs)  # Added batchnorm (not in original paper)

        # Define/guess filter sizes and kernel sizes
        # Logic here is that kernals become smaller while the number of filters increases
        kernel_sizes = [max(3, int(max_kernel_size // (1.41 ** i))) for i in range(network_depth)]
        filter_numbers = [int(min_filters_number * (1.41 ** i)) for i in range(network_depth)]

        for i in range(network_depth):
            x = conv_bn_relu_3_sandwich(x, filter_numbers[i], kernel_sizes[i])

        x = GlobalAvgPool1D()(x)
        output_layer = Dense(self.number_of_classes, activation='relu')(x)

        # Create model and compile
        model = Model(inputs=inputs, outputs=output_layer)

        model.compile(loss='mean_absolute_error',
                      optimizer=Adam(lr=learning_rate),
                      metrics=self.metrics)

        return model
예제 #20
0
def build_model(d, word2vec, hp):
    """Builds a model."""
    word_x = Input(shape=[hp.max_len_words])
    word_emb_mat = embedding_utils.make_embedding_matrix(
        d.id2word, word2vec, hp.word_emb.initializer)
    word_emb = Embedding(
        len(d.id2word),
        hp.word_emb.dim,
        weights=[word_emb_mat],
        mask_zero=True,
        input_length=hp.max_len_words,
        trainable=hp.word_emb.trainable,
    )(word_x)
    x = [word_x]
    y = word_emb

    if hp.char_emb is not None:
        char_x = Input(shape=[hp.max_len_words, hp.max_len_chars])
        x = [word_x, char_x]
        char_emb = TimeDistributed(
            Embedding(
                len(d.id2char),
                hp.char_emb.dim,
                mask_zero=True,
                input_length=hp.max_len_chars,
                trainable=hp.char_emb.trainable,
            ))(char_x)
        char_enc = TimeDistributed(Bidirectional(LSTM(
            hp.char_enc_dim)))(char_emb)
        y = concatenate([word_emb, char_enc])

    if hp.seq_arch == "none":
        pass
    elif hp.seq_arch.endswith("lstm"):
        layer = LSTM(hp.hidden_dim, return_sequences=True)
        if hp.seq_arch == "bilstm":
            layer = Bidirectional(layer)
        else:
            assert hp.seq_arch == "lstm"
        y = layer(y)
    else:
        assert False, hp.arch

    if hp.dropout_rate > 0:
        y = Dropout(hp.dropout_rate)(y)

    if hp.mode == "seq":
        # TODO: Add CRF layer.
        y = TimeDistributed(Dense(len(d.tag2id), activation="softmax"))(y)
    elif hp.mode == "cls":
        layer = None
        if hp.cls_arch == "avg_pool":
            layer = GlobalAvgPool1D()
        elif hp.cls_arch == "max_pool":
            layer = GlobalMaxPool1D()
        else:
            assert False, hp.cls_arch
        # https://github.com/tensorflow/tensorflow/issues/33260
        assert layer.supports_masking
        y = layer(y)
        y = Dense(len(d.intent2id), activation="softmax")(y)
    else:
        assert False, hp.mode

    model = Model(x, y)
    model.compile(loss="categorical_crossentropy",
                  optimizer=hp.optimizer,
                  metrics=["acc"])
    return model
예제 #21
0
               padding='same',
               activation='relu',
               kernel_initializer='he_uniform',
               dilation_rate=1,
               name='conv_3')(maxpool_1)
Conv4 = Conv1D(64,
               3,
               strides=1,
               padding='same',
               activation='relu',
               kernel_initializer='he_uniform',
               dilation_rate=1,
               name='conv_4')(Conv3)
maxpool_2 = MaxPool1D(pool_size=2, strides=2, padding='same')(Conv4)

GAP_1 = GlobalAvgPool1D()(maxpool_2)

Conv5 = Conv1D(64,
               3,
               strides=1,
               padding='same',
               activation='relu',
               kernel_initializer='he_uniform',
               dilation_rate=1,
               name='conv_5')(Reshape_Input_2)
Conv6 = Conv1D(64,
               3,
               strides=1,
               padding='same',
               activation='relu',
               kernel_initializer='he_uniform',
예제 #22
0
embedding_layer = Embedding(len(embedding_matrix), EMBEDDING_DIM, 
                            weights=[embedding_matrix],
                            trainable=False)

# Embedded version of the inputs
encoded_left = embedding_layer(left_input)
encoded_right = embedding_layer(right_input)

# Since this is a siamese network, both sides share the same LSTM
shared_bilstm = Bidirectional( LSTM(100, return_sequences=True, dropout=0.1, recurrent_dropout=0.1) )
left_output = shared_bilstm(encoded_left)
right_output = shared_bilstm(encoded_right)

maxpool = GlobalMaxPool1D()
avgpool = GlobalAvgPool1D()
concatenate1 = Concatenate()
dropout1 = Dropout(0.1)
x1 = concatenate1([ maxpool(left_output), avgpool(left_output)  ])
x2 = concatenate1([ maxpool(right_output), avgpool(right_output) ])
x1 = dropout1(x1)
x2 = dropout1(x2)

sqr_diff = Lambda( lambda tensors: K.pow((K.square(tensors[0])-K.square(tensors[1])), 0.5), name="Squared_diff" )
abs_diff = Lambda( lambda tensors : K.abs(tensors[0]-tensors[1]), name="Absolute_diff" )
concatenate2 = Concatenate()
diff = concatenate2([ sqr_diff([x1,x2]), abs_diff([x1,x2]) ])
diff = Dropout(0.1)(diff)

diff = Dense(100, activation="relu")(diff)
diff = Dropout(0.1)(diff)
#Load AWS Transcribed text file
with open('aws_transcribe.txt') as f:
    aws_text = [word for line in f for word in line.split()]

#Initialize Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df.text)

x = pad_squences(tokenizer.texts_to_sequences(df.text), 50)
y = df.sentiment

#Build RNN
input = Input((50, ), name='input')
embed = Embedding(len(tokenizer.word_index), 100)(input)
embed_dropout = SpatialDropout1D(0.5)(embed)
rnn = Bidirectional(GRI(50, return_sequences=True,
                        recurrent_dropout=0.2))(embed_dropout)
max_pool = GlobalMaxPool1D()(rnn)
avg_pool = GlobalAvgPool1D()(rnn)
concat = Concatenate()([max_pool, avg_pool])
dense = Dense(3, activation='softmax')(concat)

#Train RNN
model = Model(input, dense)
model.compile('adam', 'sparse_categorical_crossentropy',
              ['sparse_categorical_accuracy'])
model.fit(x, y, batch_size=512, validation_split=0.2, epochs=25)

#Predict Sentiment
model.predict(pad_sequences(tokenizer.texts_to_sequences(aws_text), 50))
예제 #24
0
model.add(BatchNormalization())
model.add(Conv1D(filters=8, kernel_size=9, activation='relu',
                 kernel_regularizer = l2(0.1)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=16, kernel_size=9, activation='relu'))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv1D(filters=64, kernel_size=4, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Conv1D(filters=32, kernel_size=1, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.7))
model.add(GlobalAvgPool1D())
model.add(Dense(3, activation='softmax'))

model.load_weights("modelWeights.h5");


INPUT_LIB = './input/'
SAMPLE_RATE = 44100
CLASSES = ['artifact', 'normal', 'murmur']
CODE_BOOK = {x:i for i,x in enumerate(CLASSES)}   
NB_CLASSES = len(CLASSES)

def repeat_to_length(arr, length):
    """Repeats the numpy 1D array to given length, and makes datatype float"""
    result = np.empty((length, ), dtype = 'float32')
    l = len(arr)