def baseline_model(seq_dim=3): input_1 = Input(shape=(None, 3)) input_2 = Input(shape=(None, seq_dim)) base_model = encoder(seq_dim=3) x1 = base_model(input_1) x2 = base_model(input_2) x1 = Concatenate(axis=-1)([GlobalMaxPool1D()(x1), GlobalAvgPool1D()(x1)]) x2 = Concatenate(axis=-1)([GlobalMaxPool1D()(x2), GlobalAvgPool1D()(x2)]) x3 = Subtract()([x1, x2]) x3 = Multiply()([x3, x3]) x = Multiply()([x1, x2]) x = Concatenate(axis=-1)([x, x3]) x = Dropout(0.1)(x) x = Dense(100, activation="relu")(x) x = Dropout(0.1)(x) out = Dense(1, activation="sigmoid")(x) model = Model([input_1, input_2], out) model.compile(loss="binary_crossentropy", metrics=[acc], optimizer=Adam(0.0001)) model.summary() return model
def build_encoder(n_input_dim, n_encoding_dim, n_conv_block, n_conv_layers, n_conv_filters, conv_filter_size, n_dense_layers, n_dense_units, activation, batch_norm=False, l2_lambda=0, dropout_prob=0): ## build model graph # convolution part input_op = Input([FEATURE_VEC_LEN, n_input_dim]) x = input_op for i in range(n_conv_block): x = conv_block(n_conv_filters[i], conv_filter_size[i], n_conv_layers[i], activation, batch_norm, l2_lambda, dropout_prob)(x) x = MaxPool1D((2, ))(x) x = GlobalAvgPool1D()(x) # dense part x = dense_block(n_dense_units, n_dense_layers, activation, batch_norm, l2_lambda, dropout_prob)(x) # produce encoding encoding = Dense(n_encoding_dim)(x) return Model(input_op, encoding)
def basic_cnn(num_frame, num_artist): x_input = Input(shape=(num_frame, 128)) out = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')(x_input) out = BatchNormalization(axis=2)(out) out = LeakyReLU(0.2)(out) out = MaxPool1D(pool_size=3)(out) out = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')(out) out = BatchNormalization(axis=2)(out) out = LeakyReLU(0.2)(out) out = MaxPool1D(pool_size=3)(out) out = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')(out) out = BatchNormalization(axis=2)(out) out = LeakyReLU(0.2)(out) out = MaxPool1D(pool_size=3)(out) out = Dropout(0.5)(out) out = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')(out) out = BatchNormalization(axis=2)(out) out = LeakyReLU(0.2)(out) out = MaxPool1D(pool_size=3)(out) out = Dropout(0.5)(out) out = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')(out) out = BatchNormalization(axis=2)(out) out = LeakyReLU(0.2)(out) out = Dropout(0.5)(out) out = GlobalAvgPool1D()(out) out = Dense(num_artist, activation='softmax')(out) model = Model(inputs=x_input, outputs=out) return model
def finetuning_siamese_cnn(mymodel_tmp, num_frame, num_neg_singers, num_pos_tracks): anchor = Input(shape=(num_frame, config.n_mels)) pos_items = [ Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_tracks) ] neg_items = [ Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_singers) ] dense = Dense(256) ap = GlobalAvgPool1D() anchor_out = mymodel_tmp(anchor) pos_outs = [mymodel_tmp(pos_item) for pos_item in pos_items] neg_outs = [mymodel_tmp(neg_item) for neg_item in neg_items] ### cosine pos_dists = [ dot([anchor_out, pos_out], axes=1, normalize=True) for pos_out in pos_outs ] neg_dists = [ dot([anchor_out, neg_out], axes=1, normalize=True) for neg_out in neg_outs ] all_dists = concatenate(pos_dists + neg_dists) outputs = Activation('linear')(all_dists) model = Model(inputs=[anchor] + pos_items + neg_items, outputs=outputs) return model
def buildModel(): inp = Input((MAXIMUM_SEQ_LEN, )) #use embeddings emb = Embedding(VOCAB_LENGTH, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False)(inp) #to drop some embedding instead of particular cells emb = SpatialDropout1D(0.2)(emb) #generate 100(fwd) + 100(bwd) hidden states hidden_states = Bidirectional( LSTM(100, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(emb) #on each hidden state use 100*64 kernels of size 3 conv = Conv1D(64, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(hidden_states) #take maximum for each cell of all hidden state x1 = GlobalMaxPool1D()(conv) x2 = GlobalAvgPool1D()(conv) #cocatenate both polling x = Concatenate()([x1, x2]) x = Dropout(0.2)(x) x = Dense(50, activation='relu')(x) x = Dropout(0.1)(x) out = Dense(6, activation='sigmoid')(x) model = Model(inp, out) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=[AUC(name="auc")])
def rnn_classifier( d_model=128, n_layers=2, n_classes=16, ): inp = Input((None, d_model)) x = Bidirectional(GRU(d_model, return_sequences=True))(inp) if n_classes > 1: for i in range(n_layers - 1): x = Bidirectional(GRU(d_model, return_sequences=True))(x) x = Dropout(0.2)(x) x = GlobalAvgPool1D()(x) x = Dense(4 * n_classes, activation="selu")(x) out = Dense(n_classes, activation="sigmoid")(x) model = Model(inputs=inp, outputs=out) opt = Adam(0.00001) model.compile(optimizer=opt, loss=custom_binary_crossentropy, metrics=[custom_binary_accuracy]) model.summary() return model
def baseline_model(seq_dim=3): input_1 = Input(shape=(None, seq_dim)) base_model = encoder(seq_dim=seq_dim) x1 = base_model(input_1) x1 = Dropout(0.5)(x1) x1 = Concatenate(axis=-1)([GlobalMaxPool1D()(x1), GlobalAvgPool1D()(x1)]) x = Dropout(0.5)(x1) x = Dense(100, activation="relu")(x) x = Dropout(0.5)(x) out = Dense(1, activation="sigmoid")(x) model = Model(input_1, out) model.compile(loss="binary_crossentropy", metrics=[acc], optimizer=Adam(0.0001)) model.summary() return model
def squeeze_exciation(x, amplifying_ratio, name): num_features = x.shape[-1].value x = GlobalAvgPool1D(name=f'squeeze_{name}')(x) x = Reshape((1, num_features), name=f'reshape_{name}')(x) x = Dense(int(num_features * amplifying_ratio), activation='relu', name=f'ex0_{name}')(x) x = Dense(num_features, activation='sigmoid', name=f'ex1_{name}')(x) return x
def skeleton_cnn(num_frame, weights): x_input = Input(shape=(num_frame, 128)) # audio model conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn1 = BatchNormalization() activ1 = LeakyReLU(0.2) # activ1 = Activation('relu') mp1 = MaxPool1D(pool_size=3) conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn2 = BatchNormalization() activ2 = LeakyReLU(0.2) # activ2 = Activation('relu') mp2 = MaxPool1D(pool_size=3) conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn3 = BatchNormalization() activ3 = LeakyReLU(0.2) # activ3 = Activation('relu') mp3 = MaxPool1D(pool_size=3) do3 = Dropout(0.5) conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn4 = BatchNormalization() activ4 = LeakyReLU(0.2) # activ4 = Activation('relu') mp4 = MaxPool1D(pool_size=3) conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn5 = BatchNormalization() activ5 = LeakyReLU(0.2) # activ5 = Activation('relu') do5 = Dropout(0.5) ap = GlobalAvgPool1D() # Anchor out = mp1(activ1(bn1(conv1(x_input)))) out = mp2(activ2(bn2(conv2(out)))) out = mp3(activ3(bn3(conv3(out)))) out = do3(out) out = mp4(activ4(bn4(conv4(out)))) out = activ5(bn5(conv5(out))) out = do5(out) out = ap(out) # out = Dense(num_artist, activation='softmax')(out) out = dot([out, out], axes=1, normalize=True) out = Activation('linear')(out) model = Model(inputs=x_input, outputs = out) model.load_weights(weights) return model
def se_fn(x, amplifying_ratio): num_features = x.shape[-1] x = GlobalAvgPool1D()(x) x = Reshape((1, num_features))(x) x = Dense( num_features * amplifying_ratio, activation="relu", kernel_initializer="glorot_uniform", )(x) x = Dense(num_features, activation="sigmoid", kernel_initializer="glorot_uniform")(x) return x
def squeeze_excitation(x, amplifying_ratio, name): num_features = x.shape[-1].value x = GlobalAvgPool1D(name=f'{name}_squeeze')(x) x = Reshape((1, num_features), name=f'{name}_reshape')(x) x = Dense(num_features * amplifying_ratio, activation='relu', kernel_initializer='glorot_uniform', name=f'{name}_ex0')(x) x = Dense(num_features, activation='sigmoid', kernel_initializer='glorot_uniform', name=f'{name}_ex1')(x) return x
def train_and_evaluate_model(hp): """Trains and evaluates a model.""" t = Tokenizer(oov_token=UNK) t.fit_on_texts(INPUTS) id2word = [PAD] + list(t.index_word.values()) word2id = {v: i for i, v in enumerate(id2word)} encoded_inputs = t.texts_to_sequences(INPUTS) padded_inputs = pad_sequences( encoded_inputs, maxlen=hp["pad_len"], padding="post", truncating="post", value=word2id[PAD], ) num_classes = len(set(LABELS)) labels = np.array(LABELS) loss = "binary_crossentropy" if hp["categorical"]: labels = to_categorical(LABELS, num_classes) loss = "categorical_crossentropy" model = Sequential() embedding = _rand_embedding( id2word, hp["emb_output_dim"], mask_zero=True, input_length=hp["pad_len"] ) model.add(embedding) if hp["arch"] == "flatten": model.add(Flatten()) elif hp["arch"] == "avg_pool": model.add(GlobalAvgPool1D()) elif hp["arch"] == "max_pool": model.add(GlobalMaxPool1D()) else: assert False, hp["arch"] if hp["categorical"]: model.add(Dense(num_classes, activation="softmax")) else: model.add(Dense(1, activation="sigmoid")) model.compile(loss=loss, optimizer="adam", metrics=["acc"]) print(model.summary()) model.fit(padded_inputs, labels, epochs=50, verbose=0) loss, acc = model.evaluate(padded_inputs, labels, verbose=0) print(f"loss={loss:.4f} accuracy={acc:.4f}")
def naive_attention(x): ''' squeeze and excitation ''' reduction_ratio = 1 num_filter = int(x.shape[-1]) num_neurons = num_filter//reduction_ratio # squeeze x1 = GlobalAvgPool1D()(x) # attention map prediction x2 = Dense(num_neurons,activation='relu',use_bias=False)(x1) attention_map = Dense(num_filter,activation='sigmoid',name='att_map',use_bias=False)(x2) # feature recalibration x = Multiply()([x,attention_map]) return x
def transformer_classifier( num_layers=4, d_model=128, num_heads=8, dff=256, maximum_position_encoding=2048, n_classes=16, ): inp = Input((None, d_model)) encoder = Encoder( num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, maximum_position_encoding=maximum_position_encoding, rate=0.3, ) x = encoder(inp) x = Dropout(0.2)(x) x = GlobalAvgPool1D()(x) x = Dense(4 * n_classes, activation="selu")(x) out = Dense(n_classes, activation="sigmoid")(x) model = Model(inputs=inp, outputs=out) opt = Adam(0.00001) model.compile(optimizer=opt, loss=custom_binary_crossentropy, metrics=[custom_binary_accuracy]) model.summary() return model
def feature_attention(x,original_input,i): ''' squeeze and excitation ''' reduction_ratio = 1 num_filter = int(x.shape[-1]) num_neurons = num_filter//reduction_ratio # squeeze x1 = GlobalAvgPool1D()(x) # concatenate with extracted features from the original input #original_input = Flatten()(original_input) #features = Dense(32,activation='relu')(original_input) #features = Dense(3,activation='linear')(features) #x1 = Concatenate()([x1,features]) # conv layers for feature extraction features = Conv1D(filters=16,kernel_size=7,padding='same')(original_input) features = BatchNormalization()(features) features = Activation('relu')(features) features = Conv1D(filters=4,kernel_size=5,padding='same')(features) features = BatchNormalization()(features) features = Activation('relu')(features) features = Flatten()(features) features = Dense(32,activation='linear')(features) # attention map prediction x1 = Concatenate()([x1,features]) x2 = Dense(num_neurons,activation='relu',name='att_input_%d'%i)(x1) attention_map = Dense(num_filter,activation='sigmoid',name='att_map_%d'%i)(x2) # feature recalibration x = Multiply()([x,attention_map]) return x
def siamese_cnn_track_level(num_frame, num_neg_artist, num_vocal_segments): anchor_items = [Input(shape=(num_frame,config.n_mels)) for i in range(num_vocal_segments)] pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_vocal_segments)] neg_items_of_items= [[Input(shape=(num_frame, config.n_mels)) for i in range(num_vocal_segments)] for j in range(num_neg_artist)] # audio model conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn1 = BatchNormalization(axis=2) activ1 = LeakyReLU(0.2) mp1 = MaxPool1D(pool_size=3) conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn2 = BatchNormalization(axis=2) activ2 = LeakyReLU(0.2) mp2 = MaxPool1D(pool_size=3) conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn3 = BatchNormalization(axis=2) activ3 = LeakyReLU(0.2) mp3 = MaxPool1D(pool_size=3) do3 = Dropout(0.5) conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn4 = BatchNormalization(axis=2) activ4 = LeakyReLU(0.2) mp4 = MaxPool1D(pool_size=3) conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn5 = BatchNormalization(axis=2) activ5 = LeakyReLU(0.2) do5 = Dropout(0.5) ap = GlobalAvgPool1D() track_avg = Lambda(track_average, track_average_output_shape) # Anchor anchor_outs = [mp1(activ1(bn1(conv1(anchor)))) for anchor in anchor_items] anchor_outs = [mp2(activ2(bn2(conv2(anchor_out)))) for anchor_out in anchor_outs] anchor_outs = [mp3(activ3(bn3(conv3(anchor_out)))) for anchor_out in anchor_outs] anchor_outs = [do3(anchor_out) for anchor_out in anchor_outs] anchor_outs = [mp4(activ4(bn4(conv4(anchor_out)))) for anchor_out in anchor_outs] anchor_outs = [activ5(bn5(conv5(anchor_out))) for anchor_out in anchor_outs] anchor_outs = [do5(anchor_out) for anchor_out in anchor_outs] anchor_outs = [ap(anchor_out) for anchor_out in anchor_outs] print ('anchor out', len(anchor_outs), np.array(anchor_outs).shape, anchor_outs[0].shape) # Pos pos_outs = [mp1(activ1(bn1(conv1(pos_item)))) for pos_item in pos_items] pos_outs = [mp2(activ2(bn2(conv2(pos_out)))) for pos_out in pos_outs] pos_outs = [mp3(activ3(bn3(conv3(pos_out)))) for pos_out in pos_outs] pos_outs = [do3(pos_out) for pos_out in pos_outs] pos_outs = [mp4(activ4(bn4(conv4(pos_out)))) for pos_out in pos_outs] pos_outs = [activ5(bn5(conv5(pos_out))) for pos_out in pos_outs] pos_outs = [do5(pos_out) for pos_out in pos_outs] pos_outs = [ap(pos_out) for pos_out in pos_outs] # Negs neg_outs_of_outs = [[mp1(activ1(bn1(conv1(neg_item)))) for neg_item in neg_items] for neg_items in neg_items_of_items] neg_outs_of_outs = [[mp2(activ2(bn2(conv2(neg_out)))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] neg_outs_of_outs = [[mp3(activ3(bn3(conv3(neg_out)))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] neg_outs_of_outs = [[do3(neg_out) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] neg_outs_of_outs = [[mp4(activ4(bn4(conv4(neg_out)))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] neg_outs_of_outs = [[activ5(bn5(conv5(neg_out))) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] neg_outs_of_outs = [[do5(neg_out) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] neg_outs_of_outs = [[ap(neg_out) for neg_out in neg_outs] for neg_outs in neg_outs_of_outs] # track level averaging anchor_mean = track_avg(anchor_outs) pos_mean = track_avg(pos_outs) neg_means = [track_avg(neg_outs)for neg_outs in neg_outs_of_outs] print ('mean', anchor_mean.shape) pos_dist = dot([anchor_mean, pos_mean], axes=1, normalize=True) neg_dists = [dot([anchor_mean, neg_mean], axes=1, normalize=True) for neg_mean in neg_means] all_dists = concatenate([pos_dist] + neg_dists) outputs = Activation('linear')(all_dists) inputs = [] for track_specs in neg_items_of_items: for ts in track_specs: inputs.append(ts) inputs = anchor_items + pos_items + inputs print ('inputs', len(inputs)) model = Model(inputs=inputs, outputs=outputs) return model
def siamese_cnn_mono2mix(num_frame, num_neg_artist, num_pos_track): anchor = Input(shape=(num_frame,config.n_mels)) pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_track)] neg_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_artist)] # vocal audio model conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn1 = BatchNormalization() activ1 = LeakyReLU(0.2) mp1 = MaxPool1D(pool_size=3) conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn2 = BatchNormalization() activ2 = LeakyReLU(0.2) mp2 = MaxPool1D(pool_size=3) conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn3 = BatchNormalization() activ3 = LeakyReLU(0.2) mp3 = MaxPool1D(pool_size=3) # do3 = Dropout(0.2) conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn4 = BatchNormalization() activ4 = LeakyReLU(0.2) mp4 = MaxPool1D(pool_size=3) conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn5 = BatchNormalization() activ5 = LeakyReLU(0.2) do5 = Dropout(0.3) ap = GlobalAvgPool1D() # mix audio model m_conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') m_bn1 = BatchNormalization() m_activ1 = LeakyReLU(0.2) m_mp1 = MaxPool1D(pool_size=3) m_conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') m_bn2 = BatchNormalization() m_activ2 = LeakyReLU(0.2) m_mp2 = MaxPool1D(pool_size=3) m_conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') m_bn3 = BatchNormalization() m_activ3 = LeakyReLU(0.2) m_mp3 = MaxPool1D(pool_size=3) do3 = Dropout(0.5) m_conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') m_bn4 = BatchNormalization() m_activ4 = LeakyReLU(0.2) # activ4 = Activation('relu') m_mp4 = MaxPool1D(pool_size=3) m_conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') m_bn5 = BatchNormalization() m_activ5 = LeakyReLU(0.2) m_do5 = Dropout(0.3) m_ap = GlobalAvgPool1D() # Anchor anchor_out = mp1(activ1(bn1(conv1(anchor)))) anchor_out = mp2(activ2(bn2(conv2(anchor_out)))) anchor_out = mp3(activ3(bn3(conv3(anchor_out)))) anchor_out = do3(anchor_out) anchor_out = mp4(activ4(bn4(conv4(anchor_out)))) anchor_out = activ5(bn5(conv5(anchor_out))) anchor_out = do5(anchor_out) anchor_out = ap(anchor_out) # Pos pos_outs = [m_mp1(m_activ1(m_bn1(m_conv1(pos_item)))) for pos_item in pos_items] pos_outs = [m_mp2(m_activ2(m_bn2(m_conv2(pos_out)))) for pos_out in pos_outs] pos_outs = [m_mp3(m_activ3(m_bn3(m_conv3(pos_out)))) for pos_out in pos_outs] pos_outs = [do3(pos_out) for pos_out in pos_outs] pos_outs = [m_mp4(m_activ4(m_bn4(m_conv4(pos_out)))) for pos_out in pos_outs] pos_outs = [m_activ5(m_bn5(m_conv5(pos_out))) for pos_out in pos_outs] pos_outs = [m_do5(pos_out) for pos_out in pos_outs] pos_outs = [m_ap(pos_out) for pos_out in pos_outs] # Negs neg_outs = [m_mp1(m_activ1(m_bn1(m_conv1(neg_item)))) for neg_item in neg_items] neg_outs = [m_mp2(m_activ2(m_bn2(m_conv2(neg_out)))) for neg_out in neg_outs] neg_outs = [m_mp3(m_activ3(m_bn3(m_conv3(neg_out)))) for neg_out in neg_outs] neg_outs = [do3(neg_out) for neg_out in neg_outs] neg_outs = [m_mp4(m_activ4(m_bn4(m_conv4(neg_out)))) for neg_out in neg_outs] neg_outs = [m_activ5(m_bn5(m_conv5(neg_out))) for neg_out in neg_outs] neg_outs = [m_do5(neg_out) for neg_out in neg_outs] neg_outs = [m_ap(neg_out) for neg_out in neg_outs] #### cosine pos_dists = [dot([anchor_out, pos_out], axes=1, normalize=True) for pos_out in pos_outs] neg_dists = [dot([anchor_out, neg_out], axes=1, normalize=True) for neg_out in neg_outs] all_dists = concatenate(pos_dists + neg_dists) outputs = Activation('linear', name='siamese')(all_dists) model = Model(inputs=[anchor]+ pos_items + neg_items, outputs=outputs) return model
def siamese_cnn(num_frame, num_neg_artist, num_pos_track): anchor = Input(shape=(num_frame,config.n_mels)) pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_track)] neg_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_artist)] # audio model conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn1 = BatchNormalization() activ1 = LeakyReLU(0.2) # activ1 = Activation('relu') mp1 = MaxPool1D(pool_size=3) conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn2 = BatchNormalization() activ2 = LeakyReLU(0.2) # activ2 = Activation('relu') mp2 = MaxPool1D(pool_size=3) conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn3 = BatchNormalization() activ3 = LeakyReLU(0.2) # activ3 = Activation('relu') mp3 = MaxPool1D(pool_size=3) # do3 = Dropout(0.2) conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn4 = BatchNormalization() activ4 = LeakyReLU(0.2) # activ4 = Activation('relu') mp4 = MaxPool1D(pool_size=3) conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal') bn5 = BatchNormalization() activ5 = LeakyReLU(0.2) # activ5 = Activation('relu') do5 = Dropout(0.3) ap = GlobalAvgPool1D() # euc_dist = Lambda(euclidean_dist, euclidean_dist_output_shape) # negative_sampling = Lambda(neg_sample, neg_sample_output_shape) # l2_dist = Lambda(lambda x: K.l2_normalize(x[0] - x[1],axis=1)) # Anchor anchor_out = mp1(activ1(bn1(conv1(anchor)))) anchor_out = mp2(activ2(bn2(conv2(anchor_out)))) anchor_out = mp3(activ3(bn3(conv3(anchor_out)))) # anchor_out = do3(anchor_out) anchor_out = mp4(activ4(bn4(conv4(anchor_out)))) anchor_out = activ5(bn5(conv5(anchor_out))) anchor_out = do5(anchor_out) anchor_out = ap(anchor_out) # Pos pos_outs = [mp1(activ1(bn1(conv1(pos_item)))) for pos_item in pos_items] pos_outs = [mp2(activ2(bn2(conv2(pos_out)))) for pos_out in pos_outs] pos_outs = [mp3(activ3(bn3(conv3(pos_out)))) for pos_out in pos_outs] # pos_outs = [do3(pos_out) for pos_out in pos_outs] pos_outs = [mp4(activ4(bn4(conv4(pos_out)))) for pos_out in pos_outs] pos_outs = [activ5(bn5(conv5(pos_out))) for pos_out in pos_outs] pos_outs = [do5(pos_out) for pos_out in pos_outs] pos_outs = [ap(pos_out) for pos_out in pos_outs] # Negs neg_outs = [mp1(activ1(bn1(conv1(neg_item)))) for neg_item in neg_items] neg_outs = [mp2(activ2(bn2(conv2(neg_out)))) for neg_out in neg_outs] neg_outs = [mp3(activ3(bn3(conv3(neg_out)))) for neg_out in neg_outs] # neg_outs = [do3(neg_out) for neg_out in neg_outs] neg_outs = [mp4(activ4(bn4(conv4(neg_out)))) for neg_out in neg_outs] neg_outs = [activ5(bn5(conv5(neg_out))) for neg_out in neg_outs] neg_outs = [do5(neg_out) for neg_out in neg_outs] neg_outs = [ap(neg_out) for neg_out in neg_outs] #### cosine pos_dists = [dot([anchor_out, pos_out], axes=1, normalize=True) for pos_out in pos_outs] neg_dists = [dot([anchor_out, neg_out], axes=1, normalize=True) for neg_out in neg_outs] # pos_dists = [l2_dist([anchor_out, pos_out]) for pos_out in pos_outs] # neg_dists = [l2_dist([anchor_out, neg_out]) for neg_out in neg_outs] all_dists = concatenate(pos_dists + neg_dists) # all_dists = negative_sampling(all_dists) outputs = Activation('linear')(all_dists) ### euclidean ''' distance = Lambda(euclidean_dist, output_shape=euclidean_dist_output_shape) pos_dists = [distance([anchor_out, pos_out]) for pos_out in pos_outs] neg_dists = [distance([anchor_out, neg_out]) for neg_out in neg_outs] all_dists = concatenate(pos_dists + neg_dists) outputs = all_dists ''' model = Model(inputs=[anchor]+ pos_items + neg_items, outputs=outputs) return model
def create_model( self, min_filters_number, max_kernel_size, network_depth=3, learning_rate=0.01, regularization_rate=0.01): """ Generate a ResNet model (see also https://arxiv.org/pdf/1611.06455.pdf). The compiled Keras model is returned. Parameters ---------- min_filters_number : int Number of filters for first convolutional layer max_kernel_size: int, Maximum kernel size for convolutions within Inception module network_depth : int Depth of network, i.e. number of Inception modules to stack. Default is 3. learning_rate : float Set learning rate. Default is 0.01. regularization_rate : float Set regularization rate. Default is 0.01. Returns ------- model : Keras model The compiled Keras model """ dim_length = self.x_shape[1] # number of samples in a time series dim_channels = self.x_shape[2] # number of channels weightinit = 'lecun_uniform' regularization = 0 # ignore input on purpose def conv_bn_relu_3_sandwich(x, filters, kernel_size): first_x = x for _ in range(3): x = Convolution1D(filters, kernel_size, padding='same', kernel_initializer=weightinit, kernel_regularizer=l2(regularization))(x) x = BatchNormalization()(x) x = ReLU()(x) first_x = Convolution1D(filters, kernel_size=1, padding='same', kernel_initializer=weightinit, kernel_regularizer=l2(regularization))(x) x = Add()([x, first_x]) return x x = Input((dim_length, dim_channels)) inputs = x x = BatchNormalization()(inputs) # Added batchnorm (not in original paper) # Define/guess filter sizes and kernel sizes # Logic here is that kernals become smaller while the number of filters increases kernel_sizes = [max(3, int(max_kernel_size // (1.41 ** i))) for i in range(network_depth)] filter_numbers = [int(min_filters_number * (1.41 ** i)) for i in range(network_depth)] for i in range(network_depth): x = conv_bn_relu_3_sandwich(x, filter_numbers[i], kernel_sizes[i]) x = GlobalAvgPool1D()(x) output_layer = Dense(self.number_of_classes, activation='relu')(x) # Create model and compile model = Model(inputs=inputs, outputs=output_layer) model.compile(loss='mean_absolute_error', optimizer=Adam(lr=learning_rate), metrics=self.metrics) return model
def build_model(d, word2vec, hp): """Builds a model.""" word_x = Input(shape=[hp.max_len_words]) word_emb_mat = embedding_utils.make_embedding_matrix( d.id2word, word2vec, hp.word_emb.initializer) word_emb = Embedding( len(d.id2word), hp.word_emb.dim, weights=[word_emb_mat], mask_zero=True, input_length=hp.max_len_words, trainable=hp.word_emb.trainable, )(word_x) x = [word_x] y = word_emb if hp.char_emb is not None: char_x = Input(shape=[hp.max_len_words, hp.max_len_chars]) x = [word_x, char_x] char_emb = TimeDistributed( Embedding( len(d.id2char), hp.char_emb.dim, mask_zero=True, input_length=hp.max_len_chars, trainable=hp.char_emb.trainable, ))(char_x) char_enc = TimeDistributed(Bidirectional(LSTM( hp.char_enc_dim)))(char_emb) y = concatenate([word_emb, char_enc]) if hp.seq_arch == "none": pass elif hp.seq_arch.endswith("lstm"): layer = LSTM(hp.hidden_dim, return_sequences=True) if hp.seq_arch == "bilstm": layer = Bidirectional(layer) else: assert hp.seq_arch == "lstm" y = layer(y) else: assert False, hp.arch if hp.dropout_rate > 0: y = Dropout(hp.dropout_rate)(y) if hp.mode == "seq": # TODO: Add CRF layer. y = TimeDistributed(Dense(len(d.tag2id), activation="softmax"))(y) elif hp.mode == "cls": layer = None if hp.cls_arch == "avg_pool": layer = GlobalAvgPool1D() elif hp.cls_arch == "max_pool": layer = GlobalMaxPool1D() else: assert False, hp.cls_arch # https://github.com/tensorflow/tensorflow/issues/33260 assert layer.supports_masking y = layer(y) y = Dense(len(d.intent2id), activation="softmax")(y) else: assert False, hp.mode model = Model(x, y) model.compile(loss="categorical_crossentropy", optimizer=hp.optimizer, metrics=["acc"]) return model
padding='same', activation='relu', kernel_initializer='he_uniform', dilation_rate=1, name='conv_3')(maxpool_1) Conv4 = Conv1D(64, 3, strides=1, padding='same', activation='relu', kernel_initializer='he_uniform', dilation_rate=1, name='conv_4')(Conv3) maxpool_2 = MaxPool1D(pool_size=2, strides=2, padding='same')(Conv4) GAP_1 = GlobalAvgPool1D()(maxpool_2) Conv5 = Conv1D(64, 3, strides=1, padding='same', activation='relu', kernel_initializer='he_uniform', dilation_rate=1, name='conv_5')(Reshape_Input_2) Conv6 = Conv1D(64, 3, strides=1, padding='same', activation='relu', kernel_initializer='he_uniform',
embedding_layer = Embedding(len(embedding_matrix), EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) # Embedded version of the inputs encoded_left = embedding_layer(left_input) encoded_right = embedding_layer(right_input) # Since this is a siamese network, both sides share the same LSTM shared_bilstm = Bidirectional( LSTM(100, return_sequences=True, dropout=0.1, recurrent_dropout=0.1) ) left_output = shared_bilstm(encoded_left) right_output = shared_bilstm(encoded_right) maxpool = GlobalMaxPool1D() avgpool = GlobalAvgPool1D() concatenate1 = Concatenate() dropout1 = Dropout(0.1) x1 = concatenate1([ maxpool(left_output), avgpool(left_output) ]) x2 = concatenate1([ maxpool(right_output), avgpool(right_output) ]) x1 = dropout1(x1) x2 = dropout1(x2) sqr_diff = Lambda( lambda tensors: K.pow((K.square(tensors[0])-K.square(tensors[1])), 0.5), name="Squared_diff" ) abs_diff = Lambda( lambda tensors : K.abs(tensors[0]-tensors[1]), name="Absolute_diff" ) concatenate2 = Concatenate() diff = concatenate2([ sqr_diff([x1,x2]), abs_diff([x1,x2]) ]) diff = Dropout(0.1)(diff) diff = Dense(100, activation="relu")(diff) diff = Dropout(0.1)(diff)
#Load AWS Transcribed text file with open('aws_transcribe.txt') as f: aws_text = [word for line in f for word in line.split()] #Initialize Tokenizer tokenizer = Tokenizer() tokenizer.fit_on_texts(df.text) x = pad_squences(tokenizer.texts_to_sequences(df.text), 50) y = df.sentiment #Build RNN input = Input((50, ), name='input') embed = Embedding(len(tokenizer.word_index), 100)(input) embed_dropout = SpatialDropout1D(0.5)(embed) rnn = Bidirectional(GRI(50, return_sequences=True, recurrent_dropout=0.2))(embed_dropout) max_pool = GlobalMaxPool1D()(rnn) avg_pool = GlobalAvgPool1D()(rnn) concat = Concatenate()([max_pool, avg_pool]) dense = Dense(3, activation='softmax')(concat) #Train RNN model = Model(input, dense) model.compile('adam', 'sparse_categorical_crossentropy', ['sparse_categorical_accuracy']) model.fit(x, y, batch_size=512, validation_split=0.2, epochs=25) #Predict Sentiment model.predict(pad_sequences(tokenizer.texts_to_sequences(aws_text), 50))
model.add(BatchNormalization()) model.add(Conv1D(filters=8, kernel_size=9, activation='relu', kernel_regularizer = l2(0.1))) model.add(MaxPool1D(strides=4)) model.add(BatchNormalization()) model.add(Conv1D(filters=16, kernel_size=9, activation='relu')) model.add(MaxPool1D(strides=4)) model.add(BatchNormalization()) model.add(Dropout(0.25)) model.add(Conv1D(filters=64, kernel_size=4, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Conv1D(filters=32, kernel_size=1, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.7)) model.add(GlobalAvgPool1D()) model.add(Dense(3, activation='softmax')) model.load_weights("modelWeights.h5"); INPUT_LIB = './input/' SAMPLE_RATE = 44100 CLASSES = ['artifact', 'normal', 'murmur'] CODE_BOOK = {x:i for i,x in enumerate(CLASSES)} NB_CLASSES = len(CLASSES) def repeat_to_length(arr, length): """Repeats the numpy 1D array to given length, and makes datatype float""" result = np.empty((length, ), dtype = 'float32') l = len(arr)