Ejemplo n.º 1
0
    def test_music_tagger_crnn(self):
        # TODO: For the first BatchNormalization layer in the model, we don't support `axis=3`
        keras.backend.set_image_dim_ordering("th")
        kmodel = MusicTaggerCRNN(include_top=False, weights=None)
        input_data = np.random.random([2, 1, 96, 1366])

        bmodel = DefinitionLoader.from_kmodel(kmodel)
        WeightLoader.load_weights_from_kmodel(bmodel, kmodel)

        keras_output = kmodel.predict(input_data)
        bmodel.training(is_training=False)
        bigdl_output = bmodel.forward(input_data)

        self.assert_allclose(keras_output, bigdl_output, rtol=1e-6, atol=1e-6)
Ejemplo n.º 2
0
    def test_music_tagger_crnn(self):
        # TODO: For the first BatchNormalization layer in the model, we don't support `axis=3`
        keras.backend.set_image_dim_ordering("th")
        kmodel = MusicTaggerCRNN(include_top=False, weights=None)
        input_data = np.random.random([2, 1, 96, 1366])

        bmodel = DefinitionLoader.from_kmodel(kmodel)
        WeightLoader.load_weights_from_kmodel(bmodel, kmodel)

        keras_output = kmodel.predict(input_data)
        bmodel.training(is_training=False)
        bigdl_output = bmodel.forward(input_data)

        self.assert_allclose(keras_output, bigdl_output, rtol=1e-6, atol=1e-6)
Ejemplo n.º 3
0
from keras.applications.music_tagger_crnn import MusicTaggerCRNN
from keras.applications.music_tagger_crnn import preprocess_input, decode_predictions
import numpy as np
 
# 1. Tagging
model = MusicTaggerCRNN(weights='msd')
 
audio_path = 'audio_file.mp3'
melgram = preprocess_input(audio_path)
melgrams = np.expand_dims(melgram, axis=0)
 
preds = model.predict(melgrams)
print('Predicted:')
print(decode_predictions(preds))
# print: ('Predicted:', [[('rock', 0.097071797), ('pop', 0.042456303), ('alternative', 0.032439161), ('indie', 0.024491295), ('female vocalists', 0.016455274)]])
 
#. 2. Feature extraction
model = MusicTaggerCRNN(weights='msd', include_top=False)
 
audio_path = 'audio_file.mp3'
melgram = preprocess_input(audio_path)
melgrams = np.expand_dims(melgram, axis=0)
 
feats = model.predict(melgrams)
print('Features:')
print(feats[0, :10])
# print: ('Features:', [-0.19160545 0.94259131 -0.9991011 0.47644514 -0.19089699 0.99033844 0.1103896 -0.00340496 0.14823607 0.59856361])