/
ExtractData.py
123 lines (104 loc) · 4.99 KB
/
ExtractData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import librosa
import numpy as np
# extract features/data from wav file and return as np.array
def getData(filename):
print("Gretting data for {}".format(filename))
hop_length = 256;
# Load the example clip
y, sr = librosa.load(filename)
# Short-time Fourier transform (STFT)
S = np.abs(librosa.stft(y))
# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)
# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)
# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)
# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)
# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
# Average the energy
avgEnergy = np.mean(librosa.feature.rmse(y=y))
# Estimate tuning
tuning = librosa.estimate_tuning(y=y, sr=sr)
zeroCrossings = np.sum(librosa.core.zero_crossings(y=y))
avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr))
avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr))
raw = [avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc),
np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma),
np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo]
norm = [(float(i)-min(raw))/((max(raw)-min(raw))) for i in raw] # normalise numbers between -1 and 1
return raw
# Old extraction with only 8 features
# # extract features/data from wav file and return as np.array
# def getData(filename):
# # why this hop_length?
# hop_length = 256;
#
# # Load the example clip
# y, sr = librosa.load(filename)
#
# # Short-time Fourier transform (STFT)
# S = np.abs(librosa.stft(y))
#
# # Separate harmonics and percussives into two waveforms
# y_harmonic, y_percussive = librosa.effects.hpss(y)
#
# # Beat track on the percussive signal
# tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)
#
# # Compute MFCC features from the raw signal
# mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
#
# # And the first-order differences (delta features)
# mfcc_delta = librosa.feature.delta(mfcc)
#
# # Stack and synchronize between beat events
# # This time, we'll use the mean value (default) instead of median
# beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)
#
# # Compute chroma features from the harmonic signal
# chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
#
# # Aggregate chroma features between beat events
# # We'll use the median value of each feature between beat frames
# beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)
#
# # Finally, stack all beat-synchronous features together
# beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
#
# # Average the energy
# avgEnergy = np.mean(librosa.feature.rmse(y=y))
#
# # Estimate tuning
# tuning = librosa.estimate_tuning(y=y, sr=sr)
#
# # zeroCrossings = np.sum(librosa.core.zero_crossings(y=y))
# avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr))
#
# avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr))
#
# # raw = np.array([[avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc),
# # np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma),
# # np.mean(beat_features), avgEnergy, tuning]])
#
# raw = {avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc),
# np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma),
# np.mean(beat_features), avgEnergy, tuning}
#
# norm = [float(i) / sum(raw) for i in raw] # normalise numbers between -1 and 1
#
# data = {"avgSpectralContrast": norm[0], "avgMelSpectro": norm[1], "y_harmonic": norm[2], "y_percussive": norm[3],
# "mfcc": norm[4], "mfcc_delta": norm[5], "beat_mfcc_delta": norm[6], "chromagram": norm[7],
# "beat_chroma": norm[8], "beat_features": norm[9], "avgEnergy": norm[10], "tuning": norm[11]}
#
# return data